August 15, 2015

Linux ftrace: see underlying semantics of a system call

I always wondered what's happening under the hood of a system call. strace is a simple tool to see, what syscalls are being called by an application (with lot's of performance impact on app), however to know what's really going on beyond syscall we need ftrace

ftrace is an internal tracer designed to help out developers and designers of systems to find what is going on inside the kernel. ftrace uses the debugfs file system to hold the control files as well as the files to display output

Kernel documentation on ftrace could be found here
Brenden Gregg's lwn article talks about using ftrace and his perf-tools

Below is an example trace of read(2) syscall using function_graph tracer
[root@mylinux1 tracing]# pwd
/sys/kernel/debug/tracing
[root@mylinux1 tracing]# echo SyS_read > set_graph_function
[root@mylinux1 tracing]# echo function_graph > current_tracer 
[root@mylinux1 tracing]# echo 1 > tracing_on 
Observe tracer output in trace or trace_pipe files. Below output shows kernel code path
taken for read(2) syscall on my Linux VM running 3.10.0-229 kernel
 
 0)               |  SyS_read() {
 0)   0.261 us    |    fget_light();
 0)               |    vfs_read() {
 0)               |      rw_verify_area() {
 0)               |        security_file_permission() {
 0)               |          selinux_file_permission() {
 0)   0.066 us    |            avc_policy_seqno();
 0)   0.581 us    |          }
 0)               |          __fsnotify_parent() {
 0)               |            dget_parent() {
 0)   0.580 us    |              _raw_spin_lock();
 0)   0.180 us    |              _raw_spin_unlock();
 0)   2.213 us    |            }
 0)               |            dput() {
 0)   0.265 us    |              _raw_spin_lock();
 0)   0.172 us    |              _raw_spin_unlock();
 0)   1.318 us    |            }
 0)   4.431 us    |          }
 0)   0.080 us    |          fsnotify();
 0)   6.372 us    |        }
 0)   6.803 us    |      }
 0)               |      do_sync_read() {
 0)               |        xfs_file_aio_read [xfs]() {
 0)   0.065 us    |          generic_segment_checks();
 0)               |          xfs_ilock [xfs]() {
 0)               |            down_read_nested() {
 0)   0.142 us    |              __might_sleep();
 0)   0.058 us    |              _cond_resched();
 0)   1.416 us    |            }
 0)   1.919 us    |          }
 0)               |          generic_file_aio_read() {
 0)   0.072 us    |            generic_segment_checks();
 0)   0.153 us    |            __might_sleep();
 0)   0.059 us    |            _cond_resched();
 0)   0.635 us    |            __find_get_page();
 0)   0.070 us    |            mark_page_accessed();
 0)               |            file_read_actor() {
 0)               |              do_page_fault() {
 0)               |                __do_page_fault() {
 0)   0.276 us    |                  down_read_trylock();
 0)   0.164 us    |                  __might_sleep();
 0)   0.064 us    |                  _cond_resched();
 0)   0.163 us    |                  find_vma();
 0)               |                  handle_mm_fault() {
 0)   0.499 us    |                    __mem_cgroup_count_vm_event();
 0)   0.112 us    |                    sync_mm_rss();
 0)               |                    anon_vma_prepare() {
 0)   0.203 us    |                      __might_sleep();
 0)   0.059 us    |                      _cond_resched();
 0)   1.284 us    |                    }
 0)               |                    alloc_pages_vma() {
 0)   0.095 us    |                      get_vma_policy();
 0)   0.070 us    |                      policy_nodemask();
 0)   0.081 us    |                      policy_zonelist();
 0)               |                      __alloc_pages_nodemask() {
 0)   0.136 us    |                        __might_sleep();
 0)   0.059 us    |                        _cond_resched();
 0)   0.069 us    |                        next_zones_zonelist();
 0)               |                        get_page_from_freelist() {
 0)   0.073 us    |                          next_zones_zonelist();
 0)   0.061 us    |                          __zone_watermark_ok();
 0)   0.060 us    |                          __mod_zone_page_state();
 0)               |                          zone_statistics() {
 0)   0.072 us    |                            __inc_zone_state();
 0)   0.064 us    |                            __inc_zone_state();
 0)   0.929 us    |                          }
 0)   0.060 us    |                          bad_range();
 0)               |                          mem_cgroup_bad_page_check() {
 0)               |                            lookup_page_cgroup_used() {
 0)   0.061 us    |                              lookup_page_cgroup();
 0)   0.694 us    |                            }
 0)   1.175 us    |                          }
 0)   5.841 us    |                        }
 0)   8.006 us    |                      }
 0) + 10.248 us   |                    }
 0)               |                    mem_cgroup_newpage_charge() {
 0)               |                      mem_cgroup_charge_common() {
 0)   0.549 us    |                        __mem_cgroup_try_charge();
 0)               |                        __mem_cgroup_commit_charge() {
 0)   0.095 us    |                          lookup_page_cgroup();
 0)   0.061 us    |                          mem_cgroup_charge_statistics.isra.26();
 0)   0.061 us    |                          memcg_check_events();
 0)   1.615 us    |                        }
 0)   3.113 us    |                      }
 0)   3.667 us    |                    }
 0)   0.497 us    |                    _raw_spin_lock();
 0)   0.058 us    |                    add_mm_counter_fast();
 0)               |                    page_add_new_anon_rmap() {
 0)               |                      __inc_zone_page_state() {
 0)   0.079 us    |                        __inc_zone_state();
 0)   0.586 us    |                      }
 0)               |                      lru_cache_add() {
 0)   0.062 us    |                        __lru_cache_add();
 0)   0.592 us    |                      }
 0)   2.218 us    |                    }
 0)   0.188 us    |                    _raw_spin_unlock();
 0) + 23.584 us   |                  }
 0)   0.182 us    |                  up_read();
 0) + 27.283 us   |                }
 0) + 27.703 us   |              }
 0) + 28.902 us   |            }
 0)   0.064 us    |            put_page();
 0)   0.143 us    |            __might_sleep();
 0)   0.058 us    |            _cond_resched();
 0)   0.443 us    |            __find_get_page();
 0)   0.063 us    |            put_page();
 0)               |            touch_atime() {
 0)               |              current_fs_time() {
 0)   0.071 us    |                current_kernel_time();
 0)   0.534 us    |              }
 0)   1.022 us    |            }
 0) + 36.860 us   |          }
 0)               |          xfs_iunlock [xfs]() {
 0)   0.197 us    |            up_read();
 0)   1.325 us    |          }
 0) + 41.959 us   |        }
 0) + 42.513 us   |      }
 0)               |      __fsnotify_parent() {
 0)               |        dget_parent() {
 0)   0.525 us    |          _raw_spin_lock();
 0)   0.200 us    |          _raw_spin_unlock();
 0)   2.022 us    |        }
 0)               |        dput() {
 0)   0.238 us    |          _raw_spin_lock();
 0)   0.201 us    |          _raw_spin_unlock();
 0)   1.299 us    |        }
 0)   4.228 us    |      }
 0)   0.069 us    |      fsnotify();
 0) + 55.277 us   |    }
 0) + 56.499 us   |  }

No comments:

Post a Comment