https://perf.wiki.kernel.org/index.php/Tutorial
http://os.51cto.com/art/201105/265133.htm
在LINUX 源代码中安装perf
[root@localhost perf]# pwd
/usr/src/kernels/linux-2.6.32/tools/perf
[root@localhost perf]# yum install binutils-devel
[root@localhost perf]# make
[root@localhost perf]# make install
install -d -m 755 '/root/bin'
install perf '/root/bin'
[root@localhost perf]# mount -t debugfs none /sys/kernel/debug/
[root@localhost perf]# perf list List of pre-defined events (to be used in -e): cpu-cycles OR cycles [Hardware event] instructions [Hardware event] cache-references [Hardware event] cache-misses [Hardware event] branch-instructions OR branches [Hardware event] branch-misses [Hardware event] bus-cycles [Hardware event] cpu-clock [Software event] task-clock [Software event] page-faults OR faults [Software event] minor-faults [Software event] major-faults [Software event] context-switches OR cs [Software event] cpu-migrations OR migrations [Software event] L1-dcache-loads [Hardware cache event] L1-dcache-load-misses [Hardware cache event] L1-dcache-stores [Hardware cache event] L1-dcache-store-misses [Hardware cache event] L1-dcache-prefetches [Hardware cache event] L1-dcache-prefetch-misses [Hardware cache event] L1-icache-loads [Hardware cache event] L1-icache-load-misses [Hardware cache event] L1-icache-prefetches [Hardware cache event] L1-icache-prefetch-misses [Hardware cache event] LLC-loads [Hardware cache event] LLC-load-misses [Hardware cache event] LLC-stores [Hardware cache event] LLC-store-misses [Hardware cache event] LLC-prefetches [Hardware cache event] LLC-prefetch-misses [Hardware cache event] dTLB-loads [Hardware cache event] dTLB-load-misses [Hardware cache event] dTLB-stores [Hardware cache event] dTLB-store-misses [Hardware cache event] dTLB-prefetches [Hardware cache event] dTLB-prefetch-misses [Hardware cache event] iTLB-loads [Hardware cache event] iTLB-load-misses [Hardware cache event] branch-loads [Hardware cache event] branch-load-misses [Hardware cache event] rNNN [raw hardware event descriptor]
skb:kfree_skb [Tracepoint event] skb:skb_copy_datagram_iovec [Tracepoint event] block:block_rq_abort [Tracepoint event] block:block_rq_insert [Tracepoint event] block:block_rq_issue [Tracepoint event] block:block_rq_requeue [Tracepoint event] block:block_rq_complete [Tracepoint event] block:block_bio_bounce [Tracepoint event] block:block_bio_complete [Tracepoint event] block:block_bio_backmerge [Tracepoint event] block:block_bio_frontmerge [Tracepoint event] block:block_bio_queue [Tracepoint event] block:block_getrq [Tracepoint event] block:block_sleeprq [Tracepoint event] block:block_plug [Tracepoint event] block:block_unplug_timer [Tracepoint event] block:block_unplug_io [Tracepoint event] block:block_split [Tracepoint event] block:block_remap [Tracepoint event] block:block_rq_remap [Tracepoint event] kmem:kmalloc [Tracepoint event] kmem:kmem_cache_alloc [Tracepoint event] kmem:kmalloc_node [Tracepoint event] kmem:kmem_cache_alloc_node [Tracepoint event] kmem:kfree [Tracepoint event] kmem:kmem_cache_free [Tracepoint event] kmem:mm_page_free_direct [Tracepoint event] kmem:mm_pagevec_free [Tracepoint event] kmem:mm_page_alloc [Tracepoint event] kmem:mm_page_alloc_zone_locked [Tracepoint event] kmem:mm_page_pcpu_drain [Tracepoint event] kmem:mm_page_alloc_extfrag [Tracepoint event] power:power_start [Tracepoint event] power:power_end [Tracepoint event] power:power_frequency [Tracepoint event] module:module_load [Tracepoint event] module:module_free [Tracepoint event] module:module_get [Tracepoint event] module:module_put [Tracepoint event] module:module_request [Tracepoint event] workqueue:workqueue_insertion [Tracepoint event] workqueue:workqueue_execution [Tracepoint event] workqueue:workqueue_creation [Tracepoint event] workqueue:workqueue_destruction [Tracepoint event] timer:timer_init [Tracepoint event] timer:timer_start [Tracepoint event] timer:timer_expire_entry [Tracepoint event] timer:timer_expire_exit [Tracepoint event] timer:timer_cancel [Tracepoint event] timer:hrtimer_init [Tracepoint event] timer:hrtimer_start [Tracepoint event] timer:hrtimer_expire_entry [Tracepoint event] timer:hrtimer_expire_exit [Tracepoint event] timer:hrtimer_cancel [Tracepoint event] timer:itimer_state [Tracepoint event] timer:itimer_expire [Tracepoint event] irq:irq_handler_entry [Tracepoint event] irq:irq_handler_exit [Tracepoint event] irq:softirq_entry [Tracepoint event] irq:softirq_exit [Tracepoint event] sched:sched_kthread_stop [Tracepoint event] sched:sched_kthread_stop_ret [Tracepoint event] sched:sched_wait_task [Tracepoint event] sched:sched_wakeup [Tracepoint event] sched:sched_wakeup_new [Tracepoint event] sched:sched_switch [Tracepoint event] sched:sched_migrate_task [Tracepoint event] sched:sched_process_free [Tracepoint event] sched:sched_process_exit [Tracepoint event] sched:sched_process_wait [Tracepoint event] sched:sched_process_fork [Tracepoint event] sched:sched_signal_send [Tracepoint event] sched:sched_stat_wait [Tracepoint event] sched:sched_stat_runtime [Tracepoint event] sched:sched_stat_sleep [Tracepoint event] sched:sched_stat_iowait [Tracepoint event] syscalls:sys_enter [Tracepoint event] syscalls:sys_exit [Tracepoint event]
[root@localhost perf]# perf --help usage: perf [--version] [--help] COMMAND [ARGS] The most commonly used perf commands are: annotate Read perf.data (created by perf record) and display annotated code list List all symbolic event types record Run a command and record its profile into perf.data report Read perf.data (created by perf record) and display the profile sched Tool to trace/measure scheduler properties (latencies) stat Run a command and gather performance counter statistics timechart Tool to visualize total system behavior during a workload top System profiling tool. trace Read perf.data (created by perf record) and display trace output See 'perf help COMMAND' for more information on a specific command
usage: perf stat [<options>] <command>
-e, --event <event> event selector. use 'perf list' to list available events
-i, --inherit child tasks inherit counters
-p, --pid <n> stat events on existing pid
-a, --all-cpus system-wide collection from all CPUs
-c, --scale scale/normalize counters
-v, --verbose be more verbose (show counter open errors, etc)
-r, --repeat <n> repeat command and print average + stddev (max: 100)
-n, --null null run - dont start any counters
[root@localhost perf]# perf stat dd if=/dev/zero of=/dev/null count=1000000 1000000+0 records in 1000000+0 records out 512000000 bytes (512 MB) copied, 0.522033 seconds, 981 MB/s Performance counter stats for 'dd if=/dev/zero of=/dev/null count=1000000': 514.982845 task-clock-msecs # 0.978 CPUs 2 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 200 page-faults # 0.000 M/sec 0 cycles # 0.000 M/sec 0 instructions # 0.000 IPC 0 cache-references # 0.000 M/sec 0 cache-misses # 0.000 M/sec 0.526756322 seconds time elapsed
[root@localhost perf]# perf stat -e r1a8 -a sleep 1 Performance counter stats for 'sleep 1': 0 raw 0x1a8 # 0.000 M/sec 1.003503666 seconds time elapsed
[root@localhost perf]# perf stat -r 5 sleep 1 Performance counter stats for 'sleep 1' (5 runs): 3.083876 task-clock-msecs # 0.003 CPUs ( +- 13.690% ) 1 context-switches # 0.000 M/sec ( +- 0.000% ) 0 CPU-migrations # 0.000 M/sec ( +- nan% ) 143 page-faults # 0.046 M/sec ( +- 0.171% ) 0 cycles # 0.000 M/sec ( +- nan% ) 0 instructions # 0.000 IPC ( +- nan% ) 0 cache-references # 0.000 M/sec ( +- nan% ) 0 cache-misses # 0.000 M/sec ( +- nan% ) 1.005166673 seconds time elapsed ( +- 0.050% )