forked from grate-driver/linux
-
Notifications
You must be signed in to change notification settings - Fork 0
/
perfmon.c
6819 lines (5691 loc) · 168 KB
/
perfmon.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* This file implements the perfmon-2 subsystem which is used
* to program the IA-64 Performance Monitoring Unit (PMU).
*
* The initial version of perfmon.c was written by
* Ganesh Venkitachalam, IBM Corp.
*
* Then it was modified for perfmon-1.x by Stephane Eranian and
* David Mosberger, Hewlett Packard Co.
*
* Version Perfmon-2.x is a rewrite of perfmon-1.x
* by Stephane Eranian, Hewlett Packard Co.
*
* Copyright (C) 1999-2005 Hewlett Packard Co
* Stephane Eranian <eranian@hpl.hp.com>
* David Mosberger-Tang <davidm@hpl.hp.com>
*
* More information about perfmon available at:
* http://www.hpl.hp.com/research/linux/perfmon
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <linux/list.h>
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/vfs.h>
#include <linux/smp.h>
#include <linux/pagemap.h>
#include <linux/mount.h>
#include <linux/bitops.h>
#include <linux/capability.h>
#include <linux/rcupdate.h>
#include <linux/completion.h>
#include <linux/tracehook.h>
#include <linux/slab.h>
#include <asm/errno.h>
#include <asm/intrinsics.h>
#include <asm/page.h>
#include <asm/perfmon.h>
#include <asm/processor.h>
#include <asm/signal.h>
#include <asm/uaccess.h>
#include <asm/delay.h>
#ifdef CONFIG_PERFMON
/*
* perfmon context state
*/
#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */
#define PFM_CTX_LOADED 2 /* context is loaded onto a task */
#define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */
#define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */
#define PFM_INVALID_ACTIVATION (~0UL)
#define PFM_NUM_PMC_REGS 64 /* PMC save area for ctxsw */
#define PFM_NUM_PMD_REGS 64 /* PMD save area for ctxsw */
/*
* depth of message queue
*/
#define PFM_MAX_MSGS 32
#define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail)
/*
* type of a PMU register (bitmask).
* bitmask structure:
* bit0 : register implemented
* bit1 : end marker
* bit2-3 : reserved
* bit4 : pmc has pmc.pm
* bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter
* bit6-7 : register type
* bit8-31: reserved
*/
#define PFM_REG_NOTIMPL 0x0 /* not implemented at all */
#define PFM_REG_IMPL 0x1 /* register implemented */
#define PFM_REG_END 0x2 /* end marker */
#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */
#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */
#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */
#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */
#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END)
#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END)
#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY)
/* i assumed unsigned */
#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL))
#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL))
/* XXX: these assume that register i is implemented */
#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR)
#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL)
#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value
#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask
#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0]
#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0]
#define PFM_NUM_IBRS IA64_NUM_DBG_REGS
#define PFM_NUM_DBRS IA64_NUM_DBG_REGS
#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0)
#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling)
#define PFM_CTX_TASK(h) (h)->ctx_task
#define PMU_PMC_OI 5 /* position of pmc.oi bit */
/* XXX: does not support more than 64 PMDs */
#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)
#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask)
#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
#define PFM_CODE_RR 0 /* requesting code range restriction */
#define PFM_DATA_RR 1 /* requestion data range restriction */
#define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v)
#define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v)
#define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info)
#define RDEP(x) (1UL<<(x))
/*
* context protection macros
* in SMP:
* - we need to protect against CPU concurrency (spin_lock)
* - we need to protect against PMU overflow interrupts (local_irq_disable)
* in UP:
* - we need to protect against PMU overflow interrupts (local_irq_disable)
*
* spin_lock_irqsave()/spin_unlock_irqrestore():
* in SMP: local_irq_disable + spin_lock
* in UP : local_irq_disable
*
* spin_lock()/spin_lock():
* in UP : removed automatically
* in SMP: protect against context accesses from other CPU. interrupts
* are not masked. This is useful for the PMU interrupt handler
* because we know we will not get PMU concurrency in that code.
*/
#define PROTECT_CTX(c, f) \
do { \
DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, task_pid_nr(current))); \
spin_lock_irqsave(&(c)->ctx_lock, f); \
DPRINT(("spinlocked ctx %p by [%d]\n", c, task_pid_nr(current))); \
} while(0)
#define UNPROTECT_CTX(c, f) \
do { \
DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, task_pid_nr(current))); \
spin_unlock_irqrestore(&(c)->ctx_lock, f); \
} while(0)
#define PROTECT_CTX_NOPRINT(c, f) \
do { \
spin_lock_irqsave(&(c)->ctx_lock, f); \
} while(0)
#define UNPROTECT_CTX_NOPRINT(c, f) \
do { \
spin_unlock_irqrestore(&(c)->ctx_lock, f); \
} while(0)
#define PROTECT_CTX_NOIRQ(c) \
do { \
spin_lock(&(c)->ctx_lock); \
} while(0)
#define UNPROTECT_CTX_NOIRQ(c) \
do { \
spin_unlock(&(c)->ctx_lock); \
} while(0)
#ifdef CONFIG_SMP
#define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)
#define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++
#define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION()
#else /* !CONFIG_SMP */
#define SET_ACTIVATION(t) do {} while(0)
#define GET_ACTIVATION(t) do {} while(0)
#define INC_ACTIVATION(t) do {} while(0)
#endif /* CONFIG_SMP */
#define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0)
#define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner)
#define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx)
#define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g)
#define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g)
#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
/*
* cmp0 must be the value of pmc0
*/
#define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL)
#define PFMFS_MAGIC 0xa0b4d889
/*
* debugging
*/
#define PFM_DEBUGGING 1
#ifdef PFM_DEBUGGING
#define DPRINT(a) \
do { \
if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
} while (0)
#define DPRINT_ovfl(a) \
do { \
if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
} while (0)
#endif
/*
* 64-bit software counter structure
*
* the next_reset_type is applied to the next call to pfm_reset_regs()
*/
typedef struct {
unsigned long val; /* virtual 64bit counter value */
unsigned long lval; /* last reset value */
unsigned long long_reset; /* reset value on sampling overflow */
unsigned long short_reset; /* reset value on overflow */
unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */
unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */
unsigned long seed; /* seed for random-number generator */
unsigned long mask; /* mask for random-number generator */
unsigned int flags; /* notify/do not notify */
unsigned long eventid; /* overflow event identifier */
} pfm_counter_t;
/*
* context flags
*/
typedef struct {
unsigned int block:1; /* when 1, task will blocked on user notifications */
unsigned int system:1; /* do system wide monitoring */
unsigned int using_dbreg:1; /* using range restrictions (debug registers) */
unsigned int is_sampling:1; /* true if using a custom format */
unsigned int excl_idle:1; /* exclude idle task in system wide session */
unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */
unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */
unsigned int no_msg:1; /* no message sent on overflow */
unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */
unsigned int reserved:22;
} pfm_context_flags_t;
#define PFM_TRAP_REASON_NONE 0x0 /* default value */
#define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */
#define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */
/*
* perfmon context: encapsulates all the state of a monitoring session
*/
typedef struct pfm_context {
spinlock_t ctx_lock; /* context protection */
pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */
unsigned int ctx_state; /* state: active/inactive (no bitfield) */
struct task_struct *ctx_task; /* task to which context is attached */
unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */
struct completion ctx_restart_done; /* use for blocking notification mode */
unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */
unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */
unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */
unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */
unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */
unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */
unsigned long ctx_pmcs[PFM_NUM_PMC_REGS]; /* saved copies of PMC values */
unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */
unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */
unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */
unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */
pfm_counter_t ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */
unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */
unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */
unsigned long ctx_saved_psr_up; /* only contains psr.up value */
unsigned long ctx_last_activation; /* context last activation number for last_cpu */
unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */
unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */
int ctx_fd; /* file descriptor used my this context */
pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */
pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */
void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */
unsigned long ctx_smpl_size; /* size of sampling buffer */
void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */
wait_queue_head_t ctx_msgq_wait;
pfm_msg_t ctx_msgq[PFM_MAX_MSGS];
int ctx_msgq_head;
int ctx_msgq_tail;
struct fasync_struct *ctx_async_queue;
wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */
} pfm_context_t;
/*
* magic number used to verify that structure is really
* a perfmon context
*/
#define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops)
#define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context)
#ifdef CONFIG_SMP
#define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v)
#define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu
#else
#define SET_LAST_CPU(ctx, v) do {} while(0)
#define GET_LAST_CPU(ctx) do {} while(0)
#endif
#define ctx_fl_block ctx_flags.block
#define ctx_fl_system ctx_flags.system
#define ctx_fl_using_dbreg ctx_flags.using_dbreg
#define ctx_fl_is_sampling ctx_flags.is_sampling
#define ctx_fl_excl_idle ctx_flags.excl_idle
#define ctx_fl_going_zombie ctx_flags.going_zombie
#define ctx_fl_trap_reason ctx_flags.trap_reason
#define ctx_fl_no_msg ctx_flags.no_msg
#define ctx_fl_can_restart ctx_flags.can_restart
#define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0);
#define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking
/*
* global information about all sessions
* mostly used to synchronize between system wide and per-process
*/
typedef struct {
spinlock_t pfs_lock; /* lock the structure */
unsigned int pfs_task_sessions; /* number of per task sessions */
unsigned int pfs_sys_sessions; /* number of per system wide sessions */
unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */
unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */
struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
} pfm_session_t;
/*
* information about a PMC or PMD.
* dep_pmd[]: a bitmask of dependent PMD registers
* dep_pmc[]: a bitmask of dependent PMC registers
*/
typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
typedef struct {
unsigned int type;
int pm_pos;
unsigned long default_value; /* power-on default value */
unsigned long reserved_mask; /* bitmask of reserved bits */
pfm_reg_check_t read_check;
pfm_reg_check_t write_check;
unsigned long dep_pmd[4];
unsigned long dep_pmc[4];
} pfm_reg_desc_t;
/* assume cnum is a valid monitor */
#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1)
/*
* This structure is initialized at boot time and contains
* a description of the PMU main characteristics.
*
* If the probe function is defined, detection is based
* on its return value:
* - 0 means recognized PMU
* - anything else means not supported
* When the probe function is not defined, then the pmu_family field
* is used and it must match the host CPU family such that:
* - cpu->family & config->pmu_family != 0
*/
typedef struct {
unsigned long ovfl_val; /* overflow value for counters */
pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */
pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */
unsigned int num_pmcs; /* number of PMCS: computed at init time */
unsigned int num_pmds; /* number of PMDS: computed at init time */
unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */
unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */
char *pmu_name; /* PMU family name */
unsigned int pmu_family; /* cpuid family pattern used to identify pmu */
unsigned int flags; /* pmu specific flags */
unsigned int num_ibrs; /* number of IBRS: computed at init time */
unsigned int num_dbrs; /* number of DBRS: computed at init time */
unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */
int (*probe)(void); /* customized probe routine */
unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */
} pmu_config_t;
/*
* PMU specific flags
*/
#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */
/*
* debug register related type definitions
*/
typedef struct {
unsigned long ibr_mask:56;
unsigned long ibr_plm:4;
unsigned long ibr_ig:3;
unsigned long ibr_x:1;
} ibr_mask_reg_t;
typedef struct {
unsigned long dbr_mask:56;
unsigned long dbr_plm:4;
unsigned long dbr_ig:2;
unsigned long dbr_w:1;
unsigned long dbr_r:1;
} dbr_mask_reg_t;
typedef union {
unsigned long val;
ibr_mask_reg_t ibr;
dbr_mask_reg_t dbr;
} dbreg_t;
/*
* perfmon command descriptions
*/
typedef struct {
int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
char *cmd_name;
int cmd_flags;
unsigned int cmd_narg;
size_t cmd_argsize;
int (*cmd_getsize)(void *arg, size_t *sz);
} pfm_cmd_desc_t;
#define PFM_CMD_FD 0x01 /* command requires a file descriptor */
#define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */
#define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */
#define PFM_CMD_STOP 0x08 /* command does not work on zombie context */
#define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name
#define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ)
#define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW)
#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD)
#define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP)
#define PFM_CMD_ARG_MANY -1 /* cannot be zero */
typedef struct {
unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */
unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */
unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */
unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */
unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */
unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */
unsigned long pfm_smpl_handler_calls;
unsigned long pfm_smpl_handler_cycles;
char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
} pfm_stats_t;
/*
* perfmon internal variables
*/
static pfm_stats_t pfm_stats[NR_CPUS];
static pfm_session_t pfm_sessions; /* global sessions information */
static DEFINE_SPINLOCK(pfm_alt_install_check);
static pfm_intr_handler_desc_t *pfm_alt_intr_handler;
static struct proc_dir_entry *perfmon_dir;
static pfm_uuid_t pfm_null_uuid = {0,};
static spinlock_t pfm_buffer_fmt_lock;
static LIST_HEAD(pfm_buffer_fmt_list);
static pmu_config_t *pmu_conf;
/* sysctl() controls */
pfm_sysctl_t pfm_sysctl;
EXPORT_SYMBOL(pfm_sysctl);
static ctl_table pfm_ctl_table[]={
{
.procname = "debug",
.data = &pfm_sysctl.debug,
.maxlen = sizeof(int),
.mode = 0666,
.proc_handler = proc_dointvec,
},
{
.procname = "debug_ovfl",
.data = &pfm_sysctl.debug_ovfl,
.maxlen = sizeof(int),
.mode = 0666,
.proc_handler = proc_dointvec,
},
{
.procname = "fastctxsw",
.data = &pfm_sysctl.fastctxsw,
.maxlen = sizeof(int),
.mode = 0600,
.proc_handler = proc_dointvec,
},
{
.procname = "expert_mode",
.data = &pfm_sysctl.expert_mode,
.maxlen = sizeof(int),
.mode = 0600,
.proc_handler = proc_dointvec,
},
{}
};
static ctl_table pfm_sysctl_dir[] = {
{
.procname = "perfmon",
.mode = 0555,
.child = pfm_ctl_table,
},
{}
};
static ctl_table pfm_sysctl_root[] = {
{
.procname = "kernel",
.mode = 0555,
.child = pfm_sysctl_dir,
},
{}
};
static struct ctl_table_header *pfm_sysctl_header;
static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
#define pfm_get_cpu_var(v) __ia64_per_cpu_var(v)
#define pfm_get_cpu_data(a,b) per_cpu(a, b)
static inline void
pfm_put_task(struct task_struct *task)
{
if (task != current) put_task_struct(task);
}
static inline void
pfm_reserve_page(unsigned long a)
{
SetPageReserved(vmalloc_to_page((void *)a));
}
static inline void
pfm_unreserve_page(unsigned long a)
{
ClearPageReserved(vmalloc_to_page((void*)a));
}
static inline unsigned long
pfm_protect_ctx_ctxsw(pfm_context_t *x)
{
spin_lock(&(x)->ctx_lock);
return 0UL;
}
static inline void
pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
{
spin_unlock(&(x)->ctx_lock);
}
/* forward declaration */
static const struct dentry_operations pfmfs_dentry_operations;
static struct dentry *
pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
{
return mount_pseudo(fs_type, "pfm:", NULL, &pfmfs_dentry_operations,
PFMFS_MAGIC);
}
static struct file_system_type pfm_fs_type = {
.name = "pfmfs",
.mount = pfmfs_mount,
.kill_sb = kill_anon_super,
};
MODULE_ALIAS_FS("pfmfs");
DEFINE_PER_CPU(unsigned long, pfm_syst_info);
DEFINE_PER_CPU(struct task_struct *, pmu_owner);
DEFINE_PER_CPU(pfm_context_t *, pmu_ctx);
DEFINE_PER_CPU(unsigned long, pmu_activation_number);
EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info);
/* forward declaration */
static const struct file_operations pfm_file_ops;
/*
* forward declarations
*/
#ifndef CONFIG_SMP
static void pfm_lazy_save_regs (struct task_struct *ta);
#endif
void dump_pmu_state(const char *);
static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
#include "perfmon_itanium.h"
#include "perfmon_mckinley.h"
#include "perfmon_montecito.h"
#include "perfmon_generic.h"
static pmu_config_t *pmu_confs[]={
&pmu_conf_mont,
&pmu_conf_mck,
&pmu_conf_ita,
&pmu_conf_gen, /* must be last */
NULL
};
static int pfm_end_notify_user(pfm_context_t *ctx);
static inline void
pfm_clear_psr_pp(void)
{
ia64_rsm(IA64_PSR_PP);
ia64_srlz_i();
}
static inline void
pfm_set_psr_pp(void)
{
ia64_ssm(IA64_PSR_PP);
ia64_srlz_i();
}
static inline void
pfm_clear_psr_up(void)
{
ia64_rsm(IA64_PSR_UP);
ia64_srlz_i();
}
static inline void
pfm_set_psr_up(void)
{
ia64_ssm(IA64_PSR_UP);
ia64_srlz_i();
}
static inline unsigned long
pfm_get_psr(void)
{
unsigned long tmp;
tmp = ia64_getreg(_IA64_REG_PSR);
ia64_srlz_i();
return tmp;
}
static inline void
pfm_set_psr_l(unsigned long val)
{
ia64_setreg(_IA64_REG_PSR_L, val);
ia64_srlz_i();
}
static inline void
pfm_freeze_pmu(void)
{
ia64_set_pmc(0,1UL);
ia64_srlz_d();
}
static inline void
pfm_unfreeze_pmu(void)
{
ia64_set_pmc(0,0UL);
ia64_srlz_d();
}
static inline void
pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
{
int i;
for (i=0; i < nibrs; i++) {
ia64_set_ibr(i, ibrs[i]);
ia64_dv_serialize_instruction();
}
ia64_srlz_i();
}
static inline void
pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
{
int i;
for (i=0; i < ndbrs; i++) {
ia64_set_dbr(i, dbrs[i]);
ia64_dv_serialize_data();
}
ia64_srlz_d();
}
/*
* PMD[i] must be a counter. no check is made
*/
static inline unsigned long
pfm_read_soft_counter(pfm_context_t *ctx, int i)
{
return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val);
}
/*
* PMD[i] must be a counter. no check is made
*/
static inline void
pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
{
unsigned long ovfl_val = pmu_conf->ovfl_val;
ctx->ctx_pmds[i].val = val & ~ovfl_val;
/*
* writing to unimplemented part is ignore, so we do not need to
* mask off top part
*/
ia64_set_pmd(i, val & ovfl_val);
}
static pfm_msg_t *
pfm_get_new_msg(pfm_context_t *ctx)
{
int idx, next;
next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS;
DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
if (next == ctx->ctx_msgq_head) return NULL;
idx = ctx->ctx_msgq_tail;
ctx->ctx_msgq_tail = next;
DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx));
return ctx->ctx_msgq+idx;
}
static pfm_msg_t *
pfm_get_next_msg(pfm_context_t *ctx)
{
pfm_msg_t *msg;
DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
if (PFM_CTXQ_EMPTY(ctx)) return NULL;
/*
* get oldest message
*/
msg = ctx->ctx_msgq+ctx->ctx_msgq_head;
/*
* and move forward
*/
ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS;
DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type));
return msg;
}
static void
pfm_reset_msgq(pfm_context_t *ctx)
{
ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
DPRINT(("ctx=%p msgq reset\n", ctx));
}
static void *
pfm_rvmalloc(unsigned long size)
{
void *mem;
unsigned long addr;
size = PAGE_ALIGN(size);
mem = vzalloc(size);
if (mem) {
//printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
addr = (unsigned long)mem;
while (size > 0) {
pfm_reserve_page(addr);
addr+=PAGE_SIZE;
size-=PAGE_SIZE;
}
}
return mem;
}
static void
pfm_rvfree(void *mem, unsigned long size)
{
unsigned long addr;
if (mem) {
DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size));
addr = (unsigned long) mem;
while ((long) size > 0) {
pfm_unreserve_page(addr);
addr+=PAGE_SIZE;
size-=PAGE_SIZE;
}
vfree(mem);
}
return;
}
static pfm_context_t *
pfm_context_alloc(int ctx_flags)
{
pfm_context_t *ctx;
/*
* allocate context descriptor
* must be able to free with interrupts disabled
*/
ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL);
if (ctx) {
DPRINT(("alloc ctx @%p\n", ctx));
/*
* init context protection lock
*/
spin_lock_init(&ctx->ctx_lock);
/*
* context is unloaded
*/
ctx->ctx_state = PFM_CTX_UNLOADED;
/*
* initialization of context's flags
*/
ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
/*
* will move to set properties
* ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
*/
/*
* init restart semaphore to locked
*/
init_completion(&ctx->ctx_restart_done);
/*
* activation is used in SMP only
*/
ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
SET_LAST_CPU(ctx, -1);
/*
* initialize notification message queue
*/
ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
init_waitqueue_head(&ctx->ctx_msgq_wait);
init_waitqueue_head(&ctx->ctx_zombieq);
}
return ctx;
}
static void
pfm_context_free(pfm_context_t *ctx)
{
if (ctx) {
DPRINT(("free ctx @%p\n", ctx));
kfree(ctx);
}
}
static void
pfm_mask_monitoring(struct task_struct *task)
{
pfm_context_t *ctx = PFM_GET_CTX(task);
unsigned long mask, val, ovfl_mask;
int i;
DPRINT_ovfl(("masking monitoring for [%d]\n", task_pid_nr(task)));
ovfl_mask = pmu_conf->ovfl_val;
/*
* monitoring can only be masked as a result of a valid
* counter overflow. In UP, it means that the PMU still
* has an owner. Note that the owner can be different
* from the current task. However the PMU state belongs
* to the owner.
* In SMP, a valid overflow only happens when task is
* current. Therefore if we come here, we know that
* the PMU state belongs to the current task, therefore
* we can access the live registers.
*
* So in both cases, the live register contains the owner's
* state. We can ONLY touch the PMU registers and NOT the PSR.
*
* As a consequence to this call, the ctx->th_pmds[] array
* contains stale information which must be ignored
* when context is reloaded AND monitoring is active (see
* pfm_restart).
*/
mask = ctx->ctx_used_pmds[0];
for (i = 0; mask; i++, mask>>=1) {
/* skip non used pmds */
if ((mask & 0x1) == 0) continue;
val = ia64_get_pmd(i);
if (PMD_IS_COUNTING(i)) {
/*
* we rebuild the full 64 bit value of the counter
*/
ctx->ctx_pmds[i].val += (val & ovfl_mask);
} else {
ctx->ctx_pmds[i].val = val;
}
DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
i,
ctx->ctx_pmds[i].val,
val & ovfl_mask));
}
/*
* mask monitoring by setting the privilege level to 0
* we cannot use psr.pp/psr.up for this, it is controlled by
* the user
*
* if task is current, modify actual registers, otherwise modify
* thread save state, i.e., what will be restored in pfm_load_regs()
*/
mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
if ((mask & 0x1) == 0UL) continue;
ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL);
ctx->th_pmcs[i] &= ~0xfUL;
DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i]));
}
/*
* make all of this visible
*/
ia64_srlz_d();
}
/*
* must always be done with task == current
*
* context must be in MASKED state when calling
*/
static void
pfm_restore_monitoring(struct task_struct *task)
{
pfm_context_t *ctx = PFM_GET_CTX(task);
unsigned long mask, ovfl_mask;
unsigned long psr, val;
int i, is_system;