Skip to content

Commit

Permalink
perf: Add cgroup support
Browse files Browse the repository at this point in the history
This kernel patch adds the ability to filter monitoring based on
container groups (cgroups). This is for use in per-cpu mode only.

The cgroup to monitor is passed as a file descriptor in the pid
argument to the syscall. The file descriptor must be opened to
the cgroup name in the cgroup filesystem. For instance, if the
cgroup name is foo and cgroupfs is mounted in /cgroup, then the
file descriptor is opened to /cgroup/foo. Cgroup mode is
activated by passing PERF_FLAG_PID_CGROUP in the flags argument
to the syscall.

For instance to measure in cgroup foo on CPU1 assuming
cgroupfs is mounted under /cgroup:

struct perf_event_attr attr;
int cgroup_fd, fd;

cgroup_fd = open("/cgroup/foo", O_RDONLY);
fd = perf_event_open(&attr, cgroup_fd, 1, -1, PERF_FLAG_PID_CGROUP);
close(cgroup_fd);

Signed-off-by: Stephane Eranian <eranian@google.com>
[ added perf_cgroup_{exit,attach} ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590250.114ddf0a.689e.4482@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Stephane Eranian authored and Ingo Molnar committed Feb 16, 2011
1 parent d41d5a0 commit e5d1367
Show file tree
Hide file tree
Showing 6 changed files with 671 additions and 38 deletions.
1 change: 1 addition & 0 deletions include/linux/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,7 @@ bool css_is_ancestor(struct cgroup_subsys_state *cg,
/* Get id and depth of css */
unsigned short css_id(struct cgroup_subsys_state *css);
unsigned short css_depth(struct cgroup_subsys_state *css);
struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);

#else /* !CONFIG_CGROUPS */

Expand Down
4 changes: 4 additions & 0 deletions include/linux/cgroup_subsys.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,8 @@ SUBSYS(net_cls)
SUBSYS(blkio)
#endif

#ifdef CONFIG_CGROUP_PERF
SUBSYS(perf)
#endif

/* */
33 changes: 30 additions & 3 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -464,13 +464,15 @@ enum perf_callchain_context {

#define PERF_FLAG_FD_NO_GROUP (1U << 0)
#define PERF_FLAG_FD_OUTPUT (1U << 1)
#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */

#ifdef __KERNEL__
/*
* Kernel-internal data types and definitions:
*/

#ifdef CONFIG_PERF_EVENTS
# include <linux/cgroup.h>
# include <asm/perf_event.h>
# include <asm/local64.h>
#endif
Expand Down Expand Up @@ -716,6 +718,22 @@ struct swevent_hlist {
#define PERF_ATTACH_GROUP 0x02
#define PERF_ATTACH_TASK 0x04

#ifdef CONFIG_CGROUP_PERF
/*
* perf_cgroup_info keeps track of time_enabled for a cgroup.
* This is a per-cpu dynamically allocated data structure.
*/
struct perf_cgroup_info {
u64 time;
u64 timestamp;
};

struct perf_cgroup {
struct cgroup_subsys_state css;
struct perf_cgroup_info *info; /* timing info, one per cpu */
};
#endif

/**
* struct perf_event - performance event kernel representation:
*/
Expand Down Expand Up @@ -832,6 +850,11 @@ struct perf_event {
struct event_filter *filter;
#endif

#ifdef CONFIG_CGROUP_PERF
struct perf_cgroup *cgrp; /* cgroup event is attach to */
int cgrp_defer_enabled;
#endif

#endif /* CONFIG_PERF_EVENTS */
};

Expand Down Expand Up @@ -886,6 +909,7 @@ struct perf_event_context {
u64 generation;
int pin_count;
struct rcu_head rcu_head;
int nr_cgroups; /* cgroup events present */
};

/*
Expand All @@ -905,6 +929,9 @@ struct perf_cpu_context {
struct list_head rotation_list;
int jiffies_interval;
struct pmu *active_pmu;
#ifdef CONFIG_CGROUP_PERF
struct perf_cgroup *cgrp;
#endif
};

struct perf_output_handle {
Expand Down Expand Up @@ -1040,19 +1067,19 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
__perf_sw_event(event_id, nr, nmi, regs, addr);
}

extern atomic_t perf_task_events;
extern atomic_t perf_sched_events;

static inline void perf_event_task_sched_in(struct task_struct *task)
{
COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
COND_STMT(&perf_sched_events, __perf_event_task_sched_in(task));
}

static inline
void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
{
perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);

COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
COND_STMT(&perf_sched_events, __perf_event_task_sched_out(task, next));
}

extern void perf_event_mmap(struct vm_area_struct *vma);
Expand Down
10 changes: 10 additions & 0 deletions init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,16 @@ config CGROUP_MEM_RES_CTLR_SWAP_ENABLED
select this option (if, for some reason, they need to disable it
then noswapaccount does the trick).

config CGROUP_PERF
bool "Enable perf_event per-cpu per-container group (cgroup) monitoring"
depends on PERF_EVENTS && CGROUPS
help
This option extends the per-cpu mode to restrict monitoring to
threads which belong to the cgroup specificied and run on the
designated cpu.

Say N if unsure.

menuconfig CGROUP_SCHED
bool "Group CPU scheduler"
depends on EXPERIMENTAL
Expand Down
23 changes: 23 additions & 0 deletions kernel/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -4818,6 +4818,29 @@ css_get_next(struct cgroup_subsys *ss, int id,
return ret;
}

/*
* get corresponding css from file open on cgroupfs directory
*/
struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
{
struct cgroup *cgrp;
struct inode *inode;
struct cgroup_subsys_state *css;

inode = f->f_dentry->d_inode;
/* check in cgroup filesystem dir */
if (inode->i_op != &cgroup_dir_inode_operations)
return ERR_PTR(-EBADF);

if (id < 0 || id >= CGROUP_SUBSYS_COUNT)
return ERR_PTR(-EINVAL);

/* get cgroup */
cgrp = __d_cgrp(f->f_dentry);
css = cgrp->subsys[id];
return css ? css : ERR_PTR(-ENOENT);
}

#ifdef CONFIG_CGROUP_DEBUG
static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
struct cgroup *cont)
Expand Down
Loading

0 comments on commit e5d1367

Please sign in to comment.