1/*
2 * Copyright (c) 2012 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Andreas Sandberg
38 */
39
40#ifndef __CPU_KVM_PERFEVENT_HH__
41#define __CPU_KVM_PERFEVENT_HH__
42
43#include <linux/perf_event.h>
44#include <sys/types.h>
45
46#include <inttypes.h>
47
48#include "config/have_perf_attr_exclude_host.hh"
49
50/**
51 * PerfEvent counter configuration.
52 */
53class PerfKvmCounterConfig
54{
55  public:
56    /**
57     * Initialize PerfEvent counter configuration structure
58     *
59     * PerfEvent has the concept of counter types, which is a way to
60     * abstract hardware performance counters or access software
61     * events. The type field in the configuration specifies what type
62     * of counter this is. For hardware performance counters, it's
63     * typically PERF_TYPE_HARDWARE, PERF_TYPE_HW_CACHE, or
64     * PERF_TYPE_RAW.
65     *
66     * The 'config' field has different meanings depending on the type
67     * of counter. Possible values are listed in perf_event.h in the
68     * kernel headers. When using raw counters, the value is the raw
69     * value written to the performance counter configuration register
70     * (some bits dealing with sampling and similar features are
71     * usually masked).
72     *
73     * @param type Counter type.
74     * @param config Counter configuration
75     */
76    PerfKvmCounterConfig(uint32_t type, uint64_t config);
77    ~PerfKvmCounterConfig();
78
79    /**
80     * Set the initial sample period (overflow count) of an event. If
81     * this is set to 0, the event acts as a normal counting event and
82     * does not trigger overflows.
83     *
84     * @param period Number of counter events before the counter
85     * overflows
86     */
87    PerfKvmCounterConfig &samplePeriod(uint64_t period) {
88        attr.freq = 0;
89        attr.sample_period = period;
90        return *this;
91    }
92
93    /**
94     * Set the number of samples that need to be triggered before
95     * reporting data as being available on the perf event
96     * FD. Defaults to 0, which disables overflow reporting.
97     *
98     * @param events Number of overflows before signaling a wake up
99     */
100    PerfKvmCounterConfig &wakeupEvents(uint32_t events) {
101        attr.watermark = 0;
102        attr.wakeup_events = events;
103        return *this;
104    }
105
106    /**
107     * Don't start the performance counter automatically when
108     * attaching it.
109     *
110     * @param val true to disable, false to enable the counter
111     */
112    PerfKvmCounterConfig &disabled(bool val) {
113        attr.disabled = val;
114        return *this;
115    }
116
117    /**
118     * Force the group to be on the active all the time (i.e.,
119     * disallow multiplexing).
120     *
121     * Only applies to group leaders.
122     *
123     * @param val true to pin the counter
124     */
125    PerfKvmCounterConfig &pinned(bool val) {
126        attr.pinned = val;
127        return *this;
128    }
129
130    /**
131     * Exclude the events from the host (i.e., only include events
132     * from the guest system).
133     *
134     * Intel CPUs seem to support this attribute from Linux 3.2 and
135     * onwards. Non-x86 architectures currently ignore this attribute
136     * (Linux 3.12-rc5).
137     *
138     * @warn This attribute is ignored if it isn't present in the
139     * kernel headers or if the kernel doesn't support it.
140     *
141     * @param val true to exclude host events
142     */
143    PerfKvmCounterConfig &exclude_host(bool val) {
144#if HAVE_PERF_ATTR_EXCLUDE_HOST == 1
145        attr.exclude_host = val;
146#endif
147        return *this;
148    }
149
150    /**
151     * Exclude the hyper visor (i.e., only include events from the
152     * guest system).
153     *
154     * @warn This is attribute only seems to be ignored on Intel.
155     *
156     * @param val true to exclude host events
157     */
158    PerfKvmCounterConfig &exclude_hv(bool val) {
159        attr.exclude_hv = val;
160        return *this;
161    }
162
163    /** Underlying perf_event_attr structure describing the counter */
164    struct perf_event_attr attr;
165};
166
167/**
168 * An instance of a performance counter.
169 */
170class PerfKvmCounter
171{
172public:
173    /**
174     * Create and attach a new counter group.
175     *
176     * @param config Counter configuration
177     * @param tid Thread to sample (0 indicates current thread)
178     */
179    PerfKvmCounter(PerfKvmCounterConfig &config, pid_t tid);
180    /**
181     * Create and attach a new counter and make it a member of an
182     * exist counter group.
183     *
184     * @param config Counter configuration
185     * @param tid Thread to sample (0 indicates current thread)
186     * @param parent Group leader
187     */
188    PerfKvmCounter(PerfKvmCounterConfig &config,
189                pid_t tid, const PerfKvmCounter &parent);
190    /**
191     * Create a new counter, but don't attach it.
192     */
193    PerfKvmCounter();
194    ~PerfKvmCounter();
195
196
197    /**
198     * Attach a counter.
199     *
200     * @note This operation is only supported if the counter isn't
201     * already attached.
202     *
203     * @param config Counter configuration
204     * @param tid Thread to sample (0 indicates current thread)
205     */
206    void attach(PerfKvmCounterConfig &config, pid_t tid) {
207        attach(config, tid, -1);
208    }
209
210    /**
211     * Attach a counter and make it a member of an existing counter
212     * group.
213     *
214     * @note This operation is only supported if the counter isn't
215     * already attached.
216     *
217     * @param config Counter configuration
218     * @param tid Thread to sample (0 indicates current thread)
219     * @param parent Group leader
220     */
221    void attach(PerfKvmCounterConfig &config,
222                pid_t tid, const PerfKvmCounter &parent) {
223        attach(config, tid, parent.fd);
224    }
225
226    /** Detach a counter from PerfEvent. */
227    void detach();
228
229    /** Check if a counter is attached. */
230    bool attached() const { return fd != -1; }
231
232    /**
233     * Start counting.
234     *
235     * @note If this counter is a group leader, it will start the
236     * entire group.
237     */
238    void start();
239
240    /**
241     * Stop counting.
242     *
243     * @note If this counter is a group leader, it will stop the
244     * entire group.
245     */
246    void stop();
247
248    /**
249     * Update the period of an overflow counter.
250     *
251     * @warning This ioctl has some pretty bizarre semantics. It seems
252     * like the new period isn't effective until after the next
253     * counter overflow. If you use this method to change the sample
254     * period, you will see one sample with the old period and then
255     * start sampling with the new period. This problem was fixed for
256     * ARM in version 3.7 of the kernel.
257     *
258     * @warning This method doesn't work at all on some 2.6.3x kernels
259     * since it has inverted check for the return value when copying
260     * parameters from userspace.
261     *
262     * @param period Overflow period in events
263     */
264    void period(uint64_t period);
265
266    /**
267     * Enable a counter for a fixed number of events.
268     *
269     * When this method is called, perf event enables the counter if
270     * it was disabled. It then leaves the counter enabled until it
271     * has overflowed a refresh times.
272     *
273     * @note This does not update the period of the counter.
274     *
275     * @param refresh Number of overflows before disabling the
276     * counter.
277     */
278    void refresh(int refresh);
279
280    /**
281     * Read the current value of a counter.
282     */
283    uint64_t read() const;
284
285    /**
286     * Enable signal delivery to a thread on counter overflow.
287     *
288     * @param tid Thread to deliver signal to
289     * @param signal Signal to send upon overflow
290     */
291    void enableSignals(pid_t tid, int signal);
292
293    /**
294     * Enable signal delivery on counter overflow. Identical to
295     * enableSignals(pid_t) when called with the current TID as its
296     * parameter.
297     *
298     * @param signal Signal to send upon overflow
299     */
300    void enableSignals(int signal) { enableSignals(gettid(), signal); }
301
302private:
303    // Disallow copying
304    PerfKvmCounter(const PerfKvmCounter &that);
305    // Disallow assignment
306    PerfKvmCounter &operator=(const PerfKvmCounter &that);
307
308    void attach(PerfKvmCounterConfig &config, pid_t tid, int group_fd);
309
310    /**
311     * Get the TID of the current thread.
312     *
313     * @return Current thread's TID
314     */
315    pid_t gettid();
316
317    /**
318     * MMAP the PerfEvent file descriptor.
319     *
320     * @note We currently don't use the ring buffer, but PerfEvent
321     * requires this to be mapped for overflow handling to work.
322     *
323     * @note Overflow handling requires at least one buf_page to be
324     * mapped.
325     *
326     * @param pages number of pages in circular sample buffer. Must be
327     * an even power of 2.
328     */
329    void mmapPerf(int pages);
330
331    /** @{ */
332    /**
333     * PerfEvent fnctl interface.
334     *
335     * @param cmd fcntl command
336     * @param p1 Request parameter
337     *
338     * @return -1 on error (error number in errno), ioctl dependent
339     * value otherwise.
340     */
341    int fcntl(int cmd, long p1);
342    int fcntl(int cmd, void *p1) { return fcntl(cmd, (long)p1); }
343    /** @} */
344
345    /** @{ */
346    /**
347     * PerfEvent ioctl interface.
348     *
349     * @param request PerfEvent request
350     * @param p1 Optional request parameter
351     *
352     * @return -1 on error (error number in errno), ioctl dependent
353     * value otherwise.
354     */
355    int ioctl(int request, long p1);
356    int ioctl(int request, void *p1) { return ioctl(request, (long)p1); }
357    int ioctl(int request) { return ioctl(request, 0L); }
358    /** @} */
359
360    /**
361     * Perform a read from the counter file descriptor.
362     *
363     * @param buf Destination buffer
364     * @param size Amount of data to read
365     */
366    void read(void *buf, size_t size) const;
367
368    /**
369     * PerfEvent file descriptor associated with counter. -1 if not
370     * attached to PerfEvent.
371     */
372    int fd;
373
374    /** Memory mapped PerfEvent sample ring buffer */
375    struct perf_event_mmap_page *ringBuffer;
376    /** Total number of pages in ring buffer */
377    int ringNumPages;
378
379    /** Cached host page size */
380    long pageSize;
381};
382
383#endif
384