File perf-arm-spe-support-synthetic-events.patch of Package perf
1477
1
From: Tan Xiaojun <tanxiaojun@huawei.com>
2
Date: Sat, 30 May 2020 20:24:42 +0800
3
Subject: perf arm-spe: Support synthetic events
4
Git-commit: a54ca194981be3707213437a67792b88e08264fe
5
Patch-mainline: v5.8-rc1
6
References: SLE-14769
7
X-Info: adjust for context, no 72932371e78012cea96edb9e833d81f1c32dd892
8
X-Info: adjust for context, no core.attr (1fc632cef4ea137bc45fd0fc4cb902e374064163)
9
X-Info: adjust for context, no core.id (deaf321913a7b1d440c5cd5c7766d47381c9b21b
10
11
After the commit ffd3d18c20b8 ("perf tools: Add ARM Statistical
12
Profiling Extensions (SPE) support") has been merged, it supports to
13
output raw data with option "--dump-raw-trace". However, it misses for
14
support synthetic events so cannot output any statistical info.
15
16
This patch is to improve the "perf report" support for ARM SPE for four
17
types synthetic events:
18
19
First level cache synthetic events, including L1 data cache accessing
20
and missing events;
21
Last level cache synthetic events, including last level cache
22
accessing and missing events;
23
TLB synthetic events, including TLB accessing and missing events;
24
Remote access events, which is used to account load/store operations
25
caused to another socket.
26
27
Example usage:
28
29
$ perf record -c 1024 -e arm_spe_0/branch_filter=1,ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,jitter=1,store_filter=1,min_latency=0/ dd if=/dev/zero of=/dev/null count=10000
30
$ perf report --stdio
31
32
# Samples: 59 of event 'l1d-miss'
33
# Event count (approx.): 59
34
#
35
# Children Self Command Shared Object Symbol
36
# ........ ........ ....... ................. ..................................
37
#
38
23.73% 23.73% dd [kernel.kallsyms] [k] perf_iterate_ctx.constprop.135
39
20.34% 20.34% dd [kernel.kallsyms] [k] filemap_map_pages
40
5.08% 5.08% dd [kernel.kallsyms] [k] perf_event_mmap
41
5.08% 5.08% dd [kernel.kallsyms] [k] unlock_page_memcg
42
5.08% 5.08% dd [kernel.kallsyms] [k] unmap_page_range
43
3.39% 3.39% dd [kernel.kallsyms] [k] PageHuge
44
3.39% 3.39% dd [kernel.kallsyms] [k] release_pages
45
3.39% 3.39% dd ld-2.28.so [.] 0x0000000000008b5c
46
1.69% 1.69% dd [kernel.kallsyms] [k] __alloc_fd
47
[...]
48
49
# Samples: 3K of event 'l1d-access'
50
# Event count (approx.): 3980
51
#
52
# Children Self Command Shared Object Symbol
53
# ........ ........ ....... ................. ......................................
54
#
55
26.98% 26.98% dd [kernel.kallsyms] [k] ret_to_user
56
10.53% 10.53% dd [kernel.kallsyms] [k] fsnotify
57
7.51% 7.51% dd [kernel.kallsyms] [k] new_sync_read
58
4.57% 4.57% dd [kernel.kallsyms] [k] vfs_read
59
4.35% 4.35% dd [kernel.kallsyms] [k] vfs_write
60
3.69% 3.69% dd [kernel.kallsyms] [k] __fget_light
61
3.69% 3.69% dd [kernel.kallsyms] [k] rw_verify_area
62
3.44% 3.44% dd [kernel.kallsyms] [k] security_file_permission
63
2.76% 2.76% dd [kernel.kallsyms] [k] __fsnotify_parent
64
2.44% 2.44% dd [kernel.kallsyms] [k] ksys_write
65
2.24% 2.24% dd [kernel.kallsyms] [k] iov_iter_zero
66
2.19% 2.19% dd [kernel.kallsyms] [k] read_iter_zero
67
1.81% 1.81% dd dd [.] 0x0000000000002960
68
1.78% 1.78% dd dd [.] 0x0000000000002980
69
[...]
70
71
# Samples: 35 of event 'llc-miss'
72
# Event count (approx.): 35
73
#
74
# Children Self Command Shared Object Symbol
75
# ........ ........ ....... ................. ...........................
76
#
77
34.29% 34.29% dd [kernel.kallsyms] [k] filemap_map_pages
78
8.57% 8.57% dd [kernel.kallsyms] [k] unlock_page_memcg
79
8.57% 8.57% dd [kernel.kallsyms] [k] unmap_page_range
80
5.71% 5.71% dd [kernel.kallsyms] [k] PageHuge
81
5.71% 5.71% dd [kernel.kallsyms] [k] release_pages
82
5.71% 5.71% dd ld-2.28.so [.] 0x0000000000008b5c
83
2.86% 2.86% dd [kernel.kallsyms] [k] __queue_work
84
2.86% 2.86% dd [kernel.kallsyms] [k] __radix_tree_lookup
85
2.86% 2.86% dd [kernel.kallsyms] [k] copy_page
86
[...]
87
88
# Samples: 2 of event 'llc-access'
89
# Event count (approx.): 2
90
#
91
# Children Self Command Shared Object Symbol
92
# ........ ........ ....... ................. .............
93
#
94
50.00% 50.00% dd [kernel.kallsyms] [k] copy_page
95
50.00% 50.00% dd libc-2.28.so [.] _dl_addr
96
97
# Samples: 48 of event 'tlb-miss'
98
# Event count (approx.): 48
99
#
100
# Children Self Command Shared Object Symbol
101
# ........ ........ ....... ................. ..................................
102
#
103
20.83% 20.83% dd [kernel.kallsyms] [k] perf_iterate_ctx.constprop.135
104
12.50% 12.50% dd [kernel.kallsyms] [k] __arch_clear_user
105
10.42% 10.42% dd [kernel.kallsyms] [k] clear_page
106
4.17% 4.17% dd [kernel.kallsyms] [k] copy_page
107
4.17% 4.17% dd [kernel.kallsyms] [k] filemap_map_pages
108
2.08% 2.08% dd [kernel.kallsyms] [k] __alloc_fd
109
2.08% 2.08% dd [kernel.kallsyms] [k] __mod_memcg_state.part.70
110
2.08% 2.08% dd [kernel.kallsyms] [k] __queue_work
111
2.08% 2.08% dd [kernel.kallsyms] [k] __rcu_read_unlock
112
2.08% 2.08% dd [kernel.kallsyms] [k] d_path
113
2.08% 2.08% dd [kernel.kallsyms] [k] destroy_inode
114
2.08% 2.08% dd [kernel.kallsyms] [k] do_dentry_open
115
[...]
116
117
# Samples: 9K of event 'tlb-access'
118
# Event count (approx.): 9573
119
#
120
# Children Self Command Shared Object Symbol
121
# ........ ........ ....... ................. ......................................
122
#
123
25.79% 25.79% dd [kernel.kallsyms] [k] __arch_clear_user
124
11.22% 11.22% dd [kernel.kallsyms] [k] ret_to_user
125
8.56% 8.56% dd [kernel.kallsyms] [k] fsnotify
126
4.06% 4.06% dd [kernel.kallsyms] [k] new_sync_read
127
3.67% 3.67% dd [kernel.kallsyms] [k] el0_svc_common.constprop.2
128
3.04% 3.04% dd [kernel.kallsyms] [k] __fsnotify_parent
129
2.90% 2.90% dd [kernel.kallsyms] [k] vfs_write
130
2.82% 2.82% dd [kernel.kallsyms] [k] vfs_read
131
2.52% 2.52% dd libc-2.28.so [.] write
132
2.26% 2.26% dd [kernel.kallsyms] [k] security_file_permission
133
2.08% 2.08% dd [kernel.kallsyms] [k] ksys_write
134
1.96% 1.96% dd [kernel.kallsyms] [k] rw_verify_area
135
1.95% 1.95% dd [kernel.kallsyms] [k] read_iter_zero
136
[...]
137
138
# Samples: 9 of event 'branch-miss'
139
# Event count (approx.): 9
140
#
141
# Children Self Command Shared Object Symbol
142
# ........ ........ ....... ................. .........................
143
#
144
22.22% 22.22% dd libc-2.28.so [.] _dl_addr
145
11.11% 11.11% dd [kernel.kallsyms] [k] __arch_clear_user
146
11.11% 11.11% dd [kernel.kallsyms] [k] __arch_copy_from_user
147
11.11% 11.11% dd [kernel.kallsyms] [k] __dentry_kill
148
11.11% 11.11% dd [kernel.kallsyms] [k] __efistub_memcpy
149
11.11% 11.11% dd ld-2.28.so [.] 0x0000000000012b7c
150
11.11% 11.11% dd libc-2.28.so [.] 0x000000000002a980
151
11.11% 11.11% dd libc-2.28.so [.] 0x0000000000083340
152
153
# Samples: 29 of event 'remote-access'
154
# Event count (approx.): 29
155
#
156
# Children Self Command Shared Object Symbol
157
# ........ ........ ....... ................. ...........................
158
#
159
41.38% 41.38% dd [kernel.kallsyms] [k] filemap_map_pages
160
10.34% 10.34% dd [kernel.kallsyms] [k] unlock_page_memcg
161
10.34% 10.34% dd [kernel.kallsyms] [k] unmap_page_range
162
6.90% 6.90% dd [kernel.kallsyms] [k] release_pages
163
3.45% 3.45% dd [kernel.kallsyms] [k] PageHuge
164
3.45% 3.45% dd [kernel.kallsyms] [k] __queue_work
165
3.45% 3.45% dd [kernel.kallsyms] [k] page_add_file_rmap
166
3.45% 3.45% dd [kernel.kallsyms] [k] page_counter_try_charge
167
3.45% 3.45% dd [kernel.kallsyms] [k] page_remove_rmap
168
3.45% 3.45% dd [kernel.kallsyms] [k] xas_start
169
3.45% 3.45% dd ld-2.28.so [.] 0x0000000000002a1c
170
3.45% 3.45% dd ld-2.28.so [.] 0x0000000000008b5c
171
3.45% 3.45% dd ld-2.28.so [.] 0x00000000000093cc
172
173
Signed-off-by: Tan Xiaojun <tanxiaojun@huawei.com>
174
Tested-by: James Clark <james.clark@arm.com>
175
Cc: Adrian Hunter <adrian.hunter@intel.com>
176
Cc: Al Grant <al.grant@arm.com>
177
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
178
Cc: Andi Kleen <ak@linux.intel.com>
179
Cc: Ian Rogers <irogers@google.com>
180
Cc: Jin Yao <yao.jin@linux.intel.com>
181
Cc: Jiri Olsa <jolsa@redhat.com>
182
Cc: Leo Yan <leo.yan@linaro.org>
183
Cc: Mark Rutland <mark.rutland@arm.com>
184
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
185
Cc: Mike Leach <mike.leach@linaro.org>
186
Cc: Namhyung Kim <namhyung@kernel.org>
187
Cc: Peter Zijlstra <peterz@infradead.org>
188
Cc: Thomas Gleixner <tglx@linutronix.de>
189
Cc: Will Deacon <will@kernel.org>
190
Cc: linux-arm-kernel@lists.infradead.org
191
Link: http://lore.kernel.org/lkml/20200530122442.490-4-leo.yan@linaro.org
192
Signed-off-by: James Clark <james.clark@arm.com>
193
Signed-off-by: Leo Yan <leo.yan@linaro.org>
194
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
195
Signed-off-by: Tony Jones <tonyj@suse.de>
196
---
197
tools/perf/util/arm-spe-decoder/Build | 2 +-
198
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 219 ++++++
199
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 82 ++
200
.../util/arm-spe-decoder/arm-spe-pkt-decoder.h | 16 +
201
tools/perf/util/arm-spe.c | 821 +++++++++++++++++++--
202
5 files changed, 1097 insertions(+), 43 deletions(-)
203
204
diff --git a/tools/perf/util/arm-spe-decoder/Build b/tools/perf/util/arm-spe-decoder/Build
205
index 16efbc245028..f8dae13fc876 100644
206
--- a/tools/perf/util/arm-spe-decoder/Build
207
+++ b/tools/perf/util/arm-spe-decoder/Build
208
209
-perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
210
+perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o
211
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
212
new file mode 100644
213
index 000000000000..302a14d0aca9
214
--- /dev/null
215
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
216
217
+// SPDX-License-Identifier: GPL-2.0
218
+/*
219
+ * arm_spe_decoder.c: ARM SPE support
220
+ */
221
+
222
+#ifndef _GNU_SOURCE
223
+#define _GNU_SOURCE
224
+#endif
225
+#include <errno.h>
226
+#include <inttypes.h>
227
+#include <stdbool.h>
228
+#include <string.h>
229
+#include <stdint.h>
230
+#include <stdlib.h>
231
+#include <linux/compiler.h>
232
+#include <linux/zalloc.h>
233
+
234
+#include "../auxtrace.h"
235
+#include "../debug.h"
236
+#include "../util.h"
237
+
238
+#include "arm-spe-decoder.h"
239
+
240
+#ifndef BIT
241
+#define BIT(n) (1UL << (n))
242
+#endif
243
+
244
+static u64 arm_spe_calc_ip(int index, u64 payload)
245
+{
246
+ u8 *addr = (u8 *)&payload;
247
+ int ns, el;
248
+
249
+ /* Instruction virtual address or Branch target address */
250
+ if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
251
+ index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
252
+ ns = addr[7] & SPE_ADDR_PKT_NS;
253
+ el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET;
254
+
255
+ /* Fill highest byte for EL1 or EL2 (VHE) mode */
256
+ if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2))
257
+ addr[7] = 0xff;
258
+ /* Clean highest byte for other cases */
259
+ else
260
+ addr[7] = 0x0;
261
+
262
+ /* Data access virtual address */
263
+ } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) {
264
+
265
+ /* Fill highest byte if bits [48..55] is 0xff */
266
+ if (addr[6] == 0xff)
267
+ addr[7] = 0xff;
268
+ /* Otherwise, cleanup tags */
269
+ else
270
+ addr[7] = 0x0;
271
+
272
+ /* Data access physical address */
273
+ } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) {
274
+ /* Cleanup byte 7 */
275
+ addr[7] = 0x0;
276
+ } else {
277
+ pr_err("unsupported address packet index: 0x%x\n", index);
278
+ }
279
+
280
+ return payload;
281
+}
282
+
283
+struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params)
284
+{
285
+ struct arm_spe_decoder *decoder;
286
+
287
+ if (!params->get_trace)
288
+ return NULL;
289
+
290
+ decoder = zalloc(sizeof(struct arm_spe_decoder));
291
+ if (!decoder)
292
+ return NULL;
293
+
294
+ decoder->get_trace = params->get_trace;
295
+ decoder->data = params->data;
296
+
297
+ return decoder;
298
+}
299
+
300
+void arm_spe_decoder_free(struct arm_spe_decoder *decoder)
301
+{
302
+ free(decoder);
303
+}
304
+
305
+static int arm_spe_get_data(struct arm_spe_decoder *decoder)
306
+{
307
+ struct arm_spe_buffer buffer = { .buf = 0, };
308
+ int ret;
309
+
310
+ pr_debug("Getting more data\n");
311
+ ret = decoder->get_trace(&buffer, decoder->data);
312
+ if (ret < 0)
313
+ return ret;
314
+
315
+ decoder->buf = buffer.buf;
316
+ decoder->len = buffer.len;
317
+
318
+ if (!decoder->len)
319
+ pr_debug("No more data\n");
320
+
321
+ return decoder->len;
322
+}
323
+
324
+static int arm_spe_get_next_packet(struct arm_spe_decoder *decoder)
325
+{
326
+ int ret;
327
+
328
+ do {
329
+ if (!decoder->len) {
330
+ ret = arm_spe_get_data(decoder);
331
+
332
+ /* Failed to read out trace data */
333
+ if (ret <= 0)
334
+ return ret;
335
+ }
336
+
337
+ ret = arm_spe_get_packet(decoder->buf, decoder->len,
338
+ &decoder->packet);
339
+ if (ret <= 0) {
340
+ /* Move forward for 1 byte */
341
+ decoder->buf += 1;
342
+ decoder->len -= 1;
343
+ return -EBADMSG;
344
+ }
345
+
346
+ decoder->buf += ret;
347
+ decoder->len -= ret;
348
+ } while (decoder->packet.type == ARM_SPE_PAD);
349
+
350
+ return 1;
351
+}
352
+
353
+static int arm_spe_read_record(struct arm_spe_decoder *decoder)
354
+{
355
+ int err;
356
+ int idx;
357
+ u64 payload, ip;
358
+
359
+ memset(&decoder->record, 0x0, sizeof(decoder->record));
360
+
361
+ while (1) {
362
+ err = arm_spe_get_next_packet(decoder);
363
+ if (err <= 0)
364
+ return err;
365
+
366
+ idx = decoder->packet.index;
367
+ payload = decoder->packet.payload;
368
+
369
+ switch (decoder->packet.type) {
370
+ case ARM_SPE_TIMESTAMP:
371
+ decoder->record.timestamp = payload;
372
+ return 1;
373
+ case ARM_SPE_END:
374
+ return 1;
375
+ case ARM_SPE_ADDRESS:
376
+ ip = arm_spe_calc_ip(idx, payload);
377
+ if (idx == SPE_ADDR_PKT_HDR_INDEX_INS)
378
+ decoder->record.from_ip = ip;
379
+ else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH)
380
+ decoder->record.to_ip = ip;
381
+ break;
382
+ case ARM_SPE_COUNTER:
383
+ break;
384
+ case ARM_SPE_CONTEXT:
385
+ break;
386
+ case ARM_SPE_OP_TYPE:
387
+ break;
388
+ case ARM_SPE_EVENTS:
389
+ if (payload & BIT(EV_L1D_REFILL))
390
+ decoder->record.type |= ARM_SPE_L1D_MISS;
391
+
392
+ if (payload & BIT(EV_L1D_ACCESS))
393
+ decoder->record.type |= ARM_SPE_L1D_ACCESS;
394
+
395
+ if (payload & BIT(EV_TLB_WALK))
396
+ decoder->record.type |= ARM_SPE_TLB_MISS;
397
+
398
+ if (payload & BIT(EV_TLB_ACCESS))
399
+ decoder->record.type |= ARM_SPE_TLB_ACCESS;
400
+
401
+ if ((idx == 1 || idx == 2 || idx == 3) &&
402
+ (payload & BIT(EV_LLC_MISS)))
403
+ decoder->record.type |= ARM_SPE_LLC_MISS;
404
+
405
+ if ((idx == 1 || idx == 2 || idx == 3) &&
406
+ (payload & BIT(EV_LLC_ACCESS)))
407
+ decoder->record.type |= ARM_SPE_LLC_ACCESS;
408
+
409
+ if ((idx == 1 || idx == 2 || idx == 3) &&
410
+ (payload & BIT(EV_REMOTE_ACCESS)))
411
+ decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
412
+
413
+ if (payload & BIT(EV_MISPRED))
414
+ decoder->record.type |= ARM_SPE_BRANCH_MISS;
415
+
416
+ break;
417
+ case ARM_SPE_DATA_SOURCE:
418
+ break;
419
+ case ARM_SPE_BAD:
420
+ break;
421
+ case ARM_SPE_PAD:
422
+ break;
423
+ default:
424
+ pr_err("Get packet error!\n");
425
+ return -1;
426
+ }
427
+ }
428
+
429
+ return 0;
430
+}
431
+
432
+int arm_spe_decode(struct arm_spe_decoder *decoder)
433
+{
434
+ return arm_spe_read_record(decoder);
435
+}
436
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
437
new file mode 100644
438
index 000000000000..a5111a8d4360
439
--- /dev/null
440
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
441
442
+/* SPDX-License-Identifier: GPL-2.0 */
443
+/*
444
+ * arm_spe_decoder.h: Arm Statistical Profiling Extensions support
445
+ * Copyright (c) 2019-2020, Arm Ltd.
446
+ */
447
+
448
+#ifndef INCLUDE__ARM_SPE_DECODER_H__
449
+#define INCLUDE__ARM_SPE_DECODER_H__
450
+
451
+#include <stdbool.h>
452
+#include <stddef.h>
453
+#include <stdint.h>
454
+
455
+#include "arm-spe-pkt-decoder.h"
456
+
457
+enum arm_spe_events {
458
+ EV_EXCEPTION_GEN = 0,
459
+ EV_RETIRED = 1,
460
+ EV_L1D_ACCESS = 2,
461
+ EV_L1D_REFILL = 3,
462
+ EV_TLB_ACCESS = 4,
463
+ EV_TLB_WALK = 5,
464
+ EV_NOT_TAKEN = 6,
465
+ EV_MISPRED = 7,
466
+ EV_LLC_ACCESS = 8,
467
+ EV_LLC_MISS = 9,
468
+ EV_REMOTE_ACCESS = 10,
469
+ EV_ALIGNMENT = 11,
470
+ EV_PARTIAL_PREDICATE = 17,
471
+ EV_EMPTY_PREDICATE = 18,
472
+};
473
+
474
+enum arm_spe_sample_type {
475
+ ARM_SPE_L1D_ACCESS = 1 << 0,
476
+ ARM_SPE_L1D_MISS = 1 << 1,
477
+ ARM_SPE_LLC_ACCESS = 1 << 2,
478
+ ARM_SPE_LLC_MISS = 1 << 3,
479
+ ARM_SPE_TLB_ACCESS = 1 << 4,
480
+ ARM_SPE_TLB_MISS = 1 << 5,
481
+ ARM_SPE_BRANCH_MISS = 1 << 6,
482
+ ARM_SPE_REMOTE_ACCESS = 1 << 7,
483
+};
484
+
485
+struct arm_spe_record {
486
+ enum arm_spe_sample_type type;
487
+ int err;
488
+ u64 from_ip;
489
+ u64 to_ip;
490
+ u64 timestamp;
491
+};
492
+
493
+struct arm_spe_insn;
494
+
495
+struct arm_spe_buffer {
496
+ const unsigned char *buf;
497
+ size_t len;
498
+ u64 offset;
499
+ u64 trace_nr;
500
+};
501
+
502
+struct arm_spe_params {
503
+ int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
504
+ void *data;
505
+};
506
+
507
+struct arm_spe_decoder {
508
+ int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
509
+ void *data;
510
+ struct arm_spe_record record;
511
+
512
+ const unsigned char *buf;
513
+ size_t len;
514
+
515
+ struct arm_spe_pkt packet;
516
+};
517
+
518
+struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params);
519
+void arm_spe_decoder_free(struct arm_spe_decoder *decoder);
520
+
521
+int arm_spe_decode(struct arm_spe_decoder *decoder);
522
+
523
+#endif
524
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
525
index d786ef65113f..4c870521b8eb 100644
526
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
527
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
528
529
#define ARM_SPE_NEED_MORE_BYTES -1
530
#define ARM_SPE_BAD_PACKET -2
531
532
+#define ARM_SPE_PKT_MAX_SZ 16
533
+
534
enum arm_spe_pkt_type {
535
ARM_SPE_BAD,
536
ARM_SPE_PAD,
537
538
uint64_t payload;
539
};
540
541
+#define SPE_ADDR_PKT_HDR_INDEX_INS (0x0)
542
+#define SPE_ADDR_PKT_HDR_INDEX_BRANCH (0x1)
543
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT (0x2)
544
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS (0x3)
545
+
546
+#define SPE_ADDR_PKT_NS BIT(7)
547
+#define SPE_ADDR_PKT_CH BIT(6)
548
+#define SPE_ADDR_PKT_EL_OFFSET (5)
549
+#define SPE_ADDR_PKT_EL_MASK (0x3 << SPE_ADDR_PKT_EL_OFFSET)
550
+#define SPE_ADDR_PKT_EL0 (0)
551
+#define SPE_ADDR_PKT_EL1 (1)
552
+#define SPE_ADDR_PKT_EL2 (2)
553
+#define SPE_ADDR_PKT_EL3 (3)
554
+
555
const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
556
557
int arm_spe_get_packet(const unsigned char *buf, size_t len,
558
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
559
index 235de3d0b062..3882a5360ada 100644
560
--- a/tools/perf/util/arm-spe.c
561
+++ b/tools/perf/util/arm-spe.c
562
563
* Copyright (c) 2017-2018, Arm Ltd.
564
*/
565
566
+#include <byteswap.h>
567
#include <endian.h>
568
#include <errno.h>
569
-#include <byteswap.h>
570
#include <inttypes.h>
571
-#include <unistd.h>
572
-#include <stdlib.h>
573
-#include <linux/kernel.h>
574
-#include <linux/types.h>
575
#include <linux/bitops.h>
576
+#include <linux/kernel.h>
577
#include <linux/log2.h>
578
+#include <linux/types.h>
579
#include <linux/zalloc.h>
580
+#include <stdlib.h>
581
+#include <unistd.h>
582
583
+#include "auxtrace.h"
584
#include "color.h"
585
+#include "debug.h"
586
+#include "evlist.h"
587
#include "evsel.h"
588
#include "machine.h"
589
#include "session.h"
590
-#include "debug.h"
591
-#include "auxtrace.h"
592
+#include "symbol.h"
593
+#include "thread.h"
594
+#include "thread-stack.h"
595
+#include "tool.h"
596
+//#include "util/synthetic-events.h"
597
+
598
#include "arm-spe.h"
599
+#include "arm-spe-decoder/arm-spe-decoder.h"
600
#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
601
602
+#define MAX_TIMESTAMP (~0ULL)
603
+
604
struct arm_spe {
605
struct auxtrace auxtrace;
606
struct auxtrace_queues queues;
607
struct auxtrace_heap heap;
608
+ struct itrace_synth_opts synth_opts;
609
u32 auxtrace_type;
610
struct perf_session *session;
611
struct machine *machine;
612
u32 pmu_type;
613
+
614
+ u8 timeless_decoding;
615
+ u8 data_queued;
616
+
617
+ u8 sample_flc;
618
+ u8 sample_llc;
619
+ u8 sample_tlb;
620
+ u8 sample_branch;
621
+ u8 sample_remote_access;
622
+
623
+ u64 l1d_miss_id;
624
+ u64 l1d_access_id;
625
+ u64 llc_miss_id;
626
+ u64 llc_access_id;
627
+ u64 tlb_miss_id;
628
+ u64 tlb_access_id;
629
+ u64 branch_miss_id;
630
+ u64 remote_access_id;
631
+
632
+ u64 kernel_start;
633
+
634
+ unsigned long num_events;
635
};
636
637
struct arm_spe_queue {
638
- struct arm_spe *spe;
639
- unsigned int queue_nr;
640
- struct auxtrace_buffer *buffer;
641
- bool on_heap;
642
- bool done;
643
- pid_t pid;
644
- pid_t tid;
645
- int cpu;
646
+ struct arm_spe *spe;
647
+ unsigned int queue_nr;
648
+ struct auxtrace_buffer *buffer;
649
+ struct auxtrace_buffer *old_buffer;
650
+ union perf_event *event_buf;
651
+ bool on_heap;
652
+ bool done;
653
+ pid_t pid;
654
+ pid_t tid;
655
+ int cpu;
656
+ struct arm_spe_decoder *decoder;
657
+ u64 time;
658
+ u64 timestamp;
659
+ struct thread *thread;
660
};
661
662
static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
663
664
arm_spe_dump(spe, buf, len);
665
}
666
667
-static int arm_spe_process_event(struct perf_session *session __maybe_unused,
668
- union perf_event *event __maybe_unused,
669
- struct perf_sample *sample __maybe_unused,
670
- struct perf_tool *tool __maybe_unused)
671
+static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
672
+{
673
+ struct arm_spe_queue *speq = data;
674
+ struct auxtrace_buffer *buffer = speq->buffer;
675
+ struct auxtrace_buffer *old_buffer = speq->old_buffer;
676
+ struct auxtrace_queue *queue;
677
+
678
+ queue = &speq->spe->queues.queue_array[speq->queue_nr];
679
+
680
+ buffer = auxtrace_buffer__next(queue, buffer);
681
+ /* If no more data, drop the previous auxtrace_buffer and return */
682
+ if (!buffer) {
683
+ if (old_buffer)
684
+ auxtrace_buffer__drop_data(old_buffer);
685
+ b->len = 0;
686
+ return 0;
687
+ }
688
+
689
+ speq->buffer = buffer;
690
+
691
+ /* If the aux_buffer doesn't have data associated, try to load it */
692
+ if (!buffer->data) {
693
+ /* get the file desc associated with the perf data file */
694
+ int fd = perf_data__fd(speq->spe->session->data);
695
+
696
+ buffer->data = auxtrace_buffer__get_data(buffer, fd);
697
+ if (!buffer->data)
698
+ return -ENOMEM;
699
+ }
700
+
701
+ b->len = buffer->size;
702
+ b->buf = buffer->data;
703
+
704
+ if (b->len) {
705
+ if (old_buffer)
706
+ auxtrace_buffer__drop_data(old_buffer);
707
+ speq->old_buffer = buffer;
708
+ } else {
709
+ auxtrace_buffer__drop_data(buffer);
710
+ return arm_spe_get_trace(b, data);
711
+ }
712
+
713
+ return 0;
714
+}
715
+
716
+static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
717
+ unsigned int queue_nr)
718
+{
719
+ struct arm_spe_params params = { .get_trace = 0, };
720
+ struct arm_spe_queue *speq;
721
+
722
+ speq = zalloc(sizeof(*speq));
723
+ if (!speq)
724
+ return NULL;
725
+
726
+ speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
727
+ if (!speq->event_buf)
728
+ goto out_free;
729
+
730
+ speq->spe = spe;
731
+ speq->queue_nr = queue_nr;
732
+ speq->pid = -1;
733
+ speq->tid = -1;
734
+ speq->cpu = -1;
735
+
736
+ /* params set */
737
+ params.get_trace = arm_spe_get_trace;
738
+ params.data = speq;
739
+
740
+ /* create new decoder */
741
+ speq->decoder = arm_spe_decoder_new(¶ms);
742
+ if (!speq->decoder)
743
+ goto out_free;
744
+
745
+ return speq;
746
+
747
+out_free:
748
+ zfree(&speq->event_buf);
749
+ free(speq);
750
+
751
+ return NULL;
752
+}
753
+
754
+static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
755
+{
756
+ return ip >= spe->kernel_start ?
757
+ PERF_RECORD_MISC_KERNEL :
758
+ PERF_RECORD_MISC_USER;
759
+}
760
+
761
+static void arm_spe_prep_sample(struct arm_spe *spe,
762
+ struct arm_spe_queue *speq,
763
+ union perf_event *event,
764
+ struct perf_sample *sample)
765
+{
766
+ struct arm_spe_record *record = &speq->decoder->record;
767
+
768
+ if (!spe->timeless_decoding)
769
+ sample->time = speq->timestamp;
770
+
771
+ sample->ip = record->from_ip;
772
+ sample->cpumode = arm_spe_cpumode(spe, sample->ip);
773
+ sample->pid = speq->pid;
774
+ sample->tid = speq->tid;
775
+ sample->addr = record->to_ip;
776
+ sample->period = 1;
777
+ sample->cpu = speq->cpu;
778
+
779
+ event->sample.header.type = PERF_RECORD_SAMPLE;
780
+ event->sample.header.misc = sample->cpumode;
781
+ event->sample.header.size = sizeof(struct perf_event_header);
782
+}
783
+
784
+static inline int
785
+arm_spe_deliver_synth_event(struct arm_spe *spe,
786
+ struct arm_spe_queue *speq __maybe_unused,
787
+ union perf_event *event,
788
+ struct perf_sample *sample)
789
+{
790
+ int ret;
791
+
792
+ ret = perf_session__deliver_synth_event(spe->session, event, sample);
793
+ if (ret)
794
+ pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
795
+
796
+ return ret;
797
+}
798
+
799
+static int
800
+arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq,
801
+ u64 spe_events_id)
802
+{
803
+ struct arm_spe *spe = speq->spe;
804
+ union perf_event *event = speq->event_buf;
805
+ struct perf_sample sample = { .ip = 0, };
806
+
807
+ arm_spe_prep_sample(spe, speq, event, &sample);
808
+
809
+ sample.id = spe_events_id;
810
+ sample.stream_id = spe_events_id;
811
+
812
+ return arm_spe_deliver_synth_event(spe, speq, event, &sample);
813
+}
814
+
815
+static int arm_spe_sample(struct arm_spe_queue *speq)
816
+{
817
+ const struct arm_spe_record *record = &speq->decoder->record;
818
+ struct arm_spe *spe = speq->spe;
819
+ int err;
820
+
821
+ if (spe->sample_flc) {
822
+ if (record->type & ARM_SPE_L1D_MISS) {
823
+ err = arm_spe_synth_spe_events_sample(
824
+ speq, spe->l1d_miss_id);
825
+ if (err)
826
+ return err;
827
+ }
828
+
829
+ if (record->type & ARM_SPE_L1D_ACCESS) {
830
+ err = arm_spe_synth_spe_events_sample(
831
+ speq, spe->l1d_access_id);
832
+ if (err)
833
+ return err;
834
+ }
835
+ }
836
+
837
+ if (spe->sample_llc) {
838
+ if (record->type & ARM_SPE_LLC_MISS) {
839
+ err = arm_spe_synth_spe_events_sample(
840
+ speq, spe->llc_miss_id);
841
+ if (err)
842
+ return err;
843
+ }
844
+
845
+ if (record->type & ARM_SPE_LLC_ACCESS) {
846
+ err = arm_spe_synth_spe_events_sample(
847
+ speq, spe->llc_access_id);
848
+ if (err)
849
+ return err;
850
+ }
851
+ }
852
+
853
+ if (spe->sample_tlb) {
854
+ if (record->type & ARM_SPE_TLB_MISS) {
855
+ err = arm_spe_synth_spe_events_sample(
856
+ speq, spe->tlb_miss_id);
857
+ if (err)
858
+ return err;
859
+ }
860
+
861
+ if (record->type & ARM_SPE_TLB_ACCESS) {
862
+ err = arm_spe_synth_spe_events_sample(
863
+ speq, spe->tlb_access_id);
864
+ if (err)
865
+ return err;
866
+ }
867
+ }
868
+
869
+ if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
870
+ err = arm_spe_synth_spe_events_sample(speq,
871
+ spe->branch_miss_id);
872
+ if (err)
873
+ return err;
874
+ }
875
+
876
+ if (spe->sample_remote_access &&
877
+ (record->type & ARM_SPE_REMOTE_ACCESS)) {
878
+ err = arm_spe_synth_spe_events_sample(speq,
879
+ spe->remote_access_id);
880
+ if (err)
881
+ return err;
882
+ }
883
+
884
+ return 0;
885
+}
886
+
887
+static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
888
+{
889
+ struct arm_spe *spe = speq->spe;
890
+ int ret;
891
+
892
+ if (!spe->kernel_start)
893
+ spe->kernel_start = machine__kernel_start(spe->machine);
894
+
895
+ while (1) {
896
+ ret = arm_spe_decode(speq->decoder);
897
+ if (!ret) {
898
+ pr_debug("No data or all data has been processed.\n");
899
+ return 1;
900
+ }
901
+
902
+ /*
903
+ * Error is detected when decode SPE trace data, continue to
904
+ * the next trace data and find out more records.
905
+ */
906
+ if (ret < 0)
907
+ continue;
908
+
909
+ ret = arm_spe_sample(speq);
910
+ if (ret)
911
+ return ret;
912
+
913
+ if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
914
+ *timestamp = speq->timestamp;
915
+ return 0;
916
+ }
917
+ }
918
+
919
+ return 0;
920
+}
921
+
922
+static int arm_spe__setup_queue(struct arm_spe *spe,
923
+ struct auxtrace_queue *queue,
924
+ unsigned int queue_nr)
925
+{
926
+ struct arm_spe_queue *speq = queue->priv;
927
+ struct arm_spe_record *record;
928
+
929
+ if (list_empty(&queue->head) || speq)
930
+ return 0;
931
+
932
+ speq = arm_spe__alloc_queue(spe, queue_nr);
933
+
934
+ if (!speq)
935
+ return -ENOMEM;
936
+
937
+ queue->priv = speq;
938
+
939
+ if (queue->cpu != -1)
940
+ speq->cpu = queue->cpu;
941
+
942
+ if (!speq->on_heap) {
943
+ int ret;
944
+
945
+ if (spe->timeless_decoding)
946
+ return 0;
947
+
948
+retry:
949
+ ret = arm_spe_decode(speq->decoder);
950
+
951
+ if (!ret)
952
+ return 0;
953
+
954
+ if (ret < 0)
955
+ goto retry;
956
+
957
+ record = &speq->decoder->record;
958
+
959
+ speq->timestamp = record->timestamp;
960
+ ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
961
+ if (ret)
962
+ return ret;
963
+ speq->on_heap = true;
964
+ }
965
+
966
+ return 0;
967
+}
968
+
969
+static int arm_spe__setup_queues(struct arm_spe *spe)
970
+{
971
+ unsigned int i;
972
+ int ret;
973
+
974
+ for (i = 0; i < spe->queues.nr_queues; i++) {
975
+ ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
976
+ if (ret)
977
+ return ret;
978
+ }
979
+
980
+ return 0;
981
+}
982
+
983
+static int arm_spe__update_queues(struct arm_spe *spe)
984
{
985
+ if (spe->queues.new_data) {
986
+ spe->queues.new_data = false;
987
+ return arm_spe__setup_queues(spe);
988
+ }
989
+
990
return 0;
991
}
992
993
+static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
994
+{
995
+ struct evsel *evsel;
996
+ struct evlist *evlist = spe->session->evlist;
997
+ bool timeless_decoding = true;
998
+
999
+ /*
1000
+ * Circle through the list of event and complain if we find one
1001
+ * with the time bit set.
1002
+ */
1003
+ evlist__for_each_entry(evlist, evsel) {
1004
+ if ((evsel->attr.sample_type & PERF_SAMPLE_TIME))
1005
+ timeless_decoding = false;
1006
+ }
1007
+
1008
+ return timeless_decoding;
1009
+}
1010
+
1011
+static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
1012
+ struct auxtrace_queue *queue)
1013
+{
1014
+ struct arm_spe_queue *speq = queue->priv;
1015
+ pid_t tid;
1016
+
1017
+ tid = machine__get_current_tid(spe->machine, speq->cpu);
1018
+ if (tid != -1) {
1019
+ speq->tid = tid;
1020
+ thread__zput(speq->thread);
1021
+ } else
1022
+ speq->tid = queue->tid;
1023
+
1024
+ if ((!speq->thread) && (speq->tid != -1)) {
1025
+ speq->thread = machine__find_thread(spe->machine, -1,
1026
+ speq->tid);
1027
+ }
1028
+
1029
+ if (speq->thread) {
1030
+ speq->pid = speq->thread->pid_;
1031
+ if (queue->cpu == -1)
1032
+ speq->cpu = speq->thread->cpu;
1033
+ }
1034
+}
1035
+
1036
+static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
1037
+{
1038
+ unsigned int queue_nr;
1039
+ u64 ts;
1040
+ int ret;
1041
+
1042
+ while (1) {
1043
+ struct auxtrace_queue *queue;
1044
+ struct arm_spe_queue *speq;
1045
+
1046
+ if (!spe->heap.heap_cnt)
1047
+ return 0;
1048
+
1049
+ if (spe->heap.heap_array[0].ordinal >= timestamp)
1050
+ return 0;
1051
+
1052
+ queue_nr = spe->heap.heap_array[0].queue_nr;
1053
+ queue = &spe->queues.queue_array[queue_nr];
1054
+ speq = queue->priv;
1055
+
1056
+ auxtrace_heap__pop(&spe->heap);
1057
+
1058
+ if (spe->heap.heap_cnt) {
1059
+ ts = spe->heap.heap_array[0].ordinal + 1;
1060
+ if (ts > timestamp)
1061
+ ts = timestamp;
1062
+ } else {
1063
+ ts = timestamp;
1064
+ }
1065
+
1066
+ arm_spe_set_pid_tid_cpu(spe, queue);
1067
+
1068
+ ret = arm_spe_run_decoder(speq, &ts);
1069
+ if (ret < 0) {
1070
+ auxtrace_heap__add(&spe->heap, queue_nr, ts);
1071
+ return ret;
1072
+ }
1073
+
1074
+ if (!ret) {
1075
+ ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
1076
+ if (ret < 0)
1077
+ return ret;
1078
+ } else {
1079
+ speq->on_heap = false;
1080
+ }
1081
+ }
1082
+
1083
+ return 0;
1084
+}
1085
+
1086
+static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
1087
+ u64 time_)
1088
+{
1089
+ struct auxtrace_queues *queues = &spe->queues;
1090
+ unsigned int i;
1091
+ u64 ts = 0;
1092
+
1093
+ for (i = 0; i < queues->nr_queues; i++) {
1094
+ struct auxtrace_queue *queue = &spe->queues.queue_array[i];
1095
+ struct arm_spe_queue *speq = queue->priv;
1096
+
1097
+ if (speq && (tid == -1 || speq->tid == tid)) {
1098
+ speq->time = time_;
1099
+ arm_spe_set_pid_tid_cpu(spe, queue);
1100
+ arm_spe_run_decoder(speq, &ts);
1101
+ }
1102
+ }
1103
+ return 0;
1104
+}
1105
+
1106
+static int arm_spe_process_event(struct perf_session *session,
1107
+ union perf_event *event,
1108
+ struct perf_sample *sample,
1109
+ struct perf_tool *tool)
1110
+{
1111
+ int err = 0;
1112
+ u64 timestamp;
1113
+ struct arm_spe *spe = container_of(session->auxtrace,
1114
+ struct arm_spe, auxtrace);
1115
+
1116
+ if (dump_trace)
1117
+ return 0;
1118
+
1119
+ if (!tool->ordered_events) {
1120
+ pr_err("SPE trace requires ordered events\n");
1121
+ return -EINVAL;
1122
+ }
1123
+
1124
+ if (sample->time && (sample->time != (u64) -1))
1125
+ timestamp = sample->time;
1126
+ else
1127
+ timestamp = 0;
1128
+
1129
+ if (timestamp || spe->timeless_decoding) {
1130
+ err = arm_spe__update_queues(spe);
1131
+ if (err)
1132
+ return err;
1133
+ }
1134
+
1135
+ if (spe->timeless_decoding) {
1136
+ if (event->header.type == PERF_RECORD_EXIT) {
1137
+ err = arm_spe_process_timeless_queues(spe,
1138
+ event->fork.tid,
1139
+ sample->time);
1140
+ }
1141
+ } else if (timestamp) {
1142
+ if (event->header.type == PERF_RECORD_EXIT) {
1143
+ err = arm_spe_process_queues(spe, timestamp);
1144
+ if (err)
1145
+ return err;
1146
+ }
1147
+ }
1148
+
1149
+ return err;
1150
+}
1151
+
1152
static int arm_spe_process_auxtrace_event(struct perf_session *session,
1153
union perf_event *event,
1154
struct perf_tool *tool __maybe_unused)
1155
{
1156
struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1157
auxtrace);
1158
- struct auxtrace_buffer *buffer;
1159
- off_t data_offset;
1160
- int fd = perf_data__fd(session->data);
1161
- int err;
1162
1163
- if (perf_data__is_pipe(session->data)) {
1164
- data_offset = 0;
1165
- } else {
1166
- data_offset = lseek(fd, 0, SEEK_CUR);
1167
- if (data_offset == -1)
1168
- return -errno;
1169
- }
1170
+ if (!spe->data_queued) {
1171
+ struct auxtrace_buffer *buffer;
1172
+ off_t data_offset;
1173
+ int fd = perf_data__fd(session->data);
1174
+ int err;
1175
1176
- err = auxtrace_queues__add_event(&spe->queues, session, event,
1177
- data_offset, &buffer);
1178
- if (err)
1179
- return err;
1180
+ if (perf_data__is_pipe(session->data)) {
1181
+ data_offset = 0;
1182
+ } else {
1183
+ data_offset = lseek(fd, 0, SEEK_CUR);
1184
+ if (data_offset == -1)
1185
+ return -errno;
1186
+ }
1187
1188
- /* Dump here now we have copied a piped trace out of the pipe */
1189
- if (dump_trace) {
1190
- if (auxtrace_buffer__get_data(buffer, fd)) {
1191
- arm_spe_dump_event(spe, buffer->data,
1192
- buffer->size);
1193
- auxtrace_buffer__put_data(buffer);
1194
+ err = auxtrace_queues__add_event(&spe->queues, session, event,
1195
+ data_offset, &buffer);
1196
+ if (err)
1197
+ return err;
1198
+
1199
+ /* Dump here now we have copied a piped trace out of the pipe */
1200
+ if (dump_trace) {
1201
+ if (auxtrace_buffer__get_data(buffer, fd)) {
1202
+ arm_spe_dump_event(spe, buffer->data,
1203
+ buffer->size);
1204
+ auxtrace_buffer__put_data(buffer);
1205
+ }
1206
}
1207
}
1208
1209
1210
static int arm_spe_flush(struct perf_session *session __maybe_unused,
1211
struct perf_tool *tool __maybe_unused)
1212
{
1213
- return 0;
1214
+ struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1215
+ auxtrace);
1216
+ int ret;
1217
+
1218
+ if (dump_trace)
1219
+ return 0;
1220
+
1221
+ if (!tool->ordered_events)
1222
+ return -EINVAL;
1223
+
1224
+ ret = arm_spe__update_queues(spe);
1225
+ if (ret < 0)
1226
+ return ret;
1227
+
1228
+ if (spe->timeless_decoding)
1229
+ return arm_spe_process_timeless_queues(spe, -1,
1230
+ MAX_TIMESTAMP - 1);
1231
+
1232
+ return arm_spe_process_queues(spe, MAX_TIMESTAMP);
1233
}
1234
1235
static void arm_spe_free_queue(void *priv)
1236
1237
1238
if (!speq)
1239
return;
1240
+ thread__zput(speq->thread);
1241
+ arm_spe_decoder_free(speq->decoder);
1242
+ zfree(&speq->event_buf);
1243
free(speq);
1244
}
1245
1246
1247
fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
1248
}
1249
1250
+struct arm_spe_synth {
1251
+ struct perf_tool dummy_tool;
1252
+ struct perf_session *session;
1253
+};
1254
+
1255
+static int arm_spe_event_synth(struct perf_tool *tool,
1256
+ union perf_event *event,
1257
+ struct perf_sample *sample __maybe_unused,
1258
+ struct machine *machine __maybe_unused)
1259
+{
1260
+ struct arm_spe_synth *arm_spe_synth =
1261
+ container_of(tool, struct arm_spe_synth, dummy_tool);
1262
+
1263
+ return perf_session__deliver_synth_event(arm_spe_synth->session,
1264
+ event, NULL);
1265
+}
1266
+
1267
+static int arm_spe_synth_event(struct perf_session *session,
1268
+ struct perf_event_attr *attr, u64 id)
1269
+{
1270
+ struct arm_spe_synth arm_spe_synth;
1271
+
1272
+ memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
1273
+ arm_spe_synth.session = session;
1274
+
1275
+ return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
1276
+ &id, arm_spe_event_synth);
1277
+}
1278
+
1279
+static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
1280
+ const char *name)
1281
+{
1282
+ struct evsel *evsel;
1283
+
1284
+ evlist__for_each_entry(evlist, evsel) {
1285
+ if (evsel->id && evsel->id[0] == id) {
1286
+ if (evsel->name)
1287
+ zfree(&evsel->name);
1288
+ evsel->name = strdup(name);
1289
+ break;
1290
+ }
1291
+ }
1292
+}
1293
+
1294
+static int
1295
+arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
1296
+{
1297
+ struct evlist *evlist = session->evlist;
1298
+ struct evsel *evsel;
1299
+ struct perf_event_attr attr;
1300
+ bool found = false;
1301
+ u64 id;
1302
+ int err;
1303
+
1304
+ evlist__for_each_entry(evlist, evsel) {
1305
+ if (evsel->attr.type == spe->pmu_type) {
1306
+ found = true;
1307
+ break;
1308
+ }
1309
+ }
1310
+
1311
+ if (!found) {
1312
+ pr_debug("No selected events with SPE trace data\n");
1313
+ return 0;
1314
+ }
1315
+
1316
+ memset(&attr, 0, sizeof(struct perf_event_attr));
1317
+ attr.size = sizeof(struct perf_event_attr);
1318
+ attr.type = PERF_TYPE_HARDWARE;
1319
+ attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
1320
+ attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1321
+ PERF_SAMPLE_PERIOD;
1322
+ if (spe->timeless_decoding)
1323
+ attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1324
+ else
1325
+ attr.sample_type |= PERF_SAMPLE_TIME;
1326
+
1327
+ attr.exclude_user = evsel->attr.exclude_user;
1328
+ attr.exclude_kernel = evsel->attr.exclude_kernel;
1329
+ attr.exclude_hv = evsel->attr.exclude_hv;
1330
+ attr.exclude_host = evsel->attr.exclude_host;
1331
+ attr.exclude_guest = evsel->attr.exclude_guest;
1332
+ attr.sample_id_all = evsel->attr.sample_id_all;
1333
+ attr.read_format = evsel->attr.read_format;
1334
+
1335
+ /* create new id val to be a fixed offset from evsel id */
1336
+ id = evsel->id[0] + 1000000000;
1337
+
1338
+ if (!id)
1339
+ id = 1;
1340
+
1341
+ if (spe->synth_opts.flc) {
1342
+ spe->sample_flc = true;
1343
+
1344
+ /* Level 1 data cache miss */
1345
+ err = arm_spe_synth_event(session, &attr, id);
1346
+ if (err)
1347
+ return err;
1348
+ spe->l1d_miss_id = id;
1349
+ arm_spe_set_event_name(evlist, id, "l1d-miss");
1350
+ id += 1;
1351
+
1352
+ /* Level 1 data cache access */
1353
+ err = arm_spe_synth_event(session, &attr, id);
1354
+ if (err)
1355
+ return err;
1356
+ spe->l1d_access_id = id;
1357
+ arm_spe_set_event_name(evlist, id, "l1d-access");
1358
+ id += 1;
1359
+ }
1360
+
1361
+ if (spe->synth_opts.llc) {
1362
+ spe->sample_llc = true;
1363
+
1364
+ /* Last level cache miss */
1365
+ err = arm_spe_synth_event(session, &attr, id);
1366
+ if (err)
1367
+ return err;
1368
+ spe->llc_miss_id = id;
1369
+ arm_spe_set_event_name(evlist, id, "llc-miss");
1370
+ id += 1;
1371
+
1372
+ /* Last level cache access */
1373
+ err = arm_spe_synth_event(session, &attr, id);
1374
+ if (err)
1375
+ return err;
1376
+ spe->llc_access_id = id;
1377
+ arm_spe_set_event_name(evlist, id, "llc-access");
1378
+ id += 1;
1379
+ }
1380
+
1381
+ if (spe->synth_opts.tlb) {
1382
+ spe->sample_tlb = true;
1383
+
1384
+ /* TLB miss */
1385
+ err = arm_spe_synth_event(session, &attr, id);
1386
+ if (err)
1387
+ return err;
1388
+ spe->tlb_miss_id = id;
1389
+ arm_spe_set_event_name(evlist, id, "tlb-miss");
1390
+ id += 1;
1391
+
1392
+ /* TLB access */
1393
+ err = arm_spe_synth_event(session, &attr, id);
1394
+ if (err)
1395
+ return err;
1396
+ spe->tlb_access_id = id;
1397
+ arm_spe_set_event_name(evlist, id, "tlb-access");
1398
+ id += 1;
1399
+ }
1400
+
1401
+ if (spe->synth_opts.branches) {
1402
+ spe->sample_branch = true;
1403
+
1404
+ /* Branch miss */
1405
+ err = arm_spe_synth_event(session, &attr, id);
1406
+ if (err)
1407
+ return err;
1408
+ spe->branch_miss_id = id;
1409
+ arm_spe_set_event_name(evlist, id, "branch-miss");
1410
+ id += 1;
1411
+ }
1412
+
1413
+ if (spe->synth_opts.remote_access) {
1414
+ spe->sample_remote_access = true;
1415
+
1416
+ /* Remote access */
1417
+ err = arm_spe_synth_event(session, &attr, id);
1418
+ if (err)
1419
+ return err;
1420
+ spe->remote_access_id = id;
1421
+ arm_spe_set_event_name(evlist, id, "remote-access");
1422
+ id += 1;
1423
+ }
1424
+
1425
+ return 0;
1426
+}
1427
+
1428
int arm_spe_process_auxtrace_info(union perf_event *event,
1429
struct perf_session *session)
1430
{
1431
struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
1432
- size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE;
1433
+ size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1434
struct arm_spe *spe;
1435
int err;
1436
1437
1438
spe->auxtrace_type = auxtrace_info->type;
1439
spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1440
1441
+ spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1442
spe->auxtrace.process_event = arm_spe_process_event;
1443
spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1444
spe->auxtrace.flush_events = arm_spe_flush;
1445
1446
1447
arm_spe_print_info(&auxtrace_info->priv[0]);
1448
1449
+ if (dump_trace)
1450
+ return 0;
1451
+
1452
+ if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1453
+ spe->synth_opts = *session->itrace_synth_opts;
1454
+ else
1455
+ itrace_synth_opts__set_default(&spe->synth_opts, false);
1456
+
1457
+ err = arm_spe_synth_events(spe, session);
1458
+ if (err)
1459
+ goto err_free_queues;
1460
+
1461
+ err = auxtrace_queues__process_index(&spe->queues, session);
1462
+ if (err)
1463
+ goto err_free_queues;
1464
+
1465
+ if (spe->queues.populated)
1466
+ spe->data_queued = true;
1467
+
1468
return 0;
1469
1470
+err_free_queues:
1471
+ auxtrace_queues__free(&spe->queues);
1472
+ session->auxtrace = NULL;
1473
err_free:
1474
free(spe);
1475
return err;
1476
1477