Merge: perf: sync with upstream v6.5
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/3026 Bugzilla: https://bugzilla.redhat.com/2233483 Rebase of the perf codebase from v6.3-aligned to v6.5. Signed-off-by: Michael Petlan <mpetlan@redhat.com> Approved-by: Tony Camuso <tcamuso@redhat.com> Approved-by: Artem Savkov <asavkov@redhat.com> Approved-by: David Arcari <darcari@redhat.com> Signed-off-by: Scott Weaver <scweaver@redhat.com>
This commit is contained in:
commit
f3cff0333c
|
@ -1312,6 +1312,11 @@
|
|||
#define PVR_VER_E500MC 0x8023
|
||||
#define PVR_VER_E5500 0x8024
|
||||
#define PVR_VER_E6500 0x8040
|
||||
#define PVR_VER_7450 0x8000
|
||||
#define PVR_VER_7455 0x8001
|
||||
#define PVR_VER_7447 0x8002
|
||||
#define PVR_VER_7447A 0x8003
|
||||
#define PVR_VER_7448 0x8004
|
||||
|
||||
/*
|
||||
* For the 8xx processors, all of them report the same PVR family for
|
||||
|
|
|
@ -417,9 +417,9 @@ struct power_pmu mpc7450_pmu = {
|
|||
|
||||
static int __init init_mpc7450_pmu(void)
|
||||
{
|
||||
unsigned int pvr = mfspr(SPRN_PVR);
|
||||
|
||||
if (PVR_VER(pvr) != PVR_7450)
|
||||
if (!pvr_version_is(PVR_VER_7450) && !pvr_version_is(PVR_VER_7455) &&
|
||||
!pvr_version_is(PVR_VER_7447) && !pvr_version_is(PVR_VER_7447A) &&
|
||||
!pvr_version_is(PVR_VER_7448))
|
||||
return -ENODEV;
|
||||
|
||||
return register_power_pmu(&mpc7450_pmu);
|
||||
|
|
|
@ -374,7 +374,7 @@ static int amd_pmu_hw_config(struct perf_event *event)
|
|||
|
||||
/* pass precise event sampling to ibs: */
|
||||
if (event->attr.precise_ip && get_ibs_caps())
|
||||
return -ENOENT;
|
||||
return forward_event_to_ibs(event);
|
||||
|
||||
if (has_branch_stack(event) && !x86_pmu.lbr_nr)
|
||||
return -EOPNOTSUPP;
|
||||
|
|
|
@ -190,7 +190,7 @@ static struct perf_ibs *get_ibs_pmu(int type)
|
|||
}
|
||||
|
||||
/*
|
||||
* Use IBS for precise event sampling:
|
||||
* core pmu config -> IBS config
|
||||
*
|
||||
* perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
|
||||
* perf record -a -e r076:p ... # same as -e cpu-cycles:p
|
||||
|
@ -199,25 +199,9 @@ static struct perf_ibs *get_ibs_pmu(int type)
|
|||
* IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
|
||||
* MSRC001_1033) is used to select either cycle or micro-ops counting
|
||||
* mode.
|
||||
*
|
||||
* The rip of IBS samples has skid 0. Thus, IBS supports precise
|
||||
* levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
|
||||
* rip is invalid when IBS was not able to record the rip correctly.
|
||||
* We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
|
||||
*
|
||||
*/
|
||||
static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
|
||||
static int core_pmu_ibs_config(struct perf_event *event, u64 *config)
|
||||
{
|
||||
switch (event->attr.precise_ip) {
|
||||
case 0:
|
||||
return -ENOENT;
|
||||
case 1:
|
||||
case 2:
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
switch (event->attr.type) {
|
||||
case PERF_TYPE_HARDWARE:
|
||||
switch (event->attr.config) {
|
||||
|
@ -243,22 +227,37 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
|
|||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/*
|
||||
* The rip of IBS samples has skid 0. Thus, IBS supports precise
|
||||
* levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
|
||||
* rip is invalid when IBS was not able to record the rip correctly.
|
||||
* We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
|
||||
*/
|
||||
int forward_event_to_ibs(struct perf_event *event)
|
||||
{
|
||||
u64 config = 0;
|
||||
|
||||
if (!event->attr.precise_ip || event->attr.precise_ip > 2)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!core_pmu_ibs_config(event, &config)) {
|
||||
event->attr.type = perf_ibs_op.pmu.type;
|
||||
event->attr.config = config;
|
||||
}
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
static int perf_ibs_init(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_ibs *perf_ibs;
|
||||
u64 max_cnt, config;
|
||||
int ret;
|
||||
|
||||
perf_ibs = get_ibs_pmu(event->attr.type);
|
||||
if (perf_ibs) {
|
||||
config = event->attr.config;
|
||||
} else {
|
||||
perf_ibs = &perf_ibs_op;
|
||||
ret = perf_ibs_precise_event(event, &config);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
if (!perf_ibs)
|
||||
return -ENOENT;
|
||||
|
||||
config = event->attr.config;
|
||||
|
||||
if (event->pmu != &perf_ibs->pmu)
|
||||
return -ENOENT;
|
||||
|
|
|
@ -349,6 +349,16 @@ static struct event_constraint intel_spr_event_constraints[] = {
|
|||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct extra_reg intel_gnr_extra_regs[] __read_mostly = {
|
||||
INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
|
||||
INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
|
||||
INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE),
|
||||
INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
|
||||
INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
|
||||
INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
|
||||
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
|
||||
|
@ -2451,7 +2461,7 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
|
|||
|
||||
intel_clear_masks(event, idx);
|
||||
|
||||
mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4);
|
||||
mask = intel_fixed_bits_by_idx(idx - INTEL_PMC_IDX_FIXED, INTEL_FIXED_BITS_MASK);
|
||||
cpuc->fixed_ctrl_val &= ~mask;
|
||||
}
|
||||
|
||||
|
@ -2750,25 +2760,25 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
|
|||
* if requested:
|
||||
*/
|
||||
if (!event->attr.precise_ip)
|
||||
bits |= 0x8;
|
||||
bits |= INTEL_FIXED_0_ENABLE_PMI;
|
||||
if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
|
||||
bits |= 0x2;
|
||||
bits |= INTEL_FIXED_0_USER;
|
||||
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
|
||||
bits |= 0x1;
|
||||
bits |= INTEL_FIXED_0_KERNEL;
|
||||
|
||||
/*
|
||||
* ANY bit is supported in v3 and up
|
||||
*/
|
||||
if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
|
||||
bits |= 0x4;
|
||||
bits |= INTEL_FIXED_0_ANYTHREAD;
|
||||
|
||||
idx -= INTEL_PMC_IDX_FIXED;
|
||||
bits <<= (idx * 4);
|
||||
mask = 0xfULL << (idx * 4);
|
||||
bits = intel_fixed_bits_by_idx(idx, bits);
|
||||
mask = intel_fixed_bits_by_idx(idx, INTEL_FIXED_BITS_MASK);
|
||||
|
||||
if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) {
|
||||
bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
|
||||
mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
|
||||
bits |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE);
|
||||
mask |= intel_fixed_bits_by_idx(idx, ICL_FIXED_0_ADAPTIVE);
|
||||
}
|
||||
|
||||
cpuc->fixed_ctrl_val &= ~mask;
|
||||
|
@ -3983,6 +3993,13 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
|||
struct perf_event *leader = event->group_leader;
|
||||
struct perf_event *sibling = NULL;
|
||||
|
||||
/*
|
||||
* When this memload event is also the first event (no group
|
||||
* exists yet), then there is no aux event before it.
|
||||
*/
|
||||
if (leader == event)
|
||||
return -ENODATA;
|
||||
|
||||
if (!is_mem_loads_aux_event(leader)) {
|
||||
for_each_sibling_event(sibling, leader) {
|
||||
if (is_mem_loads_aux_event(sibling))
|
||||
|
@ -4074,7 +4091,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
|
|||
if (x86_pmu.intel_cap.pebs_baseline) {
|
||||
arr[(*nr)++] = (struct perf_guest_switch_msr){
|
||||
.msr = MSR_PEBS_DATA_CFG,
|
||||
.host = cpuc->pebs_data_cfg,
|
||||
.host = cpuc->active_pebs_data_cfg,
|
||||
.guest = kvm_pmu->pebs_data_cfg,
|
||||
};
|
||||
}
|
||||
|
@ -5469,6 +5486,15 @@ pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
|||
return x86_pmu.pebs ? attr->mode : 0;
|
||||
}
|
||||
|
||||
static umode_t
|
||||
mem_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||
{
|
||||
if (attr == &event_attr_mem_ld_aux.attr.attr)
|
||||
return x86_pmu.flags & PMU_FL_MEM_LOADS_AUX ? attr->mode : 0;
|
||||
|
||||
return pebs_is_visible(kobj, attr, i);
|
||||
}
|
||||
|
||||
static umode_t
|
||||
lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||
{
|
||||
|
@ -5496,7 +5522,7 @@ static struct attribute_group group_events_td = {
|
|||
|
||||
static struct attribute_group group_events_mem = {
|
||||
.name = "events",
|
||||
.is_visible = pebs_is_visible,
|
||||
.is_visible = mem_is_visible,
|
||||
};
|
||||
|
||||
static struct attribute_group group_events_tsx = {
|
||||
|
@ -6486,6 +6512,11 @@ __init int intel_pmu_init(void)
|
|||
|
||||
case INTEL_FAM6_SAPPHIRERAPIDS_X:
|
||||
case INTEL_FAM6_EMERALDRAPIDS_X:
|
||||
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
|
||||
x86_pmu.extra_regs = intel_spr_extra_regs;
|
||||
fallthrough;
|
||||
case INTEL_FAM6_GRANITERAPIDS_X:
|
||||
case INTEL_FAM6_GRANITERAPIDS_D:
|
||||
pmem = true;
|
||||
x86_pmu.late_ack = true;
|
||||
memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids));
|
||||
|
@ -6493,7 +6524,8 @@ __init int intel_pmu_init(void)
|
|||
|
||||
x86_pmu.event_constraints = intel_spr_event_constraints;
|
||||
x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints;
|
||||
x86_pmu.extra_regs = intel_spr_extra_regs;
|
||||
if (!x86_pmu.extra_regs)
|
||||
x86_pmu.extra_regs = intel_gnr_extra_regs;
|
||||
x86_pmu.limit_period = spr_limit_period;
|
||||
x86_pmu.pebs_ept = 1;
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
|
@ -6502,7 +6534,6 @@ __init int intel_pmu_init(void)
|
|||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
|
||||
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
|
||||
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
|
||||
|
||||
x86_pmu.hw_config = hsw_hw_config;
|
||||
x86_pmu.get_event_constraints = spr_get_event_constraints;
|
||||
|
@ -6638,6 +6669,7 @@ __init int intel_pmu_init(void)
|
|||
pmu->pebs_constraints = intel_grt_pebs_event_constraints;
|
||||
pmu->extra_regs = intel_grt_extra_regs;
|
||||
if (is_mtl(boot_cpu_data.x86_model)) {
|
||||
x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_gnr_extra_regs;
|
||||
x86_pmu.pebs_latency_data = mtl_latency_data_small;
|
||||
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
|
||||
mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
|
||||
|
|
|
@ -678,6 +678,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
|
|||
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_cstates),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &icx_cstates),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &icx_cstates),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, &icx_cstates),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D, &icx_cstates),
|
||||
|
||||
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &icl_cstates),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &icl_cstates),
|
||||
|
|
|
@ -1229,12 +1229,14 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
|
|||
struct perf_event *event, bool add)
|
||||
{
|
||||
struct pmu *pmu = event->pmu;
|
||||
|
||||
/*
|
||||
* Make sure we get updated with the first PEBS
|
||||
* event. It will trigger also during removal, but
|
||||
* that does not hurt:
|
||||
*/
|
||||
bool update = cpuc->n_pebs == 1;
|
||||
if (cpuc->n_pebs == 1)
|
||||
cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
|
||||
|
||||
if (needed_cb != pebs_needs_sched_cb(cpuc)) {
|
||||
if (!needed_cb)
|
||||
|
@ -1242,7 +1244,7 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
|
|||
else
|
||||
perf_sched_cb_dec(pmu);
|
||||
|
||||
update = true;
|
||||
cpuc->pebs_data_cfg |= PEBS_UPDATE_DS_SW;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1252,24 +1254,13 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
|
|||
if (x86_pmu.intel_cap.pebs_baseline && add) {
|
||||
u64 pebs_data_cfg;
|
||||
|
||||
/* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
|
||||
if (cpuc->n_pebs == 1) {
|
||||
cpuc->pebs_data_cfg = 0;
|
||||
cpuc->pebs_record_size = sizeof(struct pebs_basic);
|
||||
}
|
||||
|
||||
pebs_data_cfg = pebs_update_adaptive_cfg(event);
|
||||
|
||||
/* Update pebs_record_size if new event requires more data. */
|
||||
if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
|
||||
cpuc->pebs_data_cfg |= pebs_data_cfg;
|
||||
adaptive_pebs_record_size_update();
|
||||
update = true;
|
||||
}
|
||||
/*
|
||||
* Be sure to update the thresholds when we change the record.
|
||||
*/
|
||||
if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
|
||||
cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
|
||||
}
|
||||
|
||||
if (update)
|
||||
pebs_update_threshold(cpuc);
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_add(struct perf_event *event)
|
||||
|
@ -1326,9 +1317,17 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
|
|||
wrmsrl(base + idx, value);
|
||||
}
|
||||
|
||||
static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
|
||||
{
|
||||
if (cpuc->n_pebs == cpuc->n_large_pebs &&
|
||||
cpuc->n_pebs != cpuc->n_pebs_via_pt)
|
||||
intel_pmu_drain_pebs_buffer();
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_enable(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
u64 pebs_data_cfg = cpuc->pebs_data_cfg & ~PEBS_UPDATE_DS_SW;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct debug_store *ds = cpuc->ds;
|
||||
unsigned int idx = hwc->idx;
|
||||
|
@ -1344,11 +1343,22 @@ void intel_pmu_pebs_enable(struct perf_event *event)
|
|||
|
||||
if (x86_pmu.intel_cap.pebs_baseline) {
|
||||
hwc->config |= ICL_EVENTSEL_ADAPTIVE;
|
||||
if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
|
||||
wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
|
||||
cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
|
||||
if (pebs_data_cfg != cpuc->active_pebs_data_cfg) {
|
||||
/*
|
||||
* drain_pebs() assumes uniform record size;
|
||||
* hence we need to drain when changing said
|
||||
* size.
|
||||
*/
|
||||
intel_pmu_drain_large_pebs(cpuc);
|
||||
adaptive_pebs_record_size_update();
|
||||
wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg);
|
||||
cpuc->active_pebs_data_cfg = pebs_data_cfg;
|
||||
}
|
||||
}
|
||||
if (cpuc->pebs_data_cfg & PEBS_UPDATE_DS_SW) {
|
||||
cpuc->pebs_data_cfg = pebs_data_cfg;
|
||||
pebs_update_threshold(cpuc);
|
||||
}
|
||||
|
||||
if (idx >= INTEL_PMC_IDX_FIXED) {
|
||||
if (x86_pmu.intel_cap.pebs_format < 5)
|
||||
|
@ -1391,9 +1401,7 @@ void intel_pmu_pebs_disable(struct perf_event *event)
|
|||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (cpuc->n_pebs == cpuc->n_large_pebs &&
|
||||
cpuc->n_pebs != cpuc->n_pebs_via_pt)
|
||||
intel_pmu_drain_pebs_buffer();
|
||||
intel_pmu_drain_large_pebs(cpuc);
|
||||
|
||||
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
|
||||
|
||||
|
|
|
@ -6068,6 +6068,17 @@ static struct intel_uncore_ops spr_uncore_mmio_ops = {
|
|||
.read_counter = uncore_mmio_read_counter,
|
||||
};
|
||||
|
||||
static struct uncore_event_desc spr_uncore_imc_events[] = {
|
||||
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x01,umask=0x00"),
|
||||
INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x05,umask=0xcf"),
|
||||
INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"),
|
||||
INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"),
|
||||
INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x05,umask=0xf0"),
|
||||
INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"),
|
||||
INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"),
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static struct intel_uncore_type spr_uncore_imc = {
|
||||
SPR_UNCORE_COMMON_FORMAT(),
|
||||
.name = "imc",
|
||||
|
@ -6075,6 +6086,7 @@ static struct intel_uncore_type spr_uncore_imc = {
|
|||
.fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
|
||||
.fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
|
||||
.ops = &spr_uncore_mmio_ops,
|
||||
.event_descs = spr_uncore_imc_events,
|
||||
};
|
||||
|
||||
static void spr_uncore_pci_enable_event(struct intel_uncore_box *box,
|
||||
|
@ -6138,6 +6150,7 @@ static struct intel_uncore_type spr_uncore_mdf = {
|
|||
};
|
||||
|
||||
#define UNCORE_SPR_NUM_UNCORE_TYPES 12
|
||||
#define UNCORE_SPR_CHA 0
|
||||
#define UNCORE_SPR_IIO 1
|
||||
#define UNCORE_SPR_IMC 6
|
||||
#define UNCORE_SPR_UPI 8
|
||||
|
@ -6448,12 +6461,32 @@ static int uncore_type_max_boxes(struct intel_uncore_type **types,
|
|||
return max + 1;
|
||||
}
|
||||
|
||||
#define SPR_MSR_UNC_CBO_CONFIG 0x2FFE
|
||||
|
||||
void spr_uncore_cpu_init(void)
|
||||
{
|
||||
struct intel_uncore_type *type;
|
||||
u64 num_cbo;
|
||||
|
||||
uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR,
|
||||
UNCORE_SPR_MSR_EXTRA_UNCORES,
|
||||
spr_msr_uncores);
|
||||
|
||||
type = uncore_find_type_by_id(uncore_msr_uncores, UNCORE_SPR_CHA);
|
||||
if (type) {
|
||||
/*
|
||||
* The value from the discovery table (stored in the type->num_boxes
|
||||
* of UNCORE_SPR_CHA) is incorrect on some SPR variants because of a
|
||||
* firmware bug. Using the value from SPR_MSR_UNC_CBO_CONFIG to replace it.
|
||||
*/
|
||||
rdmsrl(SPR_MSR_UNC_CBO_CONFIG, num_cbo);
|
||||
/*
|
||||
* The MSR doesn't work on the EMR XCC, but the firmware bug doesn't impact
|
||||
* the EMR XCC. Don't let the value from the MSR replace the existing value.
|
||||
*/
|
||||
if (num_cbo)
|
||||
type->num_boxes = num_cbo;
|
||||
}
|
||||
spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO);
|
||||
}
|
||||
|
||||
|
|
|
@ -70,6 +70,8 @@ static bool test_intel(int idx, void *data)
|
|||
case INTEL_FAM6_BROADWELL_X:
|
||||
case INTEL_FAM6_SAPPHIRERAPIDS_X:
|
||||
case INTEL_FAM6_EMERALDRAPIDS_X:
|
||||
case INTEL_FAM6_GRANITERAPIDS_X:
|
||||
case INTEL_FAM6_GRANITERAPIDS_D:
|
||||
|
||||
case INTEL_FAM6_ATOM_SILVERMONT:
|
||||
case INTEL_FAM6_ATOM_SILVERMONT_D:
|
||||
|
|
|
@ -32,11 +32,21 @@
|
|||
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
|
||||
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
|
||||
|
||||
#define INTEL_FIXED_BITS_MASK 0xFULL
|
||||
#define INTEL_FIXED_BITS_STRIDE 4
|
||||
#define INTEL_FIXED_0_KERNEL (1ULL << 0)
|
||||
#define INTEL_FIXED_0_USER (1ULL << 1)
|
||||
#define INTEL_FIXED_0_ANYTHREAD (1ULL << 2)
|
||||
#define INTEL_FIXED_0_ENABLE_PMI (1ULL << 3)
|
||||
|
||||
#define HSW_IN_TX (1ULL << 32)
|
||||
#define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
|
||||
#define ICL_EVENTSEL_ADAPTIVE (1ULL << 34)
|
||||
#define ICL_FIXED_0_ADAPTIVE (1ULL << 32)
|
||||
|
||||
#define intel_fixed_bits_by_idx(_idx, _bits) \
|
||||
((_bits) << ((_idx) * INTEL_FIXED_BITS_STRIDE))
|
||||
|
||||
#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
|
||||
#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
|
||||
#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41)
|
||||
|
@ -121,6 +131,9 @@
|
|||
#define PEBS_DATACFG_LBRS BIT_ULL(3)
|
||||
#define PEBS_DATACFG_LBR_SHIFT 24
|
||||
|
||||
/* Steal the highest bit of pebs_data_cfg for SW usage */
|
||||
#define PEBS_UPDATE_DS_SW BIT_ULL(63)
|
||||
|
||||
/*
|
||||
* Intel "Architectural Performance Monitoring" CPUID
|
||||
* detection/enumeration details:
|
||||
|
@ -475,8 +488,10 @@ struct pebs_xmm {
|
|||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
extern u32 get_ibs_caps(void);
|
||||
extern int forward_event_to_ibs(struct perf_event *event);
|
||||
#else
|
||||
static inline u32 get_ibs_caps(void) { return 0; }
|
||||
static inline int forward_event_to_ibs(struct perf_event *event) { return -ENOENT; }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
|
|
|
@ -289,7 +289,7 @@ bool mac_pton(const char *s, u8 *mac);
|
|||
*
|
||||
* Use tracing_on/tracing_off when you want to quickly turn on or off
|
||||
* tracing. It simply enables or disables the recording of the trace events.
|
||||
* This also corresponds to the user space /sys/kernel/debug/tracing/tracing_on
|
||||
* This also corresponds to the user space /sys/kernel/tracing/tracing_on
|
||||
* file, which gives a means for the kernel and userspace to interact.
|
||||
* Place a tracing_off() in the kernel where you want tracing to end.
|
||||
* From user space, examine the trace, and then echo 1 > tracing_on
|
||||
|
|
|
@ -295,6 +295,8 @@ struct perf_event_pmu_context;
|
|||
|
||||
struct perf_output_handle;
|
||||
|
||||
#define PMU_NULL_DEV ((void *)(~0UL))
|
||||
|
||||
/**
|
||||
* struct pmu - generic performance monitoring unit
|
||||
*/
|
||||
|
@ -303,6 +305,7 @@ struct pmu {
|
|||
|
||||
struct module *module;
|
||||
struct device *dev;
|
||||
struct device *parent;
|
||||
const struct attribute_group **attr_groups;
|
||||
const struct attribute_group **attr_update;
|
||||
const char *name;
|
||||
|
@ -827,6 +830,14 @@ struct perf_event {
|
|||
void *security;
|
||||
#endif
|
||||
struct list_head sb_list;
|
||||
|
||||
/*
|
||||
* Certain events gets forwarded to another pmu internally by over-
|
||||
* writing kernel copy of event->attr.type without user being aware
|
||||
* of it. event->orig_type contains original 'type' requested by
|
||||
* user.
|
||||
*/
|
||||
__u32 orig_type;
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
};
|
||||
|
||||
|
|
|
@ -482,7 +482,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
|
|||
* * This is how the trace record is structured and will
|
||||
* * be saved into the ring buffer. These are the fields
|
||||
* * that will be exposed to user-space in
|
||||
* * /sys/kernel/debug/tracing/events/<*>/format.
|
||||
* * /sys/kernel/tracing/events/<*>/format.
|
||||
* *
|
||||
* * The declared 'local variable' is called '__entry'
|
||||
* *
|
||||
|
@ -542,7 +542,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
|
|||
* tracepoint callback (this is used by programmatic plugins and
|
||||
* can also by used by generic instrumentation like SystemTap), and
|
||||
* it is also used to expose a structured trace record in
|
||||
* /sys/kernel/debug/tracing/events/.
|
||||
* /sys/kernel/tracing/events/.
|
||||
*
|
||||
* A set of (un)registration functions can be passed to the variant
|
||||
* TRACE_EVENT_FN to perform any (un)registration work.
|
||||
|
|
|
@ -1339,7 +1339,8 @@ union perf_mem_data_src {
|
|||
#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
|
||||
#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
|
||||
#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
|
||||
/* 5-0x8 available */
|
||||
/* 5-0x7 available */
|
||||
#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */
|
||||
#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
|
||||
#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
|
||||
#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
|
||||
|
|
|
@ -6647,7 +6647,7 @@ static void perf_sigtrap(struct perf_event *event)
|
|||
return;
|
||||
|
||||
send_sig_perf((void __user *)event->pending_addr,
|
||||
event->attr.type, event->attr.sig_data);
|
||||
event->orig_type, event->attr.sig_data);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -9433,8 +9433,8 @@ __perf_event_account_interrupt(struct perf_event *event, int throttle)
|
|||
hwc->interrupts = 1;
|
||||
} else {
|
||||
hwc->interrupts++;
|
||||
if (unlikely(throttle
|
||||
&& hwc->interrupts >= max_samples_per_tick)) {
|
||||
if (unlikely(throttle &&
|
||||
hwc->interrupts > max_samples_per_tick)) {
|
||||
__this_cpu_inc(perf_throttled_count);
|
||||
tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
|
||||
hwc->interrupts = MAX_INTERRUPTS;
|
||||
|
@ -9951,6 +9951,9 @@ static void sw_perf_event_destroy(struct perf_event *event)
|
|||
swevent_hlist_put();
|
||||
}
|
||||
|
||||
static struct pmu perf_cpu_clock; /* fwd declaration */
|
||||
static struct pmu perf_task_clock;
|
||||
|
||||
static int perf_swevent_init(struct perf_event *event)
|
||||
{
|
||||
u64 event_id = event->attr.config;
|
||||
|
@ -9966,7 +9969,10 @@ static int perf_swevent_init(struct perf_event *event)
|
|||
|
||||
switch (event_id) {
|
||||
case PERF_COUNT_SW_CPU_CLOCK:
|
||||
event->attr.type = perf_cpu_clock.type;
|
||||
return -ENOENT;
|
||||
case PERF_COUNT_SW_TASK_CLOCK:
|
||||
event->attr.type = perf_task_clock.type;
|
||||
return -ENOENT;
|
||||
|
||||
default:
|
||||
|
@ -11097,7 +11103,7 @@ static void cpu_clock_event_read(struct perf_event *event)
|
|||
|
||||
static int cpu_clock_event_init(struct perf_event *event)
|
||||
{
|
||||
if (event->attr.type != PERF_TYPE_SOFTWARE)
|
||||
if (event->attr.type != perf_cpu_clock.type)
|
||||
return -ENOENT;
|
||||
|
||||
if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
|
||||
|
@ -11118,6 +11124,7 @@ static struct pmu perf_cpu_clock = {
|
|||
.task_ctx_nr = perf_sw_context,
|
||||
|
||||
.capabilities = PERF_PMU_CAP_NO_NMI,
|
||||
.dev = PMU_NULL_DEV,
|
||||
|
||||
.event_init = cpu_clock_event_init,
|
||||
.add = cpu_clock_event_add,
|
||||
|
@ -11178,7 +11185,7 @@ static void task_clock_event_read(struct perf_event *event)
|
|||
|
||||
static int task_clock_event_init(struct perf_event *event)
|
||||
{
|
||||
if (event->attr.type != PERF_TYPE_SOFTWARE)
|
||||
if (event->attr.type != perf_task_clock.type)
|
||||
return -ENOENT;
|
||||
|
||||
if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
|
||||
|
@ -11199,6 +11206,7 @@ static struct pmu perf_task_clock = {
|
|||
.task_ctx_nr = perf_sw_context,
|
||||
|
||||
.capabilities = PERF_PMU_CAP_NO_NMI,
|
||||
.dev = PMU_NULL_DEV,
|
||||
|
||||
.event_init = task_clock_event_init,
|
||||
.add = task_clock_event_add,
|
||||
|
@ -11378,6 +11386,7 @@ static int pmu_dev_alloc(struct pmu *pmu)
|
|||
|
||||
dev_set_drvdata(pmu->dev, pmu);
|
||||
pmu->dev->bus = &pmu_bus;
|
||||
pmu->dev->parent = pmu->parent;
|
||||
pmu->dev->release = pmu_dev_release;
|
||||
|
||||
ret = dev_set_name(pmu->dev, "%s", pmu->name);
|
||||
|
@ -11426,31 +11435,31 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
|
|||
goto unlock;
|
||||
|
||||
pmu->type = -1;
|
||||
if (!name)
|
||||
goto skip_type;
|
||||
if (WARN_ONCE(!name, "Can not register anonymous pmu.\n")) {
|
||||
ret = -EINVAL;
|
||||
goto free_pdc;
|
||||
}
|
||||
|
||||
pmu->name = name;
|
||||
|
||||
if (type != PERF_TYPE_SOFTWARE) {
|
||||
if (type >= 0)
|
||||
max = type;
|
||||
if (type >= 0)
|
||||
max = type;
|
||||
|
||||
ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
|
||||
if (ret < 0)
|
||||
goto free_pdc;
|
||||
ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
|
||||
if (ret < 0)
|
||||
goto free_pdc;
|
||||
|
||||
WARN_ON(type >= 0 && ret != type);
|
||||
WARN_ON(type >= 0 && ret != type);
|
||||
|
||||
type = ret;
|
||||
}
|
||||
type = ret;
|
||||
pmu->type = type;
|
||||
|
||||
if (pmu_bus_running) {
|
||||
if (pmu_bus_running && !pmu->dev) {
|
||||
ret = pmu_dev_alloc(pmu);
|
||||
if (ret)
|
||||
goto free_idr;
|
||||
}
|
||||
|
||||
skip_type:
|
||||
ret = -ENOMEM;
|
||||
pmu->cpu_pmu_context = alloc_percpu(struct perf_cpu_pmu_context);
|
||||
if (!pmu->cpu_pmu_context)
|
||||
|
@ -11492,16 +11501,7 @@ skip_type:
|
|||
if (!pmu->event_idx)
|
||||
pmu->event_idx = perf_event_idx_default;
|
||||
|
||||
/*
|
||||
* Ensure the TYPE_SOFTWARE PMUs are at the head of the list,
|
||||
* since these cannot be in the IDR. This way the linear search
|
||||
* is fast, provided a valid software event is provided.
|
||||
*/
|
||||
if (type == PERF_TYPE_SOFTWARE || !name)
|
||||
list_add_rcu(&pmu->entry, &pmus);
|
||||
else
|
||||
list_add_tail_rcu(&pmu->entry, &pmus);
|
||||
|
||||
list_add_rcu(&pmu->entry, &pmus);
|
||||
atomic_set(&pmu->exclusive_cnt, 0);
|
||||
ret = 0;
|
||||
unlock:
|
||||
|
@ -11510,12 +11510,13 @@ unlock:
|
|||
return ret;
|
||||
|
||||
free_dev:
|
||||
device_del(pmu->dev);
|
||||
put_device(pmu->dev);
|
||||
if (pmu->dev && pmu->dev != PMU_NULL_DEV) {
|
||||
device_del(pmu->dev);
|
||||
put_device(pmu->dev);
|
||||
}
|
||||
|
||||
free_idr:
|
||||
if (pmu->type != PERF_TYPE_SOFTWARE)
|
||||
idr_remove(&pmu_idr, pmu->type);
|
||||
idr_remove(&pmu_idr, pmu->type);
|
||||
|
||||
free_pdc:
|
||||
free_percpu(pmu->pmu_disable_count);
|
||||
|
@ -11536,9 +11537,8 @@ void perf_pmu_unregister(struct pmu *pmu)
|
|||
synchronize_rcu();
|
||||
|
||||
free_percpu(pmu->pmu_disable_count);
|
||||
if (pmu->type != PERF_TYPE_SOFTWARE)
|
||||
idr_remove(&pmu_idr, pmu->type);
|
||||
if (pmu_bus_running) {
|
||||
idr_remove(&pmu_idr, pmu->type);
|
||||
if (pmu_bus_running && pmu->dev && pmu->dev != PMU_NULL_DEV) {
|
||||
if (pmu->nr_addr_filters)
|
||||
device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
|
||||
device_del(pmu->dev);
|
||||
|
@ -11612,6 +11612,12 @@ static struct pmu *perf_init_event(struct perf_event *event)
|
|||
|
||||
idx = srcu_read_lock(&pmus_srcu);
|
||||
|
||||
/*
|
||||
* Save original type before calling pmu->event_init() since certain
|
||||
* pmus overwrites event->attr.type to forward event to another pmu.
|
||||
*/
|
||||
event->orig_type = event->attr.type;
|
||||
|
||||
/* Try parent's PMU first: */
|
||||
if (event->parent && event->parent->pmu) {
|
||||
pmu = event->parent->pmu;
|
||||
|
@ -13651,8 +13657,8 @@ void __init perf_event_init(void)
|
|||
perf_event_init_all_cpus();
|
||||
init_srcu_struct(&pmus_srcu);
|
||||
perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
|
||||
perf_pmu_register(&perf_cpu_clock, NULL, -1);
|
||||
perf_pmu_register(&perf_task_clock, NULL, -1);
|
||||
perf_pmu_register(&perf_cpu_clock, "cpu_clock", -1);
|
||||
perf_pmu_register(&perf_task_clock, "task_clock", -1);
|
||||
perf_tp_register();
|
||||
perf_event_init_cpu(smp_processor_id());
|
||||
register_reboot_notifier(&perf_reboot_notifier);
|
||||
|
@ -13695,7 +13701,7 @@ static int __init perf_event_sysfs_init(void)
|
|||
goto unlock;
|
||||
|
||||
list_for_each_entry(pmu, &pmus, entry) {
|
||||
if (!pmu->name || pmu->type < 0)
|
||||
if (pmu->dev)
|
||||
continue;
|
||||
|
||||
ret = pmu_dev_alloc(pmu);
|
||||
|
|
|
@ -329,5 +329,4 @@ static struct kunit_suite hw_breakpoint_test_suite = {
|
|||
|
||||
kunit_test_suites(&hw_breakpoint_test_suite);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Marco Elver <elver@google.com>");
|
||||
|
|
|
@ -219,7 +219,7 @@ config DYNAMIC_FTRACE
|
|||
enabled, and the functions not enabled will not affect
|
||||
performance of the system.
|
||||
|
||||
See the files in /sys/kernel/debug/tracing:
|
||||
See the files in /sys/kernel/tracing:
|
||||
available_filter_functions
|
||||
set_ftrace_filter
|
||||
set_ftrace_notrace
|
||||
|
@ -279,7 +279,7 @@ config STACK_TRACER
|
|||
select KALLSYMS
|
||||
help
|
||||
This special tracer records the maximum stack footprint of the
|
||||
kernel and displays it in /sys/kernel/debug/tracing/stack_trace.
|
||||
kernel and displays it in /sys/kernel/tracing/stack_trace.
|
||||
|
||||
This tracer works by hooking into every function call that the
|
||||
kernel executes, and keeping a maximum stack depth value and
|
||||
|
@ -319,7 +319,7 @@ config IRQSOFF_TRACER
|
|||
disabled by default and can be runtime (re-)started
|
||||
via:
|
||||
|
||||
echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
|
||||
echo 0 > /sys/kernel/tracing/tracing_max_latency
|
||||
|
||||
(Note that kernel size and overhead increase with this option
|
||||
enabled. This option and the preempt-off timing option can be
|
||||
|
@ -343,7 +343,7 @@ config PREEMPT_TRACER
|
|||
disabled by default and can be runtime (re-)started
|
||||
via:
|
||||
|
||||
echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
|
||||
echo 0 > /sys/kernel/tracing/tracing_max_latency
|
||||
|
||||
(Note that kernel size and overhead increase with this option
|
||||
enabled. This option and the irqs-off timing option can be
|
||||
|
@ -493,7 +493,7 @@ config TRACER_SNAPSHOT
|
|||
Allow tracing users to take snapshot of the current buffer using the
|
||||
ftrace interface, e.g.:
|
||||
|
||||
echo 1 > /sys/kernel/debug/tracing/snapshot
|
||||
echo 1 > /sys/kernel/tracing/snapshot
|
||||
cat snapshot
|
||||
|
||||
config TRACER_SNAPSHOT_PER_CPU_SWAP
|
||||
|
@ -505,7 +505,7 @@ config TRACER_SNAPSHOT_PER_CPU_SWAP
|
|||
full swap (all buffers). If this is set, then the following is
|
||||
allowed:
|
||||
|
||||
echo 1 > /sys/kernel/debug/tracing/per_cpu/cpu2/snapshot
|
||||
echo 1 > /sys/kernel/tracing/per_cpu/cpu2/snapshot
|
||||
|
||||
After which, only the tracing buffer for CPU 2 was swapped with
|
||||
the main tracing buffer, and the other CPU buffers remain the same.
|
||||
|
@ -552,7 +552,7 @@ config PROFILE_ANNOTATED_BRANCHES
|
|||
This tracer profiles all likely and unlikely macros
|
||||
in the kernel. It will display the results in:
|
||||
|
||||
/sys/kernel/debug/tracing/trace_stat/branch_annotated
|
||||
/sys/kernel/tracing/trace_stat/branch_annotated
|
||||
|
||||
Note: this will add a significant overhead; only turn this
|
||||
on if you need to profile the system's use of these macros.
|
||||
|
@ -565,7 +565,7 @@ config PROFILE_ALL_BRANCHES
|
|||
taken in the kernel is recorded whether it hit or miss.
|
||||
The results will be displayed in:
|
||||
|
||||
/sys/kernel/debug/tracing/trace_stat/branch_all
|
||||
/sys/kernel/tracing/trace_stat/branch_all
|
||||
|
||||
This option also enables the likely/unlikely profiler.
|
||||
|
||||
|
@ -616,8 +616,8 @@ config BLK_DEV_IO_TRACE
|
|||
Tracing also is possible using the ftrace interface, e.g.:
|
||||
|
||||
echo 1 > /sys/block/sda/sda1/trace/enable
|
||||
echo blk > /sys/kernel/debug/tracing/current_tracer
|
||||
cat /sys/kernel/debug/tracing/trace_pipe
|
||||
echo blk > /sys/kernel/tracing/current_tracer
|
||||
cat /sys/kernel/tracing/trace_pipe
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
* Then:
|
||||
*
|
||||
* # insmod kernel/trace/kprobe_event_gen_test.ko
|
||||
* # cat /sys/kernel/debug/tracing/trace
|
||||
* # cat /sys/kernel/tracing/trace
|
||||
*
|
||||
* You should see many instances of the "gen_kprobe_test" and
|
||||
* "gen_kretprobe_test" events in the trace buffer.
|
||||
|
|
|
@ -2724,7 +2724,7 @@ rb_check_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
|
|||
sched_clock_stable() ? "" :
|
||||
"If you just came from a suspend/resume,\n"
|
||||
"please switch to the trace global clock:\n"
|
||||
" echo global > /sys/kernel/debug/tracing/trace_clock\n"
|
||||
" echo global > /sys/kernel/tracing/trace_clock\n"
|
||||
"or add trace_clock=global to the kernel command line\n");
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
* Then:
|
||||
*
|
||||
* # insmod kernel/trace/synth_event_gen_test.ko
|
||||
* # cat /sys/kernel/debug/tracing/trace
|
||||
* # cat /sys/kernel/tracing/trace
|
||||
*
|
||||
* You should see several events in the trace buffer -
|
||||
* "create_synth_test", "empty_synth_test", and several instances of
|
||||
|
|
|
@ -1154,7 +1154,7 @@ void tracing_snapshot_instance(struct trace_array *tr)
|
|||
*
|
||||
* Note, make sure to allocate the snapshot with either
|
||||
* a tracing_snapshot_alloc(), or by doing it manually
|
||||
* with: echo 1 > /sys/kernel/debug/tracing/snapshot
|
||||
* with: echo 1 > /sys/kernel/tracing/snapshot
|
||||
*
|
||||
* If the snapshot buffer is not allocated, it will stop tracing.
|
||||
* Basically making a permanent snapshot.
|
||||
|
|
|
@ -12,9 +12,9 @@ calls. Only the functions's names and the the call time are provided.
|
|||
|
||||
Usage:
|
||||
Be sure that you have CONFIG_FUNCTION_TRACER
|
||||
# mount -t debugfs nodev /sys/kernel/debug
|
||||
# echo function > /sys/kernel/debug/tracing/current_tracer
|
||||
$ cat /sys/kernel/debug/tracing/trace_pipe > ~/raw_trace_func
|
||||
# mount -t tracefs nodev /sys/kernel/tracing
|
||||
# echo function > /sys/kernel/tracing/current_tracer
|
||||
$ cat /sys/kernel/tracing/trace_pipe > ~/raw_trace_func
|
||||
Wait some times but not too much, the script is a bit slow.
|
||||
Break the pipe (Ctrl + Z)
|
||||
$ scripts/tracing/draw_functrace.py < ~/raw_trace_func > draw_functrace
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __NR_fork
|
||||
#define __NR_fork 2
|
||||
#endif
|
||||
#ifndef __NR_execve
|
||||
#define __NR_execve 11
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __NR_fork
|
||||
#define __NR_fork 57
|
||||
#endif
|
||||
#ifndef __NR_execve
|
||||
#define __NR_execve 59
|
||||
#endif
|
||||
|
|
|
@ -64,6 +64,7 @@ FEATURE_TESTS_BASIC := \
|
|||
lzma \
|
||||
get_cpuid \
|
||||
bpf \
|
||||
scandirat \
|
||||
sched_getcpu \
|
||||
sdt \
|
||||
setns \
|
||||
|
@ -79,6 +80,7 @@ FEATURE_TESTS_EXTRA := \
|
|||
compile-32 \
|
||||
compile-x32 \
|
||||
cplus-demangle \
|
||||
cxa-demangle \
|
||||
gtk2 \
|
||||
gtk2-infobar \
|
||||
hello \
|
||||
|
|
|
@ -22,6 +22,7 @@ FILES= \
|
|||
test-libbfd-liberty.bin \
|
||||
test-libbfd-liberty-z.bin \
|
||||
test-cplus-demangle.bin \
|
||||
test-cxa-demangle.bin \
|
||||
test-libcap.bin \
|
||||
test-libelf.bin \
|
||||
test-libelf-getphdrnum.bin \
|
||||
|
@ -57,19 +58,13 @@ FILES= \
|
|||
test-lzma.bin \
|
||||
test-bpf.bin \
|
||||
test-libbpf.bin \
|
||||
test-libbpf-btf__load_from_kernel_by_id.bin \
|
||||
test-libbpf-bpf_prog_load.bin \
|
||||
test-libbpf-bpf_map_create.bin \
|
||||
test-libbpf-bpf_object__next_program.bin \
|
||||
test-libbpf-bpf_object__next_map.bin \
|
||||
test-libbpf-bpf_program__set_insns.bin \
|
||||
test-libbpf-btf__raw_data.bin \
|
||||
test-get_cpuid.bin \
|
||||
test-sdt.bin \
|
||||
test-cxx.bin \
|
||||
test-gettid.bin \
|
||||
test-jvmti.bin \
|
||||
test-jvmti-cmlr.bin \
|
||||
test-scandirat.bin \
|
||||
test-sched_getcpu.bin \
|
||||
test-setns.bin \
|
||||
test-libopencsd.bin \
|
||||
|
@ -132,6 +127,9 @@ $(OUTPUT)test-get_current_dir_name.bin:
|
|||
$(OUTPUT)test-glibc.bin:
|
||||
$(BUILD)
|
||||
|
||||
$(OUTPUT)test-scandirat.bin:
|
||||
$(BUILD)
|
||||
|
||||
$(OUTPUT)test-sched_getcpu.bin:
|
||||
$(BUILD)
|
||||
|
||||
|
@ -207,7 +205,7 @@ $(OUTPUT)test-libtraceevent.bin:
|
|||
$(BUILD) -ltraceevent
|
||||
|
||||
$(OUTPUT)test-libtracefs.bin:
|
||||
$(BUILD) -ltracefs
|
||||
$(BUILD) $(shell $(PKG_CONFIG) --cflags libtraceevent 2>/dev/null) -ltracefs
|
||||
|
||||
$(OUTPUT)test-libcrypto.bin:
|
||||
$(BUILD) -lcrypto
|
||||
|
@ -261,6 +259,9 @@ $(OUTPUT)test-libbfd-liberty-z.bin:
|
|||
$(OUTPUT)test-cplus-demangle.bin:
|
||||
$(BUILD) -liberty
|
||||
|
||||
$(OUTPUT)test-cxa-demangle.bin:
|
||||
$(BUILDXX)
|
||||
|
||||
$(OUTPUT)test-backtrace.bin:
|
||||
$(BUILD)
|
||||
|
||||
|
|
|
@ -114,6 +114,10 @@
|
|||
# include "test-pthread-barrier.c"
|
||||
#undef main
|
||||
|
||||
#define main main_test_scandirat
|
||||
# include "test-scandirat.c"
|
||||
#undef main
|
||||
|
||||
#define main main_test_sched_getcpu
|
||||
# include "test-sched_getcpu.c"
|
||||
#undef main
|
||||
|
@ -202,6 +206,7 @@ int main(int argc, char *argv[])
|
|||
main_test_get_cpuid();
|
||||
main_test_bpf();
|
||||
main_test_libcrypto();
|
||||
main_test_scandirat();
|
||||
main_test_sched_getcpu();
|
||||
main_test_sdt();
|
||||
main_test_setns();
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cxxabi.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
size_t len = 256;
|
||||
char *output = (char*)malloc(len);
|
||||
int status;
|
||||
|
||||
output = abi::__cxa_demangle("FieldName__9ClassNameFd", output, &len, &status);
|
||||
|
||||
printf("demangled symbol: {%s}\n", output);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <bpf/bpf.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
return bpf_map_create(0 /* map_type */, NULL /* map_name */, 0, /* key_size */,
|
||||
0 /* value_size */, 0 /* max_entries */, NULL /* opts */);
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <bpf/libbpf.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
bpf_object__next_map(NULL /* obj */, NULL /* prev */);
|
||||
return 0;
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <bpf/libbpf.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
bpf_object__next_program(NULL /* obj */, NULL /* prev */);
|
||||
return 0;
|
||||
}
|
|
@ -1,9 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <bpf/bpf.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
return bpf_prog_load(0 /* prog_type */, NULL /* prog_name */,
|
||||
NULL /* license */, NULL /* insns */,
|
||||
0 /* insn_cnt */, NULL /* opts */);
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <bpf/libbpf.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
bpf_program__set_insns(NULL /* prog */, NULL /* new_insns */, 0 /* new_insn_cnt */);
|
||||
return 0;
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <bpf/btf.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
btf__load_from_kernel_by_id(20151128);
|
||||
return 0;
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <bpf/btf.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
btf__raw_data(NULL /* btf_ro */, NULL /* size */);
|
||||
return 0;
|
||||
}
|
|
@ -1,6 +1,10 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <bpf/libbpf.h>
|
||||
|
||||
#if !defined(LIBBPF_MAJOR_VERSION) || (LIBBPF_MAJOR_VERSION < 1)
|
||||
#error At least libbpf 1.0 is required for Linux tools.
|
||||
#endif
|
||||
|
||||
int main(void)
|
||||
{
|
||||
return bpf_object__open("test") ? 0 : -1;
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <dirent.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
// expects non-NULL, arg3 is 'restrict' so "pointers" have to be different
|
||||
return scandirat(/*dirfd=*/ 0, /*dirp=*/ (void *)1, /*namelist=*/ (void *)2, /*filter=*/ (void *)3, /*compar=*/ (void *)4);
|
||||
}
|
||||
|
||||
#undef _GNU_SOURCE
|
|
@ -12,8 +12,10 @@
|
|||
+ __GNUC_PATCHLEVEL__)
|
||||
#endif
|
||||
|
||||
#if GCC_VERSION >= 70000 && !defined(__CHECKER__)
|
||||
# define __fallthrough __attribute__ ((fallthrough))
|
||||
#if __has_attribute(__fallthrough__)
|
||||
# define fallthrough __attribute__((__fallthrough__))
|
||||
#else
|
||||
# define fallthrough do {} while (0) /* fallthrough */
|
||||
#endif
|
||||
|
||||
#if GCC_VERSION >= 40300
|
||||
|
|
|
@ -186,10 +186,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
|
|||
})
|
||||
|
||||
|
||||
#ifndef __fallthrough
|
||||
# define __fallthrough
|
||||
#endif
|
||||
|
||||
/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
|
||||
#define ___PASTE(a, b) a##b
|
||||
#define __PASTE(a, b) ___PASTE(a, b)
|
||||
|
|
|
@ -7,8 +7,19 @@
|
|||
#ifndef _LINUX_CORESIGHT_PMU_H
|
||||
#define _LINUX_CORESIGHT_PMU_H
|
||||
|
||||
#include <linux/bits.h>
|
||||
|
||||
#define CORESIGHT_ETM_PMU_NAME "cs_etm"
|
||||
#define CORESIGHT_ETM_PMU_SEED 0x10
|
||||
|
||||
/*
|
||||
* The legacy Trace ID system based on fixed calculation from the cpu
|
||||
* number. This has been replaced by drivers using a dynamic allocation
|
||||
* system - but need to retain the legacy algorithm for backward comparibility
|
||||
* in certain situations:-
|
||||
* a) new perf running on older systems that generate the legacy mapping
|
||||
* b) older tools that may not update at the same time as the kernel.
|
||||
*/
|
||||
#define CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) (0x10 + (cpu * 2))
|
||||
|
||||
/*
|
||||
* Below are the definition of bit offsets for perf option, and works as
|
||||
|
@ -34,15 +45,16 @@
|
|||
#define ETM4_CFG_BIT_RETSTK 12
|
||||
#define ETM4_CFG_BIT_VMID_OPT 15
|
||||
|
||||
static inline int coresight_get_trace_id(int cpu)
|
||||
{
|
||||
/*
|
||||
* A trace ID of value 0 is invalid, so let's start at some
|
||||
* random value that fits in 7 bits and go from there. Since
|
||||
* the common convention is to have data trace IDs be I(N) + 1,
|
||||
* set instruction trace IDs as a function of the CPU number.
|
||||
*/
|
||||
return (CORESIGHT_ETM_PMU_SEED + (cpu * 2));
|
||||
}
|
||||
/*
|
||||
* Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
|
||||
* Used to associate a CPU with the CoreSight Trace ID.
|
||||
* [07:00] - Trace ID - uses 8 bits to make value easy to read in file.
|
||||
* [59:08] - Unused (SBZ)
|
||||
* [63:60] - Version
|
||||
*/
|
||||
#define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0)
|
||||
#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60)
|
||||
|
||||
#define CS_AUX_HW_ID_CURR_VERSION 0
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1339,7 +1339,8 @@ union perf_mem_data_src {
|
|||
#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
|
||||
#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
|
||||
#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
|
||||
/* 5-0x8 available */
|
||||
/* 5-0x7 available */
|
||||
#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */
|
||||
#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
|
||||
#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
|
||||
#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
|
||||
|
|
|
@ -14,7 +14,7 @@ struct cgroupfs_cache_entry {
|
|||
};
|
||||
|
||||
/* just cache last used one */
|
||||
static struct cgroupfs_cache_entry cached;
|
||||
static struct cgroupfs_cache_entry *cached;
|
||||
|
||||
int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
|
||||
{
|
||||
|
@ -24,9 +24,9 @@ int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
|
|||
char *p, *path;
|
||||
char mountpoint[PATH_MAX];
|
||||
|
||||
if (!strcmp(cached.subsys, subsys)) {
|
||||
if (strlen(cached.mountpoint) < maxlen) {
|
||||
strcpy(buf, cached.mountpoint);
|
||||
if (cached && !strcmp(cached->subsys, subsys)) {
|
||||
if (strlen(cached->mountpoint) < maxlen) {
|
||||
strcpy(buf, cached->mountpoint);
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
|
@ -91,8 +91,13 @@ int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
|
|||
free(line);
|
||||
fclose(fp);
|
||||
|
||||
strncpy(cached.subsys, subsys, sizeof(cached.subsys) - 1);
|
||||
strcpy(cached.mountpoint, mountpoint);
|
||||
if (!cached)
|
||||
cached = calloc(1, sizeof(*cached));
|
||||
|
||||
if (cached) {
|
||||
strncpy(cached->subsys, subsys, sizeof(cached->subsys) - 1);
|
||||
strcpy(cached->mountpoint, mountpoint);
|
||||
}
|
||||
|
||||
if (mountpoint[0] && strlen(mountpoint) < maxlen) {
|
||||
strcpy(buf, mountpoint);
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
|
@ -10,6 +11,7 @@
|
|||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mount.h>
|
||||
|
||||
|
@ -43,7 +45,7 @@
|
|||
#define BPF_FS_MAGIC 0xcafe4a11
|
||||
#endif
|
||||
|
||||
static const char * const sysfs__fs_known_mountpoints[] = {
|
||||
static const char * const sysfs__known_mountpoints[] = {
|
||||
"/sys",
|
||||
0,
|
||||
};
|
||||
|
@ -86,87 +88,89 @@ static const char * const bpf_fs__known_mountpoints[] = {
|
|||
};
|
||||
|
||||
struct fs {
|
||||
const char *name;
|
||||
const char * const *mounts;
|
||||
char path[PATH_MAX];
|
||||
bool found;
|
||||
bool checked;
|
||||
long magic;
|
||||
};
|
||||
|
||||
enum {
|
||||
FS__SYSFS = 0,
|
||||
FS__PROCFS = 1,
|
||||
FS__DEBUGFS = 2,
|
||||
FS__TRACEFS = 3,
|
||||
FS__HUGETLBFS = 4,
|
||||
FS__BPF_FS = 5,
|
||||
const char * const name;
|
||||
const char * const * const mounts;
|
||||
char *path;
|
||||
pthread_mutex_t mount_mutex;
|
||||
const long magic;
|
||||
};
|
||||
|
||||
#ifndef TRACEFS_MAGIC
|
||||
#define TRACEFS_MAGIC 0x74726163
|
||||
#endif
|
||||
|
||||
static struct fs fs__entries[] = {
|
||||
[FS__SYSFS] = {
|
||||
.name = "sysfs",
|
||||
.mounts = sysfs__fs_known_mountpoints,
|
||||
.magic = SYSFS_MAGIC,
|
||||
.checked = false,
|
||||
},
|
||||
[FS__PROCFS] = {
|
||||
.name = "proc",
|
||||
.mounts = procfs__known_mountpoints,
|
||||
.magic = PROC_SUPER_MAGIC,
|
||||
.checked = false,
|
||||
},
|
||||
[FS__DEBUGFS] = {
|
||||
.name = "debugfs",
|
||||
.mounts = debugfs__known_mountpoints,
|
||||
.magic = DEBUGFS_MAGIC,
|
||||
.checked = false,
|
||||
},
|
||||
[FS__TRACEFS] = {
|
||||
.name = "tracefs",
|
||||
.mounts = tracefs__known_mountpoints,
|
||||
.magic = TRACEFS_MAGIC,
|
||||
.checked = false,
|
||||
},
|
||||
[FS__HUGETLBFS] = {
|
||||
.name = "hugetlbfs",
|
||||
.mounts = hugetlbfs__known_mountpoints,
|
||||
.magic = HUGETLBFS_MAGIC,
|
||||
.checked = false,
|
||||
},
|
||||
[FS__BPF_FS] = {
|
||||
.name = "bpf",
|
||||
.mounts = bpf_fs__known_mountpoints,
|
||||
.magic = BPF_FS_MAGIC,
|
||||
.checked = false,
|
||||
},
|
||||
};
|
||||
static void fs__init_once(struct fs *fs);
|
||||
static const char *fs__mountpoint(const struct fs *fs);
|
||||
static const char *fs__mount(struct fs *fs);
|
||||
|
||||
#define FS(lower_name, fs_name, upper_name) \
|
||||
static struct fs fs__##lower_name = { \
|
||||
.name = #fs_name, \
|
||||
.mounts = lower_name##__known_mountpoints, \
|
||||
.magic = upper_name##_MAGIC, \
|
||||
.mount_mutex = PTHREAD_MUTEX_INITIALIZER, \
|
||||
}; \
|
||||
\
|
||||
static void lower_name##_init_once(void) \
|
||||
{ \
|
||||
struct fs *fs = &fs__##lower_name; \
|
||||
\
|
||||
fs__init_once(fs); \
|
||||
} \
|
||||
\
|
||||
const char *lower_name##__mountpoint(void) \
|
||||
{ \
|
||||
static pthread_once_t init_once = PTHREAD_ONCE_INIT; \
|
||||
struct fs *fs = &fs__##lower_name; \
|
||||
\
|
||||
pthread_once(&init_once, lower_name##_init_once); \
|
||||
return fs__mountpoint(fs); \
|
||||
} \
|
||||
\
|
||||
const char *lower_name##__mount(void) \
|
||||
{ \
|
||||
const char *mountpoint = lower_name##__mountpoint(); \
|
||||
struct fs *fs = &fs__##lower_name; \
|
||||
\
|
||||
if (mountpoint) \
|
||||
return mountpoint; \
|
||||
\
|
||||
return fs__mount(fs); \
|
||||
} \
|
||||
\
|
||||
bool lower_name##__configured(void) \
|
||||
{ \
|
||||
return lower_name##__mountpoint() != NULL; \
|
||||
}
|
||||
|
||||
FS(sysfs, sysfs, SYSFS);
|
||||
FS(procfs, procfs, PROC_SUPER);
|
||||
FS(debugfs, debugfs, DEBUGFS);
|
||||
FS(tracefs, tracefs, TRACEFS);
|
||||
FS(hugetlbfs, hugetlbfs, HUGETLBFS);
|
||||
FS(bpf_fs, bpf, BPF_FS);
|
||||
|
||||
static bool fs__read_mounts(struct fs *fs)
|
||||
{
|
||||
bool found = false;
|
||||
char type[100];
|
||||
FILE *fp;
|
||||
char path[PATH_MAX + 1];
|
||||
|
||||
fp = fopen("/proc/mounts", "r");
|
||||
if (fp == NULL)
|
||||
return NULL;
|
||||
return false;
|
||||
|
||||
while (!found &&
|
||||
fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
|
||||
fs->path, type) == 2) {
|
||||
while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
|
||||
path, type) == 2) {
|
||||
|
||||
if (strcmp(type, fs->name) == 0)
|
||||
found = true;
|
||||
if (strcmp(type, fs->name) == 0) {
|
||||
fs->path = strdup(path);
|
||||
fclose(fp);
|
||||
return fs->path != NULL;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
fs->checked = true;
|
||||
return fs->found = found;
|
||||
return false;
|
||||
}
|
||||
|
||||
static int fs__valid_mount(const char *fs, long magic)
|
||||
|
@ -188,8 +192,9 @@ static bool fs__check_mounts(struct fs *fs)
|
|||
ptr = fs->mounts;
|
||||
while (*ptr) {
|
||||
if (fs__valid_mount(*ptr, fs->magic) == 0) {
|
||||
fs->found = true;
|
||||
strcpy(fs->path, *ptr);
|
||||
fs->path = strdup(*ptr);
|
||||
if (!fs->path)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
ptr++;
|
||||
|
@ -227,43 +232,26 @@ static bool fs__env_override(struct fs *fs)
|
|||
if (!override_path)
|
||||
return false;
|
||||
|
||||
fs->found = true;
|
||||
fs->checked = true;
|
||||
strncpy(fs->path, override_path, sizeof(fs->path) - 1);
|
||||
fs->path[sizeof(fs->path) - 1] = '\0';
|
||||
fs->path = strdup(override_path);
|
||||
if (!fs->path)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static const char *fs__get_mountpoint(struct fs *fs)
|
||||
static void fs__init_once(struct fs *fs)
|
||||
{
|
||||
if (fs__env_override(fs))
|
||||
return fs->path;
|
||||
|
||||
if (fs__check_mounts(fs))
|
||||
return fs->path;
|
||||
|
||||
if (fs__read_mounts(fs))
|
||||
return fs->path;
|
||||
|
||||
return NULL;
|
||||
if (!fs__env_override(fs) &&
|
||||
!fs__check_mounts(fs) &&
|
||||
!fs__read_mounts(fs)) {
|
||||
assert(!fs->path);
|
||||
} else {
|
||||
assert(fs->path);
|
||||
}
|
||||
}
|
||||
|
||||
static const char *fs__mountpoint(int idx)
|
||||
static const char *fs__mountpoint(const struct fs *fs)
|
||||
{
|
||||
struct fs *fs = &fs__entries[idx];
|
||||
|
||||
if (fs->found)
|
||||
return (const char *)fs->path;
|
||||
|
||||
/* the mount point was already checked for the mount point
|
||||
* but and did not exist, so return NULL to avoid scanning again.
|
||||
* This makes the found and not found paths cost equivalent
|
||||
* in case of multiple calls.
|
||||
*/
|
||||
if (fs->checked)
|
||||
return NULL;
|
||||
|
||||
return fs__get_mountpoint(fs);
|
||||
return fs->path;
|
||||
}
|
||||
|
||||
static const char *mount_overload(struct fs *fs)
|
||||
|
@ -278,45 +266,29 @@ static const char *mount_overload(struct fs *fs)
|
|||
return getenv(upper_name) ?: *fs->mounts;
|
||||
}
|
||||
|
||||
static const char *fs__mount(int idx)
|
||||
static const char *fs__mount(struct fs *fs)
|
||||
{
|
||||
struct fs *fs = &fs__entries[idx];
|
||||
const char *mountpoint;
|
||||
|
||||
if (fs__mountpoint(idx))
|
||||
return (const char *)fs->path;
|
||||
pthread_mutex_lock(&fs->mount_mutex);
|
||||
|
||||
/* Check if path found inside the mutex to avoid races with other callers of mount. */
|
||||
mountpoint = fs__mountpoint(fs);
|
||||
if (mountpoint)
|
||||
goto out;
|
||||
|
||||
mountpoint = mount_overload(fs);
|
||||
|
||||
if (mount(NULL, mountpoint, fs->name, 0, NULL) < 0)
|
||||
return NULL;
|
||||
|
||||
return fs__check_mounts(fs) ? fs->path : NULL;
|
||||
if (mount(NULL, mountpoint, fs->name, 0, NULL) == 0 &&
|
||||
fs__valid_mount(mountpoint, fs->magic) == 0) {
|
||||
fs->path = strdup(mountpoint);
|
||||
mountpoint = fs->path;
|
||||
}
|
||||
out:
|
||||
pthread_mutex_unlock(&fs->mount_mutex);
|
||||
return mountpoint;
|
||||
}
|
||||
|
||||
#define FS(name, idx) \
|
||||
const char *name##__mountpoint(void) \
|
||||
{ \
|
||||
return fs__mountpoint(idx); \
|
||||
} \
|
||||
\
|
||||
const char *name##__mount(void) \
|
||||
{ \
|
||||
return fs__mount(idx); \
|
||||
} \
|
||||
\
|
||||
bool name##__configured(void) \
|
||||
{ \
|
||||
return name##__mountpoint() != NULL; \
|
||||
}
|
||||
|
||||
FS(sysfs, FS__SYSFS);
|
||||
FS(procfs, FS__PROCFS);
|
||||
FS(debugfs, FS__DEBUGFS);
|
||||
FS(tracefs, FS__TRACEFS);
|
||||
FS(hugetlbfs, FS__HUGETLBFS);
|
||||
FS(bpf_fs, FS__BPF_FS);
|
||||
|
||||
int filename__read_int(const char *filename, int *value)
|
||||
{
|
||||
char line[64];
|
||||
|
|
|
@ -13,17 +13,12 @@
|
|||
|
||||
#include "tracing_path.h"
|
||||
|
||||
static char tracing_mnt[PATH_MAX] = "/sys/kernel/debug";
|
||||
static char tracing_path[PATH_MAX] = "/sys/kernel/debug/tracing";
|
||||
static char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events";
|
||||
static char tracing_path[PATH_MAX] = "/sys/kernel/tracing";
|
||||
|
||||
static void __tracing_path_set(const char *tracing, const char *mountpoint)
|
||||
{
|
||||
snprintf(tracing_mnt, sizeof(tracing_mnt), "%s", mountpoint);
|
||||
snprintf(tracing_path, sizeof(tracing_path), "%s/%s",
|
||||
mountpoint, tracing);
|
||||
snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s",
|
||||
mountpoint, tracing, "events");
|
||||
}
|
||||
|
||||
static const char *tracing_path_tracefs_mount(void)
|
||||
|
@ -149,15 +144,15 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
|
|||
/* sdt markers */
|
||||
if (!strncmp(filename, "sdt_", 4)) {
|
||||
snprintf(buf, size,
|
||||
"Error:\tFile %s/%s not found.\n"
|
||||
"Error:\tFile %s/events/%s not found.\n"
|
||||
"Hint:\tSDT event cannot be directly recorded on.\n"
|
||||
"\tPlease first use 'perf probe %s:%s' before recording it.\n",
|
||||
tracing_events_path, filename, sys, name);
|
||||
tracing_path, filename, sys, name);
|
||||
} else {
|
||||
snprintf(buf, size,
|
||||
"Error:\tFile %s/%s not found.\n"
|
||||
"Error:\tFile %s/events/%s not found.\n"
|
||||
"Hint:\tPerhaps this kernel misses some CONFIG_ setting to enable this feature?.\n",
|
||||
tracing_events_path, filename);
|
||||
tracing_path, filename);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -169,9 +164,9 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
|
|||
break;
|
||||
case EACCES: {
|
||||
snprintf(buf, size,
|
||||
"Error:\tNo permissions to read %s/%s\n"
|
||||
"Error:\tNo permissions to read %s/events/%s\n"
|
||||
"Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
|
||||
tracing_events_path, filename, tracing_path_mount());
|
||||
tracing_path, filename, tracing_path_mount());
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -7,7 +7,10 @@
|
|||
#ifndef __API_IO__
|
||||
#define __API_IO__
|
||||
|
||||
#include <errno.h>
|
||||
#include <poll.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
struct io {
|
||||
|
@ -21,6 +24,8 @@ struct io {
|
|||
char *end;
|
||||
/* Currently accessed data pointer. */
|
||||
char *data;
|
||||
/* Read timeout, 0 implies no timeout. */
|
||||
int timeout_ms;
|
||||
/* Set true on when the end of file on read error. */
|
||||
bool eof;
|
||||
};
|
||||
|
@ -33,6 +38,7 @@ static inline void io__init(struct io *io, int fd,
|
|||
io->buf = buf;
|
||||
io->end = buf;
|
||||
io->data = buf;
|
||||
io->timeout_ms = 0;
|
||||
io->eof = false;
|
||||
}
|
||||
|
||||
|
@ -45,7 +51,29 @@ static inline int io__get_char(struct io *io)
|
|||
return -1;
|
||||
|
||||
if (ptr == io->end) {
|
||||
ssize_t n = read(io->fd, io->buf, io->buf_len);
|
||||
ssize_t n;
|
||||
|
||||
if (io->timeout_ms != 0) {
|
||||
struct pollfd pfds[] = {
|
||||
{
|
||||
.fd = io->fd,
|
||||
.events = POLLIN,
|
||||
},
|
||||
};
|
||||
|
||||
n = poll(pfds, 1, io->timeout_ms);
|
||||
if (n == 0)
|
||||
errno = ETIMEDOUT;
|
||||
if (n > 0 && !(pfds[0].revents & POLLIN)) {
|
||||
errno = EIO;
|
||||
n = -1;
|
||||
}
|
||||
if (n <= 0) {
|
||||
io->eof = true;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
n = read(io->fd, io->buf, io->buf_len);
|
||||
|
||||
if (n <= 0) {
|
||||
io->eof = true;
|
||||
|
@ -112,4 +140,47 @@ static inline int io__get_dec(struct io *io, __u64 *dec)
|
|||
}
|
||||
}
|
||||
|
||||
/* Read up to and including the first newline following the pattern of getline. */
|
||||
static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out)
|
||||
{
|
||||
char buf[128];
|
||||
int buf_pos = 0;
|
||||
char *line = NULL, *temp;
|
||||
size_t line_len = 0;
|
||||
int ch = 0;
|
||||
|
||||
/* TODO: reuse previously allocated memory. */
|
||||
free(*line_out);
|
||||
while (ch != '\n') {
|
||||
ch = io__get_char(io);
|
||||
|
||||
if (ch < 0)
|
||||
break;
|
||||
|
||||
if (buf_pos == sizeof(buf)) {
|
||||
temp = realloc(line, line_len + sizeof(buf));
|
||||
if (!temp)
|
||||
goto err_out;
|
||||
line = temp;
|
||||
memcpy(&line[line_len], buf, sizeof(buf));
|
||||
line_len += sizeof(buf);
|
||||
buf_pos = 0;
|
||||
}
|
||||
buf[buf_pos++] = (char)ch;
|
||||
}
|
||||
temp = realloc(line, line_len + buf_pos + 1);
|
||||
if (!temp)
|
||||
goto err_out;
|
||||
line = temp;
|
||||
memcpy(&line[line_len], buf, buf_pos);
|
||||
line[line_len + buf_pos] = '\0';
|
||||
line_len += buf_pos;
|
||||
*line_out = line;
|
||||
*line_len_out = line_len;
|
||||
return line_len;
|
||||
err_out:
|
||||
free(line);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
#endif /* __API_IO__ */
|
||||
|
|
|
@ -188,7 +188,7 @@ install_lib: libs
|
|||
cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ)
|
||||
|
||||
HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h
|
||||
INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h threadmap.h xyarray.h
|
||||
INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h rc_check.h threadmap.h xyarray.h
|
||||
|
||||
INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf
|
||||
INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS))
|
||||
|
|
|
@ -10,16 +10,21 @@
|
|||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
|
||||
static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
|
||||
void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus)
|
||||
{
|
||||
struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
|
||||
RC_CHK_ACCESS(map)->nr = nr_cpus;
|
||||
}
|
||||
|
||||
if (cpus != NULL) {
|
||||
struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
|
||||
{
|
||||
RC_STRUCT(perf_cpu_map) *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
|
||||
struct perf_cpu_map *result;
|
||||
|
||||
if (ADD_RC_CHK(result, cpus)) {
|
||||
cpus->nr = nr_cpus;
|
||||
refcount_set(&cpus->refcnt, 1);
|
||||
|
||||
}
|
||||
return cpus;
|
||||
return result;
|
||||
}
|
||||
|
||||
struct perf_cpu_map *perf_cpu_map__dummy_new(void)
|
||||
|
@ -27,7 +32,7 @@ struct perf_cpu_map *perf_cpu_map__dummy_new(void)
|
|||
struct perf_cpu_map *cpus = perf_cpu_map__alloc(1);
|
||||
|
||||
if (cpus)
|
||||
cpus->map[0].cpu = -1;
|
||||
RC_CHK_ACCESS(cpus)->map[0].cpu = -1;
|
||||
|
||||
return cpus;
|
||||
}
|
||||
|
@ -35,23 +40,30 @@ struct perf_cpu_map *perf_cpu_map__dummy_new(void)
|
|||
static void cpu_map__delete(struct perf_cpu_map *map)
|
||||
{
|
||||
if (map) {
|
||||
WARN_ONCE(refcount_read(&map->refcnt) != 0,
|
||||
WARN_ONCE(refcount_read(perf_cpu_map__refcnt(map)) != 0,
|
||||
"cpu_map refcnt unbalanced\n");
|
||||
free(map);
|
||||
RC_CHK_FREE(map);
|
||||
}
|
||||
}
|
||||
|
||||
struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map)
|
||||
{
|
||||
if (map)
|
||||
refcount_inc(&map->refcnt);
|
||||
return map;
|
||||
struct perf_cpu_map *result;
|
||||
|
||||
if (RC_CHK_GET(result, map))
|
||||
refcount_inc(perf_cpu_map__refcnt(map));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void perf_cpu_map__put(struct perf_cpu_map *map)
|
||||
{
|
||||
if (map && refcount_dec_and_test(&map->refcnt))
|
||||
cpu_map__delete(map);
|
||||
if (map) {
|
||||
if (refcount_dec_and_test(perf_cpu_map__refcnt(map)))
|
||||
cpu_map__delete(map);
|
||||
else
|
||||
RC_CHK_PUT(map);
|
||||
}
|
||||
}
|
||||
|
||||
static struct perf_cpu_map *cpu_map__default_new(void)
|
||||
|
@ -68,7 +80,7 @@ static struct perf_cpu_map *cpu_map__default_new(void)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < nr_cpus; ++i)
|
||||
cpus->map[i].cpu = i;
|
||||
RC_CHK_ACCESS(cpus)->map[i].cpu = i;
|
||||
}
|
||||
|
||||
return cpus;
|
||||
|
@ -87,6 +99,11 @@ static int cmp_cpu(const void *a, const void *b)
|
|||
return cpu_a->cpu - cpu_b->cpu;
|
||||
}
|
||||
|
||||
static struct perf_cpu __perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
|
||||
{
|
||||
return RC_CHK_ACCESS(cpus)->map[idx];
|
||||
}
|
||||
|
||||
static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus)
|
||||
{
|
||||
size_t payload_size = nr_cpus * sizeof(struct perf_cpu);
|
||||
|
@ -94,15 +111,19 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu
|
|||
int i, j;
|
||||
|
||||
if (cpus != NULL) {
|
||||
memcpy(cpus->map, tmp_cpus, payload_size);
|
||||
qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu);
|
||||
memcpy(RC_CHK_ACCESS(cpus)->map, tmp_cpus, payload_size);
|
||||
qsort(RC_CHK_ACCESS(cpus)->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu);
|
||||
/* Remove dups */
|
||||
j = 0;
|
||||
for (i = 0; i < nr_cpus; i++) {
|
||||
if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu)
|
||||
cpus->map[j++].cpu = cpus->map[i].cpu;
|
||||
if (i == 0 ||
|
||||
__perf_cpu_map__cpu(cpus, i).cpu !=
|
||||
__perf_cpu_map__cpu(cpus, i - 1).cpu) {
|
||||
RC_CHK_ACCESS(cpus)->map[j++].cpu =
|
||||
__perf_cpu_map__cpu(cpus, i).cpu;
|
||||
}
|
||||
}
|
||||
cpus->nr = j;
|
||||
perf_cpu_map__set_nr(cpus, j);
|
||||
assert(j <= nr_cpus);
|
||||
}
|
||||
return cpus;
|
||||
|
@ -257,26 +278,31 @@ out:
|
|||
return cpus;
|
||||
}
|
||||
|
||||
static int __perf_cpu_map__nr(const struct perf_cpu_map *cpus)
|
||||
{
|
||||
return RC_CHK_ACCESS(cpus)->nr;
|
||||
}
|
||||
|
||||
struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
|
||||
{
|
||||
struct perf_cpu result = {
|
||||
.cpu = -1
|
||||
};
|
||||
|
||||
if (cpus && idx < cpus->nr)
|
||||
return cpus->map[idx];
|
||||
if (cpus && idx < __perf_cpu_map__nr(cpus))
|
||||
return __perf_cpu_map__cpu(cpus, idx);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
|
||||
{
|
||||
return cpus ? cpus->nr : 1;
|
||||
return cpus ? __perf_cpu_map__nr(cpus) : 1;
|
||||
}
|
||||
|
||||
bool perf_cpu_map__empty(const struct perf_cpu_map *map)
|
||||
{
|
||||
return map ? map->map[0].cpu == -1 : true;
|
||||
return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
|
||||
}
|
||||
|
||||
int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
|
||||
|
@ -287,10 +313,10 @@ int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
|
|||
return -1;
|
||||
|
||||
low = 0;
|
||||
high = cpus->nr;
|
||||
high = __perf_cpu_map__nr(cpus);
|
||||
while (low < high) {
|
||||
int idx = (low + high) / 2;
|
||||
struct perf_cpu cpu_at_idx = cpus->map[idx];
|
||||
struct perf_cpu cpu_at_idx = __perf_cpu_map__cpu(cpus, idx);
|
||||
|
||||
if (cpu_at_idx.cpu == cpu.cpu)
|
||||
return idx;
|
||||
|
@ -309,6 +335,32 @@ bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
|
|||
return perf_cpu_map__idx(cpus, cpu) != -1;
|
||||
}
|
||||
|
||||
bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, const struct perf_cpu_map *rhs)
|
||||
{
|
||||
int nr;
|
||||
|
||||
if (lhs == rhs)
|
||||
return true;
|
||||
|
||||
if (!lhs || !rhs)
|
||||
return false;
|
||||
|
||||
nr = __perf_cpu_map__nr(lhs);
|
||||
if (nr != __perf_cpu_map__nr(rhs))
|
||||
return false;
|
||||
|
||||
for (int idx = 0; idx < nr; idx++) {
|
||||
if (__perf_cpu_map__cpu(lhs, idx).cpu != __perf_cpu_map__cpu(rhs, idx).cpu)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
|
||||
{
|
||||
return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
|
||||
}
|
||||
|
||||
struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
|
||||
{
|
||||
struct perf_cpu result = {
|
||||
|
@ -316,7 +368,9 @@ struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
|
|||
};
|
||||
|
||||
// cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well.
|
||||
return map->nr > 0 ? map->map[map->nr - 1] : result;
|
||||
return __perf_cpu_map__nr(map) > 0
|
||||
? __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1)
|
||||
: result;
|
||||
}
|
||||
|
||||
/** Is 'b' a subset of 'a'. */
|
||||
|
@ -324,15 +378,15 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu
|
|||
{
|
||||
if (a == b || !b)
|
||||
return true;
|
||||
if (!a || b->nr > a->nr)
|
||||
if (!a || __perf_cpu_map__nr(b) > __perf_cpu_map__nr(a))
|
||||
return false;
|
||||
|
||||
for (int i = 0, j = 0; i < a->nr; i++) {
|
||||
if (a->map[i].cpu > b->map[j].cpu)
|
||||
for (int i = 0, j = 0; i < __perf_cpu_map__nr(a); i++) {
|
||||
if (__perf_cpu_map__cpu(a, i).cpu > __perf_cpu_map__cpu(b, j).cpu)
|
||||
return false;
|
||||
if (a->map[i].cpu == b->map[j].cpu) {
|
||||
if (__perf_cpu_map__cpu(a, i).cpu == __perf_cpu_map__cpu(b, j).cpu) {
|
||||
j++;
|
||||
if (j == b->nr)
|
||||
if (j == __perf_cpu_map__nr(b))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -362,27 +416,27 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
|
|||
return perf_cpu_map__get(other);
|
||||
}
|
||||
|
||||
tmp_len = orig->nr + other->nr;
|
||||
tmp_len = __perf_cpu_map__nr(orig) + __perf_cpu_map__nr(other);
|
||||
tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
|
||||
if (!tmp_cpus)
|
||||
return NULL;
|
||||
|
||||
/* Standard merge algorithm from wikipedia */
|
||||
i = j = k = 0;
|
||||
while (i < orig->nr && j < other->nr) {
|
||||
if (orig->map[i].cpu <= other->map[j].cpu) {
|
||||
if (orig->map[i].cpu == other->map[j].cpu)
|
||||
while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
|
||||
if (__perf_cpu_map__cpu(orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) {
|
||||
if (__perf_cpu_map__cpu(orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu)
|
||||
j++;
|
||||
tmp_cpus[k++] = orig->map[i++];
|
||||
tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
|
||||
} else
|
||||
tmp_cpus[k++] = other->map[j++];
|
||||
tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
|
||||
}
|
||||
|
||||
while (i < orig->nr)
|
||||
tmp_cpus[k++] = orig->map[i++];
|
||||
while (i < __perf_cpu_map__nr(orig))
|
||||
tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
|
||||
|
||||
while (j < other->nr)
|
||||
tmp_cpus[k++] = other->map[j++];
|
||||
while (j < __perf_cpu_map__nr(other))
|
||||
tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
|
||||
assert(k <= tmp_len);
|
||||
|
||||
merged = cpu_map__trim_new(k, tmp_cpus);
|
||||
|
@ -390,3 +444,38 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
|
|||
perf_cpu_map__put(orig);
|
||||
return merged;
|
||||
}
|
||||
|
||||
struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
|
||||
struct perf_cpu_map *other)
|
||||
{
|
||||
struct perf_cpu *tmp_cpus;
|
||||
int tmp_len;
|
||||
int i, j, k;
|
||||
struct perf_cpu_map *merged = NULL;
|
||||
|
||||
if (perf_cpu_map__is_subset(other, orig))
|
||||
return perf_cpu_map__get(orig);
|
||||
if (perf_cpu_map__is_subset(orig, other))
|
||||
return perf_cpu_map__get(other);
|
||||
|
||||
tmp_len = max(__perf_cpu_map__nr(orig), __perf_cpu_map__nr(other));
|
||||
tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
|
||||
if (!tmp_cpus)
|
||||
return NULL;
|
||||
|
||||
i = j = k = 0;
|
||||
while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
|
||||
if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu)
|
||||
i++;
|
||||
else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu)
|
||||
j++;
|
||||
else {
|
||||
j++;
|
||||
tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
|
||||
}
|
||||
}
|
||||
if (k)
|
||||
merged = cpu_map__trim_new(k, tmp_cpus);
|
||||
free(tmp_cpus);
|
||||
return merged;
|
||||
}
|
||||
|
|
|
@ -36,18 +36,33 @@ void perf_evlist__init(struct perf_evlist *evlist)
|
|||
static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
|
||||
struct perf_evsel *evsel)
|
||||
{
|
||||
/*
|
||||
* We already have cpus for evsel (via PMU sysfs) so
|
||||
* keep it, if there's no target cpu list defined.
|
||||
*/
|
||||
if (evsel->system_wide) {
|
||||
/* System wide: set the cpu map of the evsel to all online CPUs. */
|
||||
perf_cpu_map__put(evsel->cpus);
|
||||
evsel->cpus = perf_cpu_map__new(NULL);
|
||||
} else if (evlist->has_user_cpus && evsel->is_pmu_core) {
|
||||
/*
|
||||
* User requested CPUs on a core PMU, ensure the requested CPUs
|
||||
* are valid by intersecting with those of the PMU.
|
||||
*/
|
||||
perf_cpu_map__put(evsel->cpus);
|
||||
evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->own_cpus);
|
||||
} else if (!evsel->own_cpus || evlist->has_user_cpus ||
|
||||
(!evsel->requires_cpu && perf_cpu_map__empty(evlist->user_requested_cpus))) {
|
||||
(!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) {
|
||||
/*
|
||||
* The PMU didn't specify a default cpu map, this isn't a core
|
||||
* event and the user requested CPUs or the evlist user
|
||||
* requested CPUs have the "any CPU" (aka dummy) CPU value. In
|
||||
* which case use the user requested CPUs rather than the PMU
|
||||
* ones.
|
||||
*/
|
||||
perf_cpu_map__put(evsel->cpus);
|
||||
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
|
||||
} else if (evsel->cpus != evsel->own_cpus) {
|
||||
/*
|
||||
* No user requested cpu map but the PMU cpu map doesn't match
|
||||
* the evsel's. Reset it back to the PMU cpu map.
|
||||
*/
|
||||
perf_cpu_map__put(evsel->cpus);
|
||||
evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
|
||||
}
|
||||
|
@ -687,15 +702,14 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map,
|
|||
|
||||
void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader)
|
||||
{
|
||||
struct perf_evsel *first, *last, *evsel;
|
||||
struct perf_evsel *evsel;
|
||||
int n = 0;
|
||||
|
||||
first = list_first_entry(list, struct perf_evsel, node);
|
||||
last = list_last_entry(list, struct perf_evsel, node);
|
||||
|
||||
leader->nr_members = last->idx - first->idx + 1;
|
||||
|
||||
__perf_evlist__for_each_entry(list, evsel)
|
||||
__perf_evlist__for_each_entry(list, evsel) {
|
||||
evsel->leader = leader;
|
||||
n++;
|
||||
}
|
||||
leader->nr_members = n;
|
||||
}
|
||||
|
||||
void perf_evlist__set_leader(struct perf_evlist *evlist)
|
||||
|
@ -704,7 +718,23 @@ void perf_evlist__set_leader(struct perf_evlist *evlist)
|
|||
struct perf_evsel *first = list_entry(evlist->entries.next,
|
||||
struct perf_evsel, node);
|
||||
|
||||
evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
|
||||
__perf_evlist__set_leader(&evlist->entries, first);
|
||||
}
|
||||
}
|
||||
|
||||
int perf_evlist__nr_groups(struct perf_evlist *evlist)
|
||||
{
|
||||
struct perf_evsel *evsel;
|
||||
int nr_groups = 0;
|
||||
|
||||
perf_evlist__for_each_evsel(evlist, evsel) {
|
||||
/*
|
||||
* evsels by default have a nr_members of 1, and they are their
|
||||
* own leader. If the nr_members is >1 then this is an
|
||||
* indication of a group.
|
||||
*/
|
||||
if (evsel->leader == evsel && evsel->nr_members > 1)
|
||||
nr_groups++;
|
||||
}
|
||||
return nr_groups;
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include <linux/refcount.h>
|
||||
#include <perf/cpumap.h>
|
||||
#include <internal/rc_check.h>
|
||||
|
||||
/**
|
||||
* A sized, reference counted, sorted array of integers representing CPU
|
||||
|
@ -12,7 +13,7 @@
|
|||
* gaps if CPU numbers were used. For events associated with a pid, rather than
|
||||
* a CPU, a single dummy map with an entry of -1 is used.
|
||||
*/
|
||||
struct perf_cpu_map {
|
||||
DECLARE_RC_STRUCT(perf_cpu_map) {
|
||||
refcount_t refcnt;
|
||||
/** Length of the map array. */
|
||||
int nr;
|
||||
|
@ -24,7 +25,14 @@ struct perf_cpu_map {
|
|||
#define MAX_NR_CPUS 2048
|
||||
#endif
|
||||
|
||||
struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus);
|
||||
int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu);
|
||||
bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b);
|
||||
|
||||
void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus);
|
||||
|
||||
static inline refcount_t *perf_cpu_map__refcnt(struct perf_cpu_map *map)
|
||||
{
|
||||
return &RC_CHK_ACCESS(map)->refcnt;
|
||||
}
|
||||
#endif /* __LIBPERF_INTERNAL_CPUMAP_H */
|
||||
|
|
|
@ -17,7 +17,6 @@ struct perf_mmap_param;
|
|||
struct perf_evlist {
|
||||
struct list_head entries;
|
||||
int nr_entries;
|
||||
int nr_groups;
|
||||
bool has_user_cpus;
|
||||
bool needs_map_propagation;
|
||||
/**
|
||||
|
|
|
@ -41,7 +41,14 @@ struct perf_sample_id {
|
|||
struct perf_evsel {
|
||||
struct list_head node;
|
||||
struct perf_event_attr attr;
|
||||
/** The commonly used cpu map of CPUs the event should be opened upon, etc. */
|
||||
struct perf_cpu_map *cpus;
|
||||
/**
|
||||
* The cpu map read from the PMU. For core PMUs this is the list of all
|
||||
* CPUs the event can be opened upon. For other PMUs this is the default
|
||||
* cpu map for opening the event on, for example, the first CPU on a
|
||||
* socket for an uncore event.
|
||||
*/
|
||||
struct perf_cpu_map *own_cpus;
|
||||
struct perf_thread_map *threads;
|
||||
struct xyarray *fd;
|
||||
|
@ -55,9 +62,9 @@ struct perf_evsel {
|
|||
int nr_members;
|
||||
/*
|
||||
* system_wide is for events that need to be on every CPU, irrespective
|
||||
* of user requested CPUs or threads. Map propagation will set cpus to
|
||||
* this event's own_cpus, whereby they will contribute to evlist
|
||||
* all_cpus.
|
||||
* of user requested CPUs or threads. Tha main example of this is the
|
||||
* dummy event. Map propagation will set cpus for this event to all CPUs
|
||||
* as software PMU events like dummy, have a CPU map that is empty.
|
||||
*/
|
||||
bool system_wide;
|
||||
/*
|
||||
|
@ -65,6 +72,8 @@ struct perf_evsel {
|
|||
* i.e. it cannot be the 'any CPU' value of -1.
|
||||
*/
|
||||
bool requires_cpu;
|
||||
/** Is the PMU for the event a core one? Effects the handling of own_cpus. */
|
||||
bool is_pmu_core;
|
||||
int idx;
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
|
||||
#ifndef __LIBPERF_INTERNAL_RC_CHECK_H
|
||||
#define __LIBPERF_INTERNAL_RC_CHECK_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <linux/zalloc.h>
|
||||
|
||||
/*
|
||||
* Enable reference count checking implicitly with leak checking, which is
|
||||
* integrated into address sanitizer.
|
||||
*/
|
||||
#if defined(LEAK_SANITIZER) || defined(ADDRESS_SANITIZER)
|
||||
#define REFCNT_CHECKING 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Shared reference count checking macros.
|
||||
*
|
||||
* Reference count checking is an approach to sanitizing the use of reference
|
||||
* counted structs. It leverages address and leak sanitizers to make sure gets
|
||||
* are paired with a put. Reference count checking adds a malloc-ed layer of
|
||||
* indirection on a get, and frees it on a put. A missed put will be reported as
|
||||
* a memory leak. A double put will be reported as a double free. Accessing
|
||||
* after a put will cause a use-after-free and/or a segfault.
|
||||
*/
|
||||
|
||||
#ifndef REFCNT_CHECKING
|
||||
/* Replaces "struct foo" so that the pointer may be interposed. */
|
||||
#define DECLARE_RC_STRUCT(struct_name) \
|
||||
struct struct_name
|
||||
|
||||
/* Declare a reference counted struct variable. */
|
||||
#define RC_STRUCT(struct_name) struct struct_name
|
||||
|
||||
/*
|
||||
* Interpose the indirection. Result will hold the indirection and object is the
|
||||
* reference counted struct.
|
||||
*/
|
||||
#define ADD_RC_CHK(result, object) (result = object, object)
|
||||
|
||||
/* Strip the indirection layer. */
|
||||
#define RC_CHK_ACCESS(object) object
|
||||
|
||||
/* Frees the object and the indirection layer. */
|
||||
#define RC_CHK_FREE(object) free(object)
|
||||
|
||||
/* A get operation adding the indirection layer. */
|
||||
#define RC_CHK_GET(result, object) ADD_RC_CHK(result, object)
|
||||
|
||||
/* A put operation removing the indirection layer. */
|
||||
#define RC_CHK_PUT(object) {}
|
||||
|
||||
#else
|
||||
|
||||
/* Replaces "struct foo" so that the pointer may be interposed. */
|
||||
#define DECLARE_RC_STRUCT(struct_name) \
|
||||
struct original_##struct_name; \
|
||||
struct struct_name { \
|
||||
struct original_##struct_name *orig; \
|
||||
}; \
|
||||
struct original_##struct_name
|
||||
|
||||
/* Declare a reference counted struct variable. */
|
||||
#define RC_STRUCT(struct_name) struct original_##struct_name
|
||||
|
||||
/*
|
||||
* Interpose the indirection. Result will hold the indirection and object is the
|
||||
* reference counted struct.
|
||||
*/
|
||||
#define ADD_RC_CHK(result, object) \
|
||||
( \
|
||||
object ? (result = malloc(sizeof(*result)), \
|
||||
result ? (result->orig = object, result) \
|
||||
: (result = NULL, NULL)) \
|
||||
: (result = NULL, NULL) \
|
||||
)
|
||||
|
||||
/* Strip the indirection layer. */
|
||||
#define RC_CHK_ACCESS(object) object->orig
|
||||
|
||||
/* Frees the object and the indirection layer. */
|
||||
#define RC_CHK_FREE(object) \
|
||||
do { \
|
||||
zfree(&object->orig); \
|
||||
free(object); \
|
||||
} while(0)
|
||||
|
||||
/* A get operation adding the indirection layer. */
|
||||
#define RC_CHK_GET(result, object) ADD_RC_CHK(result, (object ? object->orig : NULL))
|
||||
|
||||
/* A put operation removing the indirection layer. */
|
||||
#define RC_CHK_PUT(object) \
|
||||
do { \
|
||||
if (object) { \
|
||||
object->orig = NULL; \
|
||||
free(object); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* __LIBPERF_INTERNAL_RC_CHECK_H */
|
|
@ -11,8 +11,16 @@ struct perf_cpu {
|
|||
int cpu;
|
||||
};
|
||||
|
||||
struct perf_cache {
|
||||
int cache_lvl;
|
||||
int cache;
|
||||
};
|
||||
|
||||
struct perf_cpu_map;
|
||||
|
||||
/**
|
||||
* perf_cpu_map__dummy_new - a map with a singular "any CPU"/dummy -1 value.
|
||||
*/
|
||||
LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
|
||||
LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
|
||||
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
|
||||
|
@ -20,12 +28,23 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
|
|||
LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
|
||||
LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
|
||||
struct perf_cpu_map *other);
|
||||
LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
|
||||
struct perf_cpu_map *other);
|
||||
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
|
||||
LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
|
||||
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
|
||||
/**
|
||||
* perf_cpu_map__empty - is map either empty or the "any CPU"/dummy value.
|
||||
*/
|
||||
LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
|
||||
LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
|
||||
LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
|
||||
LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
|
||||
const struct perf_cpu_map *rhs);
|
||||
/**
|
||||
* perf_cpu_map__any_cpu - Does the map contain the "any CPU"/dummy -1 value?
|
||||
*/
|
||||
LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map);
|
||||
|
||||
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \
|
||||
for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \
|
||||
|
|
|
@ -70,6 +70,8 @@ struct perf_record_lost {
|
|||
__u64 lost;
|
||||
};
|
||||
|
||||
#define PERF_RECORD_MISC_LOST_SAMPLES_BPF (1 << 15)
|
||||
|
||||
struct perf_record_lost_samples {
|
||||
struct perf_event_header header;
|
||||
__u64 lost;
|
||||
|
@ -378,7 +380,8 @@ enum {
|
|||
PERF_STAT_CONFIG_TERM__AGGR_MODE = 0,
|
||||
PERF_STAT_CONFIG_TERM__INTERVAL = 1,
|
||||
PERF_STAT_CONFIG_TERM__SCALE = 2,
|
||||
PERF_STAT_CONFIG_TERM__MAX = 3,
|
||||
PERF_STAT_CONFIG_TERM__AGGR_LEVEL = 3,
|
||||
PERF_STAT_CONFIG_TERM__MAX = 4,
|
||||
};
|
||||
|
||||
struct perf_record_stat_config_entry {
|
||||
|
|
|
@ -47,4 +47,5 @@ LIBPERF_API struct perf_mmap *perf_evlist__next_mmap(struct perf_evlist *evlist,
|
|||
(pos) = perf_evlist__next_mmap((evlist), (pos), overwrite))
|
||||
|
||||
LIBPERF_API void perf_evlist__set_leader(struct perf_evlist *evlist);
|
||||
LIBPERF_API int perf_evlist__nr_groups(struct perf_evlist *evlist);
|
||||
#endif /* __LIBPERF_EVLIST_H */
|
||||
|
|
|
@ -36,38 +36,40 @@ static int is_absolute_path(const char *path)
|
|||
return path[0] == '/';
|
||||
}
|
||||
|
||||
static const char *get_pwd_cwd(void)
|
||||
static const char *get_pwd_cwd(char *buf, size_t sz)
|
||||
{
|
||||
static char cwd[PATH_MAX + 1];
|
||||
char *pwd;
|
||||
struct stat cwd_stat, pwd_stat;
|
||||
if (getcwd(cwd, PATH_MAX) == NULL)
|
||||
if (getcwd(buf, sz) == NULL)
|
||||
return NULL;
|
||||
pwd = getenv("PWD");
|
||||
if (pwd && strcmp(pwd, cwd)) {
|
||||
stat(cwd, &cwd_stat);
|
||||
if (pwd && strcmp(pwd, buf)) {
|
||||
stat(buf, &cwd_stat);
|
||||
if (!stat(pwd, &pwd_stat) &&
|
||||
pwd_stat.st_dev == cwd_stat.st_dev &&
|
||||
pwd_stat.st_ino == cwd_stat.st_ino) {
|
||||
strlcpy(cwd, pwd, PATH_MAX);
|
||||
strlcpy(buf, pwd, sz);
|
||||
}
|
||||
}
|
||||
return cwd;
|
||||
return buf;
|
||||
}
|
||||
|
||||
static const char *make_nonrelative_path(const char *path)
|
||||
static const char *make_nonrelative_path(char *buf, size_t sz, const char *path)
|
||||
{
|
||||
static char buf[PATH_MAX + 1];
|
||||
|
||||
if (is_absolute_path(path)) {
|
||||
if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
|
||||
if (strlcpy(buf, path, sz) >= sz)
|
||||
die("Too long path: %.*s", 60, path);
|
||||
} else {
|
||||
const char *cwd = get_pwd_cwd();
|
||||
const char *cwd = get_pwd_cwd(buf, sz);
|
||||
|
||||
if (!cwd)
|
||||
die("Cannot determine the current working directory");
|
||||
if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
|
||||
|
||||
if (strlen(cwd) + strlen(path) + 2 >= sz)
|
||||
die("Too long path: %.*s", 60, path);
|
||||
|
||||
strcat(buf, "/");
|
||||
strcat(buf, path);
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
@ -133,8 +135,11 @@ static void add_path(char **out, const char *path)
|
|||
if (path && *path) {
|
||||
if (is_absolute_path(path))
|
||||
astrcat(out, path);
|
||||
else
|
||||
astrcat(out, make_nonrelative_path(path));
|
||||
else {
|
||||
char buf[PATH_MAX];
|
||||
|
||||
astrcat(out, make_nonrelative_path(buf, sizeof(buf), path));
|
||||
}
|
||||
|
||||
astrcat(out, ":");
|
||||
}
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
void add_cmdname(struct cmdnames *cmds, const char *name, size_t len)
|
||||
{
|
||||
struct cmdname *ent = malloc(sizeof(*ent) + len + 1);
|
||||
if (!ent)
|
||||
return;
|
||||
|
||||
ent->len = len;
|
||||
memcpy(ent->name, name, len);
|
||||
|
@ -66,7 +68,13 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
|
|||
while (ci < cmds->cnt && ei < excludes->cnt) {
|
||||
cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
|
||||
if (cmp < 0) {
|
||||
cmds->names[cj++] = cmds->names[ci++];
|
||||
if (ci == cj) {
|
||||
ci++;
|
||||
cj++;
|
||||
} else {
|
||||
zfree(&cmds->names[cj]);
|
||||
cmds->names[cj++] = cmds->names[ci++];
|
||||
}
|
||||
} else if (cmp == 0) {
|
||||
ci++;
|
||||
ei++;
|
||||
|
@ -74,10 +82,14 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
|
|||
ei++;
|
||||
}
|
||||
}
|
||||
|
||||
while (ci < cmds->cnt)
|
||||
cmds->names[cj++] = cmds->names[ci++];
|
||||
|
||||
if (ci != cj) {
|
||||
while (ci < cmds->cnt) {
|
||||
zfree(&cmds->names[cj]);
|
||||
cmds->names[cj++] = cmds->names[ci++];
|
||||
}
|
||||
}
|
||||
for (ci = cj; ci < cmds->cnt; ci++)
|
||||
zfree(&cmds->names[ci]);
|
||||
cmds->cnt = cj;
|
||||
}
|
||||
|
||||
|
|
|
@ -250,11 +250,20 @@ $(MAN_HTML): $(OUTPUT)%.html : %.txt
|
|||
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
|
||||
mv $@+ $@
|
||||
|
||||
# Generate date from either KBUILD_BUILD_TIMESTAMP or git log of
|
||||
# the doc input file
|
||||
PERF_DATE = $(strip \
|
||||
$(if $(KBUILD_BUILD_TIMESTAMP), \
|
||||
$(shell date -u -d '$(KBUILD_BUILD_TIMESTAMP)' +%Y-%m-%d), \
|
||||
$(shell git log -1 --pretty="format:%cd" \
|
||||
--date=short --no-show-signature $<)))
|
||||
|
||||
ifdef USE_ASCIIDOCTOR
|
||||
$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : %.txt
|
||||
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
|
||||
$(ASCIIDOC) -b manpage -d manpage \
|
||||
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
|
||||
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \
|
||||
-adocdate=$(PERF_DATE) -o $@+ $< && \
|
||||
mv $@+ $@
|
||||
endif
|
||||
|
||||
|
@ -266,9 +275,7 @@ $(OUTPUT)%.xml : %.txt
|
|||
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
|
||||
$(ASCIIDOC) -b docbook -d manpage \
|
||||
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \
|
||||
-aperf_date=$(shell git log -1 --pretty="format:%cd" \
|
||||
--date=short --no-show-signature $<) \
|
||||
-o $@+ $< && \
|
||||
-aperf_date=$(PERF_DATE) -o $@+ $< && \
|
||||
mv $@+ $@
|
||||
|
||||
XSLT = docbook.xsl
|
||||
|
|
|
@ -116,6 +116,9 @@ include::itrace.txt[]
|
|||
-M::
|
||||
--disassembler-style=:: Set disassembler style for objdump.
|
||||
|
||||
--addr2line=<path>::
|
||||
Path to addr2line binary.
|
||||
|
||||
--objdump=<path>::
|
||||
Path to objdump binary.
|
||||
|
||||
|
|
|
@ -250,7 +250,13 @@ annotate.*::
|
|||
These are in control of addresses, jump function, source code
|
||||
in lines of assembly code from a specific program.
|
||||
|
||||
annotate.disassembler_style:
|
||||
annotate.addr2line::
|
||||
addr2line binary to use for file names and line numbers.
|
||||
|
||||
annotate.objdump::
|
||||
objdump binary to use for disassembly and annotations.
|
||||
|
||||
annotate.disassembler_style::
|
||||
Use this to change the default disassembler style to some other value
|
||||
supported by binutils, such as "intel", see the '-M' option help in the
|
||||
'objdump' man page.
|
||||
|
@ -663,7 +669,7 @@ llvm.*::
|
|||
"$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \
|
||||
"-Wno-unused-value -Wno-pointer-sign " \
|
||||
"-working-directory $WORKING_DIR " \
|
||||
"-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE"
|
||||
"-c \"$CLANG_SOURCE\" --target=bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE"
|
||||
|
||||
llvm.clang-opt::
|
||||
Options passed to clang.
|
||||
|
|
|
@ -64,6 +64,12 @@ internal filtering.
|
|||
If implemented, 'filter_description' should return a one-line description
|
||||
of the filter, and optionally a longer description.
|
||||
|
||||
Do not assume the 'sample' argument is valid (dereferenceable)
|
||||
after 'filter_event' and 'filter_event_early' return.
|
||||
|
||||
Do not assume data referenced by pointers in struct perf_dlfilter_sample
|
||||
is valid (dereferenceable) after 'filter_event' and 'filter_event_early' return.
|
||||
|
||||
The perf_dlfilter_sample structure
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
@ -150,7 +156,8 @@ struct perf_dlfilter_fns {
|
|||
const char *(*srcline)(void *ctx, __u32 *line_number);
|
||||
struct perf_event_attr *(*attr)(void *ctx);
|
||||
__s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len);
|
||||
void *(*reserved[120])(void *);
|
||||
void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al);
|
||||
void *(*reserved[119])(void *);
|
||||
};
|
||||
----
|
||||
|
||||
|
@ -161,7 +168,8 @@ struct perf_dlfilter_fns {
|
|||
'args' returns arguments from --dlarg options.
|
||||
|
||||
'resolve_address' provides information about 'address'. al->size must be set
|
||||
before calling. Returns 0 on success, -1 otherwise.
|
||||
before calling. Returns 0 on success, -1 otherwise. Call al_cleanup() (if present,
|
||||
see below) when 'al' data is no longer needed.
|
||||
|
||||
'insn' returns instruction bytes and length.
|
||||
|
||||
|
@ -171,6 +179,12 @@ before calling. Returns 0 on success, -1 otherwise.
|
|||
|
||||
'object_code' reads object code and returns the number of bytes read.
|
||||
|
||||
'al_cleanup' must be called (if present, so check perf_dlfilter_fns.al_cleanup != NULL)
|
||||
after resolve_address() to free any associated resources.
|
||||
|
||||
Do not assume pointers obtained via perf_dlfilter_fns are valid (dereferenceable)
|
||||
after 'filter_event' and 'filter_event_early' return.
|
||||
|
||||
The perf_dlfilter_al structure
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
@ -197,9 +211,13 @@ struct perf_dlfilter_al {
|
|||
/* Below members are only populated by resolve_ip() */
|
||||
__u8 filtered; /* true if this sample event will be filtered out */
|
||||
const char *comm;
|
||||
void *priv; /* Private data. Do not change */
|
||||
};
|
||||
----
|
||||
|
||||
Do not assume data referenced by pointers in struct perf_dlfilter_al
|
||||
is valid (dereferenceable) after 'filter_event' and 'filter_event_early' return.
|
||||
|
||||
perf_dlfilter_sample flags
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
|
|
@ -58,7 +58,7 @@ There are a couple of variants of perf kvm:
|
|||
events.
|
||||
|
||||
'perf kvm stat report' reports statistical data which includes events
|
||||
handled time, samples, and so on.
|
||||
handled sample, percent_sample, time, percent_time, max_t, min_t, mean_t.
|
||||
|
||||
'perf kvm stat live' reports statistical data in a live mode (similar to
|
||||
record + report but with statistical data updated live at a given display
|
||||
|
@ -82,6 +82,8 @@ OPTIONS
|
|||
:GMEXAMPLESUBCMD: top
|
||||
include::guest-files.txt[]
|
||||
|
||||
--stdio:: Use the stdio interface.
|
||||
|
||||
-v::
|
||||
--verbose::
|
||||
Be more verbose (show counter open errors, etc).
|
||||
|
@ -97,7 +99,10 @@ STAT REPORT OPTIONS
|
|||
-k::
|
||||
--key=<value>::
|
||||
Sorting key. Possible values: sample (default, sort by samples
|
||||
number), time (sort by average time).
|
||||
number), percent_sample (sort by sample percentage), time
|
||||
(sort by average time), precent_time (sort by time percentage),
|
||||
max_t (sort by maximum time), min_t (sort by minimum time), mean_t
|
||||
(sort by mean time).
|
||||
-p::
|
||||
--pid=::
|
||||
Analyze events only for given process ID(s) (comma separated list).
|
||||
|
|
|
@ -36,6 +36,9 @@ COMMON OPTIONS
|
|||
--input=<file>::
|
||||
Input file name. (default: perf.data unless stdin is a fifo)
|
||||
|
||||
--output=<file>::
|
||||
Output file name for perf lock contention and report.
|
||||
|
||||
-v::
|
||||
--verbose::
|
||||
Be more verbose (show symbol address, etc).
|
||||
|
@ -155,8 +158,10 @@ CONTENTION OPTIONS
|
|||
--tid=<value>::
|
||||
Record events on existing thread ID (comma separated list).
|
||||
|
||||
-M::
|
||||
--map-nr-entries=<value>::
|
||||
Maximum number of BPF map entries (default: 10240).
|
||||
Maximum number of BPF map entries (default: 16384).
|
||||
This will be aligned to a power of 2.
|
||||
|
||||
--max-stack=<value>::
|
||||
Maximum stack depth when collecting lock contention (default: 8).
|
||||
|
@ -198,6 +203,11 @@ CONTENTION OPTIONS
|
|||
Note that it matches the substring so 'rq' would match both 'raw_spin_rq_lock'
|
||||
and 'irq_enter_rcu'.
|
||||
|
||||
-x::
|
||||
--field-separator=<SEP>::
|
||||
Show results using a CSV-style output to make it easy to import directly
|
||||
into spreadsheets. Columns are separated by the string specified in SEP.
|
||||
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
|
|
|
@ -119,9 +119,12 @@ OPTIONS
|
|||
"perf report" to view group events together.
|
||||
|
||||
--filter=<filter>::
|
||||
Event filter. This option should follow an event selector (-e) which
|
||||
selects either tracepoint event(s) or a hardware trace PMU
|
||||
(e.g. Intel PT or CoreSight).
|
||||
Event filter. This option should follow an event selector (-e).
|
||||
If the event is a tracepoint, the filter string will be parsed by
|
||||
the kernel. If the event is a hardware trace PMU (e.g. Intel PT
|
||||
or CoreSight), it'll be processed as an address filter. Otherwise
|
||||
it means a general filter using BPF which can be applied for any
|
||||
kind of event.
|
||||
|
||||
- tracepoint filters
|
||||
|
||||
|
@ -176,6 +179,57 @@ OPTIONS
|
|||
|
||||
Multiple filters can be separated with space or comma.
|
||||
|
||||
- bpf filters
|
||||
|
||||
A BPF filter can access the sample data and make a decision based on the
|
||||
data. Users need to set an appropriate sample type to use the BPF
|
||||
filter. BPF filters need root privilege.
|
||||
|
||||
The sample data field can be specified in lower case letter. Multiple
|
||||
filters can be separated with comma. For example,
|
||||
|
||||
--filter 'period > 1000, cpu == 1'
|
||||
or
|
||||
--filter 'mem_op == load || mem_op == store, mem_lvl > l1'
|
||||
|
||||
The former filter only accept samples with period greater than 1000 AND
|
||||
CPU number is 1. The latter one accepts either load and store memory
|
||||
operations but it should have memory level above the L1. Since the
|
||||
mem_op and mem_lvl fields come from the (memory) data_source, it'd only
|
||||
work with some events which set the data_source field.
|
||||
|
||||
Also user should request to collect that information (with -d option in
|
||||
the above case). Otherwise, the following message will be shown.
|
||||
|
||||
$ sudo perf record -e cycles --filter 'mem_op == load'
|
||||
Error: cycles event does not have PERF_SAMPLE_DATA_SRC
|
||||
Hint: please add -d option to perf record.
|
||||
failed to set filter "BPF" on event cycles with 22 (Invalid argument)
|
||||
|
||||
Essentially the BPF filter expression is:
|
||||
|
||||
<term> <operator> <value> (("," | "||") <term> <operator> <value>)*
|
||||
|
||||
The <term> can be one of:
|
||||
ip, id, tid, pid, cpu, time, addr, period, txn, weight, phys_addr,
|
||||
code_pgsz, data_pgsz, weight1, weight2, weight3, ins_lat, retire_lat,
|
||||
p_stage_cyc, mem_op, mem_lvl, mem_snoop, mem_remote, mem_lock,
|
||||
mem_dtlb, mem_blk, mem_hops
|
||||
|
||||
The <operator> can be one of:
|
||||
==, !=, >, >=, <, <=, &
|
||||
|
||||
The <value> can be one of:
|
||||
<number> (for any term)
|
||||
na, load, store, pfetch, exec (for mem_op)
|
||||
l1, l2, l3, l4, cxl, io, any_cache, lfb, ram, pmem (for mem_lvl)
|
||||
na, none, hit, miss, hitm, fwd, peer (for mem_snoop)
|
||||
remote (for mem_remote)
|
||||
na, locked (for mem_locked)
|
||||
na, l1_hit, l1_miss, l2_hit, l2_miss, any_hit, any_miss, walk, fault (for mem_dtlb)
|
||||
na, by_data, by_addr (for mem_blk)
|
||||
hops0, hops1, hops2, hops3 (for mem_hops)
|
||||
|
||||
--exclude-perf::
|
||||
Don't record events issued by perf itself. This option should follow
|
||||
an event selector (-e) which selects tracepoint event(s). It adds a
|
||||
|
|
|
@ -117,6 +117,7 @@ OPTIONS
|
|||
- addr: (Full) virtual address of the sampled instruction
|
||||
- retire_lat: On X86, this reports pipeline stall of this instruction compared
|
||||
to the previous instruction in cycles. And currently supported only on X86
|
||||
- simd: Flags describing a SIMD operation. "e" for empty Arm SVE predicate. "p" for partial Arm SVE predicate
|
||||
|
||||
By default, comm, dso and symbol keys are used.
|
||||
(i.e. --sort comm,dso,symbol)
|
||||
|
@ -380,6 +381,9 @@ OPTIONS
|
|||
This allows to examine the path the program took to each sample.
|
||||
The data collection must have used -b (or -j) and -g.
|
||||
|
||||
--addr2line=<path>::
|
||||
Path to addr2line binary.
|
||||
|
||||
--objdump=<path>::
|
||||
Path to objdump binary.
|
||||
|
||||
|
|
|
@ -130,7 +130,7 @@ OPTIONS
|
|||
-F::
|
||||
--fields::
|
||||
Comma separated list of fields to print. Options are:
|
||||
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
|
||||
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, dsoff, addr, symoff,
|
||||
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
|
||||
brstackinsn, brstackinsnlen, brstackoff, callindent, insn, insnlen, synth,
|
||||
phys_addr, metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat,
|
||||
|
|
|
@ -308,6 +308,14 @@ use --per-die in addition to -a. (system-wide). The output includes the
|
|||
die number and the number of online processors on that die. This is
|
||||
useful to gauge the amount of aggregation.
|
||||
|
||||
--per-cache::
|
||||
Aggregate counts per cache instance for system-wide mode measurements. By
|
||||
default, the aggregation happens for the cache level at the highest index
|
||||
in the system. To specify a particular level, mention the cache level
|
||||
alongside the option in the format [Ll][1-9][0-9]*. For example:
|
||||
Using option "--per-cache=l3" or "--per-cache=L3" will aggregate the
|
||||
information at the boundary of the level 3 cache in the system.
|
||||
|
||||
--per-core::
|
||||
Aggregate counts per physical processor for system-wide mode measurements. This
|
||||
is a useful mode to detect imbalance between physical cores. To enable this mode,
|
||||
|
@ -353,6 +361,15 @@ small group that need not have multiplexing is lowered. This option
|
|||
forbids the event merging logic from sharing events between groups and
|
||||
may be used to increase accuracy in this case.
|
||||
|
||||
--metric-no-threshold::
|
||||
Metric thresholds may increase the number of events necessary to
|
||||
compute whether a metric has exceeded its threshold expression. This
|
||||
may not be desirable, for example, as the events can introduce
|
||||
multiplexing. This option disables the adding of threshold expression
|
||||
events for a metric. However, if there are sufficient events to
|
||||
compute the threshold then the threshold is still computed and used to
|
||||
color the metric's computed value.
|
||||
|
||||
--quiet::
|
||||
Don't print output, warnings or messages. This is useful with perf stat
|
||||
record below to only write data to the perf.data file.
|
||||
|
@ -379,6 +396,14 @@ Aggregate counts per processor socket for system-wide mode measurements.
|
|||
--per-die::
|
||||
Aggregate counts per processor die for system-wide mode measurements.
|
||||
|
||||
--per-cache::
|
||||
Aggregate counts per cache instance for system-wide mode measurements. By
|
||||
default, the aggregation happens for the cache level at the highest index
|
||||
in the system. To specify a particular level, mention the cache level
|
||||
alongside the option in the format [Ll][1-9][0-9]*. For example: Using
|
||||
option "--per-cache=l3" or "--per-cache=L3" will aggregate the
|
||||
information at the boundary of the level 3 cache in the system.
|
||||
|
||||
--per-core::
|
||||
Aggregate counts per physical processor for system-wide mode measurements.
|
||||
|
||||
|
@ -389,15 +414,21 @@ For a group all metrics from the group are added.
|
|||
The events from the metrics are automatically measured.
|
||||
See perf list output for the possible metrics and metricgroups.
|
||||
|
||||
When threshold information is available for a metric, the
|
||||
color red is used to signify a metric has exceeded a threshold
|
||||
while green shows it hasn't. The default color means that
|
||||
no threshold information was available or the threshold
|
||||
couldn't be computed.
|
||||
|
||||
-A::
|
||||
--no-aggr::
|
||||
Do not aggregate counts across all monitored CPUs.
|
||||
|
||||
--topdown::
|
||||
Print complete top-down metrics supported by the CPU. This allows to
|
||||
determine bottle necks in the CPU pipeline for CPU bound workloads,
|
||||
by breaking the cycles consumed down into frontend bound, backend bound,
|
||||
bad speculation and retiring.
|
||||
Print top-down metrics supported by the CPU. This allows to determine
|
||||
bottle necks in the CPU pipeline for CPU bound workloads, by breaking
|
||||
the cycles consumed down into frontend bound, backend bound, bad
|
||||
speculation and retiring.
|
||||
|
||||
Frontend bound means that the CPU cannot fetch and decode instructions fast
|
||||
enough. Backend bound means that computation or memory access is the bottle
|
||||
|
@ -430,15 +461,18 @@ CPUs the workload runs on. If needed the CPUs can be forced using
|
|||
taskset.
|
||||
|
||||
--td-level::
|
||||
Print the top-down statistics that equal to or lower than the input level.
|
||||
It allows users to print the interested top-down metrics level instead of
|
||||
the complete top-down metrics.
|
||||
Print the top-down statistics that equal the input level. It allows
|
||||
users to print the interested top-down metrics level instead of the
|
||||
level 1 top-down metrics.
|
||||
|
||||
The availability of the top-down metrics level depends on the hardware. For
|
||||
example, Ice Lake only supports L1 top-down metrics. The Sapphire Rapids
|
||||
supports both L1 and L2 top-down metrics.
|
||||
As the higher levels gather more metrics and use more counters they
|
||||
will be less accurate. By convention a metric can be examined by
|
||||
appending '_group' to it and this will increase accuracy compared to
|
||||
gathering all metrics for a level. For example, level 1 analysis may
|
||||
highlight 'tma_frontend_bound'. This metric may be drilled into with
|
||||
'tma_frontend_bound_group' with
|
||||
'perf stat -M tma_frontend_bound_group...'.
|
||||
|
||||
Default: 0 means the max level that the current hardware support.
|
||||
Error out if the input is higher than the supported max level.
|
||||
|
||||
--no-merge::
|
||||
|
|
|
@ -161,6 +161,12 @@ Default is to monitor all CPUS.
|
|||
-M::
|
||||
--disassembler-style=:: Set disassembler style for objdump.
|
||||
|
||||
--addr2line=<path>::
|
||||
Path to addr2line binary.
|
||||
|
||||
--objdump=<path>::
|
||||
Path to objdump binary.
|
||||
|
||||
--prefix=PREFIX::
|
||||
--prefix-strip=N::
|
||||
Remove first N entries from source file path names in executables
|
||||
|
@ -248,6 +254,10 @@ Default is to monitor all CPUS.
|
|||
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
|
||||
Note that this feature may not be available on all processors.
|
||||
|
||||
--branch-history::
|
||||
Add the addresses of sampled taken branches to the callstack.
|
||||
This allows to examine the path the program took to each sample.
|
||||
|
||||
--raw-trace::
|
||||
When displaying traceevent output, do not use print fmt or plugins.
|
||||
|
||||
|
|
|
@ -1,46 +1,35 @@
|
|||
Using TopDown metrics in user space
|
||||
-----------------------------------
|
||||
Using TopDown metrics
|
||||
---------------------
|
||||
|
||||
Intel CPUs (since Sandy Bridge and Silvermont) support a TopDown
|
||||
methodology to break down CPU pipeline execution into 4 bottlenecks:
|
||||
frontend bound, backend bound, bad speculation, retiring.
|
||||
TopDown metrics break apart performance bottlenecks. Starting at level
|
||||
1 it is typical to get metrics on retiring, bad speculation, frontend
|
||||
bound, and backend bound. Higher levels provide more detail in to the
|
||||
level 1 bottlenecks, such as at level 2: core bound, memory bound,
|
||||
heavy operations, light operations, branch mispredicts, machine
|
||||
clears, fetch latency and fetch bandwidth. For more details see [1][2][3].
|
||||
|
||||
For more details on Topdown see [1][5]
|
||||
perf stat --topdown implements this using available metrics that vary
|
||||
per architecture.
|
||||
|
||||
Traditionally this was implemented by events in generic counters
|
||||
and specific formulas to compute the bottlenecks.
|
||||
% perf stat -a --topdown -I1000
|
||||
# time % tma_retiring % tma_backend_bound % tma_frontend_bound % tma_bad_speculation
|
||||
1.001141351 11.5 34.9 46.9 6.7
|
||||
2.006141972 13.4 28.1 50.4 8.1
|
||||
3.010162040 12.9 28.1 51.1 8.0
|
||||
4.014009311 12.5 28.6 51.8 7.2
|
||||
5.017838554 11.8 33.0 48.0 7.2
|
||||
5.704818971 14.0 27.5 51.3 7.3
|
||||
...
|
||||
|
||||
perf stat --topdown implements this.
|
||||
|
||||
Full Top Down includes more levels that can break down the
|
||||
bottlenecks further. This is not directly implemented in perf,
|
||||
but available in other tools that can run on top of perf,
|
||||
such as toplev[2] or vtune[3]
|
||||
|
||||
New Topdown features in Ice Lake
|
||||
===============================
|
||||
New Topdown features in Intel Ice Lake
|
||||
======================================
|
||||
|
||||
With Ice Lake CPUs the TopDown metrics are directly available as
|
||||
fixed counters and do not require generic counters. This allows
|
||||
to collect TopDown always in addition to other events.
|
||||
|
||||
% perf stat -a --topdown -I1000
|
||||
# time retiring bad speculation frontend bound backend bound
|
||||
1.001281330 23.0% 15.3% 29.6% 32.1%
|
||||
2.003009005 5.0% 6.8% 46.6% 41.6%
|
||||
3.004646182 6.7% 6.7% 46.0% 40.6%
|
||||
4.006326375 5.0% 6.4% 47.6% 41.0%
|
||||
5.007991804 5.1% 6.3% 46.3% 42.3%
|
||||
6.009626773 6.2% 7.1% 47.3% 39.3%
|
||||
7.011296356 4.7% 6.7% 46.2% 42.4%
|
||||
8.012951831 4.7% 6.7% 47.5% 41.1%
|
||||
...
|
||||
|
||||
This also enables measuring TopDown per thread/process instead
|
||||
of only per core.
|
||||
|
||||
Using TopDown through RDPMC in applications on Ice Lake
|
||||
======================================================
|
||||
Using TopDown through RDPMC in applications on Intel Ice Lake
|
||||
=============================================================
|
||||
|
||||
For more fine grained measurements it can be useful to
|
||||
access the new directly from user space. This is more complicated,
|
||||
|
@ -301,8 +290,8 @@ This "opens" a new measurement period.
|
|||
A program using RDPMC for TopDown should schedule such a reset
|
||||
regularly, as in every few seconds.
|
||||
|
||||
Limits on Ice Lake
|
||||
==================
|
||||
Limits on Intel Ice Lake
|
||||
========================
|
||||
|
||||
Four pseudo TopDown metric events are exposed for the end-users,
|
||||
topdown-retiring, topdown-bad-spec, topdown-fe-bound and topdown-be-bound.
|
||||
|
@ -318,8 +307,8 @@ a sampling read group. Since the SLOTS event must be the leader of a TopDown
|
|||
group, the second event of the group is the sampling event.
|
||||
For example, perf record -e '{slots, $sampling_event, topdown-retiring}:S'
|
||||
|
||||
Extension on Sapphire Rapids Server
|
||||
===================================
|
||||
Extension on Intel Sapphire Rapids Server
|
||||
=========================================
|
||||
The metrics counter is extended to support TMA method level 2 metrics.
|
||||
The lower half of the register is the TMA level 1 metrics (legacy).
|
||||
The upper half is also divided into four 8-bit fields for the new level 2
|
||||
|
@ -338,7 +327,6 @@ other four level 2 metrics by subtracting corresponding metrics as below.
|
|||
|
||||
|
||||
[1] https://software.intel.com/en-us/top-down-microarchitecture-analysis-method-win
|
||||
[2] https://github.com/andikleen/pmu-tools/wiki/toplev-manual
|
||||
[3] https://software.intel.com/en-us/intel-vtune-amplifier-xe
|
||||
[2] https://sites.google.com/site/analysismethods/yasin-pubs
|
||||
[3] https://perf.wiki.kernel.org/index.php/Top-Down_Analysis
|
||||
[4] https://github.com/andikleen/pmu-tools/tree/master/jevents
|
||||
[5] https://sites.google.com/site/analysismethods/yasin-pubs
|
||||
|
|
|
@ -149,9 +149,9 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
|
|||
ifdef CSINCLUDES
|
||||
LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
|
||||
endif
|
||||
OPENCSDLIBS := -lopencsd_c_api
|
||||
OPENCSDLIBS := -lopencsd_c_api -lopencsd
|
||||
ifeq ($(findstring -static,${LDFLAGS}),-static)
|
||||
OPENCSDLIBS += -lopencsd -lstdc++
|
||||
OPENCSDLIBS += -lstdc++
|
||||
endif
|
||||
ifdef CSLIBS
|
||||
LIBOPENCSD_LDFLAGS := -L$(CSLIBS)
|
||||
|
@ -210,6 +210,12 @@ ifeq ($(call get-executable,$(BISON)),)
|
|||
dummy := $(error Error: $(BISON) is missing on this system, please install it)
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_BPF_SKEL),1)
|
||||
ifeq ($(call get-executable,$(CLANG)),)
|
||||
dummy := $(error $(CLANG) is missing on this system, please install it to be able to build with BUILD_BPF_SKEL=1)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(OUTPUT),)
|
||||
ifeq ($(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \>\= 371), 1)
|
||||
BISON_FILE_PREFIX_MAP := --file-prefix-map=$(OUTPUT)=
|
||||
|
@ -228,6 +234,7 @@ ifndef DEBUG
|
|||
endif
|
||||
|
||||
ifeq ($(DEBUG),0)
|
||||
CORE_CFLAGS += -DNDEBUG=1
|
||||
ifeq ($(CC_NO_CLANG), 0)
|
||||
CORE_CFLAGS += -O3
|
||||
else
|
||||
|
@ -303,6 +310,7 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
|
|||
FEATURE_CHECK_LDFLAGS-libaio = -lrt
|
||||
|
||||
FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
|
||||
FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl
|
||||
|
||||
CORE_CFLAGS += -fno-omit-frame-pointer
|
||||
CORE_CFLAGS += -ggdb3
|
||||
|
@ -413,7 +421,6 @@ endif
|
|||
|
||||
ifdef NO_LIBELF
|
||||
NO_DWARF := 1
|
||||
NO_DEMANGLE := 1
|
||||
NO_LIBUNWIND := 1
|
||||
NO_LIBDW_DWARF_UNWIND := 1
|
||||
NO_LIBBPF := 1
|
||||
|
@ -427,15 +434,7 @@ else
|
|||
LIBC_SUPPORT := 1
|
||||
endif
|
||||
ifeq ($(LIBC_SUPPORT),1)
|
||||
msg := $(warning No libelf found. Disables 'probe' tool, jvmti and BPF support in 'perf record'. Please install libelf-dev, libelf-devel or elfutils-libelf-devel);
|
||||
|
||||
NO_LIBELF := 1
|
||||
NO_DWARF := 1
|
||||
NO_DEMANGLE := 1
|
||||
NO_LIBUNWIND := 1
|
||||
NO_LIBDW_DWARF_UNWIND := 1
|
||||
NO_LIBBPF := 1
|
||||
NO_JVMTI := 1
|
||||
msg := $(error ERROR: No libelf found. Disables 'probe' tool, jvmti and BPF support. Please install libelf-dev, libelf-devel, elfutils-libelf-devel or build with NO_LIBELF=1.)
|
||||
else
|
||||
ifneq ($(filter s% -fsanitize=address%,$(EXTRA_CFLAGS),),)
|
||||
ifneq ($(shell ldconfig -p | grep libasan >/dev/null 2>&1; echo $$?), 0)
|
||||
|
@ -477,10 +476,6 @@ else
|
|||
endif # libelf support
|
||||
endif # NO_LIBELF
|
||||
|
||||
ifeq ($(feature-glibc), 1)
|
||||
CFLAGS += -DHAVE_GLIBC_SUPPORT
|
||||
endif
|
||||
|
||||
ifeq ($(feature-libaio), 1)
|
||||
ifndef NO_AIO
|
||||
CFLAGS += -DHAVE_AIO_SUPPORT
|
||||
|
@ -491,6 +486,10 @@ ifdef NO_DWARF
|
|||
NO_LIBDW_DWARF_UNWIND := 1
|
||||
endif
|
||||
|
||||
ifeq ($(feature-scandirat), 1)
|
||||
CFLAGS += -DHAVE_SCANDIRAT_SUPPORT
|
||||
endif
|
||||
|
||||
ifeq ($(feature-sched_getcpu), 1)
|
||||
CFLAGS += -DHAVE_SCHED_GETCPU_SUPPORT
|
||||
endif
|
||||
|
@ -567,54 +566,17 @@ ifndef NO_LIBELF
|
|||
|
||||
# detecting libbpf without LIBBPF_DYNAMIC, so make VF=1 shows libbpf detection status
|
||||
$(call feature_check,libbpf)
|
||||
|
||||
ifdef LIBBPF_DYNAMIC
|
||||
ifeq ($(feature-libbpf), 1)
|
||||
EXTLIBS += -lbpf
|
||||
$(call detected,CONFIG_LIBBPF_DYNAMIC)
|
||||
|
||||
$(call feature_check,libbpf-btf__load_from_kernel_by_id)
|
||||
ifeq ($(feature-libbpf-btf__load_from_kernel_by_id), 1)
|
||||
CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
|
||||
endif
|
||||
$(call feature_check,libbpf-bpf_prog_load)
|
||||
ifeq ($(feature-libbpf-bpf_prog_load), 1)
|
||||
CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD
|
||||
endif
|
||||
$(call feature_check,libbpf-bpf_object__next_program)
|
||||
ifeq ($(feature-libbpf-bpf_object__next_program), 1)
|
||||
CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM
|
||||
endif
|
||||
$(call feature_check,libbpf-bpf_object__next_map)
|
||||
ifeq ($(feature-libbpf-bpf_object__next_map), 1)
|
||||
CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP
|
||||
endif
|
||||
$(call feature_check,libbpf-bpf_program__set_insns)
|
||||
ifeq ($(feature-libbpf-bpf_program__set_insns), 1)
|
||||
CFLAGS += -DHAVE_LIBBPF_BPF_PROGRAM__SET_INSNS
|
||||
else
|
||||
dummy := $(error Error: libbpf devel library needs to be >= 0.8.0 to build with LIBBPF_DYNAMIC, update or build statically with the version that comes with the kernel sources);
|
||||
endif
|
||||
$(call feature_check,libbpf-btf__raw_data)
|
||||
ifeq ($(feature-libbpf-btf__raw_data), 1)
|
||||
CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA
|
||||
endif
|
||||
$(call feature_check,libbpf-bpf_map_create)
|
||||
ifeq ($(feature-libbpf-bpf_map_create), 1)
|
||||
CFLAGS += -DHAVE_LIBBPF_BPF_MAP_CREATE
|
||||
endif
|
||||
else
|
||||
dummy := $(error Error: No libbpf devel library found, please install libbpf-devel);
|
||||
dummy := $(error Error: No libbpf devel library found or older than v1.0, please install/update libbpf-devel);
|
||||
endif
|
||||
else
|
||||
# Libbpf will be built as a static library from tools/lib/bpf.
|
||||
LIBBPF_STATIC := 1
|
||||
CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
|
||||
CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD
|
||||
CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM
|
||||
CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP
|
||||
CFLAGS += -DHAVE_LIBBPF_BPF_PROGRAM__SET_INSNS
|
||||
CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA
|
||||
CFLAGS += -DHAVE_LIBBPF_BPF_MAP_CREATE
|
||||
endif
|
||||
endif
|
||||
|
||||
|
@ -715,6 +677,10 @@ ifdef BUILD_BPF_SKEL
|
|||
CFLAGS += -DHAVE_BPF_SKEL
|
||||
endif
|
||||
|
||||
ifndef GEN_VMLINUX_H
|
||||
VMLINUX_H=$(src-perf)/util/bpf_skel/vmlinux/vmlinux.h
|
||||
endif
|
||||
|
||||
dwarf-post-unwind := 1
|
||||
dwarf-post-unwind-text := BUG
|
||||
|
||||
|
@ -798,10 +764,6 @@ ifndef NO_LIBCRYPTO
|
|||
endif
|
||||
endif
|
||||
|
||||
ifdef NO_NEWT
|
||||
NO_SLANG=1
|
||||
endif
|
||||
|
||||
ifndef NO_SLANG
|
||||
ifneq ($(feature-libslang), 1)
|
||||
ifneq ($(feature-libslang-include-subdir), 1)
|
||||
|
@ -918,19 +880,17 @@ endif
|
|||
ifneq ($(NO_JEVENTS),1)
|
||||
NO_JEVENTS := 0
|
||||
ifndef PYTHON
|
||||
$(warning No python interpreter disabling jevent generation)
|
||||
NO_JEVENTS := 1
|
||||
$(error ERROR: No python interpreter needed for jevents generation. Install python or build with NO_JEVENTS=1.)
|
||||
else
|
||||
# jevents.py uses f-strings present in Python 3.6 released in Dec. 2016.
|
||||
JEVENTS_PYTHON_GOOD := $(shell $(PYTHON) -c 'import sys;print("1" if(sys.version_info.major >= 3 and sys.version_info.minor >= 6) else "0")' 2> /dev/null)
|
||||
ifneq ($(JEVENTS_PYTHON_GOOD), 1)
|
||||
$(warning Python interpreter too old (older than 3.6) disabling jevent generation)
|
||||
NO_JEVENTS := 1
|
||||
$(error ERROR: Python interpreter needed for jevents generation too old (older than 3.6). Install a newer python or build with NO_JEVENTS=1.)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef NO_LIBBFD
|
||||
ifdef BUILD_NONDISTRO
|
||||
ifeq ($(feature-libbfd), 1)
|
||||
EXTLIBS += -lbfd -lopcodes
|
||||
else
|
||||
|
@ -945,15 +905,19 @@ ifndef NO_LIBBFD
|
|||
ifeq ($(feature-libbfd-liberty), 1)
|
||||
EXTLIBS += -lbfd -lopcodes -liberty
|
||||
FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl
|
||||
FEATURE_CHECK_LDFLAGS-disassembler-init-styled += -liberty -ldl
|
||||
else
|
||||
ifeq ($(feature-libbfd-liberty-z), 1)
|
||||
EXTLIBS += -lbfd -lopcodes -liberty -lz
|
||||
FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl
|
||||
FEATURE_CHECK_LDFLAGS-disassembler-init-styled += -liberty -lz -ldl
|
||||
endif
|
||||
endif
|
||||
$(call feature_check,disassembler-four-args)
|
||||
endif
|
||||
|
||||
CFLAGS += -DHAVE_LIBBFD_SUPPORT
|
||||
CXXFLAGS += -DHAVE_LIBBFD_SUPPORT
|
||||
ifeq ($(feature-libbfd-buildid), 1)
|
||||
CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT
|
||||
else
|
||||
|
@ -961,33 +925,26 @@ ifndef NO_LIBBFD
|
|||
endif
|
||||
endif
|
||||
|
||||
ifdef NO_DEMANGLE
|
||||
CFLAGS += -DNO_DEMANGLE
|
||||
else
|
||||
ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
|
||||
EXTLIBS += -liberty
|
||||
else
|
||||
ifndef NO_DEMANGLE
|
||||
$(call feature_check,cxa-demangle)
|
||||
ifeq ($(feature-cxa-demangle), 1)
|
||||
EXTLIBS += -lstdc++
|
||||
CFLAGS += -DHAVE_CXA_DEMANGLE_SUPPORT
|
||||
CXXFLAGS += -DHAVE_CXA_DEMANGLE_SUPPORT
|
||||
$(call detected,CONFIG_CXX_DEMANGLE)
|
||||
endif
|
||||
ifdef BUILD_NONDISTRO
|
||||
ifeq ($(filter -liberty,$(EXTLIBS)),)
|
||||
$(call feature_check,cplus-demangle)
|
||||
|
||||
# we dont have neither HAVE_CPLUS_DEMANGLE_SUPPORT
|
||||
# or any of 'bfd iberty z' trinity
|
||||
ifeq ($(feature-cplus-demangle), 1)
|
||||
EXTLIBS += -liberty
|
||||
else
|
||||
msg := $(warning No bfd.h/libbfd found, please install binutils-dev[el]/zlib-static/libiberty-dev to gain symbol demangling)
|
||||
CFLAGS += -DNO_DEMANGLE
|
||||
endif
|
||||
endif
|
||||
ifneq ($(filter -liberty,$(EXTLIBS)),)
|
||||
CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
|
||||
CXXFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(filter -liberty,$(EXTLIBS)),)
|
||||
CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(filter -lbfd,$(EXTLIBS)),)
|
||||
CFLAGS += -DHAVE_LIBBFD_SUPPORT
|
||||
endif
|
||||
|
||||
ifndef NO_ZLIB
|
||||
|
@ -1118,6 +1075,11 @@ ifndef NO_AUXTRACE
|
|||
endif
|
||||
endif
|
||||
|
||||
ifdef EXTRA_TESTS
|
||||
$(call detected,CONFIG_EXTRA_TESTS)
|
||||
CFLAGS += -DHAVE_EXTRA_TESTS
|
||||
endif
|
||||
|
||||
ifndef NO_JVMTI
|
||||
ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
|
||||
JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
|
||||
|
@ -1181,7 +1143,7 @@ ifdef LIBCLANGLLVM
|
|||
endif
|
||||
endif
|
||||
|
||||
ifdef LIBPFM4
|
||||
ifndef NO_LIBPFM4
|
||||
$(call feature_check,libpfm4)
|
||||
ifeq ($(feature-libpfm4), 1)
|
||||
CFLAGS += -DHAVE_LIBPFM
|
||||
|
@ -1190,7 +1152,6 @@ ifdef LIBPFM4
|
|||
$(call detected,CONFIG_LIBPFM4)
|
||||
else
|
||||
msg := $(warning libpfm4 not found, disables libpfm4 support. Please install libpfm4-dev);
|
||||
NO_LIBPFM4 := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
|
@ -1208,7 +1169,7 @@ ifneq ($(NO_LIBTRACEEVENT),1)
|
|||
CFLAGS += -DLIBTRACEEVENT_VERSION=$(LIBTRACEEVENT_VERSION_CPP)
|
||||
$(call detected,CONFIG_LIBTRACEEVENT)
|
||||
else
|
||||
dummy := $(warning Warning: libtraceevent is missing limiting functionality, please install libtraceevent-dev/libtraceevent-devel)
|
||||
dummy := $(error ERROR: libtraceevent is missing. Please install libtraceevent-dev/libtraceevent-devel or build with NO_LIBTRACEEVENT=1)
|
||||
endif
|
||||
|
||||
$(call feature_check,libtracefs)
|
||||
|
|
|
@ -44,8 +44,6 @@ include ../scripts/utilities.mak
|
|||
#
|
||||
# Define WERROR=0 to disable treating any warnings as errors.
|
||||
#
|
||||
# Define NO_NEWT if you do not want TUI support. (deprecated)
|
||||
#
|
||||
# Define NO_SLANG if you do not want TUI support.
|
||||
#
|
||||
# Define GTK2 if you want GTK+ GUI support.
|
||||
|
@ -122,12 +120,20 @@ include ../scripts/utilities.mak
|
|||
# generated from the kernel .tbl or unistd.h files and use, if available, libaudit
|
||||
# for doing the conversions to/from strings/id.
|
||||
#
|
||||
# Define LIBPFM4 to enable libpfm4 events extension.
|
||||
# Define NO_LIBPFM4 to disable libpfm4 events extension.
|
||||
#
|
||||
# Define NO_LIBDEBUGINFOD if you do not want support debuginfod
|
||||
#
|
||||
# Define BUILD_BPF_SKEL to enable BPF skeletons
|
||||
#
|
||||
# Define BUILD_NONDISTRO to enable building an linking against libbfd and
|
||||
# libiberty distribution license incompatible libraries.
|
||||
#
|
||||
# Define EXTRA_TESTS to enable building extra tests useful mainly to perf
|
||||
# developers, such as:
|
||||
# x86 instruction decoder - new instructions test
|
||||
#
|
||||
# Define GEN_VMLINUX_H to generate vmlinux.h from the BTF.
|
||||
|
||||
# As per kernel Makefile, avoid funny character set dependencies
|
||||
unexport LC_ALL
|
||||
|
@ -181,7 +187,6 @@ HOSTCC ?= gcc
|
|||
HOSTLD ?= ld
|
||||
HOSTAR ?= ar
|
||||
CLANG ?= clang
|
||||
LLVM_STRIP ?= llvm-strip
|
||||
|
||||
PKG_CONFIG = $(CROSS_COMPILE)pkg-config
|
||||
|
||||
|
@ -194,6 +199,7 @@ FLEX ?= flex
|
|||
BISON ?= bison
|
||||
STRIP = strip
|
||||
AWK = awk
|
||||
READELF ?= readelf
|
||||
|
||||
# include Makefile.config by default and rule out
|
||||
# non-config cases
|
||||
|
@ -375,7 +381,7 @@ ifndef NO_JVMTI
|
|||
PROGRAMS += $(OUTPUT)$(LIBJVMTI)
|
||||
endif
|
||||
|
||||
DLFILTERS := dlfilter-test-api-v0.so dlfilter-show-cycles.so
|
||||
DLFILTERS := dlfilter-test-api-v0.so dlfilter-test-api-v2.so dlfilter-show-cycles.so
|
||||
DLFILTERS := $(patsubst %,$(OUTPUT)dlfilters/%,$(DLFILTERS))
|
||||
|
||||
# what 'all' will build and 'install' will install, in perfexecdir
|
||||
|
@ -1050,25 +1056,65 @@ SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
|
|||
SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
|
||||
SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
|
||||
SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h
|
||||
SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h
|
||||
SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_OUT)/sample_filter.skel.h
|
||||
|
||||
$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT):
|
||||
$(Q)$(MKDIR) -p $@
|
||||
|
||||
ifdef BUILD_BPF_SKEL
|
||||
BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
|
||||
BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE)
|
||||
# Get Clang's default includes on this system, as opposed to those seen by
|
||||
# '--target=bpf'. This fixes "missing" files on some architectures/distros,
|
||||
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
|
||||
#
|
||||
# Use '-idirafter': Don't interfere with include mechanics except where the
|
||||
# build would have failed anyways.
|
||||
define get_sys_includes
|
||||
$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
|
||||
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
|
||||
$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
|
||||
endef
|
||||
|
||||
ifneq ($(CROSS_COMPILE),)
|
||||
CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
|
||||
endif
|
||||
|
||||
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
|
||||
BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES)
|
||||
TOOLS_UAPI_INCLUDE := -I$(srctree)/tools/include/uapi
|
||||
|
||||
$(BPFTOOL): | $(SKEL_TMP_OUT)
|
||||
$(Q)CFLAGS= $(MAKE) -C ../bpf/bpftool \
|
||||
OUTPUT=$(SKEL_TMP_OUT)/ bootstrap
|
||||
|
||||
VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
|
||||
# Paths to search for a kernel to generate vmlinux.h from.
|
||||
VMLINUX_BTF_ELF_PATHS ?= $(if $(O),$(O)/vmlinux) \
|
||||
$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
|
||||
../../vmlinux \
|
||||
/sys/kernel/btf/vmlinux \
|
||||
/boot/vmlinux-$(shell uname -r)
|
||||
VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
|
||||
|
||||
# Paths to BTF information.
|
||||
VMLINUX_BTF_BTF_PATHS ?= /sys/kernel/btf/vmlinux
|
||||
|
||||
# Filter out kernels that don't exist or without a BTF section.
|
||||
VMLINUX_BTF_ELF_ABSPATHS ?= $(abspath $(wildcard $(VMLINUX_BTF_ELF_PATHS)))
|
||||
VMLINUX_BTF_PATHS ?= $(shell for file in $(VMLINUX_BTF_ELF_ABSPATHS); \
|
||||
do \
|
||||
if [ -f $$file ] && ($(READELF) -S "$$file" | grep -q .BTF); \
|
||||
then \
|
||||
echo "$$file"; \
|
||||
fi; \
|
||||
done) \
|
||||
$(wildcard $(VMLINUX_BTF_BTF_PATHS))
|
||||
|
||||
# Select the first as the source of vmlinux.h.
|
||||
VMLINUX_BTF ?= $(firstword $(VMLINUX_BTF_PATHS))
|
||||
|
||||
ifeq ($(VMLINUX_H),)
|
||||
ifeq ($(VMLINUX_BTF),)
|
||||
$(error Missing bpftool input for generating vmlinux.h)
|
||||
endif
|
||||
endif
|
||||
|
||||
$(SKEL_OUT)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL)
|
||||
ifeq ($(VMLINUX_H),)
|
||||
|
@ -1078,8 +1124,8 @@ else
|
|||
endif
|
||||
|
||||
$(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) $(SKEL_OUT)/vmlinux.h | $(SKEL_TMP_OUT)
|
||||
$(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) \
|
||||
-c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ && $(LLVM_STRIP) -g $@
|
||||
$(QUIET_CLANG)$(CLANG) -g -O2 --target=bpf -Wall -Werror $(BPF_INCLUDE) $(TOOLS_UAPI_INCLUDE) \
|
||||
-c $(filter util/bpf_skel/%.bpf.c,$^) -o $@
|
||||
|
||||
$(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL)
|
||||
$(QUIET_GENSKEL)$(BPFTOOL) gen skeleton $< > $@
|
||||
|
|
|
@ -26,14 +26,14 @@ static int sample_ustack(struct perf_sample *sample,
|
|||
|
||||
sp = (unsigned long) regs[PERF_REG_ARM_SP];
|
||||
|
||||
map = maps__find(thread->maps, (u64)sp);
|
||||
map = maps__find(thread__maps(thread), (u64)sp);
|
||||
if (!map) {
|
||||
pr_debug("failed to get stack map\n");
|
||||
free(buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
stack_size = map->end - sp;
|
||||
stack_size = map__end(map) - sp;
|
||||
stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
|
||||
|
||||
memcpy(buf, (void *) sp, stack_size);
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "../../../util/debug.h"
|
||||
#include "../../../util/evlist.h"
|
||||
#include "../../../util/pmu.h"
|
||||
#include "../../../util/pmus.h"
|
||||
#include "cs-etm.h"
|
||||
#include "arm-spe.h"
|
||||
#include "hisi-ptt.h"
|
||||
|
@ -40,7 +41,7 @@ static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
arm_spe_pmus[*nr_spes] = perf_pmu__find(arm_spe_pmu_name);
|
||||
arm_spe_pmus[*nr_spes] = perf_pmus__find(arm_spe_pmu_name);
|
||||
if (arm_spe_pmus[*nr_spes]) {
|
||||
pr_debug2("%s %d: arm_spe_pmu %d type %d name %s\n",
|
||||
__func__, __LINE__, *nr_spes,
|
||||
|
@ -87,7 +88,7 @@ static struct perf_pmu **find_all_hisi_ptt_pmus(int *nr_ptts, int *err)
|
|||
rewinddir(dir);
|
||||
while ((dent = readdir(dir))) {
|
||||
if (strstr(dent->d_name, HISI_PTT_PMU_NAME) && idx < *nr_ptts) {
|
||||
hisi_ptt_pmus[idx] = perf_pmu__find(dent->d_name);
|
||||
hisi_ptt_pmus[idx] = perf_pmus__find(dent->d_name);
|
||||
if (hisi_ptt_pmus[idx])
|
||||
idx++;
|
||||
}
|
||||
|
@ -131,7 +132,7 @@ struct auxtrace_record
|
|||
if (!evlist)
|
||||
return NULL;
|
||||
|
||||
cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
|
||||
cs_etm_pmu = perf_pmus__find(CORESIGHT_ETM_PMU_NAME);
|
||||
arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
|
||||
hisi_ptt_pmus = find_all_hisi_ptt_pmus(&nr_ptts, err);
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "../../../util/evsel.h"
|
||||
#include "../../../util/perf_api_probe.h"
|
||||
#include "../../../util/evsel_config.h"
|
||||
#include "../../../util/pmu.h"
|
||||
#include "../../../util/pmus.h"
|
||||
#include "../../../util/cs-etm.h"
|
||||
#include <internal/lib.h> // page_size
|
||||
#include "../../../util/session.h"
|
||||
|
@ -69,21 +69,29 @@ static const char * const metadata_ete_ro[] = {
|
|||
static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu);
|
||||
static bool cs_etm_is_ete(struct auxtrace_record *itr, int cpu);
|
||||
|
||||
static int cs_etm_set_context_id(struct auxtrace_record *itr,
|
||||
struct evsel *evsel, int cpu)
|
||||
static int cs_etm_validate_context_id(struct auxtrace_record *itr,
|
||||
struct evsel *evsel, int cpu)
|
||||
{
|
||||
struct cs_etm_recording *ptr;
|
||||
struct perf_pmu *cs_etm_pmu;
|
||||
struct cs_etm_recording *ptr =
|
||||
container_of(itr, struct cs_etm_recording, itr);
|
||||
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
|
||||
char path[PATH_MAX];
|
||||
int err = -EINVAL;
|
||||
int err;
|
||||
u32 val;
|
||||
u64 contextid;
|
||||
u64 contextid = evsel->core.attr.config &
|
||||
(perf_pmu__format_bits(&cs_etm_pmu->format, "contextid") |
|
||||
perf_pmu__format_bits(&cs_etm_pmu->format, "contextid1") |
|
||||
perf_pmu__format_bits(&cs_etm_pmu->format, "contextid2"));
|
||||
|
||||
ptr = container_of(itr, struct cs_etm_recording, itr);
|
||||
cs_etm_pmu = ptr->cs_etm_pmu;
|
||||
if (!contextid)
|
||||
return 0;
|
||||
|
||||
if (!cs_etm_is_etmv4(itr, cpu))
|
||||
goto out;
|
||||
/* Not supported in etmv3 */
|
||||
if (!cs_etm_is_etmv4(itr, cpu)) {
|
||||
pr_err("%s: contextid not supported in ETMv3, disable with %s/contextid=0/\n",
|
||||
CORESIGHT_ETM_PMU_NAME, CORESIGHT_ETM_PMU_NAME);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Get a handle on TRCIDR2 */
|
||||
snprintf(path, PATH_MAX, "cpu%d/%s",
|
||||
|
@ -92,27 +100,13 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr,
|
|||
|
||||
/* There was a problem reading the file, bailing out */
|
||||
if (err != 1) {
|
||||
pr_err("%s: can't read file %s\n",
|
||||
CORESIGHT_ETM_PMU_NAME, path);
|
||||
goto out;
|
||||
pr_err("%s: can't read file %s\n", CORESIGHT_ETM_PMU_NAME,
|
||||
path);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* User has configured for PID tracing, respects it. */
|
||||
contextid = evsel->core.attr.config &
|
||||
(BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_CTXTID2));
|
||||
|
||||
/*
|
||||
* If user doesn't configure the contextid format, parse PMU format and
|
||||
* enable PID tracing according to the "contextid" format bits:
|
||||
*
|
||||
* If bit ETM_OPT_CTXTID is set, trace CONTEXTIDR_EL1;
|
||||
* If bit ETM_OPT_CTXTID2 is set, trace CONTEXTIDR_EL2.
|
||||
*/
|
||||
if (!contextid)
|
||||
contextid = perf_pmu__format_bits(&cs_etm_pmu->format,
|
||||
"contextid");
|
||||
|
||||
if (contextid & BIT(ETM_OPT_CTXTID)) {
|
||||
if (contextid &
|
||||
perf_pmu__format_bits(&cs_etm_pmu->format, "contextid1")) {
|
||||
/*
|
||||
* TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID
|
||||
* tracing is supported:
|
||||
|
@ -120,16 +114,15 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr,
|
|||
* 0b00100 Maximum of 32-bit Context ID size.
|
||||
* All other values are reserved.
|
||||
*/
|
||||
val = BMVAL(val, 5, 9);
|
||||
if (!val || val != 0x4) {
|
||||
pr_err("%s: CONTEXTIDR_EL1 isn't supported\n",
|
||||
CORESIGHT_ETM_PMU_NAME);
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
if (BMVAL(val, 5, 9) != 0x4) {
|
||||
pr_err("%s: CONTEXTIDR_EL1 isn't supported, disable with %s/contextid1=0/\n",
|
||||
CORESIGHT_ETM_PMU_NAME, CORESIGHT_ETM_PMU_NAME);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
if (contextid & BIT(ETM_OPT_CTXTID2)) {
|
||||
if (contextid &
|
||||
perf_pmu__format_bits(&cs_etm_pmu->format, "contextid2")) {
|
||||
/*
|
||||
* TRCIDR2.VMIDOPT[30:29] != 0 and
|
||||
* TRCIDR2.VMIDSIZE[14:10] == 0b00100 (32bit virtual contextid)
|
||||
|
@ -138,35 +131,34 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr,
|
|||
* Any value of VMIDSIZE >= 4 (i.e, > 32bit) is fine for us.
|
||||
*/
|
||||
if (!BMVAL(val, 29, 30) || BMVAL(val, 10, 14) < 4) {
|
||||
pr_err("%s: CONTEXTIDR_EL2 isn't supported\n",
|
||||
CORESIGHT_ETM_PMU_NAME);
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
pr_err("%s: CONTEXTIDR_EL2 isn't supported, disable with %s/contextid2=0/\n",
|
||||
CORESIGHT_ETM_PMU_NAME, CORESIGHT_ETM_PMU_NAME);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
/* All good, let the kernel know */
|
||||
evsel->core.attr.config |= contextid;
|
||||
err = 0;
|
||||
|
||||
out:
|
||||
return err;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cs_etm_set_timestamp(struct auxtrace_record *itr,
|
||||
struct evsel *evsel, int cpu)
|
||||
static int cs_etm_validate_timestamp(struct auxtrace_record *itr,
|
||||
struct evsel *evsel, int cpu)
|
||||
{
|
||||
struct cs_etm_recording *ptr;
|
||||
struct perf_pmu *cs_etm_pmu;
|
||||
struct cs_etm_recording *ptr =
|
||||
container_of(itr, struct cs_etm_recording, itr);
|
||||
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
|
||||
char path[PATH_MAX];
|
||||
int err = -EINVAL;
|
||||
int err;
|
||||
u32 val;
|
||||
|
||||
ptr = container_of(itr, struct cs_etm_recording, itr);
|
||||
cs_etm_pmu = ptr->cs_etm_pmu;
|
||||
if (!(evsel->core.attr.config &
|
||||
perf_pmu__format_bits(&cs_etm_pmu->format, "timestamp")))
|
||||
return 0;
|
||||
|
||||
if (!cs_etm_is_etmv4(itr, cpu))
|
||||
goto out;
|
||||
if (!cs_etm_is_etmv4(itr, cpu)) {
|
||||
pr_err("%s: timestamp not supported in ETMv3, disable with %s/timestamp=0/\n",
|
||||
CORESIGHT_ETM_PMU_NAME, CORESIGHT_ETM_PMU_NAME);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Get a handle on TRCIRD0 */
|
||||
snprintf(path, PATH_MAX, "cpu%d/%s",
|
||||
|
@ -177,7 +169,7 @@ static int cs_etm_set_timestamp(struct auxtrace_record *itr,
|
|||
if (err != 1) {
|
||||
pr_err("%s: can't read file %s\n",
|
||||
CORESIGHT_ETM_PMU_NAME, path);
|
||||
goto out;
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -189,24 +181,21 @@ static int cs_etm_set_timestamp(struct auxtrace_record *itr,
|
|||
*/
|
||||
val &= GENMASK(28, 24);
|
||||
if (!val) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* All good, let the kernel know */
|
||||
evsel->core.attr.config |= (1 << ETM_OPT_TS);
|
||||
err = 0;
|
||||
|
||||
out:
|
||||
return err;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define ETM_SET_OPT_CTXTID (1 << 0)
|
||||
#define ETM_SET_OPT_TS (1 << 1)
|
||||
#define ETM_SET_OPT_MASK (ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS)
|
||||
|
||||
static int cs_etm_set_option(struct auxtrace_record *itr,
|
||||
struct evsel *evsel, u32 option)
|
||||
/*
|
||||
* Check whether the requested timestamp and contextid options should be
|
||||
* available on all requested CPUs and if not, tell the user how to override.
|
||||
* The kernel will silently disable any unavailable options so a warning here
|
||||
* first is better. In theory the kernel could still disable the option for
|
||||
* some other reason so this is best effort only.
|
||||
*/
|
||||
static int cs_etm_validate_config(struct auxtrace_record *itr,
|
||||
struct evsel *evsel)
|
||||
{
|
||||
int i, err = -EINVAL;
|
||||
struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
|
||||
|
@ -220,18 +209,11 @@ static int cs_etm_set_option(struct auxtrace_record *itr,
|
|||
!perf_cpu_map__has(online_cpus, cpu))
|
||||
continue;
|
||||
|
||||
if (option & BIT(ETM_OPT_CTXTID)) {
|
||||
err = cs_etm_set_context_id(itr, evsel, i);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
if (option & BIT(ETM_OPT_TS)) {
|
||||
err = cs_etm_set_timestamp(itr, evsel, i);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
if (option & ~(BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS)))
|
||||
/* Nothing else is currently supported */
|
||||
err = cs_etm_validate_context_id(itr, evsel, i);
|
||||
if (err)
|
||||
goto out;
|
||||
err = cs_etm_validate_timestamp(itr, evsel, i);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -319,13 +301,6 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
|
|||
bool privileged = perf_event_paranoid_check(-1);
|
||||
int err = 0;
|
||||
|
||||
ptr->evlist = evlist;
|
||||
ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
|
||||
|
||||
if (!record_opts__no_switch_events(opts) &&
|
||||
perf_can_record_switch_events())
|
||||
opts->record_switch_events = true;
|
||||
|
||||
evlist__for_each_entry(evlist, evsel) {
|
||||
if (evsel->core.attr.type == cs_etm_pmu->type) {
|
||||
if (cs_etm_evsel) {
|
||||
|
@ -333,11 +308,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
|
|||
CORESIGHT_ETM_PMU_NAME);
|
||||
return -EINVAL;
|
||||
}
|
||||
evsel->core.attr.freq = 0;
|
||||
evsel->core.attr.sample_period = 1;
|
||||
evsel->needs_auxtrace_mmap = true;
|
||||
cs_etm_evsel = evsel;
|
||||
opts->full_auxtrace = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -345,6 +316,16 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
|
|||
if (!cs_etm_evsel)
|
||||
return 0;
|
||||
|
||||
ptr->evlist = evlist;
|
||||
ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
|
||||
|
||||
if (!record_opts__no_switch_events(opts) &&
|
||||
perf_can_record_switch_events())
|
||||
opts->record_switch_events = true;
|
||||
|
||||
cs_etm_evsel->needs_auxtrace_mmap = true;
|
||||
opts->full_auxtrace = true;
|
||||
|
||||
ret = cs_etm_set_sink_attr(cs_etm_pmu, cs_etm_evsel);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -414,8 +395,8 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
|
|||
}
|
||||
}
|
||||
|
||||
/* We are in full trace mode but '-m,xyz' wasn't specified */
|
||||
if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
|
||||
/* Buffer sizes weren't specified with '-m,xyz' so give some defaults */
|
||||
if (!opts->auxtrace_mmap_pages) {
|
||||
if (privileged) {
|
||||
opts->auxtrace_mmap_pages = MiB(4) / page_size;
|
||||
} else {
|
||||
|
@ -423,7 +404,6 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
|
|||
if (opts->mmap_pages == UINT_MAX)
|
||||
opts->mmap_pages = KiB(256) / page_size;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (opts->auxtrace_snapshot_mode)
|
||||
|
@ -437,38 +417,36 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
|
|||
evlist__to_front(evlist, cs_etm_evsel);
|
||||
|
||||
/*
|
||||
* In the case of per-cpu mmaps, we need the CPU on the
|
||||
* AUX event. We also need the contextID in order to be notified
|
||||
* get the CPU on the sample - need it to associate trace ID in the
|
||||
* AUX_OUTPUT_HW_ID event, and the AUX event for per-cpu mmaps.
|
||||
*/
|
||||
evsel__set_sample_bit(cs_etm_evsel, CPU);
|
||||
|
||||
/*
|
||||
* Also the case of per-cpu mmaps, need the contextID in order to be notified
|
||||
* when a context switch happened.
|
||||
*/
|
||||
if (!perf_cpu_map__empty(cpus)) {
|
||||
evsel__set_sample_bit(cs_etm_evsel, CPU);
|
||||
|
||||
err = cs_etm_set_option(itr, cs_etm_evsel,
|
||||
BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS));
|
||||
if (err)
|
||||
goto out;
|
||||
evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
|
||||
"timestamp", 1);
|
||||
evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
|
||||
"contextid", 1);
|
||||
}
|
||||
|
||||
/* Add dummy event to keep tracking */
|
||||
if (opts->full_auxtrace) {
|
||||
struct evsel *tracking_evsel;
|
||||
err = parse_event(evlist, "dummy:u");
|
||||
if (err)
|
||||
goto out;
|
||||
evsel = evlist__last(evlist);
|
||||
evlist__set_tracking_event(evlist, evsel);
|
||||
evsel->core.attr.freq = 0;
|
||||
evsel->core.attr.sample_period = 1;
|
||||
|
||||
err = parse_event(evlist, "dummy:u");
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
tracking_evsel = evlist__last(evlist);
|
||||
evlist__set_tracking_event(evlist, tracking_evsel);
|
||||
|
||||
tracking_evsel->core.attr.freq = 0;
|
||||
tracking_evsel->core.attr.sample_period = 1;
|
||||
|
||||
/* In per-cpu case, always need the time of mmap events etc */
|
||||
if (!perf_cpu_map__empty(cpus))
|
||||
evsel__set_sample_bit(tracking_evsel, TIME);
|
||||
}
|
||||
/* In per-cpu case, always need the time of mmap events etc */
|
||||
if (!perf_cpu_map__empty(cpus))
|
||||
evsel__set_sample_bit(evsel, TIME);
|
||||
|
||||
err = cs_etm_validate_config(itr, cs_etm_evsel);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
@ -659,8 +637,12 @@ static bool cs_etm_is_ete(struct auxtrace_record *itr, int cpu)
|
|||
{
|
||||
struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr);
|
||||
struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
|
||||
int trcdevarch = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_ete_ro[CS_ETE_TRCDEVARCH]);
|
||||
int trcdevarch;
|
||||
|
||||
if (!cs_etm_pmu_path_exists(cs_etm_pmu, cpu, metadata_ete_ro[CS_ETE_TRCDEVARCH]))
|
||||
return false;
|
||||
|
||||
trcdevarch = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_ete_ro[CS_ETE_TRCDEVARCH]);
|
||||
/*
|
||||
* ETE if ARCHVER is 5 (ARCHVER is 4 for ETM) and ARCHPART is 0xA13.
|
||||
* See ETM_DEVARCH_ETE_ARCH in coresight-etm4x.h
|
||||
|
@ -675,8 +657,10 @@ static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr,
|
|||
|
||||
/* Get trace configuration register */
|
||||
data[CS_ETMV4_TRCCONFIGR] = cs_etmv4_get_config(itr);
|
||||
/* Get traceID from the framework */
|
||||
data[CS_ETMV4_TRCTRACEIDR] = coresight_get_trace_id(cpu);
|
||||
/* traceID set to legacy version, in case new perf running on older system */
|
||||
data[CS_ETMV4_TRCTRACEIDR] =
|
||||
CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) | CORESIGHT_TRACE_ID_UNUSED_FLAG;
|
||||
|
||||
/* Get read-only information from sysFS */
|
||||
data[CS_ETMV4_TRCIDR0] = cs_etm_get_ro(cs_etm_pmu, cpu,
|
||||
metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
|
||||
|
@ -694,8 +678,8 @@ static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr,
|
|||
data[CS_ETMV4_TS_SOURCE] = (__u64) cs_etm_get_ro_signed(cs_etm_pmu, cpu,
|
||||
metadata_etmv4_ro[CS_ETMV4_TS_SOURCE]);
|
||||
else {
|
||||
pr_warning("[%03d] pmu file 'ts_source' not found. Fallback to safe value (-1)\n",
|
||||
cpu);
|
||||
pr_debug3("[%03d] pmu file 'ts_source' not found. Fallback to safe value (-1)\n",
|
||||
cpu);
|
||||
data[CS_ETMV4_TS_SOURCE] = (__u64) -1;
|
||||
}
|
||||
}
|
||||
|
@ -707,8 +691,10 @@ static void cs_etm_save_ete_header(__u64 data[], struct auxtrace_record *itr, in
|
|||
|
||||
/* Get trace configuration register */
|
||||
data[CS_ETE_TRCCONFIGR] = cs_etmv4_get_config(itr);
|
||||
/* Get traceID from the framework */
|
||||
data[CS_ETE_TRCTRACEIDR] = coresight_get_trace_id(cpu);
|
||||
/* traceID set to legacy version, in case new perf running on older system */
|
||||
data[CS_ETE_TRCTRACEIDR] =
|
||||
CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) | CORESIGHT_TRACE_ID_UNUSED_FLAG;
|
||||
|
||||
/* Get read-only information from sysFS */
|
||||
data[CS_ETE_TRCIDR0] = cs_etm_get_ro(cs_etm_pmu, cpu,
|
||||
metadata_ete_ro[CS_ETE_TRCIDR0]);
|
||||
|
@ -729,8 +715,8 @@ static void cs_etm_save_ete_header(__u64 data[], struct auxtrace_record *itr, in
|
|||
data[CS_ETE_TS_SOURCE] = (__u64) cs_etm_get_ro_signed(cs_etm_pmu, cpu,
|
||||
metadata_ete_ro[CS_ETE_TS_SOURCE]);
|
||||
else {
|
||||
pr_warning("[%03d] pmu file 'ts_source' not found. Fallback to safe value (-1)\n",
|
||||
cpu);
|
||||
pr_debug3("[%03d] pmu file 'ts_source' not found. Fallback to safe value (-1)\n",
|
||||
cpu);
|
||||
data[CS_ETE_TS_SOURCE] = (__u64) -1;
|
||||
}
|
||||
}
|
||||
|
@ -764,9 +750,9 @@ static void cs_etm_get_metadata(int cpu, u32 *offset,
|
|||
magic = __perf_cs_etmv3_magic;
|
||||
/* Get configuration register */
|
||||
info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
|
||||
/* Get traceID from the framework */
|
||||
/* traceID set to legacy value in case new perf running on old system */
|
||||
info->priv[*offset + CS_ETM_ETMTRACEIDR] =
|
||||
coresight_get_trace_id(cpu);
|
||||
CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) | CORESIGHT_TRACE_ID_UNUSED_FLAG;
|
||||
/* Get read-only information from sysFS */
|
||||
info->priv[*offset + CS_ETM_ETMCCER] =
|
||||
cs_etm_get_ro(cs_etm_pmu, cpu,
|
||||
|
@ -895,7 +881,7 @@ struct auxtrace_record *cs_etm_record_init(int *err)
|
|||
struct perf_pmu *cs_etm_pmu;
|
||||
struct cs_etm_recording *ptr;
|
||||
|
||||
cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
|
||||
cs_etm_pmu = perf_pmus__find(CORESIGHT_ETM_PMU_NAME);
|
||||
|
||||
if (!cs_etm_pmu) {
|
||||
*err = -EINVAL;
|
||||
|
@ -925,3 +911,22 @@ struct auxtrace_record *cs_etm_record_init(int *err)
|
|||
out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set a default config to enable the user changed config tracking mechanism
|
||||
* (CFG_CHG and evsel__set_config_if_unset()). If no default is set then user
|
||||
* changes aren't tracked.
|
||||
*/
|
||||
struct perf_event_attr *
|
||||
cs_etm_get_default_config(struct perf_pmu *pmu __maybe_unused)
|
||||
{
|
||||
struct perf_event_attr *attr;
|
||||
|
||||
attr = zalloc(sizeof(struct perf_event_attr));
|
||||
if (!attr)
|
||||
return NULL;
|
||||
|
||||
attr->sample_period = 1;
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "arm-spe.h"
|
||||
#include "hisi-ptt.h"
|
||||
#include "../../../util/pmu.h"
|
||||
#include "../../../util/cs-etm.h"
|
||||
|
||||
struct perf_event_attr
|
||||
*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
|
||||
|
@ -20,6 +21,7 @@ struct perf_event_attr
|
|||
if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
|
||||
/* add ETM default config here */
|
||||
pmu->selectable = true;
|
||||
return cs_etm_get_default_config(pmu);
|
||||
#if defined(__aarch64__)
|
||||
} else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
|
||||
return arm_spe_pmu_default_config(pmu);
|
||||
|
|
|
@ -19,27 +19,28 @@ if ! test -r $input; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
create_table_from_c()
|
||||
create_sc_table()
|
||||
{
|
||||
local sc nr last_sc
|
||||
local sc nr max_nr
|
||||
|
||||
while read sc nr; do
|
||||
printf "%s\n" " [$nr] = \"$sc\","
|
||||
last_sc=$sc
|
||||
max_nr=$nr
|
||||
done
|
||||
|
||||
printf "%s\n" "#define SYSCALLTBL_ARM64_MAX_ID __NR_$last_sc"
|
||||
echo "#define SYSCALLTBL_ARM64_MAX_ID $max_nr"
|
||||
}
|
||||
|
||||
create_table()
|
||||
{
|
||||
echo "#include \"$input\""
|
||||
echo "static const char *syscalltbl_arm64[] = {"
|
||||
create_table_from_c
|
||||
echo "static const char *const syscalltbl_arm64[] = {"
|
||||
create_sc_table
|
||||
echo "};"
|
||||
}
|
||||
|
||||
$gcc -E -dM -x c -I $incpath/include/uapi $input \
|
||||
|sed -ne 's/^#define __NR_//p' \
|
||||
|sort -t' ' -k2 -n \
|
||||
|awk '$2 ~ "__NR" && $3 !~ "__NR3264_" {
|
||||
sub("^#define __NR(3264)?_", "");
|
||||
print | "sort -k2 -n"}' \
|
||||
|create_table
|
||||
|
|
|
@ -26,14 +26,14 @@ static int sample_ustack(struct perf_sample *sample,
|
|||
|
||||
sp = (unsigned long) regs[PERF_REG_ARM64_SP];
|
||||
|
||||
map = maps__find(thread->maps, (u64)sp);
|
||||
map = maps__find(thread__maps(thread), (u64)sp);
|
||||
if (!map) {
|
||||
pr_debug("failed to get stack map\n");
|
||||
free(buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
stack_size = map->end - sp;
|
||||
stack_size = map__end(map) - sp;
|
||||
stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
|
||||
|
||||
memcpy(buf, (void *) sp, stack_size);
|
||||
|
|
|
@ -36,29 +36,6 @@ struct arm_spe_recording {
|
|||
bool *wrapped;
|
||||
};
|
||||
|
||||
static void arm_spe_set_timestamp(struct auxtrace_record *itr,
|
||||
struct evsel *evsel)
|
||||
{
|
||||
struct arm_spe_recording *ptr;
|
||||
struct perf_pmu *arm_spe_pmu;
|
||||
struct evsel_config_term *term = evsel__get_config_term(evsel, CFG_CHG);
|
||||
u64 user_bits = 0, bit;
|
||||
|
||||
ptr = container_of(itr, struct arm_spe_recording, itr);
|
||||
arm_spe_pmu = ptr->arm_spe_pmu;
|
||||
|
||||
if (term)
|
||||
user_bits = term->val.cfg_chg;
|
||||
|
||||
bit = perf_pmu__format_bits(&arm_spe_pmu->format, "ts_enable");
|
||||
|
||||
/* Skip if user has set it */
|
||||
if (bit & user_bits)
|
||||
return;
|
||||
|
||||
evsel->core.attr.config |= bit;
|
||||
}
|
||||
|
||||
static size_t
|
||||
arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
|
||||
struct evlist *evlist __maybe_unused)
|
||||
|
@ -238,7 +215,8 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
|
|||
*/
|
||||
if (!perf_cpu_map__empty(cpus)) {
|
||||
evsel__set_sample_bit(arm_spe_evsel, CPU);
|
||||
arm_spe_set_timestamp(itr, arm_spe_evsel);
|
||||
evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
|
||||
"ts_enable", 1);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -479,7 +457,7 @@ static void arm_spe_recording_free(struct auxtrace_record *itr)
|
|||
struct arm_spe_recording *sper =
|
||||
container_of(itr, struct arm_spe_recording, itr);
|
||||
|
||||
free(sper->wrapped);
|
||||
zfree(&sper->wrapped);
|
||||
free(sper);
|
||||
}
|
||||
|
||||
|
|
|
@ -29,8 +29,8 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
|
|||
char path[PATH_MAX];
|
||||
FILE *file;
|
||||
|
||||
scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR,
|
||||
sysfs, cpus->map[cpu]);
|
||||
scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d" MIDR,
|
||||
sysfs, RC_CHK_ACCESS(cpus)->map[cpu].cpu);
|
||||
|
||||
file = fopen(path, "r");
|
||||
if (!file) {
|
||||
|
|
|
@ -11,7 +11,6 @@ define_exit_reasons_table(arm64_trap_exit_reasons, kvm_arm_exception_class);
|
|||
|
||||
const char *kvm_trap_exit_reason = "esr_ec";
|
||||
const char *vcpu_id_str = "id";
|
||||
const int decode_str_len = 20;
|
||||
const char *kvm_exit_reason = "ret";
|
||||
const char *kvm_entry_trace = "kvm:kvm_entry";
|
||||
const char *kvm_exit_trace = "kvm:kvm_exit";
|
||||
|
@ -45,14 +44,14 @@ static bool event_begin(struct evsel *evsel,
|
|||
struct perf_sample *sample __maybe_unused,
|
||||
struct event_key *key __maybe_unused)
|
||||
{
|
||||
return !strcmp(evsel->name, kvm_entry_trace);
|
||||
return evsel__name_is(evsel, kvm_entry_trace);
|
||||
}
|
||||
|
||||
static bool event_end(struct evsel *evsel,
|
||||
struct perf_sample *sample,
|
||||
struct event_key *key)
|
||||
{
|
||||
if (!strcmp(evsel->name, kvm_exit_trace)) {
|
||||
if (evsel__name_is(evsel, kvm_exit_trace)) {
|
||||
event_get_key(evsel, sample, key);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ struct perf_mem_event *perf_mem_events__ptr(int i)
|
|||
return &perf_mem_events[i];
|
||||
}
|
||||
|
||||
char *perf_mem_events__name(int i, char *pmu_name __maybe_unused)
|
||||
const char *perf_mem_events__name(int i, const char *pmu_name __maybe_unused)
|
||||
{
|
||||
struct perf_mem_event *e = perf_mem_events__ptr(i);
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include <internal/cpumap.h>
|
||||
#include "../../../util/cpumap.h"
|
||||
#include "../../../util/pmu.h"
|
||||
#include "../../../util/pmus.h"
|
||||
#include <api/fs/fs.h>
|
||||
#include <math.h>
|
||||
|
||||
|
@ -10,15 +11,12 @@ static struct perf_pmu *pmu__find_core_pmu(void)
|
|||
{
|
||||
struct perf_pmu *pmu = NULL;
|
||||
|
||||
while ((pmu = perf_pmu__scan(pmu))) {
|
||||
if (!is_pmu_core(pmu->name))
|
||||
continue;
|
||||
|
||||
while ((pmu = perf_pmus__scan_core(pmu))) {
|
||||
/*
|
||||
* The cpumap should cover all CPUs. Otherwise, some CPUs may
|
||||
* not support some events or have different event IDs.
|
||||
*/
|
||||
if (pmu->cpus->nr != cpu__max_cpu().cpu)
|
||||
if (RC_CHK_ACCESS(pmu->cpus)->nr != cpu__max_cpu().cpu)
|
||||
return NULL;
|
||||
|
||||
return pmu;
|
||||
|
@ -56,10 +54,11 @@ double perf_pmu__cpu_slots_per_cycle(void)
|
|||
perf_pmu__pathname_scnprintf(path, sizeof(path),
|
||||
pmu->name, "caps/slots");
|
||||
/*
|
||||
* The value of slots is not greater than 32 bits, but sysfs__read_int
|
||||
* can't read value with 0x prefix, so use sysfs__read_ull instead.
|
||||
* The value of slots is not greater than 32 bits, but
|
||||
* filename__read_int can't read value with 0x prefix,
|
||||
* so use filename__read_ull instead.
|
||||
*/
|
||||
sysfs__read_ull(path, &slots);
|
||||
filename__read_ull(path, &slots);
|
||||
}
|
||||
|
||||
return slots ? (double)slots : NAN;
|
||||
|
|
|
@ -43,6 +43,20 @@ const char *const powerpc_triplets[] = {
|
|||
NULL
|
||||
};
|
||||
|
||||
const char *const riscv32_triplets[] = {
|
||||
"riscv32-unknown-linux-gnu-",
|
||||
"riscv32-linux-android-",
|
||||
"riscv32-linux-gnu-",
|
||||
NULL
|
||||
};
|
||||
|
||||
const char *const riscv64_triplets[] = {
|
||||
"riscv64-unknown-linux-gnu-",
|
||||
"riscv64-linux-android-",
|
||||
"riscv64-linux-gnu-",
|
||||
NULL
|
||||
};
|
||||
|
||||
const char *const s390_triplets[] = {
|
||||
"s390-ibm-linux-",
|
||||
"s390x-linux-gnu-",
|
||||
|
@ -130,7 +144,7 @@ static int lookup_triplets(const char *const *triplets, const char *name)
|
|||
}
|
||||
|
||||
static int perf_env__lookup_binutils_path(struct perf_env *env,
|
||||
const char *name, const char **path)
|
||||
const char *name, char **path)
|
||||
{
|
||||
int idx;
|
||||
const char *arch = perf_env__arch(env), *cross_env;
|
||||
|
@ -166,6 +180,10 @@ static int perf_env__lookup_binutils_path(struct perf_env *env,
|
|||
path_list = arm64_triplets;
|
||||
else if (!strcmp(arch, "powerpc"))
|
||||
path_list = powerpc_triplets;
|
||||
else if (!strcmp(arch, "riscv32"))
|
||||
path_list = riscv32_triplets;
|
||||
else if (!strcmp(arch, "riscv64"))
|
||||
path_list = riscv64_triplets;
|
||||
else if (!strcmp(arch, "sh"))
|
||||
path_list = sh_triplets;
|
||||
else if (!strcmp(arch, "s390"))
|
||||
|
@ -202,7 +220,7 @@ out_error:
|
|||
return -1;
|
||||
}
|
||||
|
||||
int perf_env__lookup_objdump(struct perf_env *env, const char **path)
|
||||
int perf_env__lookup_objdump(struct perf_env *env, char **path)
|
||||
{
|
||||
/*
|
||||
* For live mode, env->arch will be NULL and we can use
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
struct perf_env;
|
||||
|
||||
int perf_env__lookup_objdump(struct perf_env *env, const char **path);
|
||||
int perf_env__lookup_objdump(struct perf_env *env, char **path);
|
||||
bool perf_env__single_address_space(struct perf_env *env);
|
||||
|
||||
#endif /* ARCH_PERF_COMMON_H */
|
||||
|
|
|
@ -18,7 +18,7 @@ create_table()
|
|||
{
|
||||
local max_nr nr abi sc discard
|
||||
|
||||
echo 'static const char *syscalltbl_mips_n64[] = {'
|
||||
echo 'static const char *const syscalltbl_mips_n64[] = {'
|
||||
while read nr abi sc discard; do
|
||||
printf '\t[%d] = "%s",\n' $nr $sc
|
||||
max_nr=$nr
|
||||
|
|
|
@ -23,7 +23,7 @@ create_table()
|
|||
max_nr=-1
|
||||
nr=0
|
||||
|
||||
echo "static const char *syscalltbl_powerpc_${wordsize}[] = {"
|
||||
echo "static const char *const syscalltbl_powerpc_${wordsize}[] = {"
|
||||
while read nr abi sc discard; do
|
||||
if [ "$max_nr" -lt "$nr" ]; then
|
||||
printf '\t[%d] = "%s",\n' $nr $sc
|
||||
|
|
|
@ -26,14 +26,14 @@ static int sample_ustack(struct perf_sample *sample,
|
|||
|
||||
sp = (unsigned long) regs[PERF_REG_POWERPC_R1];
|
||||
|
||||
map = maps__find(thread->maps, (u64)sp);
|
||||
map = maps__find(thread__maps(thread), (u64)sp);
|
||||
if (!map) {
|
||||
pr_debug("failed to get stack map\n");
|
||||
free(buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
stack_size = map->end - sp;
|
||||
stack_size = map__end(map) - sp;
|
||||
stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
|
||||
|
||||
memcpy(buf, (void *) sp, stack_size);
|
||||
|
|
|
@ -45,6 +45,6 @@ int arch_get_runtimeparam(const struct pmu_metric *pm)
|
|||
int count;
|
||||
char path[PATH_MAX] = "/devices/hv_24x7/interface/";
|
||||
|
||||
atoi(pm->aggr_mode) == PerChip ? strcat(path, "sockets") : strcat(path, "coresperchip");
|
||||
strcat(path, pm->aggr_mode == PerChip ? "sockets" : "coresperchip");
|
||||
return sysfs__read_int(path, &count) < 0 ? 1 : count;
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
#include "util/debug.h"
|
||||
#include "util/evsel.h"
|
||||
#include "util/evlist.h"
|
||||
#include "util/pmu.h"
|
||||
#include "util/pmus.h"
|
||||
|
||||
#include "book3s_hv_exits.h"
|
||||
#include "book3s_hcalls.h"
|
||||
|
@ -14,7 +14,6 @@
|
|||
#define NR_TPS 4
|
||||
|
||||
const char *vcpu_id_str = "vcpu_id";
|
||||
const int decode_str_len = 40;
|
||||
const char *kvm_entry_trace = "kvm_hv:kvm_guest_enter";
|
||||
const char *kvm_exit_trace = "kvm_hv:kvm_guest_exit";
|
||||
|
||||
|
@ -61,13 +60,13 @@ static bool hcall_event_end(struct evsel *evsel,
|
|||
struct perf_sample *sample __maybe_unused,
|
||||
struct event_key *key __maybe_unused)
|
||||
{
|
||||
return (!strcmp(evsel->name, kvm_events_tp[3]));
|
||||
return (evsel__name_is(evsel, kvm_events_tp[3]));
|
||||
}
|
||||
|
||||
static bool hcall_event_begin(struct evsel *evsel,
|
||||
struct perf_sample *sample, struct event_key *key)
|
||||
{
|
||||
if (!strcmp(evsel->name, kvm_events_tp[2])) {
|
||||
if (evsel__name_is(evsel, kvm_events_tp[2])) {
|
||||
hcall_event_get_key(evsel, sample, key);
|
||||
return true;
|
||||
}
|
||||
|
@ -80,7 +79,7 @@ static void hcall_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
|
|||
{
|
||||
const char *hcall_reason = get_hcall_exit_reason(key->key);
|
||||
|
||||
scnprintf(decode, decode_str_len, "%s", hcall_reason);
|
||||
scnprintf(decode, KVM_EVENT_NAME_LEN, "%s", hcall_reason);
|
||||
}
|
||||
|
||||
static struct kvm_events_ops hcall_events = {
|
||||
|
@ -205,7 +204,7 @@ int kvm_add_default_arch_event(int *argc, const char **argv)
|
|||
|
||||
parse_options(j, tmp, event_options, NULL, PARSE_OPT_KEEP_UNKNOWN);
|
||||
if (!event) {
|
||||
if (pmu_have_event("trace_imc", "trace_cycles")) {
|
||||
if (perf_pmus__have_event("trace_imc", "trace_cycles")) {
|
||||
argv[j++] = strdup("-e");
|
||||
argv[j++] = strdup("trace_imc/trace_cycles/");
|
||||
*argc += 2;
|
||||
|
|
|
@ -3,10 +3,10 @@
|
|||
#include "mem-events.h"
|
||||
|
||||
/* PowerPC does not support 'ldlat' parameter. */
|
||||
char *perf_mem_events__name(int i, char *pmu_name __maybe_unused)
|
||||
const char *perf_mem_events__name(int i, const char *pmu_name __maybe_unused)
|
||||
{
|
||||
if (i == PERF_MEM_EVENTS__LOAD)
|
||||
return (char *) "cpu/mem-loads/";
|
||||
return "cpu/mem-loads/";
|
||||
|
||||
return (char *) "cpu/mem-stores/";
|
||||
return "cpu/mem-stores/";
|
||||
}
|
||||
|
|
|
@ -250,19 +250,21 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
|
|||
if (!chain || chain->nr < 3)
|
||||
return skip_slot;
|
||||
|
||||
addr_location__init(&al);
|
||||
ip = chain->ips[1];
|
||||
|
||||
thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
|
||||
|
||||
if (al.map)
|
||||
dso = al.map->dso;
|
||||
dso = map__dso(al.map);
|
||||
|
||||
if (!dso) {
|
||||
pr_debug("%" PRIx64 " dso is NULL\n", ip);
|
||||
addr_location__exit(&al);
|
||||
return skip_slot;
|
||||
}
|
||||
|
||||
rc = check_return_addr(dso, al.map->start, ip);
|
||||
rc = check_return_addr(dso, map__start(al.map), ip);
|
||||
|
||||
pr_debug("[DSO %s, sym %s, ip 0x%" PRIx64 "] rc %d\n",
|
||||
dso->long_name, al.sym->name, ip, rc);
|
||||
|
@ -279,5 +281,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
|
|||
*/
|
||||
skip_slot = 3;
|
||||
}
|
||||
|
||||
addr_location__exit(&al);
|
||||
return skip_slot;
|
||||
}
|
||||
|
|
|
@ -104,7 +104,7 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
|
|||
|
||||
lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
|
||||
|
||||
if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS)
|
||||
if (map__dso(map)->symtab_type == DSO_BINARY_TYPE__KALLSYMS)
|
||||
tev->point.offset += PPC64LE_LEP_OFFSET;
|
||||
else if (lep_offset) {
|
||||
if (pev->uprobes)
|
||||
|
@ -131,7 +131,7 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
|
|||
for (i = 0; i < ntevs; i++) {
|
||||
tev = &pev->tevs[i];
|
||||
map__for_each_symbol(map, sym, tmp) {
|
||||
if (map->unmap_ip(map, sym->start) == tev->point.address) {
|
||||
if (map__unmap_ip(map, sym->start) == tev->point.address) {
|
||||
arch__fix_tev_from_maps(pev, tev, map, sym);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ static int s390_call__parse(struct arch *arch, struct ins_operands *ops,
|
|||
target.addr = map__objdump_2mem(map, ops->target.addr);
|
||||
|
||||
if (maps__find_ams(ms->maps, &target) == 0 &&
|
||||
map__rip_2objdump(target.ms.map, map->map_ip(target.ms.map, target.addr)) == ops->target.addr)
|
||||
map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr)
|
||||
ops->target.sym = target.ms.sym;
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -18,7 +18,7 @@ create_table()
|
|||
{
|
||||
local max_nr nr abi sc discard
|
||||
|
||||
echo 'static const char *syscalltbl_s390_64[] = {'
|
||||
echo 'static const char *const syscalltbl_s390_64[] = {'
|
||||
while read nr abi sc discard; do
|
||||
printf '\t[%d] = "%s",\n' $nr $sc
|
||||
max_nr=$nr
|
||||
|
|
|
@ -6,5 +6,6 @@ perf-$(CONFIG_DWARF) += dwarf-regs.o
|
|||
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
|
||||
|
||||
perf-y += machine.o
|
||||
perf-y += pmu.o
|
||||
|
||||
perf-$(CONFIG_AUXTRACE) += auxtrace.o
|
||||
|
|
|
@ -19,7 +19,6 @@ define_exit_reasons_table(sie_diagnose_codes, diagnose_codes);
|
|||
define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes);
|
||||
|
||||
const char *vcpu_id_str = "id";
|
||||
const int decode_str_len = 40;
|
||||
const char *kvm_exit_reason = "icptcode";
|
||||
const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter";
|
||||
const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit";
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* Copyright IBM Corp. 2023
|
||||
* Author(s): Thomas Richter <tmricht@linux.ibm.com>
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "../../../util/pmu.h"
|
||||
|
||||
#define S390_PMUPAI_CRYPTO "pai_crypto"
|
||||
#define S390_PMUPAI_EXT "pai_ext"
|
||||
#define S390_PMUCPUM_CF "cpum_cf"
|
||||
|
||||
struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu)
|
||||
{
|
||||
if (!strcmp(pmu->name, S390_PMUPAI_CRYPTO) ||
|
||||
!strcmp(pmu->name, S390_PMUPAI_EXT) ||
|
||||
!strcmp(pmu->name, S390_PMUCPUM_CF))
|
||||
pmu->selectable = true;
|
||||
return NULL;
|
||||
}
|
|
@ -1,46 +1,37 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* x86 instruction nmemonic table to parse disasm lines for annotate.
|
||||
* This table is searched twice - one for exact match and another for
|
||||
* match without a size suffix (b, w, l, q) in case of AT&T syntax.
|
||||
*
|
||||
* So this table should not have entries with the suffix unless it's
|
||||
* a complete different instruction than ones without the suffix.
|
||||
*/
|
||||
static struct ins x86__instructions[] = {
|
||||
{ .name = "adc", .ops = &mov_ops, },
|
||||
{ .name = "adcb", .ops = &mov_ops, },
|
||||
{ .name = "adcl", .ops = &mov_ops, },
|
||||
{ .name = "add", .ops = &mov_ops, },
|
||||
{ .name = "addl", .ops = &mov_ops, },
|
||||
{ .name = "addq", .ops = &mov_ops, },
|
||||
{ .name = "addsd", .ops = &mov_ops, },
|
||||
{ .name = "addw", .ops = &mov_ops, },
|
||||
{ .name = "and", .ops = &mov_ops, },
|
||||
{ .name = "andb", .ops = &mov_ops, },
|
||||
{ .name = "andl", .ops = &mov_ops, },
|
||||
{ .name = "andpd", .ops = &mov_ops, },
|
||||
{ .name = "andps", .ops = &mov_ops, },
|
||||
{ .name = "andq", .ops = &mov_ops, },
|
||||
{ .name = "andw", .ops = &mov_ops, },
|
||||
{ .name = "bsr", .ops = &mov_ops, },
|
||||
{ .name = "bt", .ops = &mov_ops, },
|
||||
{ .name = "btr", .ops = &mov_ops, },
|
||||
{ .name = "bts", .ops = &mov_ops, },
|
||||
{ .name = "btsq", .ops = &mov_ops, },
|
||||
{ .name = "call", .ops = &call_ops, },
|
||||
{ .name = "callq", .ops = &call_ops, },
|
||||
{ .name = "cmovbe", .ops = &mov_ops, },
|
||||
{ .name = "cmove", .ops = &mov_ops, },
|
||||
{ .name = "cmovae", .ops = &mov_ops, },
|
||||
{ .name = "cmp", .ops = &mov_ops, },
|
||||
{ .name = "cmpb", .ops = &mov_ops, },
|
||||
{ .name = "cmpl", .ops = &mov_ops, },
|
||||
{ .name = "cmpq", .ops = &mov_ops, },
|
||||
{ .name = "cmpw", .ops = &mov_ops, },
|
||||
{ .name = "cmpxch", .ops = &mov_ops, },
|
||||
{ .name = "cmpxchg", .ops = &mov_ops, },
|
||||
{ .name = "cs", .ops = &mov_ops, },
|
||||
{ .name = "dec", .ops = &dec_ops, },
|
||||
{ .name = "decl", .ops = &dec_ops, },
|
||||
{ .name = "divsd", .ops = &mov_ops, },
|
||||
{ .name = "divss", .ops = &mov_ops, },
|
||||
{ .name = "gs", .ops = &mov_ops, },
|
||||
{ .name = "imul", .ops = &mov_ops, },
|
||||
{ .name = "inc", .ops = &dec_ops, },
|
||||
{ .name = "incl", .ops = &dec_ops, },
|
||||
{ .name = "ja", .ops = &jump_ops, },
|
||||
{ .name = "jae", .ops = &jump_ops, },
|
||||
{ .name = "jb", .ops = &jump_ops, },
|
||||
|
@ -54,7 +45,6 @@ static struct ins x86__instructions[] = {
|
|||
{ .name = "jl", .ops = &jump_ops, },
|
||||
{ .name = "jle", .ops = &jump_ops, },
|
||||
{ .name = "jmp", .ops = &jump_ops, },
|
||||
{ .name = "jmpq", .ops = &jump_ops, },
|
||||
{ .name = "jna", .ops = &jump_ops, },
|
||||
{ .name = "jnae", .ops = &jump_ops, },
|
||||
{ .name = "jnb", .ops = &jump_ops, },
|
||||
|
@ -81,48 +71,32 @@ static struct ins x86__instructions[] = {
|
|||
{ .name = "mov", .ops = &mov_ops, },
|
||||
{ .name = "movapd", .ops = &mov_ops, },
|
||||
{ .name = "movaps", .ops = &mov_ops, },
|
||||
{ .name = "movb", .ops = &mov_ops, },
|
||||
{ .name = "movdqa", .ops = &mov_ops, },
|
||||
{ .name = "movdqu", .ops = &mov_ops, },
|
||||
{ .name = "movl", .ops = &mov_ops, },
|
||||
{ .name = "movq", .ops = &mov_ops, },
|
||||
{ .name = "movsd", .ops = &mov_ops, },
|
||||
{ .name = "movslq", .ops = &mov_ops, },
|
||||
{ .name = "movss", .ops = &mov_ops, },
|
||||
{ .name = "movupd", .ops = &mov_ops, },
|
||||
{ .name = "movups", .ops = &mov_ops, },
|
||||
{ .name = "movw", .ops = &mov_ops, },
|
||||
{ .name = "movzbl", .ops = &mov_ops, },
|
||||
{ .name = "movzwl", .ops = &mov_ops, },
|
||||
{ .name = "mulsd", .ops = &mov_ops, },
|
||||
{ .name = "mulss", .ops = &mov_ops, },
|
||||
{ .name = "nop", .ops = &nop_ops, },
|
||||
{ .name = "nopl", .ops = &nop_ops, },
|
||||
{ .name = "nopw", .ops = &nop_ops, },
|
||||
{ .name = "or", .ops = &mov_ops, },
|
||||
{ .name = "orb", .ops = &mov_ops, },
|
||||
{ .name = "orl", .ops = &mov_ops, },
|
||||
{ .name = "orps", .ops = &mov_ops, },
|
||||
{ .name = "orq", .ops = &mov_ops, },
|
||||
{ .name = "pand", .ops = &mov_ops, },
|
||||
{ .name = "paddq", .ops = &mov_ops, },
|
||||
{ .name = "pcmpeqb", .ops = &mov_ops, },
|
||||
{ .name = "por", .ops = &mov_ops, },
|
||||
{ .name = "rclb", .ops = &mov_ops, },
|
||||
{ .name = "rcll", .ops = &mov_ops, },
|
||||
{ .name = "rcl", .ops = &mov_ops, },
|
||||
{ .name = "ret", .ops = &ret_ops, },
|
||||
{ .name = "retq", .ops = &ret_ops, },
|
||||
{ .name = "sbb", .ops = &mov_ops, },
|
||||
{ .name = "sbbl", .ops = &mov_ops, },
|
||||
{ .name = "sete", .ops = &mov_ops, },
|
||||
{ .name = "sub", .ops = &mov_ops, },
|
||||
{ .name = "subl", .ops = &mov_ops, },
|
||||
{ .name = "subq", .ops = &mov_ops, },
|
||||
{ .name = "subsd", .ops = &mov_ops, },
|
||||
{ .name = "subw", .ops = &mov_ops, },
|
||||
{ .name = "test", .ops = &mov_ops, },
|
||||
{ .name = "testb", .ops = &mov_ops, },
|
||||
{ .name = "testl", .ops = &mov_ops, },
|
||||
{ .name = "tzcnt", .ops = &mov_ops, },
|
||||
{ .name = "ucomisd", .ops = &mov_ops, },
|
||||
{ .name = "ucomiss", .ops = &mov_ops, },
|
||||
{ .name = "vaddsd", .ops = &mov_ops, },
|
||||
|
@ -135,11 +109,9 @@ static struct ins x86__instructions[] = {
|
|||
{ .name = "vsubsd", .ops = &mov_ops, },
|
||||
{ .name = "vucomisd", .ops = &mov_ops, },
|
||||
{ .name = "xadd", .ops = &mov_ops, },
|
||||
{ .name = "xbeginl", .ops = &jump_ops, },
|
||||
{ .name = "xbeginq", .ops = &jump_ops, },
|
||||
{ .name = "xbegin", .ops = &jump_ops, },
|
||||
{ .name = "xchg", .ops = &mov_ops, },
|
||||
{ .name = "xor", .ops = &mov_ops, },
|
||||
{ .name = "xorb", .ops = &mov_ops, },
|
||||
{ .name = "xorpd", .ops = &mov_ops, },
|
||||
{ .name = "xorps", .ops = &mov_ops, },
|
||||
};
|
||||
|
|
|
@ -18,7 +18,7 @@ emit() {
|
|||
syscall_macro "$nr" "$entry"
|
||||
}
|
||||
|
||||
echo "static const char *syscalltbl_${arch}[] = {"
|
||||
echo "static const char *const syscalltbl_${arch}[] = {"
|
||||
|
||||
sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
|
||||
grep '^[0-9]' "$in" | sort -n > $sorted_table
|
||||
|
|
|
@ -6,11 +6,15 @@ struct test_suite;
|
|||
|
||||
/* Tests */
|
||||
int test__rdpmc(struct test_suite *test, int subtest);
|
||||
#ifdef HAVE_EXTRA_TESTS
|
||||
int test__insn_x86(struct test_suite *test, int subtest);
|
||||
#endif
|
||||
int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest);
|
||||
int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest);
|
||||
int test__bp_modify(struct test_suite *test, int subtest);
|
||||
int test__x86_sample_parsing(struct test_suite *test, int subtest);
|
||||
int test__amd_ibs_via_core_pmu(struct test_suite *test, int subtest);
|
||||
int test__hybrid(struct test_suite *test, int subtest);
|
||||
|
||||
extern struct test_suite *arch_tests[];
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue