android_kernel_samsung_hero.../drivers/soc/qcom/perf_event_l2.c

/* Copyright (c) 2015, The Linux Foundation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
 * only version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 */
#define pr_fmt(fmt) "l2 perfevents: " fmt

#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/irq.h>
#include <linux/list.h>
#include <linux/of.h>
#include <linux/acpi.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <soc/qcom/perf_event_l2.h>
#include <soc/qcom/kryo-l2-accessors.h>

/*
 * The cache is made-up of one or more slices, each slice has its own PMU.
 * This structure represents one of the hardware PMUs.
 */
struct hml2_pmu {
	struct list_head entry;
	u32 cluster;
	struct perf_event *events[MAX_L2_CTRS];
	unsigned long used_mask[BITS_TO_LONGS(MAX_L2_CTRS)];
	atomic64_t prev_count[MAX_L2_CTRS];
	spinlock_t pmu_lock;
};

/*
 * Aggregate PMU. Implements the core pmu functions and manages
 * the hardware PMUs.
 */

struct l2cache_pmu {
	u32 num_pmus;
	struct list_head pmus;
	struct pmu pmu;
	int num_counters;
};

#define to_l2cache_pmu(p) (container_of(p, struct l2cache_pmu, pmu))

static struct l2cache_pmu l2cache_pmu = { 0 };

static u32 l2_cycle_ctr_idx;
static u32 l2_reset_mask;

static inline u32 idx_to_reg(u32 idx)
{
	u32 bit;

	if (idx == l2_cycle_ctr_idx)
		bit = 1 << L2CYCLE_CTR_BIT;
	else
		bit = 1 << idx;
	return bit;
}

static struct hml2_pmu *get_hml2_pmu(struct l2cache_pmu *system, int cpu)
{
	u32 cluster;
	struct hml2_pmu *slice;

	if (cpu < 0)
		cpu = smp_processor_id();

	cluster = cpu >> 1;
	list_for_each_entry(slice, &system->pmus, entry) {
		if (slice->cluster == cluster)
			return slice;
	}

	pr_err("L2 cluster not found for CPU %d\n", cpu);
	return NULL;
}

static
void hml2_pmu__reset_on_slice(void *x)
{
	/* Reset all ctrs */
	set_l2_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
	set_l2_indirect_reg(L2PMCNTENCLR, l2_reset_mask);
	set_l2_indirect_reg(L2PMINTENCLR, l2_reset_mask);
	set_l2_indirect_reg(L2PMOVSCLR, l2_reset_mask);
}

static inline
void hml2_pmu__reset(struct hml2_pmu *slice)
{
	int cpu;
	int i;

	if ((smp_processor_id() >> 1) == slice->cluster) {
		hml2_pmu__reset_on_slice(NULL);
		return;
	}
	cpu = slice->cluster << 1;
	/* Call each cpu in the cluster until one works */
	for (i = 0; i <= 1; i++) {
		if (!smp_call_function_single(cpu | i, hml2_pmu__reset_on_slice,
					      NULL, 1))
			return;
	}
	pr_err("Failed to reset on cluster %d\n", slice->cluster);
}

static inline
void hml2_pmu__init(struct hml2_pmu *slice)
{
	hml2_pmu__reset(slice);
}

static inline
void hml2_pmu__enable(void)
{
	isb();
	set_l2_indirect_reg(L2PMCR, L2PMCR_GLOBAL_ENABLE);
}

static inline
void hml2_pmu__disable(void)
{
	set_l2_indirect_reg(L2PMCR, L2PMCR_GLOBAL_DISABLE);
	isb();
}

static inline
void hml2_pmu__counter_set_value(u32 idx, u64 value)
{
	u32 counter_reg;

	if (idx == l2_cycle_ctr_idx) {
		set_l2_indirect_reg(L2PMCCNTR1, (u32)(value >> 32));
		set_l2_indirect_reg(L2PMCCNTR0, (u32)(value & 0xFFFFFFFF));
	} else {
		counter_reg = (idx * 16) + IA_L2PMXEVCNTR_BASE;
		set_l2_indirect_reg(counter_reg, (u32)(value & 0xFFFFFFFF));
	}
}

static inline
u64 hml2_pmu__counter_get_value(u32 idx)
{
	u64 value;
	u32 counter_reg;
	u32 hi, lo;

	if (idx == l2_cycle_ctr_idx) {
		do {
			hi = get_l2_indirect_reg(L2PMCCNTR1);
			lo = get_l2_indirect_reg(L2PMCCNTR0);
		} while (hi != get_l2_indirect_reg(L2PMCCNTR1));
		value = ((u64)hi << 32) | lo;
	} else {
		counter_reg = (idx * 16) + IA_L2PMXEVCNTR_BASE;
		value = get_l2_indirect_reg(counter_reg);
	}

	return value;
}

static inline
void hml2_pmu__counter_enable(u32 idx)
{
	u32 reg;

	reg = get_l2_indirect_reg(L2PMCNTENSET);
	reg |= idx_to_reg(idx);
	set_l2_indirect_reg(L2PMCNTENSET, reg);
}

static inline
void hml2_pmu__counter_disable(u32 idx)
{
	set_l2_indirect_reg(L2PMCNTENCLR, idx_to_reg(idx));
}

static inline
void hml2_pmu__counter_enable_interrupt(u32 idx)
{
	u32 reg;

	reg = get_l2_indirect_reg(L2PMINTENSET);
	reg |= idx_to_reg(idx);
	set_l2_indirect_reg(L2PMINTENSET, reg);
}

static inline
void hml2_pmu__counter_disable_interrupt(u32 idx)
{
	set_l2_indirect_reg(L2PMINTENCLR, idx_to_reg(idx));
}

static inline
void hml2_pmu__set_evcntcr(u32 ctr, u32 val)
{
	u32 evtcr_reg = (ctr * IA_L2_REG_OFFSET) + IA_L2PMXEVCNTCR_BASE;

	set_l2_indirect_reg(evtcr_reg, val);
}

static inline
void hml2_pmu__set_ccntcr(u32 val)
{
	set_l2_indirect_reg(L2PMCCNTCR, val);
}

static inline
void hml2_pmu__set_evtyper(u32 val, u32 ctr)
{
	u32 evtype_reg = (ctr * IA_L2_REG_OFFSET) + IA_L2PMXEVTYPER_BASE;

	set_l2_indirect_reg(evtype_reg, val);
}

static
void hml2_pmu__set_evres(struct hml2_pmu *slice,
			 u32 event_group, u32 event_reg, u32 event_cc)
{
	u32 group_reg;
	u32 group_val;
	u32 group_mask;
	u32 resr_val;
	u32 shift;
	unsigned long iflags;

	shift = 8 * (event_group & 3);
	group_val = (event_cc & 0xff) << shift;
	group_mask = ~(0xff << shift);

	if (event_group <= 3)
		group_reg = L2PMRESRL;
	else {
		group_reg = L2PMRESRH;
		group_val |= L2PMRESRH_EN;
	}

	spin_lock_irqsave(&slice->pmu_lock, iflags);

	resr_val = get_l2_indirect_reg(group_reg);
	resr_val &= group_mask;
	resr_val |= group_val;
	set_l2_indirect_reg(group_reg, resr_val);

	/* The enable bit has to be set in RESRH, if it's not set already */
	if (group_reg != L2PMRESRH) {
		resr_val = get_l2_indirect_reg(L2PMRESRH);
		if (!(resr_val & L2PMRESRH_EN)) {
			resr_val |= L2PMRESRH_EN;
			set_l2_indirect_reg(L2PMRESRH, resr_val);
		}
	}
	spin_unlock_irqrestore(&slice->pmu_lock, iflags);
}

static void
hml2_pmu__set_evfilter_task_mode(int ctr)
{
	u32 filter_reg = (ctr * 16) + IA_L2PMXEVFILTER_BASE;
	u32 l2_orig_filter = L2PMXEVFILTER_SUFILTER_ALL |
			     L2PMXEVFILTER_ORGFILTER_IDINDEP;
	u32 filter_val = l2_orig_filter | 1 << (smp_processor_id() % 2);

	set_l2_indirect_reg(filter_reg, filter_val);
}

static void
hml2_pmu__set_evfilter_sys_mode(int ctr, int cpu, unsigned int is_tracectr)
{
	u32 filter_reg = (ctr * IA_L2_REG_OFFSET) + IA_L2PMXEVFILTER_BASE;
	u32 filter_val;
	u32 l2_orig_filter = L2PMXEVFILTER_SUFILTER_ALL |
			     L2PMXEVFILTER_ORGFILTER_IDINDEP;

	if (is_tracectr == 1)
		filter_val = l2_orig_filter | 1 << (cpu % 2);
	else
		filter_val = l2_orig_filter | L2PMXEVFILTER_ORGFILTER_ALL;

	set_l2_indirect_reg(filter_reg, filter_val);
}

static inline
void hml2_pmu__reset_ovsr(u32 idx)
{
	set_l2_indirect_reg(L2PMOVSCLR, idx_to_reg(idx));
}

static inline
u32 hml2_pmu__getreset_ovsr(void)
{
	u32 result = get_l2_indirect_reg(L2PMOVSSET);

	set_l2_indirect_reg(L2PMOVSCLR, result);
	return result;
}

static inline
int hml2_pmu__has_overflowed(u32 ovsr)
{
	return (ovsr & l2_reset_mask) != 0;
}

static inline
int hml2_pmu__counter_has_overflowed(u32 ovsr, u32 idx)
{
	return (ovsr & idx_to_reg(idx)) != 0;
}

static
void l2_cache__event_update_from_slice(struct perf_event *event,
				       struct hml2_pmu *slice)
{
	struct hw_perf_event *hwc = &event->hw;
	u64 delta64, prev, now;
	u32 delta;
	u32 idx = hwc->idx;

again:
	prev = atomic64_read(&slice->prev_count[idx]);
	now = hml2_pmu__counter_get_value(idx);

	if (atomic64_cmpxchg(&slice->prev_count[idx], prev, now) != prev)
		goto again;

	if (idx == l2_cycle_ctr_idx) {
		/*
		 * The cycle counter is 64-bit so needs separate handling
		 * of 64-bit delta.
		 */
		delta64 = now - prev;
		local64_add(delta64, &event->count);
		local64_sub(delta64, &hwc->period_left);
	} else {
		/*
		 * 32-bit counters need the unsigned 32-bit math to handle
		 * overflow and now < prev
		 */
		delta = now - prev;
		local64_add(delta, &event->count);
		local64_sub(delta, &hwc->period_left);
	}
}

static
void l2_cache__slice_set_period(struct hml2_pmu *slice,
				struct hw_perf_event *hwc)
{
	u64 value = L2_MAX_PERIOD - (hwc->sample_period - 1);
	u32 idx = hwc->idx;
	u64 prev = atomic64_read(&slice->prev_count[idx]);

	if (prev < value) {
		value += prev;
		atomic64_set(&slice->prev_count[idx], value);
	} else {
		value = prev;
	}

	hml2_pmu__reset_ovsr(idx);
	hml2_pmu__counter_set_value(idx, value);
}

static
int l2_cache__event_set_period(struct perf_event *event,
			       struct hw_perf_event *hwc)
{
	struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
	struct hml2_pmu *slice = get_hml2_pmu(system, event->cpu);
	s64 left = local64_read(&hwc->period_left);
	s64 period = hwc->sample_period;
	int ret = 0;
	u32 idx;

	if (unlikely(!slice))
		return ret;

	if (unlikely(left <= -period)) {
		left = period;
		local64_set(&hwc->period_left, left);
		hwc->last_period = period;
		ret = 1;
	}

	if (unlikely(left <= 0)) {
		left += period;
		local64_set(&hwc->period_left, left);
		hwc->last_period = period;
		ret = 1;
	}

	if (left > (s64)L2_MAX_PERIOD)
		left = L2_MAX_PERIOD;

	idx = hwc->idx;

	atomic64_set(&slice->prev_count[idx], (u64)-left);
	hml2_pmu__reset_ovsr(idx);
	hml2_pmu__counter_set_value(idx, (u64)-left);
	perf_event_update_userpage(event);

	return ret;
}

static
int l2_cache__get_event_idx(struct hml2_pmu *slice,
			    struct hw_perf_event *hwc)
{
	int idx;

	if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
		if (test_and_set_bit(l2_cycle_ctr_idx, slice->used_mask))
			return -EAGAIN;

		return l2_cycle_ctr_idx;
	}

	for (idx = 0; idx < l2cache_pmu.num_counters - 1; idx++) {
		if (!test_and_set_bit(idx, slice->used_mask))
			return idx;
	}

	/* The counters are all in use. */
	return -EAGAIN;
}

static
void l2_cache__event_disable(struct perf_event *event)
{
	struct hw_perf_event *hwc = &event->hw;

	if (!(hwc->state & PERF_HES_STOPPED)) {
		hml2_pmu__counter_disable_interrupt(hwc->idx);
		hml2_pmu__counter_disable(hwc->idx);
	}
}

static inline
int is_sampling(struct perf_event *event)
{
	return event->attr.sample_type != 0;
}

static
irqreturn_t l2_cache__handle_irq(int irq_num, void *data)
{
	struct hml2_pmu *slice = data;
	u32 ovsr;
	int idx;
	struct pt_regs *regs;

	ovsr = hml2_pmu__getreset_ovsr();
	if (!hml2_pmu__has_overflowed(ovsr))
		return IRQ_NONE;

	regs = get_irq_regs();

	for (idx = 0; idx < l2cache_pmu.num_counters; idx++) {
		struct perf_event *event = slice->events[idx];
		struct hw_perf_event *hwc;
		struct perf_sample_data data;

		if (!event)
			continue;

		if (!hml2_pmu__counter_has_overflowed(ovsr, idx))
			continue;

		l2_cache__event_update_from_slice(event, slice);
		hwc = &event->hw;

		if (is_sampling(event)) {
			perf_sample_data_init(&data, 0, hwc->last_period);
			if (!l2_cache__event_set_period(event, hwc))
				continue;
			if (perf_event_overflow(event, &data, regs))
				l2_cache__event_disable(event);
		} else {
			l2_cache__slice_set_period(slice, hwc);
		}
	}

	/*
	 * Handle the pending perf events.
	 *
	 * Note: this call *must* be run with interrupts disabled. For
	 * platforms that can have the PMU interrupts raised as an NMI, this
	 * will not work.
	 */
	irq_work_run();

	return IRQ_HANDLED;
}

/*
 * Implementation of abstract pmu functionality required by
 * the core perf events code.
 */

static
void l2_cache__pmu_enable(struct pmu *pmu)
{
	/* Ensure all programming commands are done before proceeding */
	wmb();
	hml2_pmu__enable();
}

static
void l2_cache__pmu_disable(struct pmu *pmu)
{
	hml2_pmu__disable();
	/* Ensure the basic counter unit is stopped before proceeding */
	wmb();
}

static
int l2_cache__event_init(struct perf_event *event)
{
	struct hw_perf_event *hwc = &event->hw;

	if (event->attr.type != l2cache_pmu.pmu.type)
		return -ENOENT;

	/* We cannot filter accurately so we just don't allow it. */
	if (event->attr.exclude_user || event->attr.exclude_kernel ||
			event->attr.exclude_hv || event->attr.exclude_idle)
		return -EINVAL;

	hwc->idx = -1;
	hwc->config_base = event->attr.config;

	/*
	 * For counting events use L2_CNT_PERIOD which allows for simplified
	 * math and proper handling of overflows in the presence of IRQs and
	 * SMP.
	 */
	if (hwc->sample_period == 0) {
		hwc->sample_period = L2_CNT_PERIOD;
		hwc->last_period   = hwc->sample_period;
		local64_set(&hwc->period_left, hwc->sample_period);
	}

	return 0;
}

static
void l2_cache__event_update(struct perf_event *event)
{
	struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
	struct hml2_pmu *slice;
	struct hw_perf_event *hwc = &event->hw;

	if (hwc->idx == -1)
		return;

	slice = get_hml2_pmu(system, event->cpu);
	if (unlikely(!slice))
		return;
	l2_cache__event_update_from_slice(event, slice);
}

static
void l2_cache__event_start(struct perf_event *event, int flags)
{
	struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
	struct hml2_pmu *slice;
	struct hw_perf_event *hwc = &event->hw;
	int idx = hwc->idx;
	u32 config;
	u32 evt_prefix, event_reg, event_cc, event_group;
	int is_tracectr = 0;

	if (idx < 0)
		return;

	hwc->state = 0;

	slice = get_hml2_pmu(system, event->cpu);
	if (unlikely(!slice))
		return;
	if (is_sampling(event))
		l2_cache__event_set_period(event, hwc);
	else
		l2_cache__slice_set_period(slice, hwc);

	if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) {
		hml2_pmu__set_ccntcr(0x0);
		goto out;
	}

	config = hwc->config_base;
	evt_prefix  = (config & EVENT_PREFIX_MASK) >> EVENT_PREFIX_SHIFT;
	event_reg   = (config & EVENT_REG_MASK)    >> EVENT_REG_SHIFT;
	event_cc    = (config & EVENT_CC_MASK)     >> EVENT_CC_SHIFT;
	event_group = (config & EVENT_GROUP_MASK);

	/* Check if user requested any special origin filtering. */
	if (evt_prefix == L2_TRACECTR_PREFIX)
		is_tracectr = 1;

	hml2_pmu__set_evcntcr(idx, 0x0);
	hml2_pmu__set_evtyper(event_group, idx);
	hml2_pmu__set_evres(slice, event_group, event_reg, event_cc);
	if (event->cpu < 0)
		hml2_pmu__set_evfilter_task_mode(idx);
	else
		hml2_pmu__set_evfilter_sys_mode(idx, event->cpu, is_tracectr);
out:
	hml2_pmu__counter_enable_interrupt(idx);
	hml2_pmu__counter_enable(idx);
}

static
void l2_cache__event_stop(struct perf_event *event, int flags)
{
	struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
	struct hml2_pmu *slice;
	struct hw_perf_event *hwc = &event->hw;
	int idx = hwc->idx;

	if (idx < 0)
		return;

	if (!(hwc->state & PERF_HES_STOPPED)) {
		slice = get_hml2_pmu(system, event->cpu);
		if (unlikely(!slice))
			return;
		hml2_pmu__counter_disable_interrupt(idx);
		hml2_pmu__counter_disable(idx);

		if (flags & PERF_EF_UPDATE)
			l2_cache__event_update(event);
		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
	}
}

/* Look for a duplicate event already configured on this cluster */
static
int config_is_dup(struct hml2_pmu *slice, struct hw_perf_event *hwc)
{
	int i;
	struct hw_perf_event *hwc_i;

	for (i = 0; i < MAX_L2_CTRS; i++) {
		if (slice->events[i] == NULL)
			continue;
		hwc_i = &slice->events[i]->hw;
		if (hwc->config_base == hwc_i->config_base)
			return 1;
	}
	return 0;
}

/* Look for event with same R, G values already configured on this cluster */
static
int event_violates_column_exclusion(struct hml2_pmu *slice,
				    struct hw_perf_event *hwc)
{
	int i;
	struct hw_perf_event *hwc_i;
	u32 r_g_mask = EVENT_REG_MASK | EVENT_GROUP_MASK;
	u32 r_g_value = hwc->config_base & r_g_mask;

	for (i = 0; i < MAX_L2_CTRS; i++) {
		if (slice->events[i] == NULL)
			continue;
		hwc_i = &slice->events[i]->hw;
		/*
		 * Identical event is not column exclusion - such as
		 * sampling event on all CPUs
		 */
		if (hwc->config_base == hwc_i->config_base)
			continue;
		if (r_g_value == (hwc_i->config_base & r_g_mask)) {
			pr_err("column exclusion violation, events %lx, %lx\n",
			       hwc_i->config_base & L2_EVT_MASK,
			       hwc->config_base & L2_EVT_MASK);
			return 1;
		}
	}
	return 0;
}

static
int l2_cache__event_add(struct perf_event *event, int flags)
{
	struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
	struct hw_perf_event *hwc = &event->hw;
	int idx;
	int err = 0;
	struct hml2_pmu *slice;

	/*
	 * We need to disable the pmu while adding the event, otherwise
	 * the perf tick might kick-in and re-add this event.
	 */
	perf_pmu_disable(event->pmu);

	slice = get_hml2_pmu(system, event->cpu);
	if (!slice) {
		event->state = PERF_EVENT_STATE_OFF;
		hwc->idx = -1;
		goto out;
	}

	/*
	 * This checks for a duplicate event on the same cluster, which
	 * typically occurs in non-sampling mode when using perf -a,
	 * which generates events on each CPU. In this case, we don't
	 * want to permanently disable the event by setting its state to
	 * OFF, because if the other CPU is subsequently hotplugged, etc,
	 * we want the opportunity to start collecting on this event.
	 */
	if (config_is_dup(slice, hwc)) {
		hwc->idx = -1;
		goto out;
	}

	if (event_violates_column_exclusion(slice, hwc)) {
		event->state = PERF_EVENT_STATE_OFF;
		hwc->idx = -1;
		goto out;
	}

	idx = l2_cache__get_event_idx(slice, hwc);
	if (idx < 0) {
		err = idx;
		goto out;
	}

	hwc->idx = idx;
	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
	slice->events[idx] = event;
	atomic64_set(&slice->prev_count[idx], 0ULL);

	if (flags & PERF_EF_START)
		l2_cache__event_start(event, flags);

	/* Propagate changes to the userspace mapping. */
	perf_event_update_userpage(event);

out:
	perf_pmu_enable(event->pmu);
	return err;
}

static
void l2_cache__event_del(struct perf_event *event, int flags)
{
	struct l2cache_pmu *system = to_l2cache_pmu(event->pmu);
	struct hw_perf_event *hwc = &event->hw;
	struct hml2_pmu *slice;
	int idx = hwc->idx;

	if (idx < 0)
		return;

	slice = get_hml2_pmu(system, event->cpu);
	if (unlikely(!slice))
		return;
	l2_cache__event_stop(event, flags | PERF_EF_UPDATE);
	slice->events[idx] = NULL;
	clear_bit(idx, slice->used_mask);

	perf_event_update_userpage(event);
}

static
void l2_cache__event_read(struct perf_event *event)
{
	l2_cache__event_update(event);
}

static
int dummy_event_idx(struct perf_event *event)
{
	return 0;
}

/* NRCCG format for perf RAW codes. */
PMU_FORMAT_ATTR(l2_prefix, "config:16-19");
PMU_FORMAT_ATTR(l2_reg,    "config:12-15");
PMU_FORMAT_ATTR(l2_code,   "config:4-11");
PMU_FORMAT_ATTR(l2_grp,    "config:0-3");
static struct attribute *l2_cache_pmu_formats[] = {
	&format_attr_l2_prefix.attr,
	&format_attr_l2_reg.attr,
	&format_attr_l2_code.attr,
	&format_attr_l2_grp.attr,
	NULL,
};

static struct attribute_group l2_cache_pmu_format_group = {
	.name = "format",
	.attrs = l2_cache_pmu_formats,
};

static const struct attribute_group *l2_cache_pmu_attr_grps[] = {
	&l2_cache_pmu_format_group,
	NULL,
};

/*
 * Generic device handlers
 */

static struct of_device_id l2_cache_pmu_of_match[] = {
	{ .compatible = "qcom,qcom-l2cache-pmu", },
	{}
};
MODULE_DEVICE_TABLE(of, l2_cache_pmu_of_match);

static int get_num_counters(void)
{
	int val;

	val = get_l2_indirect_reg(L2PMCR);

	/*
	 * Read bits 15:11 of the L2PMCR and add 1
	 * for the cycle counter.
	 */
	return ((val >> PMCR_NUM_EV_SHIFT) & PMCR_NUM_EV_MASK) + 1;
}
static int l2_cache_pmu_probe(struct platform_device *pdev)
{
	int result, irq, err;
	struct device_node *of_node;
	struct hml2_pmu *slice;
	u32 res_idx;
	u32 affinity_cpu;
	const u32 *affinity_arr;
	int len;
	struct cpumask affinity_mask;

	INIT_LIST_HEAD(&l2cache_pmu.pmus);

	l2cache_pmu.pmu = (struct pmu) {
		.task_ctx_nr	= perf_hw_context,

		.name		= "l2cache",
		.pmu_enable	= l2_cache__pmu_enable,
		.pmu_disable	= l2_cache__pmu_disable,
		.event_init	= l2_cache__event_init,
		.add		= l2_cache__event_add,
		.del		= l2_cache__event_del,
		.start		= l2_cache__event_start,
		.stop		= l2_cache__event_stop,
		.read		= l2_cache__event_read,
		.event_idx	= dummy_event_idx,
		.attr_groups	= l2_cache_pmu_attr_grps,
		.events_across_hotplug = 1,
	};

	l2cache_pmu.num_counters = get_num_counters();
	l2_cycle_ctr_idx = l2cache_pmu.num_counters - 1;
	l2_reset_mask = ((1 << (l2cache_pmu.num_counters - 1)) - 1) |
		L2PM_CC_ENABLE;

	of_node = pdev->dev.of_node;
	affinity_arr = of_get_property(of_node, "qcom,cpu-affinity", &len);
	if ((len <= 0) || (!affinity_arr)) {
		dev_err(&pdev->dev,
			"Error reading qcom,cpu-affinity property (%d)\n", len);
		return -ENODEV;
	}
	len = len / sizeof(u32);

	/* Read slice info and initialize each slice */
	for (res_idx = 0; res_idx < len; res_idx++) {
		slice = devm_kzalloc(&pdev->dev, sizeof(*slice), GFP_KERNEL);
		if (!slice)
			return -ENOMEM;

		irq = platform_get_irq(pdev, res_idx);
		if (irq <= 0) {
			dev_err(&pdev->dev,
				"Failed to get valid irq for slice %d\n",
				res_idx);
			return -ENODEV;
		}

		affinity_cpu = be32_to_cpup(&affinity_arr[res_idx]);
		cpumask_clear(&affinity_mask);
		cpumask_set_cpu(affinity_cpu, &affinity_mask);
		cpumask_set_cpu(affinity_cpu + 1, &affinity_mask);

		if (irq_set_affinity(irq, &affinity_mask)) {
			dev_err(&pdev->dev,
				"Unable to set irq affinity (irq=%d, cpu=%d)\n",
				irq, affinity_arr[res_idx]);
			return -ENODEV;
		}

		err = devm_request_irq(
			&pdev->dev, irq, l2_cache__handle_irq,
			IRQF_NOBALANCING, "l2-cache-pmu", slice);
		if (err) {
			dev_err(&pdev->dev,
				"Unable to request IRQ%d for L2 PMU counters\n",
				irq);
			return err;
		}

		slice->cluster = affinity_cpu >> 1;
		slice->pmu_lock = __SPIN_LOCK_UNLOCKED(slice->pmu_lock);

		hml2_pmu__init(slice);
		list_add(&slice->entry, &l2cache_pmu.pmus);
		l2cache_pmu.num_pmus++;
	}

	if (l2cache_pmu.num_pmus == 0) {
		dev_err(&pdev->dev, "No hardware L2 PMUs found\n");
		return -ENODEV;
	}

	result = perf_pmu_register(&l2cache_pmu.pmu,
				   l2cache_pmu.pmu.name, -1);

	if (result < 0)
		dev_err(&pdev->dev,
			"Failed to register L2 cache PMU (%d)\n",
			result);
	else
		dev_info(&pdev->dev,
			 "Registered L2 cache PMU using %d HW PMUs\n",
			 l2cache_pmu.num_pmus);

	return result;
}

static int l2_cache_pmu_remove(struct platform_device *pdev)
{
	perf_pmu_unregister(&l2cache_pmu.pmu);
	return 0;
}

static struct platform_driver l2_cache_pmu_driver = {
	.driver = {
		.name = "l2cache-pmu",
		.owner = THIS_MODULE,
		.of_match_table = l2_cache_pmu_of_match,
	},
	.probe = l2_cache_pmu_probe,
	.remove = l2_cache_pmu_remove,
};

static int __init register_l2_cache_pmu_driver(void)
{
	return platform_driver_register(&l2_cache_pmu_driver);
}
device_initcall(register_l2_cache_pmu_driver);