/*
 * Copyright 2011 Tilera Corporation. All Rights Reserved.
 *
 *   This program is free software; you can redistribute it and/or
 *   modify it under the terms of the GNU General Public License
 *   as published by the Free Software Foundation, version 2.
 *
 *   This program is distributed in the hope that it will be useful, but
 *   WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
 *   NON INFRINGEMENT.  See the GNU General Public License for
 *   more details.
 */

#include <linux/oprofile.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/ptrace.h>
#include <linux/timex.h>
#include <asm/system.h>

#include <arch/chip.h>
#include <arch/interrupts.h>
#include <arch/spr_def.h>

#include "op_impl.h"

#if CHIP_HAS_AUX_PERF_COUNTERS()
#define TILE_NUM_PERF_COUNTERS	4
#else
#define TILE_NUM_PERF_COUNTERS	2
#endif

/* per-cpu configurations are copies of this master */
static struct op_counter_config master_ctr[TILE_NUM_PERF_COUNTERS];

static DEFINE_PER_CPU(struct op_counter_config[TILE_NUM_PERF_COUNTERS], ctr);
static DEFINE_PER_CPU(u32, op_counters_enabled);

static char op_tile_cpu_type[] = "tile/" CHIP_ARCH_NAME;

/*
 * Return whether oprofile is enabled on this tile.
 */
int op_enabled(void)
{
	return __get_cpu_var(op_counters_enabled);
}

/*
 * The perf count interrupts are masked and unmasked explicitly,
 * and only here.  The normal irq_enable() does not enable them,
 * and irq_disable() does not disable them.  That lets these
 * routines drive the perf count interrupts orthogonally.
 */

static inline void unmask_perf_interrupts(void)
{
	interrupt_mask_reset(INT_PERF_COUNT);
#if CHIP_HAS_AUX_PERF_COUNTERS()
	interrupt_mask_reset(INT_AUX_PERF_COUNT);
#endif
}

static inline void mask_perf_interrupts(void)
{
	interrupt_mask_set(INT_PERF_COUNT);
#if CHIP_HAS_AUX_PERF_COUNTERS()
	interrupt_mask_set(INT_AUX_PERF_COUNT);
#endif
}

/*
 * For each cpu, track how many cycles have been spent in the interrupt
 * handler, and when we started profiling.  This lets us compute if we
 * are pushing the limits of profiling.
 */
struct profile_time {
	u64 interrupt_cycles;
	u64 profile_start_time;
};
static DEFINE_PER_CPU(struct profile_time, profile_time);

/*
 * By just tracking time spent in the C code portion of the interrupt,
 * we don't track the assembler overhead, e.g. register save/restore.
 * Provide an approximation of that cost here, in cycles.
 */
#define ASM_INTERRUPT_OVERHEAD 200

/* How much time in oprofile is too much? */
static inline int too_much_oprofiling(u64 interrupt_cycles, u64 total_cycles)
{
	return (total_cycles > 1000000000ULL &&
		(interrupt_cycles > total_cycles / 2));
}

/*
 * Compute how much time we spent in this interrupt and aggregate it
 * to the total so far.  If this exceeds 50% of the total time that has
 * elapsed, turn off profiling.
 */
static void ratelimit_oprofile(u64 entry_time)
{
	struct profile_time *p = &__get_cpu_var(profile_time);
	u64 now = get_cycles();
	u64 total_cycles = now - p->profile_start_time;

	p->interrupt_cycles += (now - entry_time) + ASM_INTERRUPT_OVERHEAD;
	if (too_much_oprofiling(p->interrupt_cycles, total_cycles)) {
		pr_err("Disabling oprofiling on cpu %d;"
		       " more than %llu interrupt cycles out of %llu\n",
		       smp_processor_id(), p->interrupt_cycles, total_cycles);
		__get_cpu_var(op_counters_enabled) = 0;
		mask_perf_interrupts();
	}
}

void op_handle_perf_interrupt(struct pt_regs *regs, int fault_num,
			      unsigned long perf_count_sts)
{
	u64 entry_time = get_cycles();

	__insn_mtspr(SPR_PERF_COUNT_STS, 0x3 & perf_count_sts);

	if (!__get_cpu_var(op_counters_enabled))
		return;

	if (perf_count_sts & 1) {
		oprofile_add_sample(regs, 0);
		__insn_mtspr(SPR_PERF_COUNT_0,
			     __get_cpu_var(ctr)[0].reset_value);
	}
	if (perf_count_sts & 2) {
		oprofile_add_sample(regs, 1);
		__insn_mtspr(SPR_PERF_COUNT_1,
			     __get_cpu_var(ctr)[1].reset_value);
	}
	ratelimit_oprofile(entry_time);
}

#if CHIP_HAS_AUX_PERF_COUNTERS()
void op_handle_aux_perf_interrupt(struct pt_regs *regs, int fault_num,
				 unsigned long perf_count_sts)
{
	u64 entry_time = get_cycles();

	__insn_mtspr(SPR_AUX_PERF_COUNT_STS, 0x3 & perf_count_sts);

	if (!__get_cpu_var(op_counters_enabled))
		return;

	if (perf_count_sts & 1) {
		oprofile_add_sample(regs, 2);
		__insn_mtspr(SPR_AUX_PERF_COUNT_0,
			     __get_cpu_var(ctr)[2].reset_value);
	}
	if (perf_count_sts & 2) {
		oprofile_add_sample(regs, 3);
		__insn_mtspr(SPR_AUX_PERF_COUNT_1,
			     __get_cpu_var(ctr)[3].reset_value);
	}
	ratelimit_oprofile(entry_time);
}
#endif

static inline void op_tile_cpu_stop(void *dummy)
{
	get_cpu_var(op_counters_enabled) = 0;
	mask_perf_interrupts();
	put_cpu_var(op_counters_enabled);
}

static void op_tile_stop(void)
{
	(void)smp_call_function(op_tile_cpu_stop, NULL, 1);
	op_tile_cpu_stop(NULL);
}

static void op_tile_cpu_start(void *dummy)
{
	get_cpu();
	/* enable the perf counter interrupt */
	__insn_mtspr(SPR_PERF_COUNT_0, 0);
	__insn_mtspr(SPR_PERF_COUNT_1, 0);
	__insn_mtspr(SPR_PERF_COUNT_STS, 0x3);
	__insn_mtspr(SPR_PERF_COUNT_0, __get_cpu_var(ctr)[0].reset_value);
	__insn_mtspr(SPR_PERF_COUNT_1, __get_cpu_var(ctr)[1].reset_value);
#if CHIP_HAS_AUX_PERF_COUNTERS()
	/* enable the auxilliary perf counter interrupt */
	__insn_mtspr(SPR_AUX_PERF_COUNT_0, 0);
	__insn_mtspr(SPR_AUX_PERF_COUNT_1, 0);
	__insn_mtspr(SPR_AUX_PERF_COUNT_STS, 0x3);
	__insn_mtspr(SPR_AUX_PERF_COUNT_0, __get_cpu_var(ctr)[2].reset_value);
	__insn_mtspr(SPR_AUX_PERF_COUNT_1, __get_cpu_var(ctr)[3].reset_value);
#endif
	__get_cpu_var(op_counters_enabled) = 1;

	/*
	 * Save away the time that we enabled profiling, and clear
	 * the count of interrupt cycles seen to date.
	 */
	__get_cpu_var(profile_time).profile_start_time = get_cycles();
	__get_cpu_var(profile_time).interrupt_cycles = 0;

	/*
	 * Disable all interrupts while we unmask to make sure we don't
	 * end up with one interrupt masked and not the other.
	 */
	__insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
	unmask_perf_interrupts();
	__insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);

	put_cpu();
}

static int op_tile_start(void)
{
	(void)smp_call_function(op_tile_cpu_start, NULL, 1);
	op_tile_cpu_start(NULL);
	return 0;
}

static void op_tile_shutdown(void)
{
	/* nothing to be done here */
}

static void op_get_init_vals(struct op_counter_config master_ctr,
			     unsigned long *reset_val,
			     unsigned long *perf_count_ctl,
			     unsigned long *init_val,
			     int cpu)
{
	*perf_count_ctl = 0;
	*init_val = 0;
	*reset_val = 0;
	if (master_ctr.enabled) {
		unsigned long ctl = (master_ctr.event) & 0x7f;
		if (!master_ctr.user)
			ctl |= (1 << 7);
		/* Should we also disable HV counting in the following case? */
		if (!master_ctr.kernel)
			ctl |= (2 << 7);
		/* We take the interrupt on wrap from 0Xffffffff -> 0 */
		*reset_val = 0 - master_ctr.count;
		/*
		 * Add a stagger the first time the event is triggered
		 * so that the cycle counter event doesn't fire in
		 * lock step and cause a bursty load on oprofiled.
		 */
		*init_val = 0 - (master_ctr.count +
				 (cpu * (master_ctr.count / NR_CPUS)));
		*perf_count_ctl = ctl;
	}
}

static void op_tile_cpu_setup(void *dummy)
{
	int cpu = get_cpu();
	unsigned long perf_count_ctl[TILE_NUM_PERF_COUNTERS];
	unsigned long init_value[TILE_NUM_PERF_COUNTERS];
	int i;

	for (i = 0; i < TILE_NUM_PERF_COUNTERS; i++) {
		unsigned long reset_val;
		op_get_init_vals(master_ctr[i], &reset_val,
				 &perf_count_ctl[i],
				 &init_value[i],
				 cpu);
		__get_cpu_var(ctr)[i].reset_value = reset_val;
	}
	/* just so we don't have any accidental interrupts */
	__insn_mtspr(SPR_PERF_COUNT_0, 0);
	__insn_mtspr(SPR_PERF_COUNT_1, 0);
	__insn_mtspr(SPR_PERF_COUNT_CTL,
		     (perf_count_ctl[1] << 16) | perf_count_ctl[0]);
	__insn_mtspr(SPR_PERF_COUNT_STS, 0x3);
	__insn_mtspr(SPR_PERF_COUNT_0, init_value[0]);
	__insn_mtspr(SPR_PERF_COUNT_1, init_value[1]);

#if CHIP_HAS_AUX_PERF_COUNTERS()
	__insn_mtspr(SPR_AUX_PERF_COUNT_0, 0);
	__insn_mtspr(SPR_AUX_PERF_COUNT_1, 0);
	__insn_mtspr(SPR_AUX_PERF_COUNT_CTL,
		     (perf_count_ctl[3] << 16) | perf_count_ctl[2]);
	__insn_mtspr(SPR_AUX_PERF_COUNT_STS, 0x3);
	__insn_mtspr(SPR_AUX_PERF_COUNT_0, init_value[2]);
	__insn_mtspr(SPR_AUX_PERF_COUNT_1, init_value[3]);

	// Some profiling events on the network switch (i.e.,
	// TDN_CONGESTION, MDN_CONGESTION, VDN_BUBBLE) can occur on
	// each of the 5 ouput ports of the switch. The direction is
	// set with the *_EVT_PORT_SEL field, which is a 5-bit field
	// in the SPR_PERF_COUNT_DN_CTL register. Each bit masks one
	// of the directions. Here we set all the masks as enabled.
	__insn_mtspr(SPR_PERF_COUNT_DN_CTL, 0xFFFFFFFF);
#endif

	__get_cpu_var(op_counters_enabled) = 0;

	put_cpu();
}

static int op_tile_setup(void)
{
	/* Configure the registers on all cpus.  */
	(void)smp_call_function(op_tile_cpu_setup, NULL, 1);
	op_tile_cpu_setup(NULL);
	return 0;
}

static int op_tile_create_files(struct super_block *sb, struct dentry *root)
{
	int i;

	for (i = 0; i < TILE_NUM_PERF_COUNTERS; ++i) {
		struct dentry *dir;
		char buf[3];

		snprintf(buf, sizeof(buf), "%d", i);
		dir = oprofilefs_mkdir(sb, root, buf);

		oprofilefs_create_ulong(sb, dir, "enabled",
			&master_ctr[i].enabled);
		oprofilefs_create_ulong(sb, dir, "event",
			&master_ctr[i].event);
		oprofilefs_create_ulong(sb, dir, "count",
			&master_ctr[i].count);
		oprofilefs_create_ulong(sb, dir, "kernel",
			&master_ctr[i].kernel);
		oprofilefs_create_ulong(sb, dir, "user",
			&master_ctr[i].user);
		/* We don't use the unit mask */
		oprofilefs_create_ulong(sb, dir, "unit_mask",
			&master_ctr[i].unit_mask);
	}

	return 0;
}

int __init oprofile_arch_init(struct oprofile_operations *ops)
{
	ops->create_files = op_tile_create_files;
	ops->setup = op_tile_setup;
	ops->shutdown = op_tile_shutdown;
	ops->start = op_tile_start;
	ops->stop = op_tile_stop;
	ops->cpu_type = op_tile_cpu_type;
	ops->backtrace = tile_backtrace;
	return 0;
}

void oprofile_arch_exit(void)
{
}
