/*
 * Copyright 2011 Tilera Corporation. All Rights Reserved.
 *
 *   This program is free software; you can redistribute it and/or
 *   modify it under the terms of the GNU General Public License
 *   as published by the Free Software Foundation, version 2.
 *
 *   This program is distributed in the hope that it will be useful, but
 *   WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
 *   NON INFRINGEMENT.  See the GNU General Public License for
 *   more details.
 *
 * A device driver used to track resource allocation calls between
 * user space and the hypervisor.  This interface is used by user
 * applications that require direct access to the Tile processor's
 * onboard I/O devices.  The iorpc driver is responsible for
 * tracking memory resource mapping and handling VA/PA translation.
 */

#include <hv/iorpc.h>
#include <linux/module.h>
#include <linux/cdev.h>
#include <linux/fs.h>
#include <linux/hugetlb.h>
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/uaccess.h>
#include <linux/mutex.h>
#include <asm/homecache.h>
#if CHIP_HAS_MMIO()
#include <asm/iorpc_globals.h>
#endif

MODULE_AUTHOR("Tilera Corporation");
MODULE_LICENSE("Dual BSD/GPL");

#define DRIVER_NAME_STRING "iorpc"

#define STRINGIFY(x) #x
#define TOSTRING(x)	 STRINGIFY(x)
#define SIMPLE_MSG_LINE    DRIVER_NAME_STRING "(" TOSTRING(__LINE__) "): "

#define INFO(FMT, ...) \
	pr_info(SIMPLE_MSG_LINE FMT, ## __VA_ARGS__)
#define WARNING(FMT, ...) \
	pr_warning(SIMPLE_MSG_LINE FMT, ## __VA_ARGS__)

#ifdef DEBUG_IORPC
#define TRACE(FMT, ...) pr_info(SIMPLE_MSG_LINE FMT, ## __VA_ARGS__)
#else
#define TRACE(...)
#endif

struct iorpc_instance {
	/* Serialize access to this structure. */
	struct mutex mutex;

	/* The HV file number. */
	int fd;

	/* A list of pages currently registered with this instance. */
	struct list_head page_list;

	/* Base PTE for MMIO mappings. */
	HV_PTE mmio_base;
};

struct iorpc_page_list_entry {
	struct list_head list;
	struct page *page;
};

#define NUM_IORPC_DEVS 5

/* Hypervisor device names corresponding to each minor number. */
static const char *hv_dev_names[NUM_IORPC_DEVS] = {
	"test/0/iorpc",
	"mpipe/0/iorpc",
	"crypto/0/iorpc",
	"trio/0/iorpc",
	"zip/0/iorpc",
};

static struct cdev iorpc_cdev;
static dev_t iorpc_dev;

static const char driver_name[] = DRIVER_NAME_STRING;

#define MAX_RPC_STACK_BYTES 1024

/*
 * iorpc_open() - Open a new iorpc context.
 *
 * Each invocation of open() creates a new resource allocation context
 * for the device specified by the cdev minor number.
 */
static int iorpc_open(struct inode *inode, struct file *filp)
{
	unsigned int minor = MINOR(inode->i_rdev);
	int result = 0;
	struct iorpc_instance *instance;
	int fd;

	TRACE("iorpc_open()\n");

	fd = hv_dev_open((HV_VirtAddr)hv_dev_names[minor], 0);
	if (fd < 0) {
		TRACE("hv_dev_open() failed.\n");
		if (fd >= GXIO_ERR_MIN && fd <= GXIO_ERR_MAX)
			return fd;
		else
			return -ENODEV;
	}

	instance = kmalloc(sizeof(*instance), GFP_KERNEL);
	if (instance == NULL) {
		TRACE("kmalloc() failure.\n");
		result = -ENOMEM;
		goto kmalloc_failed;
	}

#if CHIP_HAS_MMIO()
	if (__iorpc_get_mmio_base(fd, &instance->mmio_base)) {
		TRACE("get_mmio_base() failure.\n");
		result = -EIO;
		goto mmio_failed;
	}
#endif

	mutex_init(&instance->mutex);
	instance->fd = fd;
	INIT_LIST_HEAD(&instance->page_list);

	filp->private_data = instance;

	return result;

#if CHIP_HAS_MMIO()
 mmio_failed:
	kfree(instance);
#endif

 kmalloc_failed:
	hv_dev_close(fd);
	return result;
}

/*
 * iorpc_release() - Release a iorpc instance.
 *
 * We close the hypervisor file handle, which in turn should trigger
 * any per-device reset and cleanup code that the hypervisor driver
 * requires.
 */
static int iorpc_release(struct inode *inode, struct file *filp)
{
	struct iorpc_instance *instance = filp->private_data;
	struct iorpc_page_list_entry *entry;
	struct list_head *ptr;
	struct list_head *next;

	TRACE("iorpc_release()\n");

	/* Closing the HV device resets the device. */
	hv_dev_close(instance->fd);

	/* Release all pages that the device used to reference. */
	list_for_each_safe(ptr, next, &instance->page_list) {
		entry = list_entry(ptr, struct iorpc_page_list_entry, list);
		TRACE("Release pfn %#lx\n", page_to_pfn(entry->page));
		page_cache_release(entry->page);
		kfree(entry);
	}

	kfree(instance);
	filp->private_data = NULL;

	return 0;
}

/* iorpc_read() - Read RPC data from the hypervisor. */
static ssize_t iorpc_read(struct file *filp, char __user *buf,
			     size_t count, loff_t *f_pos)
{
	struct iorpc_instance *instance = filp->private_data;
	char stack_tmp[MAX_RPC_STACK_BYTES];
	char *heap_tmp = NULL;
	char *tmp;
	ssize_t result = 0;

	TRACE("iorpc_read()\n");

	if (count > sizeof(stack_tmp)) {
		heap_tmp = kmalloc(count, GFP_KERNEL);
		if (heap_tmp == NULL)
			return -ENOMEM;
		tmp = heap_tmp;
	} else {
		tmp = stack_tmp;
	}

	/* Avoid leaks to userspace. */
	memset(tmp, 0, count);

	result = hv_dev_pread(instance->fd, 0,
			      (HV_VirtAddr)tmp, count, *f_pos);
	TRACE("hv_dev_pread() returns %zd\n", result);

	/*
	 * hv_dev_pread should never have side effects; if it did we'd
	 * run into trouble because the copy_to_user() might fail
	 * after the HV call succeeded.
	 */
	if (copy_to_user(buf, tmp, count)) {
		TRACE("EFAULT when copying hv_dev_pread() result.\n");
		result = -EFAULT;
	}

	kfree(heap_tmp);

	return result;
}


/*
 * Helper routine for translating and verifying pages that a user
 * application wishes to register with a hypervisor iorpc
 * device.
 */
static int translate_mem_buffer(iorpc_mem_buffer_t *params,
				struct iorpc_page_list_entry **entry_out)
{
	iorpc_mem_buffer_t translated;
	int count;
	struct vm_area_struct *vma;
	struct iorpc_page_list_entry *entry;
	unsigned long va;
	int page_size;
	unsigned long next_page_va;
	pte_t pte = { 0 };
	HV_PhysAddr pfn;
	int result;

	memset(&translated, 0, sizeof(translated));
	*entry_out = NULL;

	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
	if (entry == NULL)
		return -ENOMEM;

	/*
	 * We only allow registration of a single page; getting
	 * multiple pages to be physically contiguous is difficult
	 * anyway.
	 */
	va = (unsigned long)params->user.va;
	down_read(&current->mm->mmap_sem);
	count = get_user_pages(current, current->mm, va, 1 /* num_pages */,
			       1 /* writable */, 0, &entry->page, &vma);
	up_read(&current->mm->mmap_sem);
	if (count != 1) {
		TRACE("get_user_pages() failure.\n");
		result = -EFAULT;
		goto get_user_pages_failed;
	}

	page_size = is_vm_hugetlb_page(vma) ? HPAGE_SIZE : PAGE_SIZE;
	next_page_va = (va & ~(page_size - 1)) + page_size;
	if ((va + params->user.size - 1) < va ||
	    (va + params->user.size - 1) >= next_page_va) {
		TRACE("User buffer crossed page boundary.\n");
		result = -EINVAL; /* special error for 'spanned pages'? */
		goto va_overflow;
	}

	/* Figure out how the page is homed. */
	pte = pte_set_home(pte, page_home(entry->page));

	/* Fill in the translation and replace the old parameters. */
	pfn = page_to_pfn(entry->page);
	translated.kernel.cpa = PFN_PHYS(pfn) | (va & ~PAGE_MASK);
	translated.kernel.size = params->user.size;
	translated.kernel.pte = pte;
	translated.kernel.flags = params->user.flags;
	*params = translated;

	*entry_out = entry;
	return 0;

 va_overflow:
	page_cache_release(entry->page);
 get_user_pages_failed:
	kfree(entry);
	return result;
}


/* iorpc_write() - Write RPC data to the hypervisor. */
static ssize_t iorpc_write(struct file *filp, const char __user *buf,
			      size_t count, loff_t *f_pos)
{
	struct iorpc_instance *instance = filp->private_data;
	iorpc_offset_t off = {.offset = *f_pos};
	char stack_tmp[MAX_RPC_STACK_BYTES];
	char *heap_tmp = NULL;
	char *tmp;
	ssize_t result = 0;

	TRACE("iorpc_write()\n");

	if (count > sizeof(stack_tmp)) {
		heap_tmp = kmalloc(count, GFP_KERNEL);
		if (heap_tmp == NULL)
			return -ENOMEM;
		tmp = heap_tmp;
	} else {
		tmp = stack_tmp;
	}

	if (copy_from_user(tmp, buf, count)) {
		result = -EFAULT;
		goto end;
	}

	/*
	 * Perform any request-specific translation or resource
	 * allocation, as indicated by the opcode's 'format' field.
	 */
	switch (off.format) {
	case IORPC_FORMAT_NONE: {
		/* No special translation required, just do the op. */
		result = hv_dev_pwrite(instance->fd, 0,
				       (HV_VirtAddr)tmp, count, off.offset);
		break;
	}

	case IORPC_FORMAT_USER_MEM: {
		/* Translate VA to PA and get homing information. */
		struct iorpc_page_list_entry *entry;
		iorpc_mem_buffer_t *params = (iorpc_mem_buffer_t *)tmp;
		if (count < sizeof(*params)) {
			result = -EINVAL;
			break;
		}

		mutex_lock(&instance->mutex);

		result = translate_mem_buffer(params, &entry);
		if (result != 0) {
			mutex_unlock(&instance->mutex);
			TRACE("translate_mem_buffer() failed, return %zd\n",
			      result);
			break;
		}

		/* Mark format change and pass to the HV. */
		off.format = IORPC_FORMAT_KERNEL_MEM;
		result = hv_dev_pwrite(instance->fd, 0,
				       (HV_VirtAddr)tmp, count, off.offset);
		if (result < 0) {
			TRACE("hv_dev_pwrite() failed, canceling page.\n");
			page_cache_release(entry->page);
			kfree(entry);
		} else {
			/* Keep the page to be released later. */
			TRACE("Added pfn %#lx to page_list.\n",
			      page_to_pfn(entry->page));
			list_add_tail(&entry->list, &instance->page_list);
			if (!PageReserved(entry->page))
				SetPageDirty(entry->page);
		}

		mutex_unlock(&instance->mutex);
		break;
	}

	default:
		/* We handled all legal formats above, must be bad opcode. */
		result = GXIO_ERR_OPCODE;
		break;
	}

end:
	kfree(heap_tmp);

	TRACE("iorpc_write() returns %zd\n", result);
	return result;
}

#if CHIP_HAS_MMIO()
static int iorpc_mmap(struct file *file, struct vm_area_struct *vma)
{
	struct iorpc_instance *instance = file->private_data;
	size_t size = vma->vm_end - vma->vm_start;
	unsigned long offset = PFN_PHYS(vma->vm_pgoff);
	int err;
	pgprot_t prot = vma->vm_page_prot;
	unsigned long pfn;

	if (!(vma->vm_flags & VM_SHARED))
		return -EINVAL;

	/* Make sure the requested offset is allowed by the HV driver. */
	err = __iorpc_check_mmio_offset(instance->fd, offset, size);
	if (err)
		return err;

	/* Merge the base HV_PTE with our pgprot and pfn offset. */
	prot = hv_pte_set_mode(prot, HV_PTE_MODE_MMIO);
	prot = hv_pte_set_lotar(prot, hv_pte_get_lotar(instance->mmio_base));
	pfn = hv_pte_get_pfn(instance->mmio_base) + vma->vm_pgoff;

	vma->vm_flags |= VM_LOCKED | VM_RESERVED;

	err = remap_pfn_range(vma, vma->vm_start, pfn, size, prot);
	if (err) {
		TRACE("remap_pfn_range() failed.\n");
		return err;
	}

	return 0;
}
#endif /* CHIP_HAS_MMIO() */


static const struct file_operations iorpc_fops = {
	.owner = THIS_MODULE,
	.open = iorpc_open,
	.release = iorpc_release,
	.read = iorpc_read,
	.write = iorpc_write,
#if CHIP_HAS_MMIO()
	.mmap = iorpc_mmap,
#endif
};

/*
 * iorpc_init() - Initialize the iorpc driver.
 *
 * Returns zero on success, or a negative error code.
 */
static int iorpc_init(void)
{
	/* Allocate some major/minor numbers. */
	int err = alloc_chrdev_region(&iorpc_dev, 0, NUM_IORPC_DEVS,
				      driver_name);
	if (err != 0) {
		WARNING("Could not allocate iorpc major number.\n");
		return err;
	}

	/* Register the device. */
	cdev_init(&iorpc_cdev, &iorpc_fops);
	iorpc_cdev.owner = THIS_MODULE;
	err = cdev_add(&iorpc_cdev, iorpc_dev, NUM_IORPC_DEVS);
	if (err != 0) {
		WARNING("Failed to add iorpc cdev.\n");
		unregister_chrdev_region(iorpc_dev, NUM_IORPC_DEVS);
		return err;
	}

	return 0;
}


/** iorpc_cleanup() - Clean up the driver's module. */
static void iorpc_cleanup(void)
{
	cdev_del(&iorpc_cdev);
	unregister_chrdev_region(iorpc_dev, NUM_IORPC_DEVS);
}


module_init(iorpc_init);
module_exit(iorpc_cleanup);
