/*
 * Copyright 2011 Tilera Corporation. All Rights Reserved.
 *
 *   This program is free software; you can redistribute it and/or
 *   modify it under the terms of the GNU General Public License
 *   as published by the Free Software Foundation, version 2.
 *
 *   This program is distributed in the hope that it will be useful, but
 *   WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
 *   NON INFRINGEMENT.  See the GNU General Public License for
 *   more details.
 *
 * Routines for managing the direct user-to-HV zero-copy command
 * posting path.
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/cdev.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
#include <linux/semaphore.h>
#include <linux/pagemap.h>
#include <linux/pci.h>
#include <linux/percpu.h>
#include <linux/hugetlb.h>

#include <asm/tilepci.h>
#include <asm/cacheflush.h>

#include "tilepci_endp.h"

/**********************************************************************/
/*                        HV Direct Zero-Copy Routines                */
/**********************************************************************/

struct direct_file {
	/* The PCI device we're working with. */
	struct tlr_pcie_dev *tlr;

	/* CPU on which we were opened. */
	int registered_cpu;

	/* Number of credits reserved. */
	unsigned long credits_reserved;

	/* A list of commands we've reserved from the global ZC cmd list. */
	struct tlr_list reserved_cmds;

	/* That cpu's init_read data. */
	struct pcie_tile_local_init_read *init_read;

	/* List of I/O mem-registered pages. */
	struct {
		void *va;
		struct page *page;
	} iomem_pages[1 << (32 - HPAGE_SHIFT)];

	/* Bitmask of registered channels. */
	u32 channel_mask[(PCIE_CHANNELS + 31) / 32];
};


/*
 * Handle an iomem registration request by getting the backing pages
 * and registering each of them with the hypervisor driver.
 *
 * This is largely copied from xgbe.c; we should unify the code.
 */
static int register_iomem(struct direct_file *df,
			  tilepci_iomem_address_t *range)
{
	struct mm_struct *mm = current->mm;
	int hv_fd = df->tlr->hv_channel_ctl_fd;
	int i;

	/*
	 * Round up to huge page size and sanity check; if any of the
	 * range isn't huge pages we'll catch it later.
	 */
	range->size = (range->size + HPAGE_SIZE - 1) & ~(HPAGE_SIZE - 1);
	if ((range->va & (HPAGE_SIZE - 1)) ||
	    (range->size & (HPAGE_SIZE - 1)) ||
	    range->va + range->size <= range->va) {
		ERR("Bad VA range\n");
		return TILEPCI_EFAULT;
	}


	/*
	 * We use get_user_pages to grab and lock the pages, so we need
	 * this semaphore.
	 */
	down_read(&mm->mmap_sem);

	/*
	 * Get and register our pages, one at a time.
	 */
	for (i = 0; i < range->size >> HPAGE_SHIFT; i++) {
		int index = (range->va >> HPAGE_SHIFT) + i;
		struct vm_area_struct *vma;
		int retval;

		/*
		 * First get the page.
		 */
		df->iomem_pages[index].page = NULL;
		retval = get_user_pages(current, mm, index << HPAGE_SHIFT,
					1, 1, 0, &df->iomem_pages[index].page,
					&vma);

		if (retval != 1 || !is_vm_hugetlb_page(vma)) {
			/* If we didn't get a large page, we fail. */
			ERR("Couldn't get page or wasn't huge\n");
			retval = TILEPCI_EFAULT;
		} else {
			/* We got a page, see if we can register it. */
			int err;
			pcie_iomem_address_t reg_ia;
			struct page *page = df->iomem_pages[index].page;

			reg_ia.va = (HV_VirtAddr) (index << HPAGE_SHIFT);
			reg_ia.pa =
				((HV_PhysAddr)page_to_pfn(page)) << PAGE_SHIFT;
			reg_ia.size = HPAGE_SIZE;

			err = hv_dev_pwrite(
				hv_fd, 0, (HV_VirtAddr)&reg_ia,
				sizeof(reg_ia),
				PCIE_CHANNEL_CTL_IOMEM_REGISTER_OFF);
			if (err != sizeof(reg_ia))
				retval = err;
		}

		/*
		 * If the last page failed, we need to back out everything
		 * we've done so far, and return an error.
		 */
		if (retval < 0) {
			struct page *page = df->iomem_pages[index].page;
			if (page)
				page_cache_release(page);
			if (i > 0) {
				/*
				 * Unregister and release everything we've
				 * successfully registered up to this point.
				 */
				tilepci_iomem_address_t unreg_ia;
				int j;

				unreg_ia.va = range->va;
				unreg_ia.size = i << HPAGE_SHIFT;
				hv_dev_pwrite(hv_fd, 0,
				      (HV_VirtAddr)&unreg_ia,
				      sizeof(unreg_ia),
				      PCIE_CHANNEL_CTL_IOMEM_UNREGISTER_OFF);

				for (j = 0; j < i; j++) {
					int jindex =
						(range->va >> HPAGE_SHIFT) + j;
					page_cache_release(
						df->iomem_pages[jindex].page);
					df->iomem_pages[jindex].va = NULL;
				}
			}

			up_read(&mm->mmap_sem);
			return retval;
		}

		/*
		 * Remember that we've successfully gotten this page.
		 */
		df->iomem_pages[index].va = (void *) (index << HPAGE_SHIFT);
	}

	up_read(&mm->mmap_sem);
	return 0;
}


/*
 * Unregister all of the iomem associated with a struct direct_file.  This
 * is intended for use when the struct direct_file is released.
 */
static void unregister_all_iomem(struct direct_file *df)
{
	int i;

	for (i = 0;
	     i < sizeof(df->iomem_pages) / sizeof(df->iomem_pages[0]);
	     i++) {
		if (df->iomem_pages[i].va != NULL) {
			pcie_iomem_address_t iomem;

			iomem.va = (HV_VirtAddr) df->iomem_pages[i].va;
			iomem.size = HPAGE_SIZE;

			hv_dev_pwrite(df->tlr->hv_channel_ctl_fd, 0,
				      (HV_VirtAddr)&iomem, sizeof(iomem),
				      PCIE_CHANNEL_CTL_IOMEM_UNREGISTER_OFF);
			/* ignore any errors, can't do anything anyway. */

			page_cache_release(df->iomem_pages[i].page);
			df->iomem_pages[i].va = NULL;
		}
	}

}


/*
 * Allocate zero-copy command credits for use by a tile's direct-to-hv
 * interface.
 */
static int set_credits_reserved(struct direct_file *df, unsigned long credits)
{
	/* This implementation is based on __tlr_zc_cmd_q_ncmd_set(). */
	long diff = credits - df->credits_reserved;
	struct tlr_list *from_list;
	struct tlr_list *to_list;
	unsigned long count;
	unsigned long tlr_lock_flags;
	int ret = 0;
	int i;

	if (credits > TILEPCI_CMD_SLOTS)
		return -EINVAL;

	spin_lock_irqsave(&df->tlr->zc_state.cmd_q_lock, tlr_lock_flags);

	if (diff < 0) {
		from_list = &df->reserved_cmds;
		to_list = &df->tlr->zc_state.cmd_q_free_list;
		count = -diff;
	} else {
		from_list = &df->tlr->zc_state.cmd_q_free_list;
		to_list = &df->reserved_cmds;
		count = diff;
		if (__tlr_list_len(from_list) < count) {
			ret = -EBUSY;
			goto exit;
		}
	}

	/* Move the cmds. */
	for (i = 0; i < count; i++) {
		struct tlr_zc_cmd *cmd = __tlr_list_rem_zc_cmd(from_list);
		if (cmd == NULL) {
			ERR("Unexpected NULL command during"
			    " set_credits_reserved().\n");
			ret = -EIO;
			goto exit;
		}
		__tlr_list_add_zc_cmd(to_list, cmd);
	}
	df->credits_reserved = credits;

 exit:
	spin_unlock_irqrestore(&df->tlr->zc_state.cmd_q_lock, tlr_lock_flags);
	return ret;
}


/*
 * Register a channel as in use by the direct-to-HV interface.  Do not
 * allow access to channels that are already used by the
 * zero-copy-device-file interface, since that it would confuse the
 * per-channel reset flows if one interface reset and the other didn't
 * expect it.
 */
static int register_channel(struct direct_file *df, unsigned long chan)
{
	int ret = 0;
	int err;
	unsigned long irqflags;
	struct tlr_zc_cmd_q *q;

	if (!((chan >= FIRST_ZC_H2T_CHAN && chan <= LAST_ZC_H2T_CHAN) ||
	      (chan >= FIRST_ZC_T2H_CHAN && chan <= LAST_ZC_T2H_CHAN) ||
	      (chan >= TILEPCI_FIRST_C2C_SEND_CHAN &&
	       chan <= LAST_C2C_SEND_CHAN) ||
	      (chan >= TILEPCI_FIRST_C2C_RECV_CHAN &&
	       chan <= LAST_C2C_RECV_CHAN) ||
	      (chan >= TILEPCI_FIRST_DMA_READ_CHAN &&
	       chan <= LAST_DMA_READ_CHAN) ||
	      (chan >= TILEPCI_FIRST_DMA_WRITE_CHAN &&
	       chan <= LAST_DMA_WRITE_CHAN)))
		return TILEPCI_ECHANNEL;

	/* Return success if this one is already registered. */
	if (df->channel_mask[chan / 32] & (1 << (chan % 32)))
		return 0;

	/*
	 * Don't allow registration if the link is currently down;
	 * this matches the convention of the device file based ZC
	 * API.
	 */
	if (chan >= PCIE_FIRST_C2C_SEND_CHANNEL &&
	    chan <= LAST_C2C_RECV_CHAN &&
	    link_is_down(df->tlr, chan))
		return -ENXIO;

	/*
	 * This is a legal zero-copy number; look up the ZC device
	 * struct and see if we can reserve it.
	 */
	q = df->tlr->zc_state.cmd_queues[chan];

	/* Grab the queue locks in the proper order (read then write). */
	err = down_interruptible(&q->rd_xfer_mutex);
	if (err != 0) {
		ret = -ERESTARTSYS;
		goto read_lock_failed;
	}
	err = down_interruptible(&q->wr_xfer_mutex);
	if (err != 0) {
		ret = -ERESTARTSYS;
		goto write_lock_failed;
	}

	/* Check to see if the device-file based interface is open. */
	if (q->open_count > 0) {
		ret = -EBUSY;
		goto exit;
	}

	/* ISSUE: wait for any in-flight channel resets? */

	/*
	 * Bump the direct_hv count so the device-file API won't be
	 * able to open; mark the registration bit.
	 */
	df->channel_mask[chan / 32] |= (1 << (chan % 32));

	spin_lock_irqsave(&q->direct_hv_lock, irqflags);
	q->direct_hv_count++;
	spin_unlock_irqrestore(&q->direct_hv_lock, irqflags);

 exit:
	up(&q->wr_xfer_mutex);
 write_lock_failed:
	up(&q->rd_xfer_mutex);
 read_lock_failed:
	return ret;
}


/* Unregister all the channels associated with a direct-to-HV interface. */
static void unregister_all_channels(struct direct_file *df)
{
	unsigned long chan;
	unsigned long irqflags;
	for (chan = 0; chan < PCIE_CHANNELS; chan++) {
		if (df->channel_mask[chan / 32] & (1 << (chan % 32))) {
			struct tlr_zc_cmd_q *q =
				df->tlr->zc_state.cmd_queues[chan];
			BUG_ON(q == NULL);

			spin_lock_irqsave(&q->direct_hv_lock, irqflags);
			BUG_ON(q->direct_hv_count == 0);
			q->direct_hv_count--;
			if (q->direct_hv_count == 0) {
				int res;
				struct tlr_pcie_dev *tlr = df->tlr;

				res = hv_dev_pwrite(tlr->hv_channel_ctl_fd,
					0,
					(HV_VirtAddr)&q->chan,
					sizeof(q->chan),
					PCIE_CHANNEL_CTL_CHANNEL_RESET_OFF);
				if (res != sizeof(q->chan))
					ERR("Direct-HV channel reset failed,"
					    " %d\n", q->chan);
			}
			spin_unlock_irqrestore(&q->direct_hv_lock, irqflags);
		}
	}

	/* ISSUE: wait for channel resets to complete. */

}


/*
 * Perform all the unregistration tasks required when a file handle's
 * refcount goes to zero.  This method is intended to be called via an
 * smp_call.
 */
static void release_on_cpu(void *arg)
{
	struct direct_file *df = arg;
	unregister_all_iomem(df);
	set_credits_reserved(df, 0);
	unregister_all_channels(df);
}


static long tlr_hv_direct_zc_ioctl(struct file *filp,
				   unsigned int cmd, unsigned long arg)
{
	struct direct_file *df = filp->private_data;

	switch (cmd) {
	case TILEPCI_IOC_GET_HV_ZC:

		if (copy_to_user((void __user *)arg, df->init_read,
				 sizeof(*df->init_read))) {
			EX_TRACE("IOC_GET_HV_ZC copy failed\n");
			return -EFAULT;
		}

		break;

	case TILEPCI_IOC_IOMEM_REGISTER:
	{
		tilepci_iomem_address_t iomem;
		if (copy_from_user(&iomem, (void __user *)arg,
				   sizeof(tilepci_iomem_address_t)))
			return -EFAULT;

		return register_iomem(df, &iomem);
	}

	case TILEPCI_IOC_SET_NCMD:
	{
		return set_credits_reserved(df, arg);
	}

	case TILEPCI_IOC_CHANNEL_REGISTER:
	{
		return register_channel(df, arg);
	}

	default:
		return -EINVAL;
	}
	return 0;
}

/* Device release routine; unregisters all the iomem. */
static int tlr_hv_direct_release(struct inode *inode, struct file *filp)
{
	struct direct_file *df = filp->private_data;
	struct tlr_pcie_dev *tlr = df->tlr;
	unsigned long irqflags;

	/*
	 * We have to do the unregister on the tile that originally
	 * registered.  If that was us, fine; if not, we have to do a
	 * remote SMP function call to make it happen.  This deals
	 * with cases where a pthread program might close a file
	 * handle long after the thread that used it has exited.
	 */
	smp_call_function_single(df->registered_cpu, release_on_cpu,
				 (void *)df, 1);

	/* Indicate that this CPU can now be opened by another task. */
	spin_lock_irqsave(&tlr->open_cpus_lock, irqflags);
	cpumask_clear_cpu(df->registered_cpu, &tlr->open_cpus_mask);
	spin_unlock_irqrestore(&tlr->open_cpus_lock, irqflags);

	kfree(df);
	filp->private_data = NULL;

	return 0;
}


static const struct file_operations tlr_hv_direct_zc_ops = {
	.owner = THIS_MODULE,
	.unlocked_ioctl = tlr_hv_direct_zc_ioctl,
	.release = tlr_hv_direct_release,
};

int tlr_hv_direct_zc_open(struct inode *inode, struct file *filp, int cpu,
			  struct pcie_tile_local_init_read *init_read)
{
	unsigned long irqflags;
	struct direct_file *df;
	struct tlr_pcie_dev *tlr =
		container_of(inode->i_cdev, struct tlr_pcie_dev, cdev);

	if (!tlr_is_ready(tlr))
		return -ENXIO;

	/*
	 * Verify that this cpu doesn't already have a direct-to-hv
	 * file handle open.
	 */
	spin_lock_irqsave(&tlr->open_cpus_lock, irqflags);
	if (cpumask_test_cpu(cpu, &tlr->open_cpus_mask)) {
		spin_unlock_irqrestore(&tlr->open_cpus_lock, irqflags);
		return -EBUSY;
	}
	cpumask_set_cpu(cpu, &tlr->open_cpus_mask);
	spin_unlock_irqrestore(&tlr->open_cpus_lock, irqflags);

	/*
	 * Create our per-file data structure, save a pointer to the
	 * device information in it, and then attach it to our file struct.
	 */
	df = kmalloc(sizeof(*df), GFP_KERNEL);
	if (df == NULL)
		return -ENOMEM;
	memset(df, 0, sizeof(*df));
	df->tlr = tlr;
	df->registered_cpu = cpu;
	df->init_read = init_read;
	tlr_list_init(&df->reserved_cmds);
	filp->private_data = df;

	filp->f_op = &tlr_hv_direct_zc_ops;

	return 0;
}
