/*
 * Copyright 2011 Tilera Corporation. All Rights Reserved.
 *
 *   This program is free software; you can redistribute it and/or
 *   modify it under the terms of the GNU General Public License
 *   as published by the Free Software Foundation, version 2.
 *
 *   This program is distributed in the hope that it will be useful, but
 *   WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
 *   NON INFRINGEMENT.  See the GNU General Public License for
 *   more details.
 */

#ifndef __TILEPCI_SHARED_CODE_H__
#define __TILEPCI_SHARED_CODE_H__

#include <linux/version.h>
#include <linux/sched.h>
#include <asm/uaccess.h>
#include <asm/page.h>

#if !defined(TILEPCI_HOST) && !defined(TILEPCI_ENDP)
#error Shared code requires define of either TILEPCI_HOST or TILEPCI_ENDP
#endif

#if !defined(FALSE)
#define FALSE	(0)
#endif /* !defined(FALSE) */

#if !defined(TRUE)
#define TRUE	(1)
#endif /* !defined(TRUE) */


struct tlr_stream;
struct tlr_pcie_dev;

/* Feature Bits */
#define HV_FEATURE_SCATTER_GATHER		0x0001
#define HV_FEATURE_ZERO_COPY			0x0002

/* Interrupt Status Bits */
#define PCIE_INTR_MASK_APPS			0xFFFF0000
#define PCIE_INTR_MASK_KERNEL			0x0000FF00


/*
 * Minor devices are allocated as follows
 * 0-15		-- read & write old-style streams
 * 16		-- boot device
 * 17		-- lock device
 * 18		-- control device
 * ....
 * 128-191	-- Host to Tile devices
 * 192-255	-- Tile to Host devices
 */

/*
 * T2H -> Tile to Host direction
 * H2T -> Host to Tile direction
 */

/* Number of character stream devices. */
#define NUM_CHAR_STREAMS (TILEPCI_NUM_CHAR_STREAMS)

#define FIRST_PUBLIC_H2T_CHAN (TILEPCI_FIRST_CHAR_H2T_CHAN)
#define LAST_PUBLIC_H2T_CHAN  (FIRST_PUBLIC_H2T_CHAN + NUM_CHAR_STREAMS - 1)
#define FIRST_PUBLIC_T2H_CHAN (TILEPCI_FIRST_CHAR_T2H_CHAN)
#define LAST_PUBLIC_T2H_CHAN  (FIRST_PUBLIC_T2H_CHAN + NUM_CHAR_STREAMS - 1)

#define TLR_PUBLIC_STREAM_NCMD	(4 + 4)
#define TLR_PUBLIC_NCMD		(TLR_PUBLIC_STREAM_NCMD * NUM_CHAR_STREAMS)

/* Each channel can post up to this many buffers in each direction. */
#define BUFFERS_PER_STREAM 4

/* The size of each posted buffer. */
#define BUFFER_SIZE 4096

/*
 * We keep an array containing a zero-copy command queue for all
 * available channels.  Only a subset of the channels are used by
 * zero-copy devices; the others may be NULL or be used by the
 * character streams.
 */
#define NUM_ZC_H2T_CHAN	(TILEPCI_NUM_ZC_H2T_CHAN)
#define NUM_ZC_T2H_CHAN	(TILEPCI_NUM_ZC_T2H_CHAN)

#define FIRST_ZC_H2T_CHAN   (TILEPCI_FIRST_ZC_H2T_CHAN)
#define LAST_ZC_H2T_CHAN    (FIRST_ZC_H2T_CHAN + NUM_ZC_H2T_CHAN - 1)
#define FIRST_ZC_T2H_CHAN   (TILEPCI_FIRST_ZC_T2H_CHAN)
#define LAST_ZC_T2H_CHAN    (FIRST_ZC_T2H_CHAN + NUM_ZC_T2H_CHAN - 1)

#define NUM_MINOR_DEVICES    256
#define FIRST_PUBLIC_MINOR   (TILEPCI_FIRST_CHAR_STREAM_MINOR)
#define LAST_PUBLIC_MINOR    (FIRST_PUBLIC_MINOR + NUM_CHAR_STREAMS - 1)
#define FIRST_ZC_H2T_MINOR   (TILEPCI_FIRST_ZC_H2T_MINOR)
#define LAST_ZC_H2T_MINOR    (FIRST_ZC_H2T_MINOR + NUM_ZC_H2T_CHAN - 1)
#define FIRST_ZC_T2H_MINOR   (TILEPCI_FIRST_ZC_T2H_MINOR)
#define LAST_ZC_T2H_MINOR    (FIRST_ZC_T2H_MINOR + NUM_ZC_T2H_CHAN - 1)

#define LAST_C2C_SEND_MINOR \
  (TILEPCI_FIRST_C2C_SEND_MINOR + TILEPCI_NUM_C2C_SEND_CHAN - 1)
#define LAST_C2C_RECV_MINOR \
  (TILEPCI_FIRST_C2C_RECV_MINOR + TILEPCI_NUM_C2C_RECV_CHAN - 1)
#define LAST_C2C_SEND_CHAN \
  (TILEPCI_FIRST_C2C_SEND_CHAN + TILEPCI_NUM_C2C_SEND_CHAN - 1)
#define LAST_C2C_RECV_CHAN \
  (TILEPCI_FIRST_C2C_RECV_CHAN + + TILEPCI_NUM_C2C_RECV_CHAN - 1)

#define LAST_DMA_READ_CHAN \
  (TILEPCI_FIRST_DMA_READ_CHAN + TILEPCI_NUM_DMA_READ_CHAN - 1)
#define LAST_DMA_WRITE_CHAN \
  (TILEPCI_FIRST_DMA_WRITE_CHAN + + TILEPCI_NUM_DMA_WRITE_CHAN - 1)


#define TLR_ZC_CMD_Q_NCMD_INIT	(0)
#define TLR_ZC_CMD_Q_NCMD_OPEN	(4)

#define TLR_COOKIE_UNKNOWN	(0)
#define TLR_COOKIE_INVALID	(-1)

#if defined(TILEPCI_HOST)
#define tlr_dma_addr_t	dma_addr_t
#define tlr_phys_addr_t	unsigned long
#elif defined(TILEPCI_ENDP)
#define tlr_dma_addr_t	HV_PhysAddr
#define tlr_phys_addr_t	HV_PhysAddr
#else
#error Architecture Undefined
#endif	/* if defined(TILEPCI_HOST) */

#define MAX_MAPPED_PAGES_PER_XFER	(TILEPCI_MAX_XFER_LEN / PAGE_SIZE)

enum tlr_dir_e {
	TLR_DIR_UNKNOWN,
	TLR_DIR_H2T,
	TLR_DIR_T2H,
};

struct tlr_buf_fragment {
	struct page *page;
};

struct tlr_mmap_state {
	/* Array of buffer fragments, TILEPCI_MMAP_GRANULARITY bytes each.*/
	struct tlr_buf_fragment *frags;
	int num_frags;              /* Number of entries in 'fragments'. */
	int ref_cnt;                /* # of VMAs referencing us. */

	struct semaphore mutex;     /* Must hold this to modify the above. */
};

enum tlr_zc_cmd_type_e {
	TLR_ZC_CMD_UNKNOWN,
	TLR_ZC_CMD_H2T,
	TLR_ZC_CMD_T2H,
	TLR_ZC_CMD_C2C_SEND,
	TLR_ZC_CMD_C2C_RECV,
	TLR_DMA_READ,
	TLR_DMA_WRITE,
};

enum tlr_zc_cmd_st_e {
	TLR_ZC_CMD_ST_UNKNOWN,
	TLR_ZC_CMD_ST_FREE,
	TLR_ZC_CMD_ST_PEND,
	TLR_ZC_CMD_ST_POST,
	TLR_ZC_CMD_ST_COMP,
};

struct tlr_zc_cmd {
	struct list_head		 list;
	struct tlr_pcie_dev		*tlr;
	struct tlr_zc_cmd_q		*cmd_q;
	enum tlr_zc_cmd_type_e		 type;
	enum tlr_zc_cmd_st_e		 state;
	enum dma_data_direction		 dma_dir;
	dma_addr_t			 dma_addr;
	void				*usr_addr;
	uint32_t			 post_len;
	uint32_t			 comp_len;
	tilepci_cookie_t		 cookie;
	unsigned int			 flags;
	uint32_t			 nmapped_pages;
	struct page		      *mapped_pages[MAX_MAPPED_PAGES_PER_XFER];
#if defined(TILEPCI_HOST)
	struct pcie_host_buffer_cmd	 cmd;
#elif defined(TILEPCI_ENDP)
	struct pcie_tile_buffer_cmd	 cmd;
#else
#error Undefined Architecture
#endif
};
#define SZ_TLR_ZC_CMD	(sizeof(struct tlr_zc_cmd))

/*
 * Each command queue can be reset independently.  When the
 * hypervisor first comes up, each queue is in reset and waiting for a
 * command with SoC.  When the user posts a command, we set the SoC
 * bit and give it to the hypervisor, entering the connected state.
 * Once connected, we can be reset either by closing the file or
 * issuing the 'start reset' ioctl.  Once in reset, we may or may not
 * discard completions; if the file handle is still open we need to
 * keep them for return to the user, and if the file handle is closed
 * or the user issues the 'finish reset' ioctl, we need to discard
 * completions until no commands are outstanding.
 */
enum tlr_zc_cmd_q_st_e {
	CMD_Q_ST_NEED_SOC,       /* In reset, ready for first cmd with SoC. */
	CMD_Q_ST_CONNECTED,      /* Connection is up and running. */
	CMD_Q_ST_RESET_STARTED,  /* In reset, but don't discard cpls yet. */
	CMD_Q_ST_RESET_DISCARD,  /* In reset, discard cpls until all return.*/
	CMD_Q_ST_CHIP_DOWN,      /* The whole chip is in reset. */
};

struct tlr_list {
	struct list_head	  q;
	uint32_t		  len;
};

struct tlr_zc_cmd_q {
	enum tlr_zc_cmd_type_e	 type;
	uint32_t		 chan;

	struct tlr_list		 free_q;
	struct tlr_list		 pend_q;
	struct tlr_list		 post_q;
	struct tlr_list		 comp_q;

	enum tlr_zc_cmd_q_st_e	 state;
	uint32_t		 nreq_tot;
	uint32_t		 ncomp_tot;

	/* This spinlock protects everything from the lists to here. */
	spinlock_t		 lock;

	tilepci_xfer_comp_t	*rd_xfer_comps;
	struct semaphore	 rd_xfer_mutex;

	tilepci_xfer_req_t	*wr_xfer_reqs;
	struct semaphore	 wr_xfer_mutex;

	/* The number of commands the user wants allocated to this queue. */
	uint32_t		 ncmd_wanted;

	/* The number we actually have right now.  This could be
	 * greater than ncmd_wanted; it will never be less than
	 * ncmd_wanted. */
	uint32_t		 ncmd_have;

	/* If passed a large number of completions to be filled, read
	 * should return once it reaches this many completed. */
	uint32_t		 ncmd_min_read;

	/* Length of comp_q at which the readers on comp_queue should
	 * be woken up. */
	uint32_t		 ncmd_comp_wakeup;

	struct tlr_pcie_dev	*tlr;
	wait_queue_head_t	 free_queue;
	wait_queue_head_t	 comp_queue;
	wait_queue_head_t	 reset_drain_queue;

	uint32_t		 nreq_cur;
	uint32_t		 ncomp_cur;
	uint64_t		 nreq_bytes_cur;
	uint64_t		 ncomp_bytes_cur;

	uint64_t		 nreq_bytes_tot;
	uint64_t		 ncomp_bytes_tot;

	uint32_t		 ncomp_mismatch;

	int open_count;
	int is_ready;
	int chip_reset_poison;

	/*
	 * We keep a separate spinlock on this counter so that we can
	 * decrement this count while in an smp_call.  This lock is
	 * only needed for write accesses.
	 */
	spinlock_t direct_hv_lock;
	int direct_hv_count;

	struct tlr_mmap_state mmap_state;

#if defined(TILEPCI_ENDP)
	u32 cpl_queue_id;	 /* The tile that receives completions. */
#endif
};

struct tlr_zc_state {
	struct tlr_pcie_dev *tlr;

	/* Command structures used to build command queues. */
	uint32_t		 ncmd;
	size_t			 cmds_sz;
	struct tlr_zc_cmd	*cmds;
	spinlock_t		 cmd_q_lock;  /* lock for free list */
	struct tlr_list		 cmd_q_free_list;

	/* The command queues.  Indexed by HV channel number, so some
	 * entries may be NULL if there isn't actually a command queue
	 * assigned to that channel. */
	struct tlr_zc_cmd_q *cmd_queues[PCIE_CHANNELS];
};

struct tlr_packet_queue_state {
	/* Number of buffers allocated for the receive queue. */
	uint32_t num_elems;
	/* Size of a single buffer, in bytes. */
	uint32_t buf_size;

	/* Pointer to memory for the receive queue. */
	void *packet_queue_mem;
	dma_addr_t packet_queue_mem_handle;

	/* mmap info. */
	int vmas;		/* # of VMAs referencing us. */
	struct semaphore mutex;
};

/* We maintain separate state objects for each stream. */
struct tlr_stream {
	int index;
	int need_read_soc;
	int need_write_soc;
	struct tlr_pcie_dev *dev;
	int open_count;
	int is_ready;

#if defined(TILEPCI_ENDP)
	u32 cpl_queue_id;	    /* The tile that receives completions. */
	unsigned long buffer_page;  /* Use one page for all the buffers. */
#else
	dma_addr_t write_dma_addrs[BUFFERS_PER_STREAM];
	dma_addr_t read_dma_addrs[BUFFERS_PER_STREAM];
#endif

	/*
	 * Poison bit indicating that the chip has rebooted but this
	 * channel hasn't been closed and reopened.
	 */
	int chip_reset_poison;

	/* Semaphore for guaranteeing only one one writer at a time. */
	struct semaphore write_mutex;

	/* Queue of processes waiting for write() buffers. */
	wait_queue_head_t write_queue;

	/*
	 * Write-side state.  When opened, the stream allocates a ring
	 * of buffers to be used for all writes.  The buffers are
	 * posted to and returned by the device in-order, so we can
	 * always determine the next buffer address by keeping a count
	 * of the number of buffers posted thus far, and using it to
	 * index the ring.
	 */
	char *write_buffers[BUFFERS_PER_STREAM];
	size_t write_sizes[BUFFERS_PER_STREAM];
	u32 writes_posted;
	u32 writes_completed;

	/* Semaphore for guaranteeing only one one reader at a time. */
	struct semaphore read_mutex;

	/* Queue of processes waiting for read() buffers. */
	wait_queue_head_t read_queue;

	/*
	 * Read-side state.  Buffers are managed just like write
	 * buffers (see above), but we also keep a count of the number
	 * of bytes already taken out of the oldest returned buffer.
	 * This is necessary so that an incoming large packet is
	 * properly handled by smaller reads.
	 */
	char *read_buffers[BUFFERS_PER_STREAM];
	size_t read_sizes[BUFFERS_PER_STREAM];
	u32 reads_completed;        /* Completions from iBound. */
	u32 reads_consumed;         /* Completely delivered to user. */
	size_t partial_read_bytes;
};



/* FIXME: Move these back to their original location? */
#if defined(TILEPCI_ENDP)

/* Our PCI device structure, which contains several character devices. */
struct tlr_pcie_dev {
	struct cdev cdev;
	dev_t first_dev;
	struct tlr_stream *streams[NUM_CHAR_STREAMS];

	/* All state information needed by the shared zero-copy code. */
	struct tlr_zc_state zc_state;

	/* Atomic access of "readiness".  */
	struct semaphore hv_ready_mutex;
	int is_hv_ready;

	/* Spin lock for atomic command Q access between tasklet instances. */
	spinlock_t cmd_queue_lock;


	/* HV communication interfaces */
	int link_index;
	int hv_channel_ctl_fd;
	struct pcie_tile_shm_state *shm_state;

	/*
	 * The HV irq number, and a lock to guard it while making sure
	 * it's the same on all cpus.
	 */
	spinlock_t irq_lock;
	int irq;

	/* Global host link index for C2C communication. */
	int host_link_index;

	/* Some BIOS info. */
	size_t max_payload_size;
	size_t max_read_size;
	int link_width;
	unsigned long long link_bar1_size;
	unsigned long long link_bar1_address;
	union tlr_prebooter_info prebooter_info;

	/* Application CSR interfaces */
	void *csr_memory;
	struct pcie_csr_write_notify_queue *csr_write_queue;
	wait_queue_head_t csr_wait_queue;
	struct semaphore csr_notify_mutex;

	/*
	 * Mask of CPUs that currently have the direct-to-HV zero-copy
	 * command interface open.
	 */
	spinlock_t open_cpus_lock;
	struct cpumask open_cpus_mask;

	/* Pages that we have mapped into the BAR1 memory window. */
	struct semaphore bar1_pages_mutex;
	struct {
		struct page *page;
		struct file *filp;
	}
	bar1_pages[PCIE_BAR1_PAGES];
};


#else

/*
 * Startpoint for config register restore.  Can't restore all configs
 * because some get changed after probe() call.
 */
#define CONFIG_SAVE_START_INDEX (PCI_INTERRUPT_LINE / sizeof(u32))

/* Our PCI device structure, which contains several character devices. */
struct tlr_pcie_dev {
	struct pci_dev *pci_dev;
	dev_t first_dev;
	int link_index;
	struct tlr_board_s *board;
	int global_port_index;           /* Index of this port as probed. */

	struct cdev cdev;
	struct tlr_stream *streams[NUM_CHAR_STREAMS];

	/* All state information needed by the shared zero-copy code. */
	struct tlr_zc_state zc_state;

	/* Address of the card's MMIO region. */
	struct pcie_host_mmio_regs __iomem *regs;

	/* Address of the ibound debug status registers. */
	struct pcie_debug_status_regs __iomem *debug_regs;

	/* Address of the rshim registers. */
	u8 __iomem *rshim_regs;

	/*
	 * State related to device ready check.  The is_ready_lock
	 * guards the interrupt handler's read accesses to is_ready,
	 * and must also held whenever is_ready is modified.  In
	 * addition, the reboot_mutex semaphore should be held
	 * whenever modifiying is_ready, to prevent a race with some
	 * other process trying to reset the chip.
	 */
	spinlock_t is_ready_lock;
	int is_ready;
	u32 features;

	/* Command queue state. */
	spinlock_t cmd_queue_lock;
	uint32_t commands_posted;
	struct pcie_host_buffer_cmd *buffer_cmd_array;
	dma_addr_t buffer_cmd_handle;

	/* Completion queue state (no spinlock - used only by IRQ handler). */
	uint32_t completions_last;
	struct pcie_host_completion *completion_array;
	dma_addr_t completion_handle;

	/* Tile-to-host interrupt interface. */
	spinlock_t tile_intr_pending_lock;
	volatile uint32_t tile_intr_pending;
	wait_queue_head_t tile_intr_wait_queue;
	struct semaphore tile_intr_wait_mutex;

	/* State used by the dedicated tile internal DMA benchmarks. */
	unsigned long bench_page;
	dma_addr_t bench_dma_addr;

	/* Space to save config space whenever we reset the board. */
	u32 bars[6];
	u32 config_space[100];     /* config space is ~77 words */

	/* Chip information. */
	int chip_version;
	int chip_width;
	int chip_height;

	/* All state information needed by the receive queue. */
	struct tlr_packet_queue_state packet_queue_state;

	/* Address of the EPP registers. */
	struct  pcie_epp_regs_drv __iomem *epp_regs_drv;

	/* Address of the chip-to-chip registers. */
	pcie_c2c_regs_t __iomem *c2c_regs;
};

#endif

#if defined(TILEPCI_HOST)
#define DRIVER_NAME_STRING "tilepci"
#else
#define DRIVER_NAME_STRING "tilepci_endp"
#endif

#define STRINGIFY(x) #x
#define TOSTRING(x)	 STRINGIFY(x)
#define MSG_LINE     "%s(%s:" TOSTRING(__LINE__) "-%d):"
#define SIMPLE_MSG_LINE    DRIVER_NAME_STRING "(" TOSTRING(__LINE__) "): "

#define TLR_TRACE(TYPE, HDR, FMT, ...)  printk(TYPE HDR MSG_LINE FMT,	\
					"PCI_DRV", __func__,	\
					smp_processor_id(),		\
					##__VA_ARGS__)
#define TLR_INFO(HDR, FMT, ...)  TLR_TRACE(KERN_INFO, HDR, FMT, ##__VA_ARGS__)

#ifdef DRV_TRACE_LEGACY
#define INT_TRACE(FMT, ...) TLR_INFO("*** ", FMT, ##__VA_ARGS__)
#define WRK_TRACE(FMT, ...) TLR_INFO("### ", FMT, ##__VA_ARGS__)
#define CMD_TRACE(FMT, ...) TLR_INFO("^^^ ", FMT, ##__VA_ARGS__)
#define FOP_TRACE(FMT, ...) TLR_INFO("--> ", FMT, ##__VA_ARGS__)
#define EX_TRACE(FMT, ...) TLR_INFO("<-- ", FMT, ##__VA_ARGS__)
#define TRACE(FMT, ...) TLR_INFO("TRC ", FMT, ##__VA_ARGS__)
#else
#define INT_TRACE(FMT, ...)
#define WRK_TRACE(FMT, ...)
#define CMD_TRACE(FMT, ...)
#define FOP_TRACE(FMT, ...)
#define EX_TRACE(FMT, ...)
#define TRACE(FMT, ...)
#endif

#ifdef DRV_TRACE_ZC
#define HID_INT_TRACE(FMT, ...) TLR_INFO("*** ", FMT, ##__VA_ARGS__)
#define HID_WRK_TRACE(FMT, ...) TLR_INFO("### ", FMT, ##__VA_ARGS__)
#define HID_CMD_TRACE(FMT, ...) TLR_INFO("^^^ ", FMT, ##__VA_ARGS__)
#define HID_FOP_TRACE(FMT, ...) TLR_INFO("--> ", FMT, ##__VA_ARGS__)
#define HID_EX_TRACE(FMT, ...)  TLR_INFO("<-- ", FMT, ##__VA_ARGS__)
#define HID_ERR_TRACE(FMT, ...) TLR_INFO("!!! ", FMT, ##__VA_ARGS__)
#else
#define HID_INT_TRACE(FMT, ...)
#define HID_WRK_TRACE(FMT, ...)
#define HID_CMD_TRACE(FMT, ...)
#define HID_FOP_TRACE(FMT, ...)
#define HID_EX_TRACE(FMT, ...)
#define HID_ERR_TRACE(FMT, ...)
#endif

#define INFO(FMT, ...) \
	printk(KERN_INFO SIMPLE_MSG_LINE FMT, ## __VA_ARGS__)
#define WARNING(FMT, ...) \
	printk(KERN_WARNING SIMPLE_MSG_LINE FMT, ## __VA_ARGS__)

#if defined(TILEPCI_ENDP)
#define ERR(FMT, ...)	printk(KERN_ERR SIMPLE_MSG_LINE FMT, ## __VA_ARGS__)
#else
/* Store error messages into a 'last error' string. */
extern spinlock_t last_error_lock;
extern char last_error_string[256];

#define ERR(FMT, ...) do {                                   \
  unsigned long flags;                                       \
  printk(KERN_ERR SIMPLE_MSG_LINE FMT, ## __VA_ARGS__);      \
  spin_lock_irqsave(&last_error_lock, flags);                \
  snprintf(last_error_string, sizeof(last_error_string) - 1, \
    FMT, ## __VA_ARGS__);                                    \
  spin_unlock_irqrestore(&last_error_lock, flags);           \
} while (0)
#endif


/*
 * This macro gets the upper 32 bits of a 32 or 64 bit dma_addr_t, and
 * yields 0 on 32-bit dma_addr_t's.  The bizarre shift expression is
 * necessary to compile on 32-bit platforms.
 */
#define DMA_ADDR_HI32(A) \
  ((u32)((sizeof(A) == 8) ? ((A) >> (sizeof(A) * 8 - 32)) : 0))

/* This macro yields the low 32 bits of a 32 or 64 bit dma_addr_t. */
#define DMA_ADDR_LO32(A) ((u32) (A))

/* Generate a 64-bit address without shift warnings if 32-bit. */
#define DMA_ADDR_GEN(hi, lo) ((((u64)(hi)) << 32) | ((u64)(lo)))

static inline int tlr_zc_cmd_free(struct tlr_zc_cmd	*cmd);

/* Function for GETTING and SETTING a STREAM object */
#define tlr_get_stream(tlr, idx)      ((tlr)->streams[idx])
#define tlr_set_stream(tlr, idx, str) ((tlr)->streams[idx] = (str))


/*
 * Set cmd->dma_addr, given a user-space buffer.  To do this, we need
 * to pin the user-space buffer into memory, verify that it's
 * contiguous in PA space, and map it to the PCI bus if necessary.
 */
static inline int
tlr_map_cmd(struct tlr_zc_cmd *cmd, unsigned long user_va, size_t size,
	    int writable)
{
	int ret = 0;
	unsigned long offset;
	unsigned long start_page_addr;
	unsigned long start_pfn;
	unsigned long end_pfn;
	unsigned long page_count;
	int nmapped_pages;
	int i;
	tlr_dma_addr_t prev_dma_addr;

	/* Determine which pages are being accessed and bump ref counts. */
	offset = user_va & ~PAGE_MASK;
	start_page_addr = user_va & PAGE_MASK;
	start_pfn = start_page_addr >> PAGE_SHIFT;
	end_pfn = (user_va + size - 1) >> PAGE_SHIFT;
	page_count = end_pfn - start_pfn + 1;

	if (page_count > MAX_MAPPED_PAGES_PER_XFER) {
		ret = -EINVAL;
		HID_ERR_TRACE("page_count %d MAX_MAPPED_PAGES_PER_XFER %d\n",
			      (int)page_count, (int)MAX_MAPPED_PAGES_PER_XFER);
		goto fail_params;
	}

	down_read(&current->mm->mmap_sem);
	/* We have to set writable so that all the pages are populated. */
	writable = 1;
	nmapped_pages = get_user_pages(current, current->mm,
				       start_page_addr,
				       page_count,
				       writable, 0,
				       &cmd->mapped_pages[0], NULL);
	up_read(&current->mm->mmap_sem);

	if (nmapped_pages < 0) {
		ret = nmapped_pages;
		HID_ERR_TRACE("get_user_pages %d\n", nmapped_pages);
		goto fail_get_pages;
	} else if (nmapped_pages == 0) {
		ret = -EINVAL;
		HID_ERR_TRACE("get_user_pages %d\n", nmapped_pages);
		goto fail_get_pages;
	}
	cmd->nmapped_pages = nmapped_pages;


#if defined(TILEPCI_HOST)
	/* Map the pages and verify that they're contiguous. */
	cmd->dma_addr = dma_map_page(&cmd->tlr->pci_dev->dev,
				     cmd->mapped_pages[0], 0, PAGE_SIZE,
				     cmd->dma_dir);
	prev_dma_addr = cmd->dma_addr;
	for (i = 1; i < nmapped_pages; i++) {
		tlr_dma_addr_t bus_addr;
		bus_addr = dma_map_page(&cmd->tlr->pci_dev->dev,
					cmd->mapped_pages[i], 0, PAGE_SIZE,
					cmd->dma_dir);
		if (bus_addr != prev_dma_addr + PAGE_SIZE) {
			dma_unmap_page(&cmd->tlr->pci_dev->dev, bus_addr,
				       PAGE_SIZE, cmd->dma_dir);

			for (bus_addr = cmd->dma_addr;
			     bus_addr < cmd->dma_addr + i * PAGE_SIZE;
			     bus_addr += PAGE_SIZE) {
				dma_unmap_page(&cmd->tlr->pci_dev->dev,
					       bus_addr, PAGE_SIZE,
					       cmd->dma_dir);
			}

			ret = -EINVAL;
			HID_ERR_TRACE("dma_map_page() not contiguous\n");
			for (i = 0; i < nmapped_pages; i++) {
				unsigned long long pa =
					page_to_phys(cmd->mapped_pages[i]);
				printk(KERN_INFO "  %llx\n", pa);
			}
			goto fail_contig;
		}
		prev_dma_addr = bus_addr;
	}
#else
	/* Just verify that the PAs are contiguous. */
	cmd->dma_addr = page_to_pa(cmd->mapped_pages[0]);
	prev_dma_addr = cmd->dma_addr;
	for (i = 1; i < nmapped_pages; i++) {
		tlr_dma_addr_t dma_addr = page_to_pa(cmd->mapped_pages[i]);
		if (dma_addr != prev_dma_addr + PAGE_SIZE) {
			ret = -EINVAL;
			HID_ERR_TRACE("non-contiguous PAs\n");
			goto fail_contig;
		}
		prev_dma_addr = dma_addr;
	}

	/* Take care of cache coherence. */
	if (cmd->type == TLR_ZC_CMD_H2T || cmd->type == TLR_ZC_CMD_C2C_RECV)
		ret = finv_user((void __user *)user_va, size);
	else if (cmd->type == TLR_ZC_CMD_T2H ||
		 cmd->type == TLR_ZC_CMD_C2C_SEND)
		ret = flush_user((void __user *)user_va, size);
	else
		ERR("Unknown cmd type\n");
	if (ret) {
		ret = -EINVAL;
		goto fail_contig;
	}
#endif

	/* Finally, apply the offset. */
	cmd->dma_addr += offset;

	return 0;

 fail_contig:
	for (i = 0; i < cmd->nmapped_pages; i++)
		page_cache_release(cmd->mapped_pages[i]);
	cmd->nmapped_pages = 0;
 fail_get_pages:
 fail_params:
	return ret;
}

static inline int tlr_unmap_cmd(struct tlr_zc_cmd *cmd)
{
	int i;
	struct page *page;

	for (i = 0; i < cmd->nmapped_pages; i++) {
		page = cmd->mapped_pages[i];

		if (!PageReserved(page) && (cmd->dma_dir != DMA_TO_DEVICE))
			SetPageDirty(page);

		page_cache_release(page);
	}
	cmd->nmapped_pages = 0;

	return 0;
}

/* Functions provided by the host- or endpoint- specific code. */
void post_cmds(struct tlr_zc_cmd_q *q);
int tlr_is_ready(struct tlr_pcie_dev *tlr);
int tlr_get_cpl_queue_id(struct tlr_pcie_dev *tlr);


/*****************************************************************************
 * BUFFER FUNCTIONS
 *  tlr_get_pages(start,size) - increments ref count on pages.
 *  tlr_put_pages(start,size) - decrements and frees pages.
 *  tlr_flush_buffer(tlr, addr ,len) - flush cache to physical memory
 *  tlr_inv_buffer(tlr, addr, len)   - invalidate buffers in cache
 ****************************************************************************/

static inline void tlr_get_pages(void *start, size_t size)
{
	void		*end;
	void		*p;
	struct page	*page;

	end = start + size - 1;
	for (p = start; p <= end; p += PAGE_SIZE) {
		page = virt_to_page(p);
		get_page(page);
	}
}

static inline void tlr_put_pages(void *start, size_t size)
{
	void		*end;
	void		*p;
	struct page	*page;

	end = start + size - 1;
	for (p = start; p <= end; p += PAGE_SIZE) {
		page = virt_to_page(p);
		put_page_testzero(page);
	}
}

static inline void tlr_flush_buffer(struct tlr_pcie_dev *tlr,
				    tlr_dma_addr_t addr, size_t len)
{
#if defined(TILEPCI_ENDP)
	flush_buffer(__va(addr), len);
#elif defined(TILEPCI_HOST)
	dma_sync_single_for_device(&tlr->pci_dev->dev,
				   addr, len,
				   DMA_TO_DEVICE);
#else
#error Architecture Undefined
#endif
}

static inline void tlr_inv_buffer(struct tlr_pcie_dev *tlr,
				  tlr_dma_addr_t addr, size_t len)
{
#if defined(TILEPCI_ENDP)
	inv_buffer(__va(addr), len);
#elif defined(TILEPCI_HOST)
	dma_sync_single_for_cpu(&tlr->pci_dev->dev,
				addr, len,
				DMA_FROM_DEVICE);
#else
#error Architecture Undefined
#endif
}


/*****************************************************************************
 * LIST MANUPULATION FUNCTIONS
 *
 *    This driver uses the kernel CIRCULAR LINKED LIST structure.  In this
 *  implementation, an empty list head points to itself, while a single item
 *  list will have its next and prev point to the same other object.
 *
 *    the struct tlr_list object is a standard list_head, with an additional
 *  length field.
 *
 *  tlr_list_splice_tail(*cur_list, *new_list, *loc) - will acquire the spin
 *    lock then move the items from new_list into cur_list, empty new_list,
 *    and unlock.
 *  tlr_list_add_zc_cmd(*list, *cmd, *lock) - will acquire the spin lock,
 *    then move the cmd into the list specified list.
 *  tlr_inv_buffer(tlr, addr, len)   - invalidate buffers in cache
 *  tlr_alloc_blk_mem(tlr, size, *addr) - allocate a block of DMA visible
 *    memory
 *  tlr_free_blk_mem(tlr, size, *kern_addr, dma_addr - free a block of DMA
 *    memory
 ****************************************************************************/

static inline int tlr_list_init(struct tlr_list *l)
{
	INIT_LIST_HEAD(&l->q);
	l->len = 0;

	return 0;
}

static inline int __tlr_list_splice_tail(struct tlr_list *cur_l,
					 struct tlr_list *new_l)
{
	struct list_head	*cur;
	struct list_head	*new;
	struct list_head	*new_first;
	struct list_head	*new_last;
	struct list_head	*cur_last;

	cur = &cur_l->q;
	new = &new_l->q;

	/*
	 * check if we're splicing an empty list to the tail
	 * of the new list
	 */

	new_first = new->next;
	if (new_first != new) {
		new_last = new->prev;
		cur_last = cur->prev;

		/*
		 * First Adjust the tail of the current list
		 */
		new_first->prev = cur_last;
		cur_last->next = new_first;

		/*
		 * Then adjust the head of the current list
		 */
		new_last->next = cur;
		cur->prev = new_last;

		/*
		 * Finally, adjust the length of the list
		 */
		cur_l->len += new_l->len;
	}

	/* Empty the old list */
	tlr_list_init(new_l);

	return 0;
}

static inline int tlr_list_splice_tail(struct tlr_list *cur_l,
				       struct tlr_list *new_l,
				       spinlock_t *lock)
{
	int		 ret;
	unsigned long	 lock_flags;

	spin_lock_irqsave(lock, lock_flags);
	ret = __tlr_list_splice_tail(cur_l, new_l);
	spin_unlock_irqrestore(lock, lock_flags);

	return ret;
}

static inline int __tlr_list_add_zc_cmd(struct tlr_list		*l,
					    struct tlr_zc_cmd	*cmd)
{
	int	 ret;

	ret = 0;

	list_add_tail(&cmd->list, &l->q);
	l->len++;

	return ret;
}

static inline int tlr_list_add_zc_cmd(struct tlr_list		*l,
					  struct tlr_zc_cmd	*cmd,
					  spinlock_t		*lock)
{
	int		 ret;
	unsigned long	 lock_flags;

	spin_lock_irqsave(lock, lock_flags);
	ret = __tlr_list_add_zc_cmd(l, cmd);
	spin_unlock_irqrestore(lock, lock_flags);

	return ret;
}

static inline struct tlr_zc_cmd *__tlr_list_rem_zc_cmd(struct tlr_list *l)
{
	struct tlr_zc_cmd	*ret;
	struct list_head	*entry;

	ret = NULL;

	if (list_empty(&l->q))
		goto exit;

	entry = l->q.next;
	list_del(entry);
	ret = list_entry(entry, struct tlr_zc_cmd, list);
	l->len--;

 exit:
	return ret;
}

static inline struct tlr_zc_cmd *tlr_list_rem_zc_cmd(struct tlr_list	*l,
							spinlock_t	*lock)
{
	struct tlr_zc_cmd	*ret;
	unsigned long		 lock_flags;

	spin_lock_irqsave(lock, lock_flags);
	ret = __tlr_list_rem_zc_cmd(l);
	spin_unlock_irqrestore(lock, lock_flags);

	return ret;
}

static inline int tlr_list_free_zc_cmds(struct tlr_list	*l,
					    spinlock_t	*lock)
{
	int			 ret;
	struct tlr_zc_cmd	*cmd;
	unsigned long		 lock_flags;

	ret = 0;
	spin_lock_irqsave(lock, lock_flags);
	while (l->len > 0) {
		cmd = __tlr_list_rem_zc_cmd(l);
		tlr_zc_cmd_free(cmd);
	}
	spin_unlock_irqrestore(lock, lock_flags);

	return ret;
}

static inline int __tlr_list_len(struct tlr_list *l)
{
	int	 ret;

	ret = l->len;

	return ret;
}

static inline int tlr_list_len(struct tlr_list *l, spinlock_t *lock)
{
	int		 ret;
	unsigned long	 lock_flags;

	spin_lock_irqsave(lock, lock_flags);
	ret = __tlr_list_len(l);
	spin_unlock_irqrestore(lock, lock_flags);

	return ret;
}

static inline int __tlr_list_empty(struct tlr_list *l)
{
	int	 ret;
	int	 len;

	len = l->len;
	ret = len == 0;
	return ret;
}

static inline int tlr_list_empty(struct tlr_list *l, spinlock_t *lock)
{
	int	 ret;
	int	 len;

	len = tlr_list_len(l, lock);
	ret = len == 0;
	return ret;
}

static inline int tlr_zc_cmd_init(struct tlr_zc_cmd		*cmd,
				      struct tlr_zc_cmd_q	*q,
				      struct tlr_pcie_dev		*tlr,
				      enum tlr_zc_cmd_type_e	 type,
				      tilepci_cookie_t		 cookie,
				      struct tlr_list		*list,
				      spinlock_t		*lock)
{
	int	 ret;

	ret = 0;

	memset(cmd, 0, sizeof(*cmd));
	cmd->tlr = tlr;
	cmd->cmd_q = q;
	cmd->type = type;
	cmd->state = TLR_ZC_CMD_ST_FREE;
	cmd->cookie = cookie;
	cmd->flags = 0;
	cmd->nmapped_pages = 0;
	cmd->dma_addr = 0;

	switch (cmd->type) {
#if defined(TILEPCI_HOST)
	case TLR_ZC_CMD_H2T:
		cmd->dma_dir = DMA_TO_DEVICE;
		break;
	case TLR_ZC_CMD_T2H:
		cmd->dma_dir = DMA_FROM_DEVICE;
		break;
	default:
		cmd->dma_dir = DMA_BIDIRECTIONAL;
		break;
#elif defined(TILEPCI_ENDP)
	case TLR_ZC_CMD_H2T:
	case TLR_ZC_CMD_C2C_RECV:
		cmd->dma_dir = DMA_FROM_DEVICE;
		break;
	case TLR_ZC_CMD_T2H:
	case TLR_ZC_CMD_C2C_SEND:
		cmd->dma_dir = DMA_TO_DEVICE;
		break;
	default:
		cmd->dma_dir = DMA_BIDIRECTIONAL;
		break;
#else
#error Undefined Architecture
#endif
	}

	if (lock == NULL)
		__tlr_list_add_zc_cmd(list, cmd);
	else
		tlr_list_add_zc_cmd(list, cmd, lock);

	return ret;
}

static inline int tlr_zc_cmd_release(struct tlr_zc_cmd *cmd)
{
	return tlr_unmap_cmd(cmd);
}

static inline int tlr_zc_cmd_free(struct tlr_zc_cmd	*cmd)
{
	int		 ret;
	struct tlr_pcie_dev	*tlr;

	ret = 0;
	tlr = cmd->tlr;

	tlr_zc_cmd_release(cmd);

	tlr_list_add_zc_cmd(&tlr->zc_state.cmd_q_free_list, cmd,
				&tlr->zc_state.cmd_q_lock);

	return ret;
}

static inline int tlr_zc_cmd_new(enum tlr_zc_cmd_type_e	 type,
				 tilepci_cookie_t		 cookie,
				 struct tlr_zc_cmd_q		*q)
{
	int			 ret;
	struct tlr_zc_cmd	*cmd;
	struct tlr_pcie_dev		*tlr;

	ret = 0;
	tlr = q->tlr;

	cmd = tlr_list_rem_zc_cmd(&tlr->zc_state.cmd_q_free_list,
				  &tlr->zc_state.cmd_q_lock);
	if (cmd == NULL)
		goto exit;

	ret = tlr_zc_cmd_init(cmd, q, tlr, type, cookie,
			      &q->free_q, &q->lock);

 exit:
	return ret;
}

static inline int __tlr_zc_cmd_q_free_xfers(struct tlr_zc_cmd_q *q)
{
	int	 ret;

	ret = 0;

	if (q->rd_xfer_comps != NULL) {
		kfree(q->rd_xfer_comps);
		q->rd_xfer_comps = NULL;
	}
	if (q->wr_xfer_reqs != NULL) {
		kfree(q->wr_xfer_reqs);
		q->wr_xfer_reqs = NULL;
	}

	return ret;
}

static inline int __tlr_zc_cmd_q_alloc_xfers(struct tlr_zc_cmd_q *q)
{
	int		 ret;
	uint32_t	 rd_xfer_sz;
	uint32_t	 wr_xfer_sz;

	ret = 0;
	rd_xfer_sz = sizeof(*q->rd_xfer_comps) * q->ncmd_wanted;
	wr_xfer_sz = sizeof(*q->wr_xfer_reqs) * q->ncmd_wanted;

	__tlr_zc_cmd_q_free_xfers(q);

	if (rd_xfer_sz != 0) {
		q->rd_xfer_comps = kmalloc(rd_xfer_sz, GFP_KERNEL);
		if (q->rd_xfer_comps == NULL) {
			ret = -ENOMEM;
			goto err_exit;
		}
	}

	if (wr_xfer_sz != 0) {
		q->wr_xfer_reqs = kmalloc(wr_xfer_sz, GFP_KERNEL);
		if (q->wr_xfer_reqs == NULL) {
			ret = -ENOMEM;
			goto err_exit;
		}
	}

	return ret;

 err_exit:
	__tlr_zc_cmd_q_free_xfers(q);
	return ret;
}

/*
 * Release command queue resources.  This should only be called when
 * the chip resets.
 */
static inline int tlr_zc_cmd_q_free(struct tlr_zc_cmd_q *q)
{
	int ret = 0;

	q->state = CMD_Q_ST_CHIP_DOWN;
	q->is_ready = FALSE;
	q->ncmd_wanted = 0;
	q->ncmd_have = 0;

	tlr_list_free_zc_cmds(&q->free_q, &q->lock);
	tlr_list_free_zc_cmds(&q->pend_q, &q->lock);
	tlr_list_free_zc_cmds(&q->post_q, &q->lock);
	tlr_list_free_zc_cmds(&q->comp_q, &q->lock);

	wmb();  /* ensure visibility before waking others */
	wake_up_interruptible(&q->free_queue);
	wake_up_interruptible(&q->reset_drain_queue);
	wake_up_interruptible(&q->comp_queue);

	__tlr_zc_cmd_q_free_xfers(q);

	return ret;
}

/*
 * Initialize a command queue after chip reset.  This should only be
 * called on queues which have either just come up via
 * tlr_zc_cmd_q_startup(), or on queues which were cleaned up after a
 * chip reset via tlr_zc_cmd_q_free().
 */
static inline int tlr_zc_cmd_q_init(struct tlr_zc_cmd_q	*q)
{
	int ret = 0;

	q->ncmd_wanted = 0;
	q->ncmd_have = 0;
	q->ncomp_tot = 0;
	q->nreq_tot = 0;
	q->ncmd_min_read = 1;
	q->ncmd_comp_wakeup = 1;
	q->rd_xfer_comps = NULL;
	q->wr_xfer_reqs = NULL;
	q->ncomp_mismatch = 0;

	tlr_list_init(&q->free_q);
	tlr_list_init(&q->pend_q);
	tlr_list_init(&q->post_q);
	tlr_list_init(&q->comp_q);

	wmb();  /* ensure visibility before starting up */

#if defined(TILEPCI_ENDP)
	q->cpl_queue_id = tlr_get_cpl_queue_id(q->tlr);
#endif
	q->state = CMD_Q_ST_NEED_SOC;
	q->is_ready = TRUE;
	return ret;
}

/*
 * Constructor for command queues.  This should be called once for
 * each queue when the driver initializes.
 */
static inline int tlr_zc_cmd_q_startup(struct tlr_zc_cmd_q *q,
				       enum tlr_zc_cmd_type_e type,
				       struct tlr_pcie_dev *tlr,
				       uint32_t	chan)
{
	int	 ret;

	ret = 0;
	memset(q, 0, sizeof(*q));

	init_MUTEX(&q->mmap_state.mutex);

	q->type = type;
	q->chan = chan;
	q->tlr = tlr;
	q->state = CMD_Q_ST_CHIP_DOWN;
	q->ncmd_min_read = 1;
	q->ncmd_comp_wakeup = 1;
	wmb();  /* ensure visibility before starting up */

	init_MUTEX(&q->rd_xfer_mutex);
	init_MUTEX(&q->wr_xfer_mutex);
	init_waitqueue_head(&q->free_queue);
	init_waitqueue_head(&q->reset_drain_queue);
	init_waitqueue_head(&q->comp_queue);
	spin_lock_init(&q->lock);
	spin_lock_init(&q->direct_hv_lock);

	return ret;
}

static inline int tlr_zc_cmd_q_get_free(struct tlr_zc_cmd_q *q,
					    int wait,
					    struct tlr_zc_cmd **ret_cmd)
{
	int			 ret;
	struct tlr_zc_cmd	*cmd;

	cmd = NULL;
	ret = 0;

	for (cmd = tlr_list_rem_zc_cmd(&q->free_q, &q->lock);
	     cmd == NULL;
	     cmd = tlr_list_rem_zc_cmd(&q->free_q, &q->lock)) {
		if (!wait) {
			ret = -EAGAIN;
			goto exit;
		}

		ret = wait_event_interruptible(q->free_queue,
				       !tlr_list_empty(&q->free_q, &q->lock) ||
				       q->chip_reset_poison);
		if (ret != 0) {
			ret = -ERESTARTSYS;
			HID_ERR_TRACE("Exit  -ERESTARTSYS\n");
			goto exit;
		}

		HID_FOP_TRACE("Woke from queue\n");

		/* We could have been poisoned while sleeping. */
		if (q->chip_reset_poison) {
			ret = -ENXIO;
			goto exit;
		}
	}

 exit:
	if (cmd != NULL) {
		cmd->nmapped_pages = 0;
		cmd->dma_addr = 0;
	}
	*ret_cmd = cmd;

	return ret;
}

static inline int tlr_zc_cmd_q_get_comp(struct tlr_zc_cmd_q *q,
					    int wait,
					    uint32_t comp_wakeup,
					    struct tlr_zc_cmd **ret_cmd)
{
	int			 ret;
	struct tlr_zc_cmd	*cmd;

	cmd = NULL;
	ret = 0;

	for (cmd = tlr_list_rem_zc_cmd(&q->comp_q, &q->lock);
	     cmd == NULL;
	     cmd = tlr_list_rem_zc_cmd(&q->comp_q, &q->lock)) {
		if (!wait) {
			ret = -EAGAIN;
			goto exit;
		}

		q->ncmd_comp_wakeup = (comp_wakeup > q->ncmd_min_read) ?
			q->ncmd_min_read : comp_wakeup;
		ret = wait_event_interruptible(q->comp_queue,
			       ((tlr_list_len(&q->comp_q, &q->lock) > 0) ||
				(q->chip_reset_poison)));
		if (ret != 0) {
			ret = -ERESTARTSYS;
			HID_ERR_TRACE("Exit  -ERESTARTSYS\n");
			goto exit;
		}

		HID_FOP_TRACE("Woke from queue\n");

		/* We could have been poisoned while sleeping. */
		if (q->chip_reset_poison) {
			ret = -ENXIO;
			HID_ERR_TRACE("Exit -ENXIO\n");
			goto exit;
		}
	}

 exit:
	*ret_cmd = cmd;

	return ret;
}

static inline int __tlr_zc_cmd_q_put_free(struct tlr_zc_cmd_q *q,
					      struct tlr_zc_cmd *cmd)
{
	int		 ret;

	ret = 0;

	if (q->ncmd_wanted < q->ncmd_have) {
		tlr_zc_cmd_free(cmd);
		q->ncmd_have--;
	} else {
		tlr_zc_cmd_release(cmd);
		ret = __tlr_list_add_zc_cmd(&q->free_q, cmd);

		wmb();  /* ensure visibility before waking others */
		wake_up_interruptible(&q->free_queue);
	}

	return ret;
}

static inline int tlr_zc_cmd_q_put_free(struct tlr_zc_cmd_q *q,
					    struct tlr_zc_cmd *cmd)
{
	int		 ret;
	unsigned long	 lock_flags;

	spin_lock_irqsave(&q->lock, lock_flags);
	ret = __tlr_zc_cmd_q_put_free(q, cmd);
	spin_unlock_irqrestore(&q->lock, lock_flags);
	return ret;
}

static inline int __tlr_zc_cmd_q_free_list(struct tlr_zc_cmd_q *q,
					       struct tlr_list *l)
{
	int			 ret;
	uint32_t		 l_len;
	int			 i;
	struct tlr_zc_cmd	*cmd;

	ret = 0;

	l_len = __tlr_list_len(l);
	for (i = 0; i < l_len; i++) {
		cmd = __tlr_list_rem_zc_cmd(l);
		__tlr_zc_cmd_q_put_free(q, cmd);
	}

	return ret;
}

static inline int tlr_zc_cmd_q_free_list(struct tlr_zc_cmd_q *q,
					     struct tlr_list *l)
{
	int		 ret;
	unsigned long	 lock_flags;

	spin_lock_irqsave(&q->lock, lock_flags);
	ret = __tlr_zc_cmd_q_free_list(q, l);
	spin_unlock_irqrestore(&q->lock, lock_flags);
	return ret;
}

static inline int __tlr_zc_cmd_q_pend_list(struct tlr_zc_cmd_q *q,
					     struct tlr_list *l)
{
	int		 ret;

	ret = 0;

	ret = __tlr_list_splice_tail(&q->pend_q, l);

	return ret;
}

static inline int tlr_zc_cmd_q_pend_list(struct tlr_zc_cmd_q *q,
					     struct tlr_list *l)
{
	int		 ret;

	ret = 0;

	ret = tlr_list_splice_tail(&q->pend_q, l, &q->lock);

	return ret;
}


#if defined(TILEPCI_HOST)
static inline int __tlr_zc_cmd_q_comp(struct tlr_zc_cmd_q *q,
				      pcie_host_completion_t *cmp,
				      uint32_t comp_len)
#elif defined(TILEPCI_ENDP)
static inline int __tlr_zc_cmd_q_comp(struct tlr_zc_cmd_q *q,
				      pcie_tile_completion_t *cmp,
				      uint32_t comp_len)
#else
#error Undefined Architecture
#endif
{
	int			 ret;
	struct tlr_zc_cmd	*cmd;

	ret = 0;

	cmd = __tlr_list_rem_zc_cmd(&q->post_q);
	if (cmd == NULL) {
		ret = -ENOBUFS;
		HID_ERR_TRACE("Exit ENOBUFS\n");
		goto exit;
	}

	tlr_unmap_cmd(cmd);

	if (q->state == CMD_Q_ST_RESET_DISCARD) {
		/*
		 * In discard mode, we simply free commands as they
		 * come back.  This is essentially what would happen
		 * if we ran the non-discard completion code and then
		 * returned the completion via the read() syscall.
		 */
		__tlr_zc_cmd_q_put_free(q, cmd);
		q->ncomp_tot++;
		q->ncomp_bytes_tot += comp_len;

		/*
		 * If we're discarding, we need to wake up any waiters
		 * when the last completion arrives.
		 */
		if (q->ncomp_tot == q->nreq_tot) {
			wmb();  /* ensure visibility before waking others */
			wake_up_interruptible(&q->reset_drain_queue);
		}
		goto exit;
	}

	cmd->comp_len = comp_len;
	cmd->cookie = cmp->tag;
	cmd->flags = 0;
	if (cmp->eop)
		cmd->flags |= TILEPCI_CPL_EOP;
	if (cmp->overflow)
		cmd->flags |= TILEPCI_CPL_OVERFLOW;
#if defined(TILEPCI_ENDP)
	if (cmp->link_down)
		cmd->flags |= TILEPCI_CPL_LINK_DOWN;
#endif
	if (cmp->reset) {
		/*
		 * If we thought things were happily connected, note
		 * that reset was initiated by the other side.
		 */
		if (q->state == CMD_Q_ST_CONNECTED)
			q->state = CMD_Q_ST_RESET_STARTED;
		cmd->flags |= TILEPCI_CPL_RESET;
	}

	__tlr_list_add_zc_cmd(&q->comp_q, cmd);
	if (__tlr_list_len(&q->comp_q) >= q->ncmd_comp_wakeup) {
		wmb();  /* ensure visibility before waking others */
		wake_up_interruptible(&q->comp_queue);
	}

	q->ncomp_cur++;
	q->ncomp_tot++;
	q->ncomp_bytes_cur += comp_len;
	q->ncomp_bytes_tot += comp_len;

 exit:
	return ret;
}

#if defined(TILEPCI_HOST)
static inline int tlr_zc_cmd_q_comp(struct tlr_zc_state *zc_state,
				    int channel,
				    pcie_host_completion_t *cmp,
				    uint32_t comp_len)
#elif defined(TILEPCI_ENDP)
static inline int tlr_zc_cmd_q_comp(struct tlr_zc_state *zc_state,
				    int channel,
				    pcie_tile_completion_t *cmp,
				    uint32_t comp_len)
#else
#error Undefined Architecture
#endif
{
	int		 ret;
	unsigned long	 lock_flags;
	struct tlr_zc_cmd_q *q = zc_state->cmd_queues[channel];
	if (q == NULL) {
		ERR("NULL ZC queue for channel %d\n", channel);
		return -ENXIO;
	}

	spin_lock_irqsave(&q->lock, lock_flags);
	ret = __tlr_zc_cmd_q_comp(q, cmp, comp_len);
	spin_unlock_irqrestore(&q->lock, lock_flags);

	return ret;
}


/* generic file ops */
int tlr_zc_init(struct tlr_pcie_dev *tlr);
void tlr_zc_free(struct tlr_zc_state *zc_state);
void tlr_zc_chip_reset(struct tlr_zc_state *zc_state);


int tlr_zc_open(struct tlr_zc_state *zc_state, struct file *filp, int chan);
int tlr_zc_debug_open(struct tlr_zc_state *zc_state, struct file *filp);

void release_zc_semaphores(struct tlr_zc_state *zc_state, int count);
int grab_all_zc_semaphores(struct tlr_zc_state *zc_state);

#if defined(TILEPCI_ENDP)
int link_is_down(struct tlr_pcie_dev *tlr, int chan);
#endif

/*
 * Compatibility defines for dealing with the different page faulting
 * paths before and after 2.6.18.
 */
/* Note: avoid KERNEL_VERSION() to avoid warnings from "sunifdef" */
#if LINUX_VERSION_CODE > 0x020612  /* 2.6.18 */
#define USE_VM_FAULT
#define RETURN_SIGBUS VM_FAULT_SIGBUS
#define RETURN_OOM VM_FAULT_OOM
#else
#define RETURN_SIGBUS NOPAGE_SIGBUS
#define RETURN_OOM NOPAGE_OOM
#endif

/* Backwards compatibility with host-side PCI driver. */
typedef struct tlr_zc_cmd tlr_zc_cmd_t;
typedef struct tlr_zc_cmd_q tlr_zc_cmd_q_t;
typedef struct tlr_stream tlr_stream_t;
typedef struct tlr_pcie_dev tlr_pcie_dev_t;

#endif /* !__TILEPCI_SHARED_CODE_H__ */
