/*
 * Copyright 2011 Tilera Corporation. All Rights Reserved.
 *
 *   This program is free software; you can redistribute it and/or
 *   modify it under the terms of the GNU General Public License
 *   as published by the Free Software Foundation, version 2.
 *
 *   This program is distributed in the hope that it will be useful, but
 *   WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
 *   NON INFRINGEMENT.  See the GNU General Public License for
 *   more details.
 */

/**
 * @file
 *
 * Host and Tile interfaces for PCIe communication.
 */
#ifndef _SYS_HV_DRV_PCIE_CHANNEL_INTF_H
#define _SYS_HV_DRV_PCIE_CHANNEL_INTF_H
#include "drv_pcie_common.h"

#ifndef __ASSEMBLER__

#ifdef DRV_PCIE_CHANNEL_STANDALONE
/**
 * @mainpage PCIe Endpoint Driver Developers Reference
 *
 * This document describes the driver interfaces exported by the
 * hypervisor's PCIe subsystem when running in endpoint mode.  This
 * information is intended for developers working on operating system
 * drivers, including both host-side drivers for managing a TILExpress
 * card or tile-side drivers for connecting the hypervisor's PCIe
 * interfaces to user space applications.  Programmers who are
 * interested in writing user space applications that take advantage
 * of the Linux PCIe drivers provided with the MDE should see UG218,
 * "PCIe User Space Communication API".
 *
 * The hypervisor's PCIe subsystem defines the interfaces used to
 * communicate between the host machine and Tile-side client
 * supervisors.  The communication operations themselves are managed
 * by a set of dedicated hypervisor driver tiles, either one or three
 * per interface, which are responsible for handling communication
 * requests from either side and performing DMA operations to transfer
 * blocks of data over the PCIe link.  The hypervisor driver tiles
 * export a programming interface to both the host- and Tile-side
 * supervisors.  This interface interacts with operating system device
 * drivers on both sides; Tilera's MDE ships with Linux drivers for
 * both x86 Linux and Tile Linux that can properly interface with the
 * hypervisor's PCIe interfaces.
 *
 * On the host side, the PCIe interface is exported as a set of
 * memory-mapped control registers mapped to BAR0 of the Tile
 * processor's PCIe interface.  This MMIO region is comprised of
 * several subregions, each representing a different set of
 * communication mechanisms:
 *
 * - Offsets [0, sizeof(struct pcie_host_mmio_regs)] have a mapping of
 * the main control registers, which are used for device
 * initialization and channelized messaging.  Channelized messaging
 * allows the host and Tile to post messages to a set of 256
 * independent, unidirectional messaging streams.
 *
 * - Offsets [::PCIE_DEBUG_REG_OFFSET, ::PCIE_DEBUG_REG_OFFSET +
 * sizeof(struct pcie_ibound_debug_status_regs)] contains a set of
 * registers that can be used to collect debugging information about
 * the progress of channelized messaging operations.
 *
 * - Offset  [::PCIE_CSR_REG_OFFSET, ::PCIE_CSR_REG_OFFSET +
 * ::TILE_CSR_MEMORY_MAX_SIZE] map a set of "configuration and status
 * registers" that can be read or written by both host and Tile
 * applications.
 *
 * - A window beginning at offset ::PCIE_C2C_REG_OFFSET allows the
 * host machine to configure communication channels between multiple
 * Tile processor PCIe ports.
 *
 * The Tile side interface is initialized via a hypervisor device
 * filesystem interface.  Runtime communication operations are
 * performed via shared memory data structures that are accessible to
 * both the supervisor driver and the hypervisor dedicated tiles.
 *
 * This document contains the following sections:
 *
 * - @ref boot desribes the process that a host driver should use to
 * boot or reset a PCIe card containing a TILE64 Processor.
 *
 * - @ref constants describes various constants that are built into the
 * PCIe subsystem.
 *
 * - @ref host describes the MMIO registers and host-memory structures
 * used to perform channel communication from the host machine.
 *
 * - @ref tile describes the hypervisor filesystem entries and
 * Tile-memory structures used to perform channel communication from
 * the Tile supervisor.
 *
 * - @ref c2c describes the host side interface for establishing
 * chip-to-chip communication channels.
 *
 * - @ref csr describes both the host and Tile-side interfaces for
 * creating a PCI-mapped application register space that can be used
 * for application configuration and status querying.
 *
 * - @ref barmem describes an interface for mapping Tile-side huge
 * pages into BAR1 so that those pages can be read or written from the
 * host, or used as targets for DMA read/write operations by other
 * PCIe devices.
 *
 * - @ref debug documents the channel debug registers, which the host
 * can read to obtain information about the work done by the
 * hypervisor's dedicated PCIe tile(s).
 *
 * - @ref epp describes the optional support for NetIO packet egress
 *   over PCIe into a host-memory ring buffer.
 */
#endif


/**
 * @defgroup boot Booting and Resetting the TILE64 via PCIe
 * @{
 *
 * This section describes the process used to boot and reset the
 * TILE64 via PCIe.  The boot process described below is necessary for
 * systems in which the boot file is driven over the PCIe link; it
 * will not be used in systems that boot from a ROM chip.  For example
 * reset and boot code, see the Linux host driver provided as part of
 * the Tilera MDE in lib/modules/src/.
 *
 * @section host_boot Booting via PCIe
 *
 * The following steps can be used to boot the Tile processor via
 * PCIe.  Booting is only possible after power cycling the Tile
 * processor or resetting it as described in @ref host_reset.
 *
 * - Make sure the chip is ready to boot by reading the upper two bits
 * of the configuration space subsystem ID register.  If the value of
 * these bits is ::SUBSYSTEM_FLAG_RESET, the chip has emerged from
 * reset and is waiting for a boot stream.  If the value is
 * ::SUBSYSTEM_FLAG_IBOUND, the chip is already booted, but can be
 * reset via PCIe as described in @ref host_reset.  If the value is
 * ::SUBSYSTEM_FLAG_BOOTED, the chip has been booted but may not have
 * loaded the hypervisor drivers necessary for PCIe reset.
 *
 * - Write the boot stream by writing individual 32-bit words to the
 * pcie_host_mmio_regs::status register.  Once the first word is
 * written, you must finish writing all the words or the chip will be
 * left in an undefined state.
 *
 * - Once the whole boot stream has been injected, wait for
 * ::TLR_REBOOT_TO_READY_WAIT_MS before performing any other
 * operations.
 *
 * @section host_reset Resetting the Processor via PCIe
 *
 * If the PCIe configuration space subsystem ID register contains the
 * ::SUBSYSTEM_FLAG_IBOUND bits, as described above, the chip may be
 * reset via PCIe.  To reset the chip, do the following:
 *
 * - Save the values of the configuration space registers.  Many
 * operating systems automatically save the basic config registers (up
 * through config word 14), but the other registers may be set by BIOS
 * and some operating systems (Linux 2.6.9, for instance) do not
 * restore them automatically.  To ensure that configuration space
 * can be restored after reset, be sure to save off any registers that
 * are not restored by your operating system.  In the sample Linux
 * driver, this means saving config words 15 through 100.
 *
 * - Write ::PCIE_STATUS_RESET_CHIP to the pcie_host_mmio_regs::status
 *   register.
 *
 * - Take down the PCI links associated with the device by putting the
 * PCI bus device associated with each link into the 'secondary reset'
 * state.
 *
 * - Wait for ::TLR_RESET_WAIT_MS milliseconds to allow the chip to
 *   come in and out of reset.
 *
 * - Take the links out of secondary reset.
 *
 * - Issue a config read to the vendor and device ID register to
 * ensure that the card has returned from reset.  If the config read
 * returns -1, the device has failed to reset or the PCIe link has not
 * been restored.
 *
 * - At this point, reading the upper two bits of the subsystem ID
 * register should return ::SUBSYSTEM_FLAG_RESET, indicating that the
 * chip has reset and is waiting for a boot stream.
 *
 * - Restore the config registers that were saved off before reset.
 *
 * This procedure will only work on cards that have been booted with
 * a hypervisor configuration that includes the 'pcie' driver.
 *
 * @section host_plx_reset Resetting the TILExpress-20G Card
 *
 * On the TILExpress-20G card, it is possible to reset the Tile
 * processor via a reset wire driven by the PLX bridge chip that
 * connects the host's 8-lane connection to the Tile processor's two
 * 4-lane links.  Using the PLX reset wire is the preferred method for
 * reseting a TILExpress-20G.  The primary advantage of this mechanism
 * is that it can be used at any time, regardless of whether the PCIe
 * dedicated tile code is running on the Tile processor.
 *
 * To use the PLX-driven reset wire, follow the reset sequence above
 * but replace the 'write ::PCIE_STATUS_RESET_CHIP to the
 * pcie_host_mmio_regs::status register' and 'Take down the PCI links'
 * steps with the following:
 *
 * - Take down the PCI links associated with the device by putting the
 * PCI bus device associated with each link into the 'secondary reset'
 * state.
 *
 * - Write ::PCIE_PLX_20G_RESET_ENABLE_BIT to offset
 * ::PCIE_PLX_20G_RESET_ENABLE_REG in the PLX's BAR0.
 *
 * - Write ::PCIE_PLX_20G_RESET_ASSERT_BIT to offset
 * ::PCIE_PLX_20G_RESET_ASSERT_REG in the PLX's BAR0, then read back
 * from the same address in order to flush the write to the device.
 *
 * - Wait ::PCIE_PLX_20G_RESET_HOLD_MS milliseconds.
 *
 * - Write ::PCIE_PLX_20G_RESET_DEASSERT_BIT to offset
 * ::PCIE_PLX_20G_RESET_ASSERT_REG in the PLX's BAR0, then read back
 * from the same address in order to flush the write to the device.
 *
 * After performing the above operations, the reset process can
 * continue at 'wait for ::TLR_RESET_WAIT_MS' above.
 */


/** The vendor ID for all Tilera processors. */
#define TILERA_VENDOR_ID 0x1a41

/** The device ID for the TILE64 processor. */
#define TILERA_TILE64_DEV_ID 0x1

/** The device ID for the TILEPro processor. */
#define TILERA_TILE_PRO_DEV_ID 0x2

/** Milliseconds to wait for chip to come back after reset. */
#define TLR_RESET_WAIT_MS 500

/** Milliseconds to wait between boot completion and ready check. */
#define TLR_REBOOT_TO_READY_WAIT_MS 300

/** Device ID for the PLX switch on a TILExpress-20G card. */
#define PCIE_PLX_20G_DEVICE_ID 0x8624

/** Register address for enabling the reset driver on a TILExpress-20G. */
#define PCIE_PLX_20G_RESET_ENABLE_REG 0x630

/** Bit written to ::PCIE_PLX_20G_RESET_ENABLE_REG to turn on driver. */
#define PCIE_PLX_20G_RESET_ENABLE_BIT (1 << 5)

/** Register address for asserting or deasserting TILExpress-20G reset. */
#define PCIE_PLX_20G_RESET_ASSERT_REG 0x644

/** Bit written to ::PCIE_PLX_20G_RESET_ASSERT_REG to assert reset. */
#define PCIE_PLX_20G_RESET_ASSERT_BIT (0 << 11)

/** Bit written to ::PCIE_PLX_20G_RESET_ASSERT_REG to deassert reset. */
#define PCIE_PLX_20G_RESET_DEASSERT_BIT (1 << 11)

/** Number of milliseconds that the TILExpress-20G should be held in reset. */
#define PCIE_PLX_20G_RESET_HOLD_MS 100

/** After reset, the offset in BAR0 that maps the rshim registers. */
#define PCIE_RSHIM_WINDOW_OFFSET 0x700000

/** The size of the rshim register window. */
#define PCIE_RSHIM_WINDOW_SIZE 0x100000

/** @} */


#endif /* __ASSEMBLER__ */

/**
 * @defgroup constants Constants
 * @{
 *
 * This section contains various constants related to the PCIe
 * subsystem.  Some of these values, for example the TILE64 device
 * IDs, are hardware constants.  Others, for example the number of
 * channels and outstanding commands, are software-defined.
 */

/** The first host-to-tile communication channel. */
#define PCIE_FIRST_HOST_TO_TILE_CHANNEL 0

/** The number of host-to-tile communcation channels. */
#define PCIE_HOST_TO_TILE_CHANNELS 128

/** The first tile-to-host communication channel. */
#define PCIE_FIRST_TILE_TO_HOST_CHANNEL 128

/** The number of tile-to-host communication channels. */
#define PCIE_TILE_TO_HOST_CHANNELS 128

/** The number of chip-to-chip send bindings and receive bindings
    present on each link. */
#define PCIE_C2C_STREAM_COUNT 32

/** The first chip-to-chip send channel. */
#define PCIE_FIRST_C2C_SEND_CHANNEL 256

/** The number of chip-to-chip send channels. */
#define PCIE_C2C_SEND_CHANNELS (PCIE_C2C_STREAM_COUNT)

/** The first chip-to-chip receive channel. */
#define PCIE_FIRST_C2C_RECV_CHANNEL \
  (PCIE_FIRST_C2C_SEND_CHANNEL + PCIE_C2C_SEND_CHANNELS)

/** The number of chip-to-chip receive channels. */
#define PCIE_C2C_RECV_CHANNELS (PCIE_C2C_STREAM_COUNT)

/** The number of DMA channels in each direction. */
#define PCIE_DMA_CHANNEL_COUNT 32

/** The first DMA read channel. */
#define PCIE_FIRST_DMA_READ_CHANNEL \
  (PCIE_FIRST_C2C_RECV_CHANNEL + PCIE_C2C_RECV_CHANNELS)

/** The number of channels for issuing DMA reads. */
#define PCIE_DMA_READ_CHANNELS (PCIE_DMA_CHANNEL_COUNT)

/** The first DMA write channel. */
#define PCIE_FIRST_DMA_WRITE_CHANNEL \
  (PCIE_FIRST_DMA_READ_CHANNEL + PCIE_DMA_READ_CHANNELS)

/** The number of channels for issuing DMA writes. */
#define PCIE_DMA_WRITE_CHANNELS (PCIE_DMA_CHANNEL_COUNT)


/** The number of unidirectional communication channels. */
#define PCIE_CHANNELS \
  (PCIE_HOST_TO_TILE_CHANNELS + PCIE_TILE_TO_HOST_CHANNELS + \
   PCIE_C2C_SEND_CHANNELS + PCIE_C2C_RECV_CHANNELS + \
   PCIE_DMA_READ_CHANNELS + PCIE_DMA_WRITE_CHANNELS)

/**
 * The number of entries in the host and Tile buffer queues.  Each
 * side can post up to PCIE_CMD_QUEUE_ENTRIES buffers to the PCIe
 * subsystem.  Once that number of buffers have been posted, the
 * driver must wait for some buffer slots to free up before posting
 * more buffers.
 */
#define PCIE_CMD_QUEUE_ENTRIES                 1024

#ifndef __ASSEMBLER__

/** Size of host buffer_cmd array, in bytes. */
#define PCIE_HOST_BUFFER_CMD_ARRAY_SIZE \
  (sizeof(struct pcie_host_buffer_cmd) * PCIE_CMD_QUEUE_ENTRIES)

/** Size of host completion array, in bytes. */
#define PCIE_HOST_COMPLETION_ARRAY_SIZE \
  (sizeof(struct pcie_host_completion) * PCIE_CMD_QUEUE_ENTRIES)

/** @} */


/** @defgroup profiling Profiling the Dedicated Tile
 * @{
 *
 * When compiled with profiling enabled, the PCIe subsystem dedicated
 * tile can execute DMA benchmarks and profile its internal execution.
 * The host side can use the tools in this section to trigger
 * benchmarking or start and stop profiling.
 */

/** Commands that can be issued to the dedicated tile profiler. */
enum pcie_host_profile_cmd {
  PCIE_PROFILE_CLEAR,         /**< Clear internal profile counters. */
  PCIE_PROFILE_PRINT,         /**< Print profile counters to console. */
  PCIE_PROFILE_RUN_DMAS       /**< Run DMA tests and print results. */
};

/**
 * A data structure used to control profiling of the PCIe subsystem's
 * dedicated tile.
 */
struct pcie_host_profile_regs
{
  uint32_t command;           /**< Write to issue a ::pcie_host_profile_cmd.*/
  uint32_t dma_bench_lo32;    /**< Low 32 bits of memory for benchmarking.*/
  uint32_t dma_bench_hi32;    /**< High 32 bits of memory for benchmarking. */
  uint32_t dma_bench_count;   /**< Number of DMAs in benchmark. */
  uint32_t dma_bench_size;    /**< Size of DMAs in benchmark. */
  uint32_t dma_bench_offset;  /**< Offset start address by N bytes. */
};

/** @} */

/**
 * @defgroup host Host-Side Channel Interface
 * @{
 *
 * This section describes the MMIO registers and host-memory structures
 * used by the host machine to perform channelized communicaiton over
 * PCIe.  All the discussion in this section assumes that the Tile
 * processor has already been booted, either via an on-board bootrom
 * or by booting over PCIe as described in @ref boot.  Much of this
 * discussion centers around the ::pcie_host_mmio_regs structure,
 * which is mapped to offset 0 of BAR0.
 *
 * The PCIe subsystem provides ::PCIE_CHANNELS unidirectional
 * communication channels between the host and the Tile processor.
 * The first ::PCIE_HOST_TO_TILE_CHANNELS of these channels copy data
 * from the host to the Tile processor.  The remaining channels copy
 * data from the Tile processor to the host.  The host specifies the
 * send or receive buffers by 'posting' ::pcie_host_buffer_cmd
 * structures to the device.  When both a host-side buffer and a
 * Tile-side buffer have been posted to a particular channel, the PCIe
 * subsystem transfers the data, advances some operation counters, and
 * optionally sends interrupts to either side.
 *
 * The following section describe how to initialize the PCIe channel
 * subsystem after the Tile processor has booted and how to perform
 * channel communication once the system is initialized.
 *
 * @section host_init Initializing the Device
 *
 * Upon successfully booting the Tile processor, the host driver needs
 * to wait for the PCIe subsystem to initialize, provide the address
 * of a buffer command and completion arrays to the PCIe subsystem,
 * and initialize various control registers.
 *
 * To wait for the the PCIe subsystem to initialize, the host driver
 * should poll the value of the pcie_host_mmio_regs::status register.
 * The value will transition from ::PCIE_STATUS_NOT_READY to
 * ::PCIE_STATUS_RUNNING when the Tile processor is ready for the next
 * phase of initialization.  When the device is ready, the host driver
 * should use PCIE_VERSION_MATCH() to verify that
 * pcie_host_mmio_regs::version matches the expected subsystem version
 * number.
 *
 * Next, the host driver should allocate an array of contiguous,
 * bus-mapped memory to hold the buffer command array.  This array
 * must be 32-bit aligned and must contain at least sizeof(struct
 * pcie_host_buffer_cmd[PCIE_CMD_QUEUE_ENTRIES]) bytes.  Having
 * allocated the command array, the host should inform the PCIe
 * subsystem of its location by writing its 64-bit bus address into
 * pcie_host_mmio_regs::buffer_cmd_array_lo32 and
 * pcie_host_mmio_regs::buffer_cmd_array_hi32.
 *
 * Similarly, the host driver should also allocate an array of
 * contiguous, bus-mapped memory to hold the completion array.  This
 * array should be 32-bit aligned and it should contain at least
 * sizeof(struct pcie_host_completion[PCIE_CMD_QUEUE_ENTRIES]) bytes.
 * The PCIe subsystem is informed of its address by writing
 * pcie_host_mmio_regs::completion_array_lo32, etc.  Once the
 * buffer_cmd and completion arrays are initialized, the host can
 * start posting commands and updating
 * pcie_host_mmio_regs::buffer_cmd_posted_count.  Writing that
 * register while the buffers are invalid may result in DMA operations
 * to invalid addresses.
 *
 * The host driver should also be sure to initialize any control
 * registers necessary for communication operations.  In particular,
 * it must record the values of
 * pcie_host_mmio_regs::buffer_cmd_posted_count and
 * pcie_host_mmio_regs::completion_consumed_count.  These counters
 * have non-zero initial values to ensure the host drivers properly
 * handle the 32-bit wrap-around case.  The host can also initialize
 * the interrupt control registers as described in @ref
 * host_interrupt.
 *
 * @section host_operation Sending and Receiving Buffers
 *
 * The host driver triggers communication by posting buffers to the
 * PCIe subsystem.  Posting a buffer to a channel number <
 * ::PCIE_HOST_TO_TILE_CHANNELS causes the data in that buffer to be
 * written to the Tile processor when a corresponding receive buffer
 * is posted to the same channel on the Tile side.  Similarly, posting
 * a buffer to a channel number >= ::PCIE_HOST_TO_TILE_CHANNELS causes
 * data to be read from the Tile processor to the host buffer when a
 * corresponding send buffer is posted to the same channel on the Tile
 * side.
 *
 * In order to post a send or receive buffer to a channel, the host
 * fills the ::pcie_host_buffer_cmd structure at index
 * (buffer_cmd_posted_count % PCIE_CMD_QUEUE_ENTRIES).  Then, the host
 * informs the PCIe subsystem of the new command by writing to
 * pcie_host_mmio_regs::buffer_cmd_posted_count.  It is possible to
 * post multiple commands at once by filling in multiple array entries
 * and then advancing the buffer_cmd_posted_count register by more
 * than one.
 *
 * However, the host cannot submit more commands unless it knows that
 * there is space in the completion queue to hold their resultant
 * completions.  This implies that the host can post up to
 * ::PCIE_CMD_QUEUE_ENTRIES commands before it must process
 * completions.  Once that number of commands have been submitted, the
 * host must wait for completions to become available (as indicated by
 * pcie_host_mmio_regs::completion_posted_count).  Once it has
 * processed those completions, freeing up their queue entries, the
 * host can submit one more command for each completion that it
 * processed.  In summary, the host driver can have at most
 * ::PCIE_CMD_QUEUE_ENTRIES commands posted to the device at a time.
 *
 * The PCIe subsystem can complete commands to different channels out
 * of order.  Commands to the same channel are always completed in
 * order.  Any of the ::PCIE_CMD_QUEUE_ENTRIES buffers currently
 * posted to the device can be completed if and when a matching send
 * or receive buffer is posted on the Tile side.  This out-of-ordering
 * processing allows the driver to avoid head-of-line blocking
 * problems between multiple channels.  The PCIe subsystem continues
 * to process commands for channels that are making progress, even if
 * some set of channels stops posting buffers.  If the host driver
 * limits the number of commands that can be posted to each channel,
 * and any particular channel stops consuming buffers, then the buffer
 * command array continues processing and completing commands for
 * other channels.  The host can loop around the buffer command array
 * many times even though certain commands are not making progress.
 *
 * When a send or receive buffer posted by the host matches a buffer
 * posted by the Tile processor, the PCIe subsystem uses DMA to
 * transfer the data from the send buffer on one side to the receive
 * buffer on the other side.  Once the DMA is complete, the transfer
 * completion information is written to the completion queue.  The
 * ::pcie_host_completion structure at index
 * (pcie_host_mmio_regs::completion_posted_count %
 * ::PCIE_CMD_QUEUE_ENTRIES) is filled with the buffer address,
 * channel number, and size of the completed transfer.  If the buffer
 * command was a receive, the size is set to the actual transfer size,
 * that is, the size of the send buffer that was posted on the Tile
 * side.
 *
 * Once the completion information is written to the completion queue,
 * the PCIe subsystem may deliver an interrupt to the host to inform
 * it of the newly completed transfers (see @ref host_interrupt).  The
 * host can check to see how many transfers have completed by reading
 * pcie_host_mmio_regs::completion_posted_count and comparing it to a
 * previous value.  The host can then process any new completions and
 * potentially issue more commands if the host was waiting for
 * completion slots to become available before issuing buffer
 * commands.
 *
 * Both the host and Tile side driver interfaces allow the
 * specification of 4-bit 'tag' values in each buffer command.  The
 * tag values are copied into the completion structures; the
 * ::pcie_host_completion structure contains a copy of both the host-
 * and Tile- side tags.  The driver stack can use these bits to pass
 * small amounts of implementation-specific metadata.
 *
 * @section host_interrupt Interrupt Management
 *
 * The host machine can choose to receive interrupts when buffer
 * commands are completed.  Interrupts can be enabled by writing
 * ::PCIE_STATUS_RUNNING to the pcie_host_mmio_regs::status register.
 * Interrupts can be disabled by writing ::PCIE_STATUS_INTS_DISABLED
 * to that same register.
 *
 * The PCIe subsystem supports interrupt mitigation via two
 * mechanisms.  First, the host driver can write the
 * pcie_host_mmio_regs::interrupt_delay_us and
 * pcie_host_mmio_regs::interrupt_delay_op_cnt registers to inform the
 * PCIe subsystem that it should delay interrupts in an effort to
 * accumulate multiple transfers per interrupt.  The
 * interrupt_delay_us value sets the maximum number of microseconds
 * that an interrupt can be delayed.  The interrupt_delay_op_cnt
 * specifies the maximum number of operations that can be completed
 * before an interrupt must be delivered.
 *
 * The PCIe subsystem also mitigates interrupt overhead by requiring
 * that each interrupt must be acknowledged before another interrupt
 * can be sent.  The host driver acknowledges an interrupt by reading
 * the pcie_host_mmio_regs::interrupt_pending register.  The read
 * returns 1 if an interrupt has been sent.  The read also resets the
 * register value to 0 and reenables interrupt delivery.  Thus, the
 * PCIe subsystem will not deliver another interrupt until the first
 * has been acknowledged.
 */


/** A macro for defining the PCIe subsystem revision.
 *
 * @param Features An identifier to be used by systems that add custom
 * features to the PCIe subsystem.  0 for the standard PCIe subsystem.
 * @param Major Major revision number, new values signal incompatible changes.
 * @param Minor Minor revision number, new values should be compatible.
 */
#define PCIE_VERSION_DEF(Features, Major, Minor) \
        ((Features << 16) | (Major << 8) | (Minor))

/** Extract 'features' from a version number. */
#define PCIE_VERSION_FEATURES(x) ((x) >> 16)

/** Extract 'major' from a version number. */
#define PCIE_VERSION_MAJOR(x)    (((x) >> 8) & 0xFF)

/** Extract 'minor' from a version number. */
#define PCIE_VERSION_MINOR(x)    ((x) & 0xFF)

/**
 * A macro for checking whether the host driver and PCIe subsystem
 * revision are compatible.  The macro guarantees that the major
 * numbers match.
 */
#define PCIE_VERSION_MATCH(x, y) \
        (PCIE_VERSION_MAJOR(x) == PCIE_VERSION_MAJOR(y))

/** Optional PCIe features. */
enum pcie_optional_features {
  /** The PCIe subsystem includes an interface for delivering network
      packets into a large ring buffer in host memory.  This interface
      allows very high performance with small packet sizes, but has no
      flexibility in its buffer management.  See @ref epp for more
      information.*/
  PCIE_FEATURE_EPP,

  /** Supports chip-to-chip zero-copy API. */
  PCIE_FEATURE_C2C,
};

/** The major number of the PCIe subsystem described in this documentation. */
#define PCIE_HEADER_MAJOR 5

/** The minor number of the Tile-side PCIe subsystem in this documentation. */
#define PCIE_HEADER_MINOR 1

/** The PCIe subsystem version to which this documentation refers,
including all possible optional features. */
#define PCIE_HEADER_VERSION \
  PCIE_VERSION_DEF((1 << PCIE_FEATURE_EPP), \
                   PCIE_HEADER_MAJOR, PCIE_HEADER_MINOR)

/** HV PCIe completion available interrupt flag. */
#define PCIE_HOST_INTR_HV_CPL_AVAIL     (1 << 0)


/**
 * The structure used to post buffers to the PCIe subsystem.  The host
 * provides an array of PCIE_CMD_QUEUE_ENTRIES of these structures to
 * the PCIe subsystem.  Buffer commands are then posted to the PCIe
 * subsystem by writing to the next entry in the array and advancing
 * pcie_host_mmio_regs::buffer_cmd_posted_count.  Posting a buffer
 * with 'channel' < ::PCIE_HOST_TO_TILE_CHANNELS causes the data in
 * that buffer to be copied to the Tile processor; posting a buffer
 * with 'channel' >= ::PCIE_HOST_TO_TILE_CHANNELS causes that buffer
 * to be filled with data from the Tile processor.
 */
typedef struct pcie_host_buffer_cmd
{
  uint32_t buffer_addr_lo32;  /**< Low 32 bits of the buffer bus address. */
  uint32_t buffer_addr_hi32;  /**< High 32 bits of the buffer bus address. */
  uint32_t tag;               /**< Copied from sender to receiver. */

  /** Size of the buffer, maximum 64k.  Any values larger than 64k
     will be treated as 64k. */
  uint32_t size:17;

  /** Start of connection bit. If a channel is in 'reset' mode, this
      bit must be set on a command from both the sending and receiving
      sides before data transmission can begin.  All commands that do
      not have this bit set are completed with no data transfer and
      the reset bit set in the completion.  If the channel is not in
      reset mode, this bit is ignored. */
  uint32_t soc:1;

  /** Must end packet bit.  When a receive command is posted with this
      bit set, the PCIe subsystem consumes commands on the send side
      until it finds a command with this bit set.  When a send command
      is posted with this bit set, the PCIe subsystem consumes
      commands on the receive side until it finds a command with this
      bit or the may_eop bit set. */
  uint32_t must_eop:1;

  /** May end packet bit.  When this bit is set in a receive command,
      the PCIe subsystem will allow that command to satisfy an
      end-of-packet request from the send side. */
  uint32_t may_eop:1;

  uint32_t reserved:4;        /**< Must be zero. */
  uint32_t channel:8;         /**< Channel to which the buffer is posted. */
} pcie_host_buffer_cmd_t;


/**
 * The structure used to inform the host that a communication
 * operation has completed.  The host provides an array of
 * PCIE_CMD_QUEUE_ENTRIES of these structures to the PCIe subsystem.
 * When a command completes, the PCIe subsystem will write an entry in
 * that array and advances
 * pcie_host_mmio_regs::completion_posted_count.  Posting a buffer
 * with 'channel' < PCIE_HOST_TO_TILE_CHANNELS causes the data in that
 * buffer to be copied to the Tile processor; posting a buffer with
 * 'channel' >= PCIE_HOST_TO_TILE_CHANNELS causes that buffer to be
 * filled with data from the Tile processor.
 */
typedef struct pcie_host_completion
{
  uint32_t buffer_addr_lo32;  /**< Low 32 bits of the buffer bus address. */
  uint32_t buffer_addr_hi32;  /**< High 32 bits of the buffer bus address. */
  uint32_t tag;               /**< Copied from send command. */
  uint32_t size:17;           /**< Size of the actual data transfer. */

  /** Reset bit.  If set, the command was completed without data
      transfer because the channel is in reset mode and the command
      did not inclue the soc bit. */
  uint32_t reset:1;

  /** End of packet bit.  For receive completions, this bit is set if
      the sender or receiver forced end-of-packet via must_eop.  The
      value is undefined for send completions. */
  uint32_t eop:1;

  /** Overflow bit. For receive completions, this bit is set if the
      sender's data overflows the buffer specified by this completion.
      If the eop bit is also set, some amount of send data has been
      dropped.  The value is undefined for send completions. */
  uint32_t overflow:1;

  uint32_t reserved:4;        /**< Undefined. */
  uint32_t channel:8;         /**< Channel to which the buffer was posted. */
} pcie_host_completion_t;

/**
 * The memory-mapped input/output (MMIO) registers mapped to BAR0 of
 * the PCIe interface.  When booted with a hypervisor configuration
 * that includes the 'pcie' device, the hypervisor PCIe subsystem maps
 * the following set of MMIO control registers in BAR 0.  These
 * registers allow the host machine to reset the TILE64, initialize it
 * for PCIe channel communication, perform communication operations,
 * and check for completion of those operations.
 */
struct pcie_host_mmio_regs
{
  /** When read, the status register returns a ::pcie_host_status
      value indicating the current state of the PCIe subsystem.
      Writing to the register allows the host to change the subsystem
      status; in particular writing ::TLR_INTS_DISABLED disables
      interruts and writing ::TLR_RESET_CHIP causes the TILE64 to
      reset. */
  uint32_t status;

  /** When read, returns the hypervisor PCIe subsystem version number.
      The current revision number has major == 2; see
      PCIE_VERSION_DEF() for more information on how the version
      number is formatted.  After booting, host drivers should make
      sure that the subsystem version number is compatible. */
  uint32_t version;


  /** When read, returns non-zero if an interrupt is pending to the
      host.  If a channel completion has been posted, the
      ::PCIE_HOST_INTR_HV_CPL_AVAIL bit will be set.  The high 24-bits
      may be set if the Tile-side application has asserted interrupts
      via the @ref csr mechanism.  This register is read-to-reset; the
      act of reading will reset it to zero and it will remain 0 until
      another interrupt fires. */
  uint32_t interrupt_pending;

  /** The maximum number of microseconds that may pass before a
      deferred interrupt must be delivered to the host.  The initial
      value is 0 microseconds; writing the register sets the value. */
  uint32_t interrupt_delay_us;

  /** The maximum number of communication operations that may complete
      before a deferred interrupt must be deliver to the host.  The
      initial value is 0 operations; writing the register sets the
      value. */
  uint32_t interrupt_delay_op_cnt;


  /** Low 32 bits of the bus address of the
      pcie_host_buffer_cmd[PCIE_CMD_QUEUE_ENTRIES] in host memory. */
  uint32_t buffer_cmd_array_lo32;

  /** High 32 bits of the bus address of the
      pcie_host_buffer_cmd[PCIE_CMD_QUEUE_ENTRIES] in host memory. */
  uint32_t buffer_cmd_array_hi32;

  /** The number of pcie_host_buffer_cmd structures that have been
      posted to the device.  This host writes this register in order
      to indicate that more buffer commands have been posted to the
      device. The initial value is 0xffff0000.  Do not write this
      register until both the buffer_cmd_array and completion_array
      addresses are valid. */
  uint32_t buffer_cmd_posted_count;


  /** Low 32 bits of the bus address of the
      pcie_host_buffer_cpl[PCIE_CMD_QUEUE_ENTRIES] in host memory. */
  uint32_t completion_array_lo32;

  /** High 32 bits of the bus address of the
      pcie_host_buffer_cmd[PCIE_CMD_QUEUE_ENTRIES] in host memory. */
  uint32_t completion_array_hi32;

  /** The number of pcie_host_completion structures that have been
      written by the device to the completion array.  This value wraps
      as a 32-bit counter.  The host reads this register in order to
      determine how many commands have been completed.  The initial
      value is 0xffff0000.*/
  uint32_t completion_posted_count;

  /** Write a channel number to this register to force that channel
      into 'reset' mode.  Resetting a channel via this register will
      cause all host commands currently posted to the channel to
      complete-with-reset.  Once the previously posted host commands
      are returned, the channel enters reset mode and
      completes-with-reset all host and tile commands until both sides
      post a command with the soc bit set. */
  uint32_t channel_reset;

  /** Registers used to profile and benchmark the PCIe subsystem. */
  struct pcie_host_profile_regs profile_regs;
};


/** Possible values of pcie_host_mmio_regs::status. */
enum pcie_status {
  PCIE_STATUS_NOT_READY,         /**< Chip is not ready */
  PCIE_STATUS_RUNNING,           /**< Chip is running */
  PCIE_STATUS_INTS_DISABLED,     /**< Chip has interrupts disabled */
  PCIE_STATUS_RESET_CHIP,        /**< Reset the chip */
  PCIE_STATUS_HALT_CHIP,         /**< Halt the chip (cannot reboot). */
};

/** @} */


/**
 * @defgroup tile Tile-Side Channel Interface
 * @{
 *
 * This section describes the hypervisor interfaces and shared memory
 * data structures that allow Tile processor programs to communicate
 * with the host via PCIe.
 *
 * @section tile_init Initialization
 *
 * @section tile_operation Sending and Receiving Buffers
 */

/**
 * Special flags that can be passed in the 'flags' field of buffer
 * commands.  These flags allow the driver to request certain
 * special-case handling, for example marking packets that should not
 * generate completions or that should generatie completions but not
 * interrupt the completion tile.
 */
enum pcie_tile_buffer_cmd_flags {
  PCIE_TILE_FLAG_NO_CPL = (1 << 0), /**< Do not generate a completion. */
  PCIE_TILE_FLAG_NO_INT = (1 << 1), /**< Do not interrupt when completed. */
};


/**
 * The structure used to post zero-copy buffers to the PCIe subsystem.
 * Based on channel number, commands can be either matched to host
 * zero-copy commands, or move data to/from a particular DMA address.
 */
typedef struct pcie_tile_buffer_cmd
{
  uint32_t buffer_addr_lo32;      /**< Low 32 bits of the buffer CPA. */

  uint32_t buffer_addr_hi16:16;   /**< Bits [47:32] of the buffer CPA. */
  uint32_t completion_queue_id:8; /**< Completion queue to notify when done.*/
  uint32_t reserved_1:8;          /**< Must be zero. */
  uint32_t tag;                   /**< Copied from sender to receiver. */

  /** Size of the buffer, maximum 64k.  Any values larger than 64k
     will be treated as 64k. */
  uint32_t size:17;

  /** Start of connection bit. If a channel is in 'reset' mode, this
      bit must be set on a command from both the sending and receiving
      sides in order for transmission to begin.  All commands that do
      not have this bit set will be completed with no data transfer
      and the reset bit set in the completion.  If the channel is not
      in reset mode, this bit is ignored. */
  uint32_t soc:1;

  /** End of packet bit.  When a receive command is posted with this
      bit set, the PCIe subsystem will consume commands on the send
      side until it finds a command with this bit set.  When a send
      command is posted with this bit set, the PCIe subsystem will
      consume commands on the receive side until it finds a command
      with this bit or the may_eop bit set. */
  uint32_t must_eop:1;

  /** May end packet bit.  When this bit is set in a receive command,
      the PCIe subsystem will allow that command to satisfy an
      end-of-packet request from the send side. */
  uint32_t may_eop:1;

  uint32_t reserved_2:3;            /**< Must be zero. */
  uint32_t channel:9;             /**< Channel to which buffer is posted.*/

  uint32_t bus_addr_lo;           /**< Low 32 bits of PCIE DMA address. */
  uint32_t bus_addr_hi;           /**< High 32 bits of PCIE DMA address. */
}
pcie_tile_buffer_cmd_t;



/**
 * The shared memory data structure used to post buffer commands from
 * Tile programs to the PCIe subsystem.  The client supervisor is
 * responsible for providing a 64-kB aligned page to the hypervisor in
 * order to store this structure.  This data structure starts at
 * offset 0 in that page and the page is homed as specified by the
 * client.
 */
typedef struct pcie_tile_shm_state
{
  /** The number of pcie_tile_buffer_cmd structures that have been
      posted to the PCIe subsystem.  Tile programs write this register
      in order to indicate that more buffer commands have been posted
      to the commands array.  Be sure to issue an insn_mf() between
      writing the new command(s) and advancing this counter.  The
      client must initialize this value to 0xffff0000.*/
  uint32_t buffer_cmd_posted_count;

  /** Buffer commands are posted to the PCIe subsystem by writing to
      the next entry in the array and advancing
      pcie_tile_shm_regs::buffer_cmd_posted_count.  Posting a buffer
      with 'channel' < ::PCIE_HOST_TO_TILE_CHANNELS will cause that
      buffer to be filled with data from the host; posting a buffer
      with 'channel' >= ::PCIE_HOST_TO_TILE_CHANNELS will cause that
      buffer to be copied to the host. */
  pcie_tile_buffer_cmd_t buffer_cmd_array[PCIE_CMD_QUEUE_ENTRIES];

  /* ALL FIELDS BELOW THIS SHOULD NOT BE MODIFIED BY LINUX.  THESE MAY
     BE REMOVED IN A FUTURE RELEASE.*/

  /** A lock used to serialize access by multiple HV worker tiles. */
  int32_t hv_cmd_array_lock __attribute__((__aligned__(64)));

  /** A count of how many commands HV workers have posted. */
  uint32_t hv_cmd_posted_count;

  /** A second command array used by the user-direct-to-HV command
      posting path. */
  pcie_tile_buffer_cmd_t hv_cmd_array[PCIE_CMD_QUEUE_ENTRIES];

}
pcie_tile_shm_state_t;


/**
 * The structure used to inform Tile processor programs that a
 * communication operation has completed.
 */
/* Note: any size change of this structure would require change to the
 * the instant intr handler. */
typedef struct pcie_tile_completion
{
  uint32_t buffer_addr_lo32;      /**< Low 32 bits of the buffer CPA. */

  uint32_t buffer_addr_hi16:16;   /**< Bits [47:32] of the buffer CPA. */
  uint32_t reserved_1:16;           /**< Undefined. */
  uint32_t tag;                   /**< Copied from send command. */
  uint32_t size:17;               /**< Size of the actual data transfer. */

  /** Reset bit.  If set, the command was completed without data
      transfer because the channel is in reset mode and the command
      did not inclue the soc bit. */
  uint32_t reset:1;

  /** End of packet bit.  For receive completions, this bit is set if
      the sender or receiver forced end-of-packet via must_eop.  The
      value is undefined for send completions. */
  uint32_t eop:1;

  /** Overflow bit. For receive completions, this bit is set if the
      sender's data overflows the buffer specified by this completion.
      If the eop bit is also set, some amount of send data has been
      dropped.  The value is undefined for send completions. */
  uint32_t overflow:1;

  /** Link-is-down bit.  For chip-to-chip transactions, this bit is
      set if the command could not be processed because there is no
      link to the other chip. */
  uint32_t link_down:1;

  uint32_t reserved_2:2;            /**< Undefined. */
  uint32_t channel:9;             /**< Channel to which buffer was posted.*/
}
pcie_tile_completion_t;


/**
 * A per-tile completion queue, allocated by the hypervisor and
 * provided to each tile when it registers for PCIe communication.
 * The supervisor may read this structure, but it does not have write
 * permissions.
 */
struct pcie_tile_completion_queue
{
  /** The number of completions that have been posted to this
      queue. The initial value is 0xffff0000. */
  volatile uint32_t completion_posted_count;

  /** An IRQ mask indicating which interrupt will be used to downcall
      to the supervisor when an interrupting completion arrives. */
  uint32_t irq_mask;

  /** An array of completion structures. */
  pcie_tile_completion_t completion_array[PCIE_CMD_QUEUE_ENTRIES];
};


/** File offset for initializing the PCIe subsystem as a whole. */
#define PCIE_CHANNEL_CTL_GLOBAL_INIT_OFF 0x1000

/**
 * A structure to be read from the PCIe subsystem at the start of
 * global initialization.  The driver should read offset
 * ::PCIE_CHANNEL_CTL_GLOBAL_INIT_OFFSET from the 'pcie/N/channel_ctl'
 * device file in order to fill in this structure.  Global
 * initialization should be done by only one tile and must precede
 * local initialization.
 */
struct pcie_tile_global_init_read
{
  /** The tile coordinates on which the ::pcie_tile_shm_state object
      will be homed.  All page table entries used to access that
      physical page should be set as uncacheable and OLOC'ed to this
      coordinate. */
  uint32_t shm_state_lotar;
};

/**
 * A structure to be written to the PCIe subsystem to finish global
 * initialization.  After reading the ::pcie_tile_global_init_read
 * structure, the driver should fill in this structure should be
 * filled in and write it to ::PCIE_CHANNEL_CTL_GLOBAL_INIT_OFFSET
 * within the 'pcie/N/channel_ctl' device file.  Global initialization
 * should be done by only one tile and must precede local
 * initialization.
 */
struct pcie_tile_global_init_write
{
  /** The CPA of a physical page to be used for the
      ::pcie_tile_shm_state structure.  This address must be aligned
      to a 64kB boundary.  The structure must be initialized before
      this structure is written; in particular the
      pcie_tile_shm_state::buffer_cmd_posted_count value must be
      initialized to 0xffff0000. */
  uint64_t shm_state_cpa;

  /** The tile on which the shm_state_cpa page is homed.  Will be
      removed when Linux supports OLOC to non-client tiles. */
  uint32_t shm_state_home;

  /** The CPA of a physical page to be used for the
      CSR backing memory.  This address must be aligned to a page
      size specified below.  The structure must be initialized before
      this structure is written. */
  uint64_t csr_memory_cpa;

  /** The page size for the CSR backing memory. */
  uint32_t csr_memory_page_size;

  /** The tile on which the csr_memory_cpa page is homed.  Will be
      removed when Linux supports OLOC to non-client tiles. */
  uint32_t csr_memory_home;

  /** The CPA of a physical page to be used for the
      CSR write queue.  This address must be aligned
      to a page boundary.  The structure must be initialized before
      this structure is written. */
  uint64_t csr_write_queue_cpa;

  /** The tile on which the csr_write_queue_cpa page is homed.  Will be
      removed when Linux support OLOC to non-client tiles. */
  uint32_t csr_write_queue_home;
};



/** File offset for initializing per-tile PCIe subsystem state. */
#define PCIE_CHANNEL_CTL_LOCAL_INIT_OFF 0x2000

/**
 * A structure to be read from the PCIe subsystem in order to learn
 * about certain per-tile state.  To fill in this structure, the
 * driver should read from offset ::PCIE_CHANNEL_CTL_LOCAL_INIT_OFF in
 * the 'pcie/N/channel_ctl' device file.  This step should be repeated
 * on every tile that will process PCIe completions.
 */
struct pcie_tile_local_init_read
{
  /** Filled with the VA of the completion queue.  This structure is
      automatically mapped into this tile's page tables; see the
      description of drv_client_alloc() in drvintf.h for more
      information. */
  struct pcie_tile_completion_queue* completion_queue;

  /** Each tile is assigned a unique completion queue ID.  This ID is
      passed in the pcie_tile_buffer_cmd::completion_queue_id field to
      specify which completion queue should be notified when the
      command is completed. */
  uint8_t completion_queue_id;

  /** Filled with the VA of this tile's direct-to-HV completion
      queue. */
  struct pcie_tile_user_completion_queue* user_completion_queue;

  /** Filled with the fastcall number for user programs to use when
      posting ::pcie_tile_user_buffer_cmd structures. */
  int zc_post_fastcall;
};

/** Write a channel number to this file offset in pcie/N/channel_ctl
    puts that channel into 'reset' mode.  Resetting a channel via this
    HV file offset will cause all tile commands currently posted to
    the channel to complete-with-reset.  Once the previously posted
    tile commands are returned, the channel will enter reset mode and
    will complete-with-reset all host and tile commands until both
    sides post a command with the soc bit set. */
#define PCIE_CHANNEL_CTL_CHANNEL_RESET_OFF 0x3000

/** Reading a byte from this offset fills the result buffer with 1 if
    the channel's transport link is down, and 0 if it is up. */
#define PCIE_CHANNEL_CTL_LINK_DOWN_OFF(N) (0x4000 + (N))

/** @} */


/**
 * @defgroup tile_fast_zc Direct User-to-HV ZC Command Interface.
 * @{
 *
 *
 */

/** Offset at which to read or write the 32-bit number of credits
    reserved for use by the supervisor (i.e. non-direct-to-HV) command
    queue. */
#define PCIE_CHANNEL_CTL_CREDITS_OFF 0x5000

/** Offset at which to read or write a ::pcie_tile_channel_mask
    structure indicating which channels the direct-to-HV interface for
    this tile is allowed to use. */
#define PCIE_CHANNEL_CTL_CHANNEL_MASK_OFF 0x6000

/** Offset at which to register the packet memory with the HV PCIe. */
#define PCIE_CHANNEL_CTL_IOMEM_REGISTER_OFF 0x7000

/** Offset at which to unregister the packet memory from the HV PCIe. */
#define PCIE_CHANNEL_CTL_IOMEM_UNREGISTER_OFF 0x7100

/** Offset at which to obtain the global host link index. */
#define PCIE_CHANNEL_CTL_HOST_LINK_INDEX_OFF 0x8000

/** A bitmask for controlling which set of channels userspace can
    access via the direct-to-HV API. */
struct pcie_tile_channel_mask
{
  uint32_t bits[16]; /**< One bit for each of the 512 possible channel IDs. */
};

/** The command struct for direct-to-HV commands is slightly different
    because it refers to VAs, not CPAs, and the completions are always
    returned to the tile that posted the command. */
struct pcie_tile_user_buffer_cmd
{
  uint32_t buffer_va;              /**< Buffer virtual address. */
  uint32_t tag;                   /**< Copied from sender to receiver. */

  /** Size of the buffer, maximum 64k.  Any values larger than 64k
     will be treated as 64k. */
  uint32_t size:17;

  /** Start of connection bit. If a channel is in 'reset' mode, this
      bit must be set on a command from both the sending and receiving
      sides in order for transmission to begin.  All commands that do
      not have this bit set will be completed with no data transfer
      and the reset bit set in the completion.  If the channel is not
      in reset mode, this bit is ignored. */
  uint32_t soc:1;

  /** End of packet bit.  When a receive command is posted with this
      bit set, the PCIe subsystem will consume commands on the send
      side until it finds a command with this bit set.  When a send
      command is posted with this bit set, the PCIe subsystem will
      consume commands on the receive side until it finds a command
      with this bit or the may_eop bit set. */
  uint32_t must_eop:1;

  /** May end packet bit.  When this bit is set in a receive command,
      the PCIe subsystem will allow that command to satisfy an
      end-of-packet request from the send side. */
  uint32_t may_eop:1;

  uint32_t reserved_2:3;            /**< Must be zero. */
  uint32_t channel:9;             /**< Channel to which buffer is posted.*/

  uint32_t bus_addr_lo;           /**< Low 32 bits of PCIE DMA address. */
  uint32_t bus_addr_hi;           /**< High 32 bits of PCIE DMA address. */
};


/** The completion structure for direct-to-HV commands is slightly
    different because the driver does not return the VA with the
    completion.  The user is responsible for keeping track of the
    buffer VAs, using the per-channel in-order completion property. */
struct pcie_tile_user_completion
{
  uint32_t tag;                   /**< Copied from send command. */
  uint32_t size:17;               /**< Size of the actual data transfer. */

  /** Reset bit.  If set, the command was completed without data
      transfer because the channel is in reset mode and the command
      did not inclue the soc bit. */
  uint32_t reset:1;

  /** End of packet bit.  For receive completions, this bit is set if
      the sender or receiver forced end-of-packet via must_eop.  The
      value is undefined for send completions. */
  uint32_t eop:1;

  /** Overflow bit. For receive completions, this bit is set if the
      sender's data overflows the buffer specified by this completion.
      If the eop bit is also set, some amount of send data has been
      dropped.  The value is undefined for send completions. */
  uint32_t overflow:1;

  /** Link-is-down bit.  For chip-to-chip transactions, this bit is
      set if the command could not be processed because there is no
      link to the other chip. */
  uint32_t link_down:1;

  uint32_t reserved_2:2;            /**< Undefined. */
  uint32_t channel:9;             /**< Channel to which buffer was posted.*/
};


/**
 * A per-tile completion queue, allocated by the hypervisor and
 * provided to each tile when it registers for PCIe communication.
 * The supervisor may read this structure, but it does not have write
 * permissions.
 *
 * The Linux driver obtains the VA of this structure via the
 * ::pcie_tile_local_init_read structure.
 */
struct pcie_tile_user_completion_queue
{
  /** The number of completions that have come back to this tile.  The
      initial value is 0xffff0000. */
  volatile uint32_t completion_count;

  /** The number of commands posted on this tile.  The initial value
      is 0xffff0000. */
  uint32_t command_count;

  /** An array of completion structures. */
  struct pcie_tile_user_completion completion_array[PCIE_CMD_QUEUE_ENTRIES];
};

/**
 * An object for registering the packet memory with the hypervisor.
 *
 */
typedef struct
{
  /** The virtual address of the memory, must be aligned huge-page-size. */
  uint32_t va;

  /** The physical address of the backing page. */
  uint64_t pa;

  /** Size (in bytes) of the memory area. */
  int size;
}
pcie_iomem_address_t;


/** @} */



/**
 * @defgroup csr Configuration and Status Register Emulation
 * @{
 *
 * The configuration and status register mechanism allows the host and
 * Tile side to communicate via a region of shared memory that is
 * accessible to both sides.  The host can read or write aligned
 * 32-bit words to the memory region by mapping BAR0 +
 * ::::PCIE_CSR_REG_OFFSET into its virtual address space.  Similarly,
 * the Tile side driver provides the "backing memory" for this region
 * to the hypervisor as part of the ::pcie_tile_global_init_write
 * initialization structure.  All host read accesses are immediately
 * satisfied by reading the memory provided by the Tile-side
 * supervisor.
 *
 * When the host writes to the CSR memory region, the PCIe subsystem
 * does not apply that write operation to the backing memory.
 * Instead, it posts a ::pcie_csr_write_notify structure into the
 * ::pcie_csr_write_notify_queue that is provided by the Tile-side
 * supervisor during initialization.  Whenever a write is added to the
 * write queue, the Tile supervisor receives an interrupt that allows
 * it to wake up and process the write request.  The supervisor can
 * then apply the write operation as it chooses, perhaps by writing
 * the value to memory.  However, the supervisor is free to implement
 * other behavior if it chooses, for instance by having a particular
 * write address trigger a communication operation or by emulating a
 * write-one-to-clear type register.
 *
 * On occasion, the Tile-side application might need to deliver an
 * interrupt to the host-side supervisor.  The CSR hypervisor
 * interface enables this mechanism via a hypervisor filesystem
 * operation.  The top 24 bits of
 * ::pcie_host_mmio_regs::interrupt_pending are reserved for
 * application interrupt bits, and the Tile-side supervisor can assert
 * any of those bits by writing a 32-bit word to offset
 * ::PCIE_CTL_ASSERT_HOST_INTR_OFF, with the relevant bits in [0:23].
 * The hypervisor will OR those bits into the into the high 24 bits of
 * the interrupt word, and assert a host interrupt if non-zero.  When
 * the host reads the interrupt_pending register to discover which
 * interrupts are pending, all the bits are automatically cleared.
 */

/** The maximum mapping page size that is used for the CSR backing memory. */
#define TILE_CSR_MEMORY_MAX_SIZE (256 << 10)

/** Offset of the CSR region in the MMIO space. */
#define PCIE_CSR_REG_OFFSET 0x100000

/** The number of entries in the CSR write notification queue. */
#define PCIE_CSR_WRITE_QUEUE_ENTRIES 1024

/** The page size that is used for the CSR write queue. */
#define TILE_CSR_WRITE_QUEUE_PAGE_SIZE (16 << 10)

/**
 * The PCIe CSR write notification queue entry.
 */
typedef struct pcie_csr_write_notify
{
  uint32_t offset:24;  /**< Offset of the write in the CSR memory. */
  uint32_t size:4;     /**< Width of the write in bytes. */
  uint32_t reserved:3; /**< Reserved. */
  uint32_t overflow:1; /**< The queue is overflow. */
  uint32_t value;      /**< Write value. */
} pcie_csr_write_notify_t;

/**
 * The PCIe CSR write notification queue.
 */
struct pcie_csr_write_notify_queue
{
  /** The number of notifications that have been posted.
      The initial value is 0xffff0000. */
  volatile uint32_t writes_posted;

  /** The number of notifications that have been consumed.
      The initial value is 0xffff0000. */
  volatile uint32_t writes_consumed;

  /** An array of notification structures. */
  pcie_csr_write_notify_t notify_array[PCIE_CSR_WRITE_QUEUE_ENTRIES];
};

/** File offset for the supervisor to assert a host interrupt. */
#define PCIE_CTL_ASSERT_HOST_INTR_OFF 0x4000

/** Mask for the HV interrupt conditions in the interrupt_pending word. */
#define PCIE_HOST_INTR_HV_MASK          0xff

/** Shift value for supervisor interrupt conditions in interrupt_pending. */
#define PCIE_HOST_INTR_SV_SHIFT         8

/**
 * @}
 */


/**
 * @defgroup barmem BAR1 Memory Map Support.
 * @{
 *
 * On TilePro systems, the board can contain special entries that
 * cause the hardware to enable BAR1 after chip reset.  The following
 * interface allows client driver code to map huge page memory to back
 * that region, such that remote read and write requests act on that
 * memory.
 *
 * If a page is not mapped at a particular BAR offset, incoming writes
 * are discarded and incoming reads receive a response filled with
 * zeroed-out data.
 */

/** File offset for the supervisor to write a huge page mapping request. */
#define PCIE_BARMEM_CONFIG_OFF 0x9000

/** Parameter struct written to PCIE_BARMEM_CONFIG_OFF. */
struct pcie_barmem_config
{
  int is_map;                 /**< 1 if mapping, 0 if unmapping. */
  int bar;                    /**< Which BAR; must be 1. */
  uint64_t hpage_pa;          /**< PA to map, if is_map == 1. */
  uint32_t offset;            /**< Offset within the BAR. */
};

/**
 * @}
 */

/**
 * @defgroup epp NetIO Egress Support
 * @{
 *
 * Some variants of the PCIe driver provide support for transmitting
 * small packets from Tile NetIO applications into a ring buffer in
 * host memory.  If present, this capability is noted by the presence
 * of the ::PCIE_FEATURE_EPP flag in pcie_host_mmio_regs::version.
 *
 * The host interface to this feature is available as two sets of
 * register-mapped control registers.  The ::pcie_epp_regs_drv_t
 * structure, mapped at ::PCIE_EPP_REG_DRV_OFFSET, allows the host
 * supervisor to set packet ring-buffer parameters like entry size,
 * number of entries, and the host-memory address of the ring.
 *
 * The ::pcie_epp_regs_user_t structure, mapped at
 * ::PCIE_EPP_REG_USER_OFFSET allows a host program to see how many
 * packets have been produced and to notify the hypervisor when ring
 * entries have been consumed.
 */

/**
 * The maximum mapping page size that is used for the backing memory
 * of the fast PCIe interface control registers.
 */
#define TILE_PCIE_EPP_REG_MEMORY_MAX_SIZE (64 << 10)

/** Offset of the EPP registers (drv-visible set) in the MMIO space. */
#define PCIE_EPP_REG_DRV_OFFSET 0x200000

/** Offset of the EPP registers (user-visible set) in the MMIO space. */
#define PCIE_EPP_REG_USER_OFFSET 0x201000

/**
 * Fast PCIe EPP control registers.
 *
 * The host ring buffer size must be a power of two and a multiple
 * of the single buffer size.
 *
 * The write_index and read_index represent the absolute write and
 * read indices, respectively. The relative indices are obtained by
 * division modulo the number of buffers in the ring. The write index
 * points to the next buffer to which the tile can DMA and the read
 * index points to the buffer beyond the last buffer to which the Tile
 * can write. The unsigned difference (write_index - read_index)
 * always yields the number of buffers to which data has been written
 * but not yet retrieved.
 *
 * Because the write_index and read_index are generally mapped into
 * host user space, we need to split the EPP control registers into
 * two groups: the user-visible group containing the write_index and
 * read_index, and the host driver-visible group containing other
 * registers.
 */
typedef struct pcie_epp_regs_drv
{
  /** Low 32 bits of the base bus address of the
      host memory ring buffer. */
  uint32_t buffer_ring_lo32;

  /** High 32 bits of the base bus address of the
      host memory ring buffer. */
  uint32_t buffer_ring_hi32;

  /** Number of buffers in the host ring buffer. */
  uint32_t num_bufs;

  /** Size of a single buffer in bytes. */
  uint32_t buf_size;

} pcie_epp_regs_drv_t;

/**
 * User-visible set of fast PCIe EPP control registers.  tilepci.h
 * defines structure pcie_packet_queue_indices_reg_t that is exported
 * to the user application. These two structs must match.
 */
typedef struct pcie_epp_regs_user
{
  /** Ring buffer write index. */
  uint32_t write_index;

  /** Ring buffer read index. */
  uint32_t read_index;

} pcie_epp_regs_user_t;

/** Fast I/O index offsets. */
typedef enum
{
  PCIE_FASTIO_SENDV_PKT         = 0, /**< Send a packet buffer */
  PCIE_FASTIO_NUM_INDEX        = 1, /**< Total number of fast I/O indices */
} pcie_fastio_index_t;

/**
 * @}
 */


/**
 * @defgroup c2c Chip-to-Chip Transfer Support
 * @{
 *
 * Chip-to-chip transfers allow multiple Tile processors to
 * communicate with each other via a zero-copy interface.  This
 * feature is only supported if the ::PCIE_FEATURE_C2C flag is
 * present.  The chip-to-chip API uses the @ref tile to post
 * communication commands between different PCIe ports.  It also
 * provides a separate host control interface that allows the host
 * driver to define the connection topology between 'send ports' and
 * 'receive ports' on each Tile PCIe link.
 *
 * The host interface is defined by the ::pcie_c2c_regs_t structure,
 * located at offset ::PCIE_C2C_REG_OFFSET of BAR0 of each PCI link.
 * This structure contains arrays of send ports and receive ports.
 * The host driver can arrange for send ports on one link to point at
 * receive ports on another link.  Once the driver fills in the BAR
 * offset and port index fields in the send and receive port
 * structures, it can write a 1 to the ready bit in the receive port
 * to create the communication channel.
 *
 * The host driver can also reset a communication channel by writing a
 * zero to the ready bit in the receive port structure.  After
 * clearing the ready bit, the driver should poll on the ready value
 * until the read result is zero.  Once the register goes to zero, the
 * communication channel is disconnected and the driver is free to
 * establish a different connection, reset the chip, and so forth.
 */

/** The offset of the chip-to-chip binding registers. */
#define PCIE_C2C_REG_OFFSET 0x300000

/** The maximum number of commands that can be outstanding on a
    particular chip-to-chip command queue. */
#define PCIE_C2C_MAX_PENDING_CMDS 16

/** Registers used to specify the remote receive port to which a send
    port should transmit data. */
typedef struct pcie_c2c_send_port {
  /** Low 32 bits of the BAR on the card to which data is sent. */
  uint32_t remote_bar_addr_lo;

  /** High 32 bits of the BAR on the card to which data is sent. */
  uint32_t remote_bar_addr_hi;

  /** Index of the receive port to which data is sent. */
  uint32_t remote_recv_port_index;
} pcie_c2c_send_port_t;


/** Registers used to specify the remote send port from which a
    receive port receives data. */
typedef struct pcie_c2c_recv_port {
  /** Low 32 bits of the BAR on the card from which data is received. */
  uint32_t remote_bar_addr_lo;

  /** High 32 bits of the BAR on the card from which data is received. */
  uint32_t remote_bar_addr_hi;

  /** Index of the send port from which data is received. */
  uint32_t remote_send_port_index;

  /** Indicates whether the remote_bar_addr and remote_recv_port_index
      are ready.  Writing a one to this register will cause the PCIe
      subsystem to establish a connection with the remote sender.
      Writing a zero will tear down the connection. */
  uint32_t ready;
} pcie_c2c_recv_port_t;


/** The chip-to-chip control registers mapped at offset
    ::PCIE_C2C_REG_OFFSET of BAR0. */
typedef struct
{
  /** Bindings for outbound data streams. */
  pcie_c2c_send_port_t send_ports[PCIE_C2C_STREAM_COUNT];

  /** Bindings for inbound data streams. */
  pcie_c2c_recv_port_t recv_ports[PCIE_C2C_STREAM_COUNT];

  /** Host link index. */
  uint32_t host_link_index;

  /* All registers below here are internal; the host should not access them.*/
#ifndef __DOXYGEN__
  uint64_t recv_param_target;
  uint64_t reset_request_target;
  uint64_t flush_target;
  uint64_t flush_ack_target;
  uint64_t link_status_target;
  uint64_t link_ack_target;
#endif
} pcie_c2c_regs_t;

#ifndef __DOXYGEN__
/** Offset of the EOP flag words that are written at the end of a
    chip-to-chip packet transfer. */
#define PCIE_C2C_EOP_METADATA_OFFSET (PCIE_C2C_REG_OFFSET + 0x10000)

/** Offset of the data-target regions to which chip-to-chip packets
    are DMAed. */
#define PCIE_C2C_DATA_OFFSET (PCIE_C2C_REG_OFFSET + 0x20000)
#endif

/** Fast I/O index offsets. */
typedef enum
{
  FASTPCI_FASTIO_POST_CMD         = 0, /**< Post a transfer command */
  FASTPCI_FASTIO_NUM_INDEX        = 1, /**< Total number of fast I/O indices */
} fastpci_fastio_index_t;


/**
 * @}
 */

/**
 * @defgroup debug Dedicated Tile Debug Registers
 * @{
 *
 *  On occasion, developers attempting to debug a driver might want to
 *  find out what operations the hypervisor dedicated PCIe tiles are
 *  actually performing.  The PCIe interface provides a set of
 *  registers for reading various statistics about channelized
 *  messaging operations.  These debug registers are accessible via
 *  the ::pcie_ibound_debug_status_regs structure mapped at BAR0 +
 *  ::PCIE_DEBUG_REG_OFFSET.
 */

/**
 * Per-channel structure that contains the ibound debug status
 * registers for reading various PCIe device-model state.
 */
typedef struct pcie_ibound_debug_status_regs
{
  uint32_t in_reset;         /**< Channel in reset state. */
  uint32_t tile_has_soc;     /**< Tile issued SOC command. */
  uint32_t host_has_soc;     /**< Host issued SOC command. */
  uint32_t host_cmd_posted;  /**< Number of host commands posted. */
  uint32_t tile_cmd_posted;  /**< Number of tile commands posted. */
  uint32_t dma_completions;  /**< Number of completed DMAs. */
  uint32_t host_completions; /**< Number of host completions. */
  uint32_t tile_completions; /**< Number of tile completions. */
} pcie_ibound_debug_status_regs_t;

/** Offset of the debug registers region. */
#define PCIE_DEBUG_REG_OFFSET 0x10000

/**
 * The PCIe ibound debug status registers are mapped at offset
 * ::PCIE_DEBUG_REG_OFFSET of BAR0.
 */
struct pcie_debug_status_regs
{
  /** Debug status registers of the PCIe subsystem. */
  pcie_ibound_debug_status_regs_t debug_status_regs[PCIE_CHANNELS];
};

/**
 * @}
 */

#endif /* __ASSEMBLER__ */

#endif /* _SYS_HV_DRV_PCIE_CHANNEL_INTF_H */
