From 57f0f512b273f60d52568b8c6b77e17f5636edc0 Mon Sep 17 00:00:00 2001 From: André Fabian Silva Delgado Date: Wed, 5 Aug 2015 17:04:01 -0300 Subject: Initial import --- arch/tile/include/hv/drv_mpipe_intf.h | 605 ++++++ arch/tile/include/hv/drv_mshim_intf.h | 50 + arch/tile/include/hv/drv_pcie_rc_intf.h | 38 + arch/tile/include/hv/drv_srom_intf.h | 41 + arch/tile/include/hv/drv_trio_intf.h | 199 ++ arch/tile/include/hv/drv_uart_intf.h | 33 + arch/tile/include/hv/drv_usb_host_intf.h | 39 + arch/tile/include/hv/drv_xgbe_impl.h | 300 +++ arch/tile/include/hv/drv_xgbe_intf.h | 615 ++++++ arch/tile/include/hv/hypervisor.h | 2598 ++++++++++++++++++++++++++ arch/tile/include/hv/iorpc.h | 714 +++++++ arch/tile/include/hv/netio_errors.h | 122 ++ arch/tile/include/hv/netio_intf.h | 2975 ++++++++++++++++++++++++++++++ arch/tile/include/hv/syscall_public.h | 42 + 14 files changed, 8371 insertions(+) create mode 100644 arch/tile/include/hv/drv_mpipe_intf.h create mode 100644 arch/tile/include/hv/drv_mshim_intf.h create mode 100644 arch/tile/include/hv/drv_pcie_rc_intf.h create mode 100644 arch/tile/include/hv/drv_srom_intf.h create mode 100644 arch/tile/include/hv/drv_trio_intf.h create mode 100644 arch/tile/include/hv/drv_uart_intf.h create mode 100644 arch/tile/include/hv/drv_usb_host_intf.h create mode 100644 arch/tile/include/hv/drv_xgbe_impl.h create mode 100644 arch/tile/include/hv/drv_xgbe_intf.h create mode 100644 arch/tile/include/hv/hypervisor.h create mode 100644 arch/tile/include/hv/iorpc.h create mode 100644 arch/tile/include/hv/netio_errors.h create mode 100644 arch/tile/include/hv/netio_intf.h create mode 100644 arch/tile/include/hv/syscall_public.h (limited to 'arch/tile/include/hv') diff --git a/arch/tile/include/hv/drv_mpipe_intf.h b/arch/tile/include/hv/drv_mpipe_intf.h new file mode 100644 index 000000000..c97e416dd --- /dev/null +++ b/arch/tile/include/hv/drv_mpipe_intf.h @@ -0,0 +1,605 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Interface definitions for the mpipe driver. + */ + +#ifndef _SYS_HV_DRV_MPIPE_INTF_H +#define _SYS_HV_DRV_MPIPE_INTF_H + +#include +#include + + +/** Number of mPIPE instances supported */ +#define HV_MPIPE_INSTANCE_MAX (2) + +/** Number of buffer stacks (32). */ +#define HV_MPIPE_NUM_BUFFER_STACKS \ + (MPIPE_MMIO_INIT_DAT_GX36_1__BUFFER_STACK_MASK_WIDTH) + +/** Number of NotifRings (256). */ +#define HV_MPIPE_NUM_NOTIF_RINGS (MPIPE_NUM_NOTIF_RINGS) + +/** Number of NotifGroups (32). */ +#define HV_MPIPE_NUM_NOTIF_GROUPS (MPIPE_NUM_NOTIF_GROUPS) + +/** Number of buckets (4160). */ +#define HV_MPIPE_NUM_BUCKETS (MPIPE_NUM_BUCKETS) + +/** Number of "lo" buckets (4096). */ +#define HV_MPIPE_NUM_LO_BUCKETS 4096 + +/** Number of "hi" buckets (64). */ +#define HV_MPIPE_NUM_HI_BUCKETS \ + (HV_MPIPE_NUM_BUCKETS - HV_MPIPE_NUM_LO_BUCKETS) + +/** Number of edma rings (24). */ +#define HV_MPIPE_NUM_EDMA_RINGS \ + (MPIPE_MMIO_INIT_DAT_GX36_1__EDMA_POST_MASK_WIDTH) + + + + +/** A flag bit indicating a fixed resource allocation. */ +#define HV_MPIPE_ALLOC_FIXED 0x01 + +/** Offset for the config register MMIO region. */ +#define HV_MPIPE_CONFIG_MMIO_OFFSET \ + (MPIPE_MMIO_ADDR__REGION_VAL_CFG << MPIPE_MMIO_ADDR__REGION_SHIFT) + +/** Size of the config register MMIO region. */ +#define HV_MPIPE_CONFIG_MMIO_SIZE (64 * 1024) + +/** Offset for the config register MMIO region. */ +#define HV_MPIPE_FAST_MMIO_OFFSET \ + (MPIPE_MMIO_ADDR__REGION_VAL_IDMA << MPIPE_MMIO_ADDR__REGION_SHIFT) + +/** Size of the fast register MMIO region (IDMA, EDMA, buffer stack). */ +#define HV_MPIPE_FAST_MMIO_SIZE \ + ((MPIPE_MMIO_ADDR__REGION_VAL_BSM + 1 - MPIPE_MMIO_ADDR__REGION_VAL_IDMA) \ + << MPIPE_MMIO_ADDR__REGION_SHIFT) + + +/* + * Each type of resource allocation comes in quantized chunks, where + * XXX_BITS is the number of chunks, and XXX_RES_PER_BIT is the number + * of resources in each chunk. + */ + +/** Number of buffer stack chunks available (32). */ +#define HV_MPIPE_ALLOC_BUFFER_STACKS_BITS \ + MPIPE_MMIO_INIT_DAT_GX36_1__BUFFER_STACK_MASK_WIDTH + +/** Granularity of buffer stack allocation (1). */ +#define HV_MPIPE_ALLOC_BUFFER_STACKS_RES_PER_BIT \ + (HV_MPIPE_NUM_BUFFER_STACKS / HV_MPIPE_ALLOC_BUFFER_STACKS_BITS) + +/** Number of NotifRing chunks available (32). */ +#define HV_MPIPE_ALLOC_NOTIF_RINGS_BITS \ + MPIPE_MMIO_INIT_DAT_GX36_0__NOTIF_RING_MASK_WIDTH + +/** Granularity of NotifRing allocation (8). */ +#define HV_MPIPE_ALLOC_NOTIF_RINGS_RES_PER_BIT \ + (HV_MPIPE_NUM_NOTIF_RINGS / HV_MPIPE_ALLOC_NOTIF_RINGS_BITS) + +/** Number of NotifGroup chunks available (32). */ +#define HV_MPIPE_ALLOC_NOTIF_GROUPS_BITS \ + HV_MPIPE_NUM_NOTIF_GROUPS + +/** Granularity of NotifGroup allocation (1). */ +#define HV_MPIPE_ALLOC_NOTIF_GROUPS_RES_PER_BIT \ + (HV_MPIPE_NUM_NOTIF_GROUPS / HV_MPIPE_ALLOC_NOTIF_GROUPS_BITS) + +/** Number of lo bucket chunks available (16). */ +#define HV_MPIPE_ALLOC_LO_BUCKETS_BITS \ + MPIPE_MMIO_INIT_DAT_GX36_0__BUCKET_RELEASE_MASK_LO_WIDTH + +/** Granularity of lo bucket allocation (256). */ +#define HV_MPIPE_ALLOC_LO_BUCKETS_RES_PER_BIT \ + (HV_MPIPE_NUM_LO_BUCKETS / HV_MPIPE_ALLOC_LO_BUCKETS_BITS) + +/** Number of hi bucket chunks available (16). */ +#define HV_MPIPE_ALLOC_HI_BUCKETS_BITS \ + MPIPE_MMIO_INIT_DAT_GX36_0__BUCKET_RELEASE_MASK_HI_WIDTH + +/** Granularity of hi bucket allocation (4). */ +#define HV_MPIPE_ALLOC_HI_BUCKETS_RES_PER_BIT \ + (HV_MPIPE_NUM_HI_BUCKETS / HV_MPIPE_ALLOC_HI_BUCKETS_BITS) + +/** Number of eDMA ring chunks available (24). */ +#define HV_MPIPE_ALLOC_EDMA_RINGS_BITS \ + MPIPE_MMIO_INIT_DAT_GX36_1__EDMA_POST_MASK_WIDTH + +/** Granularity of eDMA ring allocation (1). */ +#define HV_MPIPE_ALLOC_EDMA_RINGS_RES_PER_BIT \ + (HV_MPIPE_NUM_EDMA_RINGS / HV_MPIPE_ALLOC_EDMA_RINGS_BITS) + + + + +/** Bit vector encoding which NotifRings are in a NotifGroup. */ +typedef struct +{ + /** The actual bits. */ + uint64_t ring_mask[4]; + +} gxio_mpipe_notif_group_bits_t; + + +/** Another name for MPIPE_LBL_INIT_DAT_BSTS_TBL_t. */ +typedef MPIPE_LBL_INIT_DAT_BSTS_TBL_t gxio_mpipe_bucket_info_t; + + + +/** Eight buffer stack ids. */ +typedef struct +{ + /** The stacks. */ + uint8_t stacks[8]; + +} gxio_mpipe_rules_stacks_t; + + +/** A destination mac address. */ +typedef struct +{ + /** The octets. */ + uint8_t octets[6]; + +} gxio_mpipe_rules_dmac_t; + + +/** A vlan. */ +typedef uint16_t gxio_mpipe_rules_vlan_t; + + + +/** Maximum number of characters in a link name. */ +#define GXIO_MPIPE_LINK_NAME_LEN 32 + + +/** Structure holding a link name. Only needed, and only typedef'ed, + * because the IORPC stub generator only handles types which are single + * words coming before the parameter name. */ +typedef struct +{ + /** The name itself. */ + char name[GXIO_MPIPE_LINK_NAME_LEN]; +} +_gxio_mpipe_link_name_t; + +/** Maximum number of characters in a symbol name. */ +#define GXIO_MPIPE_SYMBOL_NAME_LEN 128 + + +/** Structure holding a symbol name. Only needed, and only typedef'ed, + * because the IORPC stub generator only handles types which are single + * words coming before the parameter name. */ +typedef struct +{ + /** The name itself. */ + char name[GXIO_MPIPE_SYMBOL_NAME_LEN]; +} +_gxio_mpipe_symbol_name_t; + + +/** Structure holding a MAC address. */ +typedef struct +{ + /** The address. */ + uint8_t mac[6]; +} +_gxio_mpipe_link_mac_t; + + + +/** Request shared data permission -- that is, the ability to send and + * receive packets -- on the specified link. Other processes may also + * request shared data permission on the same link. + * + * No more than one of ::GXIO_MPIPE_LINK_DATA, ::GXIO_MPIPE_LINK_NO_DATA, + * or ::GXIO_MPIPE_LINK_EXCL_DATA may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_DATA is assumed. + */ +#define GXIO_MPIPE_LINK_DATA 0x00000001UL + +/** Do not request data permission on the specified link. + * + * No more than one of ::GXIO_MPIPE_LINK_DATA, ::GXIO_MPIPE_LINK_NO_DATA, + * or ::GXIO_MPIPE_LINK_EXCL_DATA may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_DATA is assumed. + */ +#define GXIO_MPIPE_LINK_NO_DATA 0x00000002UL + +/** Request exclusive data permission -- that is, the ability to send and + * receive packets -- on the specified link. No other processes may + * request data permission on this link, and if any process already has + * data permission on it, this open will fail. + * + * No more than one of ::GXIO_MPIPE_LINK_DATA, ::GXIO_MPIPE_LINK_NO_DATA, + * or ::GXIO_MPIPE_LINK_EXCL_DATA may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_DATA is assumed. + */ +#define GXIO_MPIPE_LINK_EXCL_DATA 0x00000004UL + +/** Request shared stats permission -- that is, the ability to read and write + * registers which contain link statistics, and to get link attributes -- + * on the specified link. Other processes may also request shared stats + * permission on the same link. + * + * No more than one of ::GXIO_MPIPE_LINK_STATS, ::GXIO_MPIPE_LINK_NO_STATS, + * or ::GXIO_MPIPE_LINK_EXCL_STATS may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_STATS is assumed. + */ +#define GXIO_MPIPE_LINK_STATS 0x00000008UL + +/** Do not request stats permission on the specified link. + * + * No more than one of ::GXIO_MPIPE_LINK_STATS, ::GXIO_MPIPE_LINK_NO_STATS, + * or ::GXIO_MPIPE_LINK_EXCL_STATS may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_STATS is assumed. + */ +#define GXIO_MPIPE_LINK_NO_STATS 0x00000010UL + +/** Request exclusive stats permission -- that is, the ability to read and + * write registers which contain link statistics, and to get link + * attributes -- on the specified link. No other processes may request + * stats permission on this link, and if any process already + * has stats permission on it, this open will fail. + * + * Requesting exclusive stats permission is normally a very bad idea, since + * it prevents programs like mpipe-stat from providing information on this + * link. Applications should only do this if they use MAC statistics + * registers, and cannot tolerate any of the clear-on-read registers being + * reset by other statistics programs. + * + * No more than one of ::GXIO_MPIPE_LINK_STATS, ::GXIO_MPIPE_LINK_NO_STATS, + * or ::GXIO_MPIPE_LINK_EXCL_STATS may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_STATS is assumed. + */ +#define GXIO_MPIPE_LINK_EXCL_STATS 0x00000020UL + +/** Request shared control permission -- that is, the ability to modify link + * attributes, and read and write MAC and MDIO registers -- on the + * specified link. Other processes may also request shared control + * permission on the same link. + * + * No more than one of ::GXIO_MPIPE_LINK_CTL, ::GXIO_MPIPE_LINK_NO_CTL, + * or ::GXIO_MPIPE_LINK_EXCL_CTL may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_CTL is assumed. + */ +#define GXIO_MPIPE_LINK_CTL 0x00000040UL + +/** Do not request control permission on the specified link. + * + * No more than one of ::GXIO_MPIPE_LINK_CTL, ::GXIO_MPIPE_LINK_NO_CTL, + * or ::GXIO_MPIPE_LINK_EXCL_CTL may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_CTL is assumed. + */ +#define GXIO_MPIPE_LINK_NO_CTL 0x00000080UL + +/** Request exclusive control permission -- that is, the ability to modify + * link attributes, and read and write MAC and MDIO registers -- on the + * specified link. No other processes may request control permission on + * this link, and if any process already has control permission on it, + * this open will fail. + * + * Requesting exclusive control permission is not always a good idea, since + * it prevents programs like mpipe-link from configuring the link. + * + * No more than one of ::GXIO_MPIPE_LINK_CTL, ::GXIO_MPIPE_LINK_NO_CTL, + * or ::GXIO_MPIPE_LINK_EXCL_CTL may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_CTL is assumed. + */ +#define GXIO_MPIPE_LINK_EXCL_CTL 0x00000100UL + +/** Set the desired state of the link to up, allowing any speeds which are + * supported by the link hardware, as part of this open operation; do not + * change the desired state of the link when it is closed or the process + * exits. No more than one of ::GXIO_MPIPE_LINK_AUTO_UP, + * ::GXIO_MPIPE_LINK_AUTO_UPDOWN, ::GXIO_MPIPE_LINK_AUTO_DOWN, or + * ::GXIO_MPIPE_LINK_AUTO_NONE may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed. + */ +#define GXIO_MPIPE_LINK_AUTO_UP 0x00000200UL + +/** Set the desired state of the link to up, allowing any speeds which are + * supported by the link hardware, as part of this open operation; when the + * link is closed or this process exits, if no other process has the link + * open, set the desired state of the link to down. No more than one of + * ::GXIO_MPIPE_LINK_AUTO_UP, ::GXIO_MPIPE_LINK_AUTO_UPDOWN, + * ::GXIO_MPIPE_LINK_AUTO_DOWN, or ::GXIO_MPIPE_LINK_AUTO_NONE may be + * specifed in a gxio_mpipe_link_open() call. If none are specified, + * ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed. + */ +#define GXIO_MPIPE_LINK_AUTO_UPDOWN 0x00000400UL + +/** Do not change the desired state of the link as part of the open + * operation; when the link is closed or this process exits, if no other + * process has the link open, set the desired state of the link to down. + * No more than one of ::GXIO_MPIPE_LINK_AUTO_UP, + * ::GXIO_MPIPE_LINK_AUTO_UPDOWN, ::GXIO_MPIPE_LINK_AUTO_DOWN, or + * ::GXIO_MPIPE_LINK_AUTO_NONE may be specifed in a gxio_mpipe_link_open() + * call. If none are specified, ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed. + */ +#define GXIO_MPIPE_LINK_AUTO_DOWN 0x00000800UL + +/** Do not change the desired state of the link as part of the open + * operation; do not change the desired state of the link when it is + * closed or the process exits. No more than one of + * ::GXIO_MPIPE_LINK_AUTO_UP, ::GXIO_MPIPE_LINK_AUTO_UPDOWN, + * ::GXIO_MPIPE_LINK_AUTO_DOWN, or ::GXIO_MPIPE_LINK_AUTO_NONE may be + * specifed in a gxio_mpipe_link_open() call. If none are specified, + * ::GXIO_MPIPE_LINK_AUTO_UPDOWN is assumed. + */ +#define GXIO_MPIPE_LINK_AUTO_NONE 0x00001000UL + +/** Request that this open call not complete until the network link is up. + * The process will wait as long as necessary for this to happen; + * applications which wish to abandon waiting for the link after a + * specific time period should not specify this flag when opening a link, + * but should instead call gxio_mpipe_link_wait() afterward. The link + * must be opened with stats permission. Note that this flag by itself + * does not change the desired link state; if other open flags or previous + * link state changes have not requested a desired state of up, the open + * call will never complete. This flag is not available to kernel + * clients. + */ +#define GXIO_MPIPE_LINK_WAIT 0x00002000UL + + +/* + * Note: link attributes must fit in 24 bits, since we use the top 8 bits + * of the IORPC offset word for the channel number. + */ + +/** Determine whether jumbo frames may be received. If this attribute's + * value value is nonzero, the MAC will accept frames of up to 10240 bytes. + * If the value is zero, the MAC will only accept frames of up to 1544 + * bytes. The default value is zero. */ +#define GXIO_MPIPE_LINK_RECEIVE_JUMBO 0x010000 + +/** Determine whether to send pause frames on this link if the mPIPE packet + * FIFO is nearly full. If the value is zero, pause frames are not sent. + * If the value is nonzero, it is the delay value which will be sent in any + * pause frames which are output, in units of 512 bit times. + * + * Bear in mind that in almost all circumstances, the mPIPE packet FIFO + * will never fill up, since mPIPE will empty it as fast as or faster than + * the incoming data rate, by either delivering or dropping packets. The + * only situation in which this is not true is if the memory and cache + * subsystem is extremely heavily loaded, and mPIPE cannot perform DMA of + * packet data to memory in a timely fashion. In particular, pause frames + * will not be sent if packets cannot be delivered because + * NotifRings are full, buckets are full, or buffers are not available in + * a buffer stack. */ +#define GXIO_MPIPE_LINK_SEND_PAUSE 0x020000 + +/** Determine whether to suspend output on the receipt of pause frames. + * If the value is nonzero, mPIPE shim will suspend output on the link's + * channel when a pause frame is received. If the value is zero, pause + * frames will be ignored. The default value is zero. */ +#define GXIO_MPIPE_LINK_RECEIVE_PAUSE 0x030000 + +/** Interface MAC address. The value is a 6-byte MAC address, in the least + * significant 48 bits of the value; in other words, an address which would + * be printed as '12:34:56:78:90:AB' in IEEE 802 canonical format would + * be returned as 0x12345678ab. + * + * Depending upon the overall system design, a MAC address may or may not + * be available for each interface. Note that the interface's MAC address + * does not limit the packets received on its channel, although the + * classifier's rules could be configured to do that. Similarly, the MAC + * address is not used when transmitting packets, although applications + * could certainly decide to use the assigned address as a source MAC + * address when doing so. This attribute may only be retrieved with + * gxio_mpipe_link_get_attr(); it may not be modified. + */ +#define GXIO_MPIPE_LINK_MAC 0x040000 + +/** Determine whether to discard egress packets on link down. If this value + * is nonzero, packets sent on this link while the link is down will be + * discarded. If this value is zero, no packets will be sent on this link + * while it is down. The default value is one. */ +#define GXIO_MPIPE_LINK_DISCARD_IF_DOWN 0x050000 + +/** Possible link state. The value is a combination of link state flags, + * ORed together, that indicate link modes which are actually supported by + * the hardware. This attribute may only be retrieved with + * gxio_mpipe_link_get_attr(); it may not be modified. */ +#define GXIO_MPIPE_LINK_POSSIBLE_STATE 0x060000 + +/** Current link state. The value is a combination of link state flags, + * ORed together, that indicate the current state of the hardware. If the + * link is down, the value ANDed with ::GXIO_MPIPE_LINK_SPEED will be zero; + * if the link is up, the value ANDed with ::GXIO_MPIPE_LINK_SPEED will + * result in exactly one of the speed values, indicating the current speed. + * This attribute may only be retrieved with gxio_mpipe_link_get_attr(); it + * may not be modified. */ +#define GXIO_MPIPE_LINK_CURRENT_STATE 0x070000 + +/** Desired link state. The value is a conbination of flags, which specify + * the desired state for the link. With gxio_mpipe_link_set_attr(), this + * will, in the background, attempt to bring up the link using whichever of + * the requested flags are reasonable, or take down the link if the flags + * are zero. The actual link up or down operation may happen after this + * call completes. If the link state changes in the future, the system + * will continue to try to get back to the desired link state; for + * instance, if the link is brought up successfully, and then the network + * cable is disconnected, the link will go down. However, the desired + * state of the link is still up, so if the cable is reconnected, the link + * will be brought up again. + * + * With gxio_mpipe_link_set_attr(), this will indicate the desired state + * for the link, as set with a previous gxio_mpipe_link_set_attr() call, + * or implicitly by a gxio_mpipe_link_open() or link close operation. + * This may not reflect the current state of the link; to get that, use + * ::GXIO_MPIPE_LINK_CURRENT_STATE. + */ +#define GXIO_MPIPE_LINK_DESIRED_STATE 0x080000 + + + +/** Link can run, should run, or is running at 10 Mbps. */ +#define GXIO_MPIPE_LINK_10M 0x0000000000000001UL + +/** Link can run, should run, or is running at 100 Mbps. */ +#define GXIO_MPIPE_LINK_100M 0x0000000000000002UL + +/** Link can run, should run, or is running at 1 Gbps. */ +#define GXIO_MPIPE_LINK_1G 0x0000000000000004UL + +/** Link can run, should run, or is running at 10 Gbps. */ +#define GXIO_MPIPE_LINK_10G 0x0000000000000008UL + +/** Link can run, should run, or is running at 20 Gbps. */ +#define GXIO_MPIPE_LINK_20G 0x0000000000000010UL + +/** Link can run, should run, or is running at 25 Gbps. */ +#define GXIO_MPIPE_LINK_25G 0x0000000000000020UL + +/** Link can run, should run, or is running at 50 Gbps. */ +#define GXIO_MPIPE_LINK_50G 0x0000000000000040UL + +/** Link should run at the highest speed supported by the link and by + * the device connected to the link. Only usable as a value for + * the link's desired state; never returned as a value for the current + * or possible states. */ +#define GXIO_MPIPE_LINK_ANYSPEED 0x0000000000000800UL + +/** All legal link speeds. This value is provided for use in extracting + * the speed-related subset of the link state flags; it is not intended + * to be set directly as a value for one of the GXIO_MPIPE_LINK_xxx_STATE + * attributes. A link is up or is requested to be up if its current or + * desired state, respectively, ANDED with this value, is nonzero. */ +#define GXIO_MPIPE_LINK_SPEED_MASK 0x0000000000000FFFUL + +/** Link can run, should run, or is running in MAC loopback mode. This + * loops transmitted packets back to the receiver, inside the Tile + * Processor. */ +#define GXIO_MPIPE_LINK_LOOP_MAC 0x0000000000001000UL + +/** Link can run, should run, or is running in PHY loopback mode. This + * loops transmitted packets back to the receiver, inside the external + * PHY chip. */ +#define GXIO_MPIPE_LINK_LOOP_PHY 0x0000000000002000UL + +/** Link can run, should run, or is running in external loopback mode. + * This requires that an external loopback plug be installed on the + * Ethernet port. Note that only some links require that this be + * configured via the gxio_mpipe_link routines; other links can do + * external loopack with the plug and no special configuration. */ +#define GXIO_MPIPE_LINK_LOOP_EXT 0x0000000000004000UL + +/** All legal loopback types. */ +#define GXIO_MPIPE_LINK_LOOP_MASK 0x000000000000F000UL + +/** Link can run, should run, or is running in full-duplex mode. + * If neither ::GXIO_MPIPE_LINK_FDX nor ::GXIO_MPIPE_LINK_HDX are + * specified in a set of desired state flags, both are assumed. */ +#define GXIO_MPIPE_LINK_FDX 0x0000000000010000UL + +/** Link can run, should run, or is running in half-duplex mode. + * If neither ::GXIO_MPIPE_LINK_FDX nor ::GXIO_MPIPE_LINK_HDX are + * specified in a set of desired state flags, both are assumed. */ +#define GXIO_MPIPE_LINK_HDX 0x0000000000020000UL + + +/** An individual rule. */ +typedef struct +{ + /** The total size. */ + uint16_t size; + + /** The priority. */ + int16_t priority; + + /** The "headroom" in each buffer. */ + uint8_t headroom; + + /** The "tailroom" in each buffer. */ + uint8_t tailroom; + + /** The "capacity" of the largest buffer. */ + uint16_t capacity; + + /** The mask for converting a flow hash into a bucket. */ + uint16_t bucket_mask; + + /** The offset for converting a flow hash into a bucket. */ + uint16_t bucket_first; + + /** The buffer stack ids. */ + gxio_mpipe_rules_stacks_t stacks; + + /** The actual channels. */ + uint32_t channel_bits; + + /** The number of dmacs. */ + uint16_t num_dmacs; + + /** The number of vlans. */ + uint16_t num_vlans; + + /** The actual dmacs and vlans. */ + uint8_t dmacs_and_vlans[]; + +} gxio_mpipe_rules_rule_t; + + +/** A list of classifier rules. */ +typedef struct +{ + /** The offset to the end of the current rule. */ + uint16_t tail; + + /** The offset to the start of the current rule. */ + uint16_t head; + + /** The actual rules. */ + uint8_t rules[4096 - 4]; + +} gxio_mpipe_rules_list_t; + + + + +/** mPIPE statistics structure. These counters include all relevant + * events occurring on all links within the mPIPE shim. */ +typedef struct +{ + /** Number of ingress packets dropped for any reason. */ + uint64_t ingress_drops; + /** Number of ingress packets dropped because a buffer stack was empty. */ + uint64_t ingress_drops_no_buf; + /** Number of ingress packets dropped or truncated due to lack of space in + * the iPkt buffer. */ + uint64_t ingress_drops_ipkt; + /** Number of ingress packets dropped by the classifier or load balancer */ + uint64_t ingress_drops_cls_lb; + /** Total number of ingress packets. */ + uint64_t ingress_packets; + /** Total number of egress packets. */ + uint64_t egress_packets; + /** Total number of ingress bytes. */ + uint64_t ingress_bytes; + /** Total number of egress bytes. */ + uint64_t egress_bytes; +} +gxio_mpipe_stats_t; + + +#endif /* _SYS_HV_DRV_MPIPE_INTF_H */ diff --git a/arch/tile/include/hv/drv_mshim_intf.h b/arch/tile/include/hv/drv_mshim_intf.h new file mode 100644 index 000000000..c6ef3bdc5 --- /dev/null +++ b/arch/tile/include/hv/drv_mshim_intf.h @@ -0,0 +1,50 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drv_mshim_intf.h + * Interface definitions for the Linux EDAC memory controller driver. + */ + +#ifndef _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H +#define _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H + +/** Number of memory controllers in the public API. */ +#define TILE_MAX_MSHIMS 4 + +/** Memory info under each memory controller. */ +struct mshim_mem_info +{ + uint64_t mem_size; /**< Total memory size in bytes. */ + uint8_t mem_type; /**< Memory type, DDR2 or DDR3. */ + uint8_t mem_ecc; /**< Memory supports ECC. */ +}; + +/** + * DIMM error structure. + * For now, only correctable errors are counted and the mshim doesn't record + * the error PA. HV takes panic upon uncorrectable errors. + */ +struct mshim_mem_error +{ + uint32_t sbe_count; /**< Number of single-bit errors. */ +}; + +/** Read this offset to get the memory info per mshim. */ +#define MSHIM_MEM_INFO_OFF 0x100 + +/** Read this offset to check DIMM error. */ +#define MSHIM_MEM_ERROR_OFF 0x200 + +#endif /* _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H */ diff --git a/arch/tile/include/hv/drv_pcie_rc_intf.h b/arch/tile/include/hv/drv_pcie_rc_intf.h new file mode 100644 index 000000000..9bd2243be --- /dev/null +++ b/arch/tile/include/hv/drv_pcie_rc_intf.h @@ -0,0 +1,38 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drv_pcie_rc_intf.h + * Interface definitions for the PCIE Root Complex. + */ + +#ifndef _SYS_HV_DRV_PCIE_RC_INTF_H +#define _SYS_HV_DRV_PCIE_RC_INTF_H + +/** File offset for reading the interrupt base number used for PCIE legacy + interrupts and PLX Gen 1 requirement flag */ +#define PCIE_RC_CONFIG_MASK_OFF 0 + + +/** + * Structure used for obtaining PCIe config information, read from the PCIE + * subsystem /ctl file at initialization + */ +typedef struct pcie_rc_config +{ + int intr; /**< interrupt number used for downcall */ + int plx_gen1; /**< flag for PLX Gen 1 configuration */ +} pcie_rc_config_t; + +#endif /* _SYS_HV_DRV_PCIE_RC_INTF_H */ diff --git a/arch/tile/include/hv/drv_srom_intf.h b/arch/tile/include/hv/drv_srom_intf.h new file mode 100644 index 000000000..6395faa6d --- /dev/null +++ b/arch/tile/include/hv/drv_srom_intf.h @@ -0,0 +1,41 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drv_srom_intf.h + * Interface definitions for the SPI Flash ROM driver. + */ + +#ifndef _SYS_HV_INCLUDE_DRV_SROM_INTF_H +#define _SYS_HV_INCLUDE_DRV_SROM_INTF_H + +/** Read this offset to get the total device size. */ +#define SROM_TOTAL_SIZE_OFF 0xF0000000 + +/** Read this offset to get the device sector size. */ +#define SROM_SECTOR_SIZE_OFF 0xF0000004 + +/** Read this offset to get the device page size. */ +#define SROM_PAGE_SIZE_OFF 0xF0000008 + +/** Write this offset to flush any pending writes. */ +#define SROM_FLUSH_OFF 0xF1000000 + +/** Write this offset, plus the byte offset of the start of a sector, to + * erase a sector. Any write data is ignored, but there must be at least + * one byte of write data. Only applies when the driver is in MTD mode. + */ +#define SROM_ERASE_OFF 0xF2000000 + +#endif /* _SYS_HV_INCLUDE_DRV_SROM_INTF_H */ diff --git a/arch/tile/include/hv/drv_trio_intf.h b/arch/tile/include/hv/drv_trio_intf.h new file mode 100644 index 000000000..237e04dee --- /dev/null +++ b/arch/tile/include/hv/drv_trio_intf.h @@ -0,0 +1,199 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Interface definitions for the trio driver. + */ + +#ifndef _SYS_HV_DRV_TRIO_INTF_H +#define _SYS_HV_DRV_TRIO_INTF_H + +#include + +/** The vendor ID for all Tilera processors. */ +#define TILERA_VENDOR_ID 0x1a41 + +/** The device ID for the Gx36 processor. */ +#define TILERA_GX36_DEV_ID 0x0200 + +/** Device ID for our internal bridge when running as RC. */ +#define TILERA_GX36_RC_DEV_ID 0x2000 + +/** Maximum number of TRIO interfaces. */ +#define TILEGX_NUM_TRIO 2 + +/** Gx36 has max 3 PCIe MACs per TRIO interface. */ +#define TILEGX_TRIO_PCIES 3 + +/** Specify port properties for a PCIe MAC. */ +struct pcie_port_property +{ + /** If true, the link can be configured in PCIe root complex mode. */ + uint8_t allow_rc: 1; + + /** If true, the link can be configured in PCIe endpoint mode. */ + uint8_t allow_ep: 1; + + /** If true, the link can be configured in StreamIO mode. */ + uint8_t allow_sio: 1; + + /** If true, the link is allowed to support 1-lane operation. Software + * will not consider it an error if the link comes up as a x1 link. */ + uint8_t allow_x1: 1; + + /** If true, the link is allowed to support 2-lane operation. Software + * will not consider it an error if the link comes up as a x2 link. */ + uint8_t allow_x2: 1; + + /** If true, the link is allowed to support 4-lane operation. Software + * will not consider it an error if the link comes up as a x4 link. */ + uint8_t allow_x4: 1; + + /** If true, the link is allowed to support 8-lane operation. Software + * will not consider it an error if the link comes up as a x8 link. */ + uint8_t allow_x8: 1; + + /** If true, this link is connected to a device which may or may not + * be present. */ + uint8_t removable: 1; + +}; + +/** Configurations can be issued to configure a char stream interrupt. */ +typedef enum pcie_stream_intr_config_sel_e +{ + /** Interrupt configuration for memory map regions. */ + MEM_MAP_SEL, + + /** Interrupt configuration for push DMAs. */ + PUSH_DMA_SEL, + + /** Interrupt configuration for pull DMAs. */ + PULL_DMA_SEL, +} +pcie_stream_intr_config_sel_t; + + +/** The mmap file offset (PA) of the TRIO config region. */ +#define HV_TRIO_CONFIG_OFFSET \ + ((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_CFG << \ + TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) + +/** The maximum size of the TRIO config region. */ +#define HV_TRIO_CONFIG_SIZE \ + (1ULL << TRIO_CFG_REGION_ADDR__REGION_SHIFT) + +/** Size of the config region mapped into client. We can't use + * TRIO_MMIO_ADDRESS_SPACE__OFFSET_WIDTH because it + * will require the kernel to allocate 4GB VA space + * from the VMALLOC region which has a total range + * of 4GB. + */ +#define HV_TRIO_CONFIG_IOREMAP_SIZE \ + ((uint64_t) 1 << TRIO_CFG_REGION_ADDR__PROT_SHIFT) + +/** The mmap file offset (PA) of a scatter queue region. */ +#define HV_TRIO_SQ_OFFSET(queue) \ + (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_MAP_SQ << \ + TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \ + ((queue) << TRIO_MAP_SQ_REGION_ADDR__SQ_SEL_SHIFT)) + +/** The maximum size of a scatter queue region. */ +#define HV_TRIO_SQ_SIZE \ + (1ULL << TRIO_MAP_SQ_REGION_ADDR__SQ_SEL_SHIFT) + + +/** The "hardware MMIO region" of the first PIO region. */ +#define HV_TRIO_FIRST_PIO_REGION 8 + +/** The mmap file offset (PA) of a PIO region. */ +#define HV_TRIO_PIO_OFFSET(region) \ + (((unsigned long long)(region) + HV_TRIO_FIRST_PIO_REGION) \ + << TRIO_PIO_REGIONS_ADDR__REGION_SHIFT) + +/** The maximum size of a PIO region. */ +#define HV_TRIO_PIO_SIZE (1ULL << TRIO_PIO_REGIONS_ADDR__ADDR_WIDTH) + + +/** The mmap file offset (PA) of a push DMA region. */ +#define HV_TRIO_PUSH_DMA_OFFSET(ring) \ + (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_PUSH_DMA << \ + TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \ + ((ring) << TRIO_PUSH_DMA_REGION_ADDR__RING_SEL_SHIFT)) + +/** The mmap file offset (PA) of a pull DMA region. */ +#define HV_TRIO_PULL_DMA_OFFSET(ring) \ + (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_PULL_DMA << \ + TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \ + ((ring) << TRIO_PULL_DMA_REGION_ADDR__RING_SEL_SHIFT)) + +/** The maximum size of a DMA region. */ +#define HV_TRIO_DMA_REGION_SIZE \ + (1ULL << TRIO_PUSH_DMA_REGION_ADDR__RING_SEL_SHIFT) + + +/** The mmap file offset (PA) of a Mem-Map interrupt region. */ +#define HV_TRIO_MEM_MAP_INTR_OFFSET(map) \ + (((unsigned long long)TRIO_MMIO_ADDRESS_SPACE__REGION_VAL_MAP_MEM << \ + TRIO_MMIO_ADDRESS_SPACE__REGION_SHIFT) | \ + ((map) << TRIO_MAP_MEM_REGION_ADDR__MAP_SEL_SHIFT)) + +/** The maximum size of a Mem-Map interrupt region. */ +#define HV_TRIO_MEM_MAP_INTR_SIZE \ + (1ULL << TRIO_MAP_MEM_REGION_ADDR__MAP_SEL_SHIFT) + + +/** A flag bit indicating a fixed resource allocation. */ +#define HV_TRIO_ALLOC_FIXED 0x01 + +/** TRIO requires that all mappings have 4kB aligned start addresses. */ +#define HV_TRIO_PAGE_SHIFT 12 + +/** TRIO requires that all mappings have 4kB aligned start addresses. */ +#define HV_TRIO_PAGE_SIZE (1ull << HV_TRIO_PAGE_SHIFT) + + +/* Specify all PCIe port properties for a TRIO. */ +struct pcie_trio_ports_property +{ + struct pcie_port_property ports[TILEGX_TRIO_PCIES]; + + /** Set if this TRIO belongs to a Gx72 device. */ + uint8_t is_gx72; +}; + +/* Flags indicating traffic class. */ +#define HV_TRIO_FLAG_TC_SHIFT 4 +#define HV_TRIO_FLAG_TC_RMASK 0xf +#define HV_TRIO_FLAG_TC(N) \ + ((((N) & HV_TRIO_FLAG_TC_RMASK) + 1) << HV_TRIO_FLAG_TC_SHIFT) + +/* Flags indicating virtual functions. */ +#define HV_TRIO_FLAG_VFUNC_SHIFT 8 +#define HV_TRIO_FLAG_VFUNC_RMASK 0xff +#define HV_TRIO_FLAG_VFUNC(N) \ + ((((N) & HV_TRIO_FLAG_VFUNC_RMASK) + 1) << HV_TRIO_FLAG_VFUNC_SHIFT) + + +/* Flag indicating an ordered PIO region. */ +#define HV_TRIO_PIO_FLAG_ORDERED (1 << 16) + +/* Flags indicating special types of PIO regions. */ +#define HV_TRIO_PIO_FLAG_SPACE_SHIFT 17 +#define HV_TRIO_PIO_FLAG_SPACE_MASK (0x3 << HV_TRIO_PIO_FLAG_SPACE_SHIFT) +#define HV_TRIO_PIO_FLAG_CONFIG_SPACE (0x1 << HV_TRIO_PIO_FLAG_SPACE_SHIFT) +#define HV_TRIO_PIO_FLAG_IO_SPACE (0x2 << HV_TRIO_PIO_FLAG_SPACE_SHIFT) + + +#endif /* _SYS_HV_DRV_TRIO_INTF_H */ diff --git a/arch/tile/include/hv/drv_uart_intf.h b/arch/tile/include/hv/drv_uart_intf.h new file mode 100644 index 000000000..f5379e240 --- /dev/null +++ b/arch/tile/include/hv/drv_uart_intf.h @@ -0,0 +1,33 @@ +/* + * Copyright 2013 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Interface definitions for the UART driver. + */ + +#ifndef _SYS_HV_DRV_UART_INTF_H +#define _SYS_HV_DRV_UART_INTF_H + +#include + +/** Number of UART ports supported. */ +#define TILEGX_UART_NR 2 + +/** The mmap file offset (PA) of the UART MMIO region. */ +#define HV_UART_MMIO_OFFSET 0 + +/** The maximum size of the UARTs MMIO region (64K Bytes). */ +#define HV_UART_MMIO_SIZE (1UL << 16) + +#endif /* _SYS_HV_DRV_UART_INTF_H */ diff --git a/arch/tile/include/hv/drv_usb_host_intf.h b/arch/tile/include/hv/drv_usb_host_intf.h new file mode 100644 index 000000000..24ce774a3 --- /dev/null +++ b/arch/tile/include/hv/drv_usb_host_intf.h @@ -0,0 +1,39 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Interface definitions for the USB host driver. + */ + +#ifndef _SYS_HV_DRV_USB_HOST_INTF_H +#define _SYS_HV_DRV_USB_HOST_INTF_H + +#include + + +/** Offset for the EHCI register MMIO region. */ +#define HV_USB_HOST_MMIO_OFFSET_EHCI ((uint64_t) USB_HOST_HCCAPBASE_REG) + +/** Offset for the OHCI register MMIO region. */ +#define HV_USB_HOST_MMIO_OFFSET_OHCI ((uint64_t) USB_HOST_OHCD_HC_REVISION_REG) + +/** Size of the register MMIO region. This turns out to be the same for + * both EHCI and OHCI. */ +#define HV_USB_HOST_MMIO_SIZE ((uint64_t) 0x1000) + +/** The number of service domains supported by the USB host shim. */ +#define HV_USB_HOST_NUM_SVC_DOM 1 + + +#endif /* _SYS_HV_DRV_USB_HOST_INTF_H */ diff --git a/arch/tile/include/hv/drv_xgbe_impl.h b/arch/tile/include/hv/drv_xgbe_impl.h new file mode 100644 index 000000000..3a73b2b44 --- /dev/null +++ b/arch/tile/include/hv/drv_xgbe_impl.h @@ -0,0 +1,300 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drivers/xgbe/impl.h + * Implementation details for the NetIO library. + */ + +#ifndef __DRV_XGBE_IMPL_H__ +#define __DRV_XGBE_IMPL_H__ + +#include +#include +#include + + +/** How many groups we have (log2). */ +#define LOG2_NUM_GROUPS (12) +/** How many groups we have. */ +#define NUM_GROUPS (1 << LOG2_NUM_GROUPS) + +/** Number of output requests we'll buffer per tile. */ +#define EPP_REQS_PER_TILE (32) + +/** Words used in an eDMA command without checksum acceleration. */ +#define EDMA_WDS_NO_CSUM 8 +/** Words used in an eDMA command with checksum acceleration. */ +#define EDMA_WDS_CSUM 10 +/** Total available words in the eDMA command FIFO. */ +#define EDMA_WDS_TOTAL 128 + + +/* + * FIXME: These definitions are internal and should have underscores! + * NOTE: The actual numeric values here are intentional and allow us to + * optimize the concept "if small ... else if large ... else ...", by + * checking for the low bit being set, and then for non-zero. + * These are used as array indices, so they must have the values (0, 1, 2) + * in some order. + */ +#define SIZE_SMALL (1) /**< Small packet queue. */ +#define SIZE_LARGE (2) /**< Large packet queue. */ +#define SIZE_JUMBO (0) /**< Jumbo packet queue. */ + +/** The number of "SIZE_xxx" values. */ +#define NETIO_NUM_SIZES 3 + + +/* + * Default numbers of packets for IPP drivers. These values are chosen + * such that CIPP1 will not overflow its L2 cache. + */ + +/** The default number of small packets. */ +#define NETIO_DEFAULT_SMALL_PACKETS 2750 +/** The default number of large packets. */ +#define NETIO_DEFAULT_LARGE_PACKETS 2500 +/** The default number of jumbo packets. */ +#define NETIO_DEFAULT_JUMBO_PACKETS 250 + + +/** Log2 of the size of a memory arena. */ +#define NETIO_ARENA_SHIFT 24 /* 16 MB */ +/** Size of a memory arena. */ +#define NETIO_ARENA_SIZE (1 << NETIO_ARENA_SHIFT) + + +/** A queue of packets. + * + * This structure partially defines a queue of packets waiting to be + * processed. The queue as a whole is written to by an interrupt handler and + * read by non-interrupt code; this data structure is what's touched by the + * interrupt handler. The other part of the queue state, the read offset, is + * kept in user space, not in hypervisor space, so it is in a separate data + * structure. + * + * The read offset (__packet_receive_read in the user part of the queue + * structure) points to the next packet to be read. When the read offset is + * equal to the write offset, the queue is empty; therefore the queue must + * contain one more slot than the required maximum queue size. + * + * Here's an example of all 3 state variables and what they mean. All + * pointers move left to right. + * + * @code + * I I V V V V I I I I + * 0 1 2 3 4 5 6 7 8 9 10 + * ^ ^ ^ ^ + * | | | + * | | __last_packet_plus_one + * | __buffer_write + * __packet_receive_read + * @endcode + * + * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one + * = 10). The read pointer is at 2, and the write pointer is at 6; thus, + * there are valid, unread packets in slots 2, 3, 4, and 5. The remaining + * slots are invalid (do not contain a packet). + */ +typedef struct { + /** Byte offset of the next notify packet to be written: zero for the first + * packet on the queue, sizeof (netio_pkt_t) for the second packet on the + * queue, etc. */ + volatile uint32_t __packet_write; + + /** Offset of the packet after the last valid packet (i.e., when any + * pointer is incremented to this value, it wraps back to zero). */ + uint32_t __last_packet_plus_one; +} +__netio_packet_queue_t; + + +/** A queue of buffers. + * + * This structure partially defines a queue of empty buffers which have been + * obtained via requests to the IPP. (The elements of the queue are packet + * handles, which are transformed into a full netio_pkt_t when the buffer is + * retrieved.) The queue as a whole is written to by an interrupt handler and + * read by non-interrupt code; this data structure is what's touched by the + * interrupt handler. The other parts of the queue state, the read offset and + * requested write offset, are kept in user space, not in hypervisor space, so + * they are in a separate data structure. + * + * The read offset (__buffer_read in the user part of the queue structure) + * points to the next buffer to be read. When the read offset is equal to the + * write offset, the queue is empty; therefore the queue must contain one more + * slot than the required maximum queue size. + * + * The requested write offset (__buffer_requested_write in the user part of + * the queue structure) points to the slot which will hold the next buffer we + * request from the IPP, once we get around to sending such a request. When + * the requested write offset is equal to the write offset, no requests for + * new buffers are outstanding; when the requested write offset is one greater + * than the read offset, no more requests may be sent. + * + * Note that, unlike the packet_queue, the buffer_queue places incoming + * buffers at decreasing addresses. This makes the check for "is it time to + * wrap the buffer pointer" cheaper in the assembly code which receives new + * buffers, and means that the value which defines the queue size, + * __last_buffer, is different than in the packet queue. Also, the offset + * used in the packet_queue is already scaled by the size of a packet; here we + * use unscaled slot indices for the offsets. (These differences are + * historical, and in the future it's possible that the packet_queue will look + * more like this queue.) + * + * @code + * Here's an example of all 4 state variables and what they mean. Remember: + * all pointers move right to left. + * + * V V V I I R R V V V + * 0 1 2 3 4 5 6 7 8 9 + * ^ ^ ^ ^ + * | | | | + * | | | __last_buffer + * | | __buffer_write + * | __buffer_requested_write + * __buffer_read + * @endcode + * + * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9). + * The read pointer is at 2, and the write pointer is at 6; thus, there are + * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7. The requested write + * pointer is at 4; thus, requests have been made to the IPP for buffers which + * will be placed in slots 6 and 5 when they arrive. Finally, the remaining + * slots are invalid (do not contain a buffer). + */ +typedef struct +{ + /** Ordinal number of the next buffer to be written: 0 for the first slot in + * the queue, 1 for the second slot in the queue, etc. */ + volatile uint32_t __buffer_write; + + /** Ordinal number of the last buffer (i.e., when any pointer is decremented + * below zero, it is reloaded with this value). */ + uint32_t __last_buffer; +} +__netio_buffer_queue_t; + + +/** + * An object for providing Ethernet packets to a process. + */ +typedef struct __netio_queue_impl_t +{ + /** The queue of packets waiting to be received. */ + __netio_packet_queue_t __packet_receive_queue; + /** The intr bit mask that IDs this device. */ + unsigned int __intr_id; + /** Offset to queues of empty buffers, one per size. */ + uint32_t __buffer_queue[NETIO_NUM_SIZES]; + /** The address of the first EPP tile, or -1 if no EPP. */ + /* ISSUE: Actually this is always "0" or "~0". */ + uint32_t __epp_location; + /** The queue ID that this queue represents. */ + unsigned int __queue_id; + /** Number of acknowledgements received. */ + volatile uint32_t __acks_received; + /** Last completion number received for packet_sendv. */ + volatile uint32_t __last_completion_rcv; + /** Number of packets allowed to be outstanding. */ + uint32_t __max_outstanding; + /** First VA available for packets. */ + void* __va_0; + /** First VA in second range available for packets. */ + void* __va_1; + /** Padding to align the "__packets" field to the size of a netio_pkt_t. */ + uint32_t __padding[3]; + /** The packets themselves. */ + netio_pkt_t __packets[0]; +} +netio_queue_impl_t; + + +/** + * An object for managing the user end of a NetIO queue. + */ +typedef struct __netio_queue_user_impl_t +{ + /** The next incoming packet to be read. */ + uint32_t __packet_receive_read; + /** The next empty buffers to be read, one index per size. */ + uint8_t __buffer_read[NETIO_NUM_SIZES]; + /** Where the empty buffer we next request from the IPP will go, one index + * per size. */ + uint8_t __buffer_requested_write[NETIO_NUM_SIZES]; + /** PCIe interface flag. */ + uint8_t __pcie; + /** Number of packets left to be received before we send a credit update. */ + uint32_t __receive_credit_remaining; + /** Value placed in __receive_credit_remaining when it reaches zero. */ + uint32_t __receive_credit_interval; + /** First fast I/O routine index. */ + uint32_t __fastio_index; + /** Number of acknowledgements expected. */ + uint32_t __acks_outstanding; + /** Last completion number requested. */ + uint32_t __last_completion_req; + /** File descriptor for driver. */ + int __fd; +} +netio_queue_user_impl_t; + + +#define NETIO_GROUP_CHUNK_SIZE 64 /**< Max # groups in one IPP request */ +#define NETIO_BUCKET_CHUNK_SIZE 64 /**< Max # buckets in one IPP request */ + + +/** Internal structure used to convey packet send information to the + * hypervisor. FIXME: Actually, it's not used for that anymore, but + * netio_packet_send() still uses it internally. + */ +typedef struct +{ + uint16_t flags; /**< Packet flags (__NETIO_SEND_FLG_xxx) */ + uint16_t transfer_size; /**< Size of packet */ + uint32_t va; /**< VA of start of packet */ + __netio_pkt_handle_t handle; /**< Packet handle */ + uint32_t csum0; /**< First checksum word */ + uint32_t csum1; /**< Second checksum word */ +} +__netio_send_cmd_t; + + +/** Flags used in two contexts: + * - As the "flags" member in the __netio_send_cmd_t, above; used only + * for netio_pkt_send_{prepare,commit}. + * - As part of the flags passed to the various send packet fast I/O calls. + */ + +/** Need acknowledgement on this packet. Note that some code in the + * normal send_pkt fast I/O handler assumes that this is equal to 1. */ +#define __NETIO_SEND_FLG_ACK 0x1 + +/** Do checksum on this packet. (Only used with the __netio_send_cmd_t; + * normal packet sends use a special fast I/O index to denote checksumming, + * and multi-segment sends test the checksum descriptor.) */ +#define __NETIO_SEND_FLG_CSUM 0x2 + +/** Get a completion on this packet. Only used with multi-segment sends. */ +#define __NETIO_SEND_FLG_COMPLETION 0x4 + +/** Position of the number-of-extra-segments value in the flags word. + Only used with multi-segment sends. */ +#define __NETIO_SEND_FLG_XSEG_SHIFT 3 + +/** Width of the number-of-extra-segments value in the flags word. */ +#define __NETIO_SEND_FLG_XSEG_WIDTH 2 + +#endif /* __DRV_XGBE_IMPL_H__ */ diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h new file mode 100644 index 000000000..2a20b266d --- /dev/null +++ b/arch/tile/include/hv/drv_xgbe_intf.h @@ -0,0 +1,615 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drv_xgbe_intf.h + * Interface to the hypervisor XGBE driver. + */ + +#ifndef __DRV_XGBE_INTF_H__ +#define __DRV_XGBE_INTF_H__ + +/** + * An object for forwarding VAs and PAs to the hypervisor. + * @ingroup types + * + * This allows the supervisor to specify a number of areas of memory to + * store packet buffers. + */ +typedef struct +{ + /** The physical address of the memory. */ + HV_PhysAddr pa; + /** Page table entry for the memory. This is only used to derive the + * memory's caching mode; the PA bits are ignored. */ + HV_PTE pte; + /** The virtual address of the memory. */ + HV_VirtAddr va; + /** Size (in bytes) of the memory area. */ + int size; + +} +netio_ipp_address_t; + +/** The various pread/pwrite offsets into the hypervisor-level driver. + * @ingroup types + */ +typedef enum +{ + /** Inform the Linux driver of the address of the NetIO arena memory. + * This offset is actually only used to convey information from netio + * to the Linux driver; it never makes it from there to the hypervisor. + * Write-only; takes a uint32_t specifying the VA address. */ + NETIO_FIXED_ADDR = 0x5000000000000000ULL, + + /** Inform the Linux driver of the size of the NetIO arena memory. + * This offset is actually only used to convey information from netio + * to the Linux driver; it never makes it from there to the hypervisor. + * Write-only; takes a uint32_t specifying the VA size. */ + NETIO_FIXED_SIZE = 0x5100000000000000ULL, + + /** Register current tile with IPP. Write then read: write, takes a + * netio_input_config_t, read returns a pointer to a netio_queue_impl_t. */ + NETIO_IPP_INPUT_REGISTER_OFF = 0x6000000000000000ULL, + + /** Unregister current tile from IPP. Write-only, takes a dummy argument. */ + NETIO_IPP_INPUT_UNREGISTER_OFF = 0x6100000000000000ULL, + + /** Start packets flowing. Write-only, takes a dummy argument. */ + NETIO_IPP_INPUT_INIT_OFF = 0x6200000000000000ULL, + + /** Stop packets flowing. Write-only, takes a dummy argument. */ + NETIO_IPP_INPUT_UNINIT_OFF = 0x6300000000000000ULL, + + /** Configure group (typically we group on VLAN). Write-only: takes an + * array of netio_group_t's, low 24 bits of the offset is the base group + * number times the size of a netio_group_t. */ + NETIO_IPP_INPUT_GROUP_CFG_OFF = 0x6400000000000000ULL, + + /** Configure bucket. Write-only: takes an array of netio_bucket_t's, low + * 24 bits of the offset is the base bucket number times the size of a + * netio_bucket_t. */ + NETIO_IPP_INPUT_BUCKET_CFG_OFF = 0x6500000000000000ULL, + + /** Get/set a parameter. Read or write: read or write data is the parameter + * value, low 32 bits of the offset is a __netio_getset_offset_t. */ + NETIO_IPP_PARAM_OFF = 0x6600000000000000ULL, + + /** Get fast I/O index. Read-only; returns a 4-byte base index value. */ + NETIO_IPP_GET_FASTIO_OFF = 0x6700000000000000ULL, + + /** Configure hijack IP address. Packets with this IPv4 dest address + * go to bucket NETIO_NUM_BUCKETS - 1. Write-only: takes an IP address + * in some standard form. FIXME: Define the form! */ + NETIO_IPP_INPUT_HIJACK_CFG_OFF = 0x6800000000000000ULL, + + /** + * Offsets beyond this point are reserved for the supervisor (although that + * enforcement must be done by the supervisor driver itself). + */ + NETIO_IPP_USER_MAX_OFF = 0x6FFFFFFFFFFFFFFFULL, + + /** Register I/O memory. Write-only, takes a netio_ipp_address_t. */ + NETIO_IPP_IOMEM_REGISTER_OFF = 0x7000000000000000ULL, + + /** Unregister I/O memory. Write-only, takes a netio_ipp_address_t. */ + NETIO_IPP_IOMEM_UNREGISTER_OFF = 0x7100000000000000ULL, + + /* Offsets greater than 0x7FFFFFFF can't be used directly from Linux + * userspace code due to limitations in the pread/pwrite syscalls. */ + + /** Drain LIPP buffers. */ + NETIO_IPP_DRAIN_OFF = 0xFA00000000000000ULL, + + /** Supply a netio_ipp_address_t to be used as shared memory for the + * LEPP command queue. */ + NETIO_EPP_SHM_OFF = 0xFB00000000000000ULL, + + /* 0xFC... is currently unused. */ + + /** Stop IPP/EPP tiles. Write-only, takes a dummy argument. */ + NETIO_IPP_STOP_SHIM_OFF = 0xFD00000000000000ULL, + + /** Start IPP/EPP tiles. Write-only, takes a dummy argument. */ + NETIO_IPP_START_SHIM_OFF = 0xFE00000000000000ULL, + + /** Supply packet arena. Write-only, takes an array of + * netio_ipp_address_t values. */ + NETIO_IPP_ADDRESS_OFF = 0xFF00000000000000ULL, +} netio_hv_offset_t; + +/** Extract the base offset from an offset */ +#define NETIO_BASE_OFFSET(off) ((off) & 0xFF00000000000000ULL) +/** Extract the local offset from an offset */ +#define NETIO_LOCAL_OFFSET(off) ((off) & 0x00FFFFFFFFFFFFFFULL) + + +/** + * Get/set offset. + */ +typedef union +{ + struct + { + uint64_t addr:48; /**< Class-specific address */ + unsigned int class:8; /**< Class (e.g., NETIO_PARAM) */ + unsigned int opcode:8; /**< High 8 bits of NETIO_IPP_PARAM_OFF */ + } + bits; /**< Bitfields */ + uint64_t word; /**< Aggregated value to use as the offset */ +} +__netio_getset_offset_t; + +/** + * Fast I/O index offsets (must be contiguous). + */ +typedef enum +{ + NETIO_FASTIO_ALLOCATE = 0, /**< Get empty packet buffer */ + NETIO_FASTIO_FREE_BUFFER = 1, /**< Give buffer back to IPP */ + NETIO_FASTIO_RETURN_CREDITS = 2, /**< Give credits to IPP */ + NETIO_FASTIO_SEND_PKT_NOCK = 3, /**< Send a packet, no checksum */ + NETIO_FASTIO_SEND_PKT_CK = 4, /**< Send a packet, with checksum */ + NETIO_FASTIO_SEND_PKT_VEC = 5, /**< Send a vector of packets */ + NETIO_FASTIO_SENDV_PKT = 6, /**< Sendv one packet */ + NETIO_FASTIO_NUM_INDEX = 7, /**< Total number of fast I/O indices */ +} netio_fastio_index_t; + +/** 3-word return type for Fast I/O call. */ +typedef struct +{ + int err; /**< Error code. */ + uint32_t val0; /**< Value. Meaning depends upon the specific call. */ + uint32_t val1; /**< Value. Meaning depends upon the specific call. */ +} netio_fastio_rv3_t; + +/** 0-argument fast I/O call */ +int __netio_fastio0(uint32_t fastio_index); +/** 1-argument fast I/O call */ +int __netio_fastio1(uint32_t fastio_index, uint32_t arg0); +/** 3-argument fast I/O call, 2-word return value */ +netio_fastio_rv3_t __netio_fastio3_rv3(uint32_t fastio_index, uint32_t arg0, + uint32_t arg1, uint32_t arg2); +/** 4-argument fast I/O call */ +int __netio_fastio4(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, + uint32_t arg2, uint32_t arg3); +/** 6-argument fast I/O call */ +int __netio_fastio6(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, + uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5); +/** 9-argument fast I/O call */ +int __netio_fastio9(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, + uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5, + uint32_t arg6, uint32_t arg7, uint32_t arg8); + +/** Allocate an empty packet. + * @param fastio_index Fast I/O index. + * @param size Size of the packet to allocate. + */ +#define __netio_fastio_allocate(fastio_index, size) \ + __netio_fastio1((fastio_index) + NETIO_FASTIO_ALLOCATE, size) + +/** Free a buffer. + * @param fastio_index Fast I/O index. + * @param handle Handle for the packet to free. + */ +#define __netio_fastio_free_buffer(fastio_index, handle) \ + __netio_fastio1((fastio_index) + NETIO_FASTIO_FREE_BUFFER, handle) + +/** Increment our receive credits. + * @param fastio_index Fast I/O index. + * @param credits Number of credits to add. + */ +#define __netio_fastio_return_credits(fastio_index, credits) \ + __netio_fastio1((fastio_index) + NETIO_FASTIO_RETURN_CREDITS, credits) + +/** Send packet, no checksum. + * @param fastio_index Fast I/O index. + * @param ackflag Nonzero if we want an ack. + * @param size Size of the packet. + * @param va Virtual address of start of packet. + * @param handle Packet handle. + */ +#define __netio_fastio_send_pkt_nock(fastio_index, ackflag, size, va, handle) \ + __netio_fastio4((fastio_index) + NETIO_FASTIO_SEND_PKT_NOCK, ackflag, \ + size, va, handle) + +/** Send packet, calculate checksum. + * @param fastio_index Fast I/O index. + * @param ackflag Nonzero if we want an ack. + * @param size Size of the packet. + * @param va Virtual address of start of packet. + * @param handle Packet handle. + * @param csum0 Shim checksum header. + * @param csum1 Checksum seed. + */ +#define __netio_fastio_send_pkt_ck(fastio_index, ackflag, size, va, handle, \ + csum0, csum1) \ + __netio_fastio6((fastio_index) + NETIO_FASTIO_SEND_PKT_CK, ackflag, \ + size, va, handle, csum0, csum1) + + +/** Format for the "csum0" argument to the __netio_fastio_send routines + * and LEPP. Note that this is currently exactly identical to the + * ShimProtocolOffloadHeader. + */ +typedef union +{ + struct + { + unsigned int start_byte:7; /**< The first byte to be checksummed */ + unsigned int count:14; /**< Number of bytes to be checksummed. */ + unsigned int destination_byte:7; /**< The byte to write the checksum to. */ + unsigned int reserved:4; /**< Reserved. */ + } bits; /**< Decomposed method of access. */ + unsigned int word; /**< To send out the IDN. */ +} __netio_checksum_header_t; + + +/** Sendv packet with 1 or 2 segments. + * @param fastio_index Fast I/O index. + * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus + * 1 in next 2 bits; expected checksum in high 16 bits. + * @param confno Confirmation number to request, if notify flag set. + * @param csum0 Checksum descriptor; if zero, no checksum. + * @param va_F Virtual address of first segment. + * @param va_L Virtual address of last segment, if 2 segments. + * @param len_F_L Length of first segment in low 16 bits; length of last + * segment, if 2 segments, in high 16 bits. + */ +#define __netio_fastio_sendv_pkt_1_2(fastio_index, flags, confno, csum0, \ + va_F, va_L, len_F_L) \ + __netio_fastio6((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \ + csum0, va_F, va_L, len_F_L) + +/** Send packet on PCIe interface. + * @param fastio_index Fast I/O index. + * @param flags Ack/csum/notify flags in low 3 bits. + * @param confno Confirmation number to request, if notify flag set. + * @param csum0 Checksum descriptor; Hard wired 0, not needed for PCIe. + * @param va_F Virtual address of the packet buffer. + * @param va_L Virtual address of last segment, if 2 segments. Hard wired 0. + * @param len_F_L Length of the packet buffer in low 16 bits. + */ +#define __netio_fastio_send_pcie_pkt(fastio_index, flags, confno, csum0, \ + va_F, va_L, len_F_L) \ + __netio_fastio6((fastio_index) + PCIE_FASTIO_SENDV_PKT, flags, confno, \ + csum0, va_F, va_L, len_F_L) + +/** Sendv packet with 3 or 4 segments. + * @param fastio_index Fast I/O index. + * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus + * 1 in next 2 bits; expected checksum in high 16 bits. + * @param confno Confirmation number to request, if notify flag set. + * @param csum0 Checksum descriptor; if zero, no checksum. + * @param va_F Virtual address of first segment. + * @param va_L Virtual address of last segment (third segment if 3 segments, + * fourth segment if 4 segments). + * @param len_F_L Length of first segment in low 16 bits; length of last + * segment in high 16 bits. + * @param va_M0 Virtual address of "middle 0" segment; this segment is sent + * second when there are three segments, and third if there are four. + * @param va_M1 Virtual address of "middle 1" segment; this segment is sent + * second when there are four segments. + * @param len_M0_M1 Length of middle 0 segment in low 16 bits; length of middle + * 1 segment, if 4 segments, in high 16 bits. + */ +#define __netio_fastio_sendv_pkt_3_4(fastio_index, flags, confno, csum0, va_F, \ + va_L, len_F_L, va_M0, va_M1, len_M0_M1) \ + __netio_fastio9((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \ + csum0, va_F, va_L, len_F_L, va_M0, va_M1, len_M0_M1) + +/** Send vector of packets. + * @param fastio_index Fast I/O index. + * @param seqno Number of packets transmitted so far on this interface; + * used to decide which packets should be acknowledged. + * @param nentries Number of entries in vector. + * @param va Virtual address of start of vector entry array. + * @return 3-word netio_fastio_rv3_t structure. The structure's err member + * is an error code, or zero if no error. The val0 member is the + * updated value of seqno; it has been incremented by 1 for each + * packet sent. That increment may be less than nentries if an + * error occurred, or if some of the entries in the vector contain + * handles equal to NETIO_PKT_HANDLE_NONE. The val1 member is the + * updated value of nentries; it has been decremented by 1 for each + * vector entry processed. Again, that decrement may be less than + * nentries (leaving the returned value positive) if an error + * occurred. + */ +#define __netio_fastio_send_pkt_vec(fastio_index, seqno, nentries, va) \ + __netio_fastio3_rv3((fastio_index) + NETIO_FASTIO_SEND_PKT_VEC, seqno, \ + nentries, va) + + +/** An egress DMA command for LEPP. */ +typedef struct +{ + /** Is this a TSO transfer? + * + * NOTE: This field is always 0, to distinguish it from + * lepp_tso_cmd_t. It must come first! + */ + uint8_t tso : 1; + + /** Unused padding bits. */ + uint8_t _unused : 3; + + /** Should this packet be sent directly from caches instead of DRAM, + * using hash-for-home to locate the packet data? + */ + uint8_t hash_for_home : 1; + + /** Should we compute a checksum? */ + uint8_t compute_checksum : 1; + + /** Is this the final buffer for this packet? + * + * A single packet can be split over several input buffers (a "gather" + * operation). This flag indicates that this is the last buffer + * in a packet. + */ + uint8_t end_of_packet : 1; + + /** Should LEPP advance 'comp_busy' when this DMA is fully finished? */ + uint8_t send_completion : 1; + + /** High bits of Client Physical Address of the start of the buffer + * to be egressed. + * + * NOTE: Only 6 bits are actually needed here, as CPAs are + * currently 38 bits. So two bits could be scavenged from this. + */ + uint8_t cpa_hi; + + /** The number of bytes to be egressed. */ + uint16_t length; + + /** Low 32 bits of Client Physical Address of the start of the buffer + * to be egressed. + */ + uint32_t cpa_lo; + + /** Checksum information (only used if 'compute_checksum'). */ + __netio_checksum_header_t checksum_data; + +} lepp_cmd_t; + + +/** A chunk of physical memory for a TSO egress. */ +typedef struct +{ + /** The low bits of the CPA. */ + uint32_t cpa_lo; + /** The high bits of the CPA. */ + uint16_t cpa_hi : 15; + /** Should this packet be sent directly from caches instead of DRAM, + * using hash-for-home to locate the packet data? + */ + uint16_t hash_for_home : 1; + /** The length in bytes. */ + uint16_t length; +} lepp_frag_t; + + +/** An LEPP command that handles TSO. */ +typedef struct +{ + /** Is this a TSO transfer? + * + * NOTE: This field is always 1, to distinguish it from + * lepp_cmd_t. It must come first! + */ + uint8_t tso : 1; + + /** Unused padding bits. */ + uint8_t _unused : 7; + + /** Size of the header[] array in bytes. It must be in the range + * [40, 127], which are the smallest header for a TCP packet over + * Ethernet and the maximum possible prepend size supported by + * hardware, respectively. Note that the array storage must be + * padded out to a multiple of four bytes so that the following + * LEPP command is aligned properly. + */ + uint8_t header_size; + + /** Byte offset of the IP header in header[]. */ + uint8_t ip_offset; + + /** Byte offset of the TCP header in header[]. */ + uint8_t tcp_offset; + + /** The number of bytes to use for the payload of each packet, + * except of course the last one, which may not have enough bytes. + * This means that each Ethernet packet except the last will have a + * size of header_size + payload_size. + */ + uint16_t payload_size; + + /** The length of the 'frags' array that follows this struct. */ + uint16_t num_frags; + + /** The actual frags. */ + lepp_frag_t frags[0 /* Variable-sized; num_frags entries. */]; + + /* + * The packet header template logically follows frags[], + * but you can't declare that in C. + * + * uint32_t header[header_size_in_words_rounded_up]; + */ + +} lepp_tso_cmd_t; + + +/** An LEPP completion ring entry. */ +typedef void* lepp_comp_t; + + +/** Maximum number of frags for one TSO command. This is adapted from + * linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for + * our page size of exactly 65536. We add one for a "body" fragment. + */ +#define LEPP_MAX_FRAGS (65536 / HV_DEFAULT_PAGE_SIZE_SMALL + 2 + 1) + +/** Total number of bytes needed for an lepp_tso_cmd_t. */ +#define LEPP_TSO_CMD_SIZE(num_frags, header_size) \ + (sizeof(lepp_tso_cmd_t) + \ + (num_frags) * sizeof(lepp_frag_t) + \ + (((header_size) + 3) & -4)) + +/** The size of the lepp "cmd" queue. */ +#define LEPP_CMD_QUEUE_BYTES \ + (((CHIP_L2_CACHE_SIZE() - 2 * CHIP_L2_LINE_SIZE()) / \ + (sizeof(lepp_cmd_t) + sizeof(lepp_comp_t))) * sizeof(lepp_cmd_t)) + +/** The largest possible command that can go in lepp_queue_t::cmds[]. */ +#define LEPP_MAX_CMD_SIZE LEPP_TSO_CMD_SIZE(LEPP_MAX_FRAGS, 128) + +/** The largest possible value of lepp_queue_t::cmd_{head, tail} (inclusive). + */ +#define LEPP_CMD_LIMIT \ + (LEPP_CMD_QUEUE_BYTES - LEPP_MAX_CMD_SIZE) + +/** The maximum number of completions in an LEPP queue. */ +#define LEPP_COMP_QUEUE_SIZE \ + ((LEPP_CMD_LIMIT + sizeof(lepp_cmd_t) - 1) / sizeof(lepp_cmd_t)) + +/** Increment an index modulo the queue size. */ +#define LEPP_QINC(var) \ + (var = __insn_mnz(var - (LEPP_COMP_QUEUE_SIZE - 1), var + 1)) + +/** A queue used to convey egress commands from the client to LEPP. */ +typedef struct +{ + /** Index of first completion not yet processed by user code. + * If this is equal to comp_busy, there are no such completions. + * + * NOTE: This is only read/written by the user. + */ + unsigned int comp_head; + + /** Index of first completion record not yet completed. + * If this is equal to comp_tail, there are no such completions. + * This index gets advanced (modulo LEPP_QUEUE_SIZE) whenever + * a command with the 'completion' bit set is finished. + * + * NOTE: This is only written by LEPP, only read by the user. + */ + volatile unsigned int comp_busy; + + /** Index of the first empty slot in the completion ring. + * Entries from this up to but not including comp_head (in ring order) + * can be filled in with completion data. + * + * NOTE: This is only read/written by the user. + */ + unsigned int comp_tail; + + /** Byte index of first command enqueued for LEPP but not yet processed. + * + * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT. + * + * NOTE: LEPP advances this counter as soon as it no longer needs + * the cmds[] storage for this entry, but the transfer is not actually + * complete (i.e. the buffer pointed to by the command is no longer + * needed) until comp_busy advances. + * + * If this is equal to cmd_tail, the ring is empty. + * + * NOTE: This is only written by LEPP, only read by the user. + */ + volatile unsigned int cmd_head; + + /** Byte index of first empty slot in the command ring. This field can + * be incremented up to but not equal to cmd_head (because that would + * mean the ring is empty). + * + * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT. + * + * NOTE: This is read/written by the user, only read by LEPP. + */ + volatile unsigned int cmd_tail; + + /** A ring of variable-sized egress DMA commands. + * + * NOTE: Only written by the user, only read by LEPP. + */ + char cmds[LEPP_CMD_QUEUE_BYTES] + __attribute__((aligned(CHIP_L2_LINE_SIZE()))); + + /** A ring of user completion data. + * NOTE: Only read/written by the user. + */ + lepp_comp_t comps[LEPP_COMP_QUEUE_SIZE] + __attribute__((aligned(CHIP_L2_LINE_SIZE()))); +} lepp_queue_t; + + +/** An internal helper function for determining the number of entries + * available in a ring buffer, given that there is one sentinel. + */ +static inline unsigned int +_lepp_num_free_slots(unsigned int head, unsigned int tail) +{ + /* + * One entry is reserved for use as a sentinel, to distinguish + * "empty" from "full". So we compute + * (head - tail - 1) % LEPP_QUEUE_SIZE, but without using a slow % operation. + */ + return (head - tail - 1) + ((head <= tail) ? LEPP_COMP_QUEUE_SIZE : 0); +} + + +/** Returns how many new comp entries can be enqueued. */ +static inline unsigned int +lepp_num_free_comp_slots(const lepp_queue_t* q) +{ + return _lepp_num_free_slots(q->comp_head, q->comp_tail); +} + +static inline int +lepp_qsub(int v1, int v2) +{ + int delta = v1 - v2; + return delta + ((delta >> 31) & LEPP_COMP_QUEUE_SIZE); +} + + +/** FIXME: Check this from linux, via a new "pwrite()" call. */ +#define LIPP_VERSION 1 + + +/** We use exactly two bytes of alignment padding. */ +#define LIPP_PACKET_PADDING 2 + +/** The minimum size of a "small" buffer (including the padding). */ +#define LIPP_SMALL_PACKET_SIZE 128 + +/* + * NOTE: The following two values should total to less than around + * 13582, to keep the total size used for "lipp_state_t" below 64K. + */ + +/** The maximum number of "small" buffers. + * This is enough for 53 network cpus with 128 credits. Note that + * if these are exhausted, we will fall back to using large buffers. + */ +#define LIPP_SMALL_BUFFERS 6785 + +/** The maximum number of "large" buffers. + * This is enough for 53 network cpus with 128 credits. + */ +#define LIPP_LARGE_BUFFERS 6785 + +#endif /* __DRV_XGBE_INTF_H__ */ diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h new file mode 100644 index 000000000..e0e6af4e7 --- /dev/null +++ b/arch/tile/include/hv/hypervisor.h @@ -0,0 +1,2598 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file hypervisor.h + * The hypervisor's public API. + */ + +#ifndef _HV_HV_H +#define _HV_HV_H + +#include + +/* Linux builds want unsigned long constants, but assembler wants numbers */ +#ifdef __ASSEMBLER__ +/** One, for assembler */ +#define __HV_SIZE_ONE 1 +#elif !defined(__tile__) && CHIP_VA_WIDTH() > 32 +/** One, for 64-bit on host */ +#define __HV_SIZE_ONE 1ULL +#else +/** One, for Linux */ +#define __HV_SIZE_ONE 1UL +#endif + +/** The log2 of the span of a level-1 page table, in bytes. + */ +#define HV_LOG2_L1_SPAN 32 + +/** The span of a level-1 page table, in bytes. + */ +#define HV_L1_SPAN (__HV_SIZE_ONE << HV_LOG2_L1_SPAN) + +/** The log2 of the initial size of small pages, in bytes. + * See HV_DEFAULT_PAGE_SIZE_SMALL. + */ +#define HV_LOG2_DEFAULT_PAGE_SIZE_SMALL 16 + +/** The initial size of small pages, in bytes. This value should be verified + * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL). + * It may also be modified when installing a new context. + */ +#define HV_DEFAULT_PAGE_SIZE_SMALL \ + (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_SMALL) + +/** The log2 of the initial size of large pages, in bytes. + * See HV_DEFAULT_PAGE_SIZE_LARGE. + */ +#define HV_LOG2_DEFAULT_PAGE_SIZE_LARGE 24 + +/** The initial size of large pages, in bytes. This value should be verified + * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE). + * It may also be modified when installing a new context. + */ +#define HV_DEFAULT_PAGE_SIZE_LARGE \ + (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_LARGE) + +#if CHIP_VA_WIDTH() > 32 + +/** The log2 of the initial size of jumbo pages, in bytes. + * See HV_DEFAULT_PAGE_SIZE_JUMBO. + */ +#define HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO 32 + +/** The initial size of jumbo pages, in bytes. This value should + * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO). + * It may also be modified when installing a new context. + */ +#define HV_DEFAULT_PAGE_SIZE_JUMBO \ + (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO) + +#endif + +/** The log2 of the granularity at which page tables must be aligned; + * in other words, the CPA for a page table must have this many zero + * bits at the bottom of the address. + */ +#define HV_LOG2_PAGE_TABLE_ALIGN 11 + +/** The granularity at which page tables must be aligned. + */ +#define HV_PAGE_TABLE_ALIGN (__HV_SIZE_ONE << HV_LOG2_PAGE_TABLE_ALIGN) + +/** Normal start of hypervisor glue in client physical memory. */ +#define HV_GLUE_START_CPA 0x10000 + +/** This much space is reserved at HV_GLUE_START_CPA + * for the hypervisor glue. The client program must start at + * some address higher than this, and in particular the address of + * its text section should be equal to zero modulo HV_PAGE_SIZE_LARGE + * so that relative offsets to the HV glue are correct. + */ +#define HV_GLUE_RESERVED_SIZE 0x10000 + +/** Each entry in the hv dispatch array takes this many bytes. */ +#define HV_DISPATCH_ENTRY_SIZE 32 + +/** Version of the hypervisor interface defined by this file */ +#define _HV_VERSION 13 + +/** Last version of the hypervisor interface with old hv_init() ABI. + * + * The change from version 12 to version 13 corresponds to launching + * the client by default at PL2 instead of PL1 (corresponding to the + * hv itself running at PL3 instead of PL2). To make this explicit, + * the hv_init() API was also extended so the client can report its + * desired PL, resulting in a more helpful failure diagnostic. If you + * call hv_init() with _HV_VERSION_OLD_HV_INIT and omit the client_pl + * argument, the hypervisor will assume client_pl = 1. + * + * Note that this is a deprecated solution and we do not expect to + * support clients of the Tilera hypervisor running at PL1 indefinitely. + */ +#define _HV_VERSION_OLD_HV_INIT 12 + +/* Index into hypervisor interface dispatch code blocks. + * + * Hypervisor calls are invoked from user space by calling code + * at an address HV_BASE_ADDRESS + (index) * HV_DISPATCH_ENTRY_SIZE, + * where index is one of these enum values. + * + * Normally a supervisor is expected to produce a set of symbols + * starting at HV_BASE_ADDRESS that obey this convention, but a user + * program could call directly through function pointers if desired. + * + * These numbers are part of the binary API and will not be changed + * without updating HV_VERSION, which should be a rare event. + */ + +/** reserved. */ +#define _HV_DISPATCH_RESERVED 0 + +/** hv_init */ +#define HV_DISPATCH_INIT 1 + +/** hv_install_context */ +#define HV_DISPATCH_INSTALL_CONTEXT 2 + +/** hv_sysconf */ +#define HV_DISPATCH_SYSCONF 3 + +/** hv_get_rtc */ +#define HV_DISPATCH_GET_RTC 4 + +/** hv_set_rtc */ +#define HV_DISPATCH_SET_RTC 5 + +/** hv_flush_asid */ +#define HV_DISPATCH_FLUSH_ASID 6 + +/** hv_flush_page */ +#define HV_DISPATCH_FLUSH_PAGE 7 + +/** hv_flush_pages */ +#define HV_DISPATCH_FLUSH_PAGES 8 + +/** hv_restart */ +#define HV_DISPATCH_RESTART 9 + +/** hv_halt */ +#define HV_DISPATCH_HALT 10 + +/** hv_power_off */ +#define HV_DISPATCH_POWER_OFF 11 + +/** hv_inquire_physical */ +#define HV_DISPATCH_INQUIRE_PHYSICAL 12 + +/** hv_inquire_memory_controller */ +#define HV_DISPATCH_INQUIRE_MEMORY_CONTROLLER 13 + +/** hv_inquire_virtual */ +#define HV_DISPATCH_INQUIRE_VIRTUAL 14 + +/** hv_inquire_asid */ +#define HV_DISPATCH_INQUIRE_ASID 15 + +/** hv_nanosleep */ +#define HV_DISPATCH_NANOSLEEP 16 + +/** hv_console_read_if_ready */ +#define HV_DISPATCH_CONSOLE_READ_IF_READY 17 + +/** hv_console_write */ +#define HV_DISPATCH_CONSOLE_WRITE 18 + +/** hv_downcall_dispatch */ +#define HV_DISPATCH_DOWNCALL_DISPATCH 19 + +/** hv_inquire_topology */ +#define HV_DISPATCH_INQUIRE_TOPOLOGY 20 + +/** hv_fs_findfile */ +#define HV_DISPATCH_FS_FINDFILE 21 + +/** hv_fs_fstat */ +#define HV_DISPATCH_FS_FSTAT 22 + +/** hv_fs_pread */ +#define HV_DISPATCH_FS_PREAD 23 + +/** hv_physaddr_read64 */ +#define HV_DISPATCH_PHYSADDR_READ64 24 + +/** hv_physaddr_write64 */ +#define HV_DISPATCH_PHYSADDR_WRITE64 25 + +/** hv_get_command_line */ +#define HV_DISPATCH_GET_COMMAND_LINE 26 + +/** hv_set_caching */ +#define HV_DISPATCH_SET_CACHING 27 + +/** hv_bzero_page */ +#define HV_DISPATCH_BZERO_PAGE 28 + +/** hv_register_message_state */ +#define HV_DISPATCH_REGISTER_MESSAGE_STATE 29 + +/** hv_send_message */ +#define HV_DISPATCH_SEND_MESSAGE 30 + +/** hv_receive_message */ +#define HV_DISPATCH_RECEIVE_MESSAGE 31 + +/** hv_inquire_context */ +#define HV_DISPATCH_INQUIRE_CONTEXT 32 + +/** hv_start_all_tiles */ +#define HV_DISPATCH_START_ALL_TILES 33 + +/** hv_dev_open */ +#define HV_DISPATCH_DEV_OPEN 34 + +/** hv_dev_close */ +#define HV_DISPATCH_DEV_CLOSE 35 + +/** hv_dev_pread */ +#define HV_DISPATCH_DEV_PREAD 36 + +/** hv_dev_pwrite */ +#define HV_DISPATCH_DEV_PWRITE 37 + +/** hv_dev_poll */ +#define HV_DISPATCH_DEV_POLL 38 + +/** hv_dev_poll_cancel */ +#define HV_DISPATCH_DEV_POLL_CANCEL 39 + +/** hv_dev_preada */ +#define HV_DISPATCH_DEV_PREADA 40 + +/** hv_dev_pwritea */ +#define HV_DISPATCH_DEV_PWRITEA 41 + +/** hv_flush_remote */ +#define HV_DISPATCH_FLUSH_REMOTE 42 + +/** hv_console_putc */ +#define HV_DISPATCH_CONSOLE_PUTC 43 + +/** hv_inquire_tiles */ +#define HV_DISPATCH_INQUIRE_TILES 44 + +/** hv_confstr */ +#define HV_DISPATCH_CONFSTR 45 + +/** hv_reexec */ +#define HV_DISPATCH_REEXEC 46 + +/** hv_set_command_line */ +#define HV_DISPATCH_SET_COMMAND_LINE 47 + +#if !CHIP_HAS_IPI() + +/** hv_clear_intr */ +#define HV_DISPATCH_CLEAR_INTR 48 + +/** hv_enable_intr */ +#define HV_DISPATCH_ENABLE_INTR 49 + +/** hv_disable_intr */ +#define HV_DISPATCH_DISABLE_INTR 50 + +/** hv_raise_intr */ +#define HV_DISPATCH_RAISE_INTR 51 + +/** hv_trigger_ipi */ +#define HV_DISPATCH_TRIGGER_IPI 52 + +#endif /* !CHIP_HAS_IPI() */ + +/** hv_store_mapping */ +#define HV_DISPATCH_STORE_MAPPING 53 + +/** hv_inquire_realpa */ +#define HV_DISPATCH_INQUIRE_REALPA 54 + +/** hv_flush_all */ +#define HV_DISPATCH_FLUSH_ALL 55 + +#if CHIP_HAS_IPI() +/** hv_get_ipi_pte */ +#define HV_DISPATCH_GET_IPI_PTE 56 +#endif + +/** hv_set_pte_super_shift */ +#define HV_DISPATCH_SET_PTE_SUPER_SHIFT 57 + +/** hv_console_set_ipi */ +#define HV_DISPATCH_CONSOLE_SET_IPI 63 + +/** One more than the largest dispatch value */ +#define _HV_DISPATCH_END 64 + + +#ifndef __ASSEMBLER__ + +#ifdef __KERNEL__ +#include +typedef u32 __hv32; /**< 32-bit value */ +typedef u64 __hv64; /**< 64-bit value */ +#else +#include +typedef uint32_t __hv32; /**< 32-bit value */ +typedef uint64_t __hv64; /**< 64-bit value */ +#endif + + +/** Hypervisor physical address. */ +typedef __hv64 HV_PhysAddr; + +#if CHIP_VA_WIDTH() > 32 +/** Hypervisor virtual address. */ +typedef __hv64 HV_VirtAddr; +#else +/** Hypervisor virtual address. */ +typedef __hv32 HV_VirtAddr; +#endif /* CHIP_VA_WIDTH() > 32 */ + +/** Hypervisor ASID. */ +typedef unsigned int HV_ASID; + +/** Hypervisor tile location for a memory access + * ("location overridden target"). + */ +typedef unsigned int HV_LOTAR; + +/** Hypervisor size of a page. */ +typedef unsigned long HV_PageSize; + +/** A page table entry. + */ +typedef struct +{ + __hv64 val; /**< Value of PTE */ +} HV_PTE; + +/** Hypervisor error code. */ +typedef int HV_Errno; + +#endif /* !__ASSEMBLER__ */ + +#define HV_OK 0 /**< No error */ +#define HV_EINVAL -801 /**< Invalid argument */ +#define HV_ENODEV -802 /**< No such device */ +#define HV_ENOENT -803 /**< No such file or directory */ +#define HV_EBADF -804 /**< Bad file number */ +#define HV_EFAULT -805 /**< Bad address */ +#define HV_ERECIP -806 /**< Bad recipients */ +#define HV_E2BIG -807 /**< Message too big */ +#define HV_ENOTSUP -808 /**< Service not supported */ +#define HV_EBUSY -809 /**< Device busy */ +#define HV_ENOSYS -810 /**< Invalid syscall */ +#define HV_EPERM -811 /**< No permission */ +#define HV_ENOTREADY -812 /**< Device not ready */ +#define HV_EIO -813 /**< I/O error */ +#define HV_ENOMEM -814 /**< Out of memory */ +#define HV_EAGAIN -815 /**< Try again */ + +#define HV_ERR_MAX -801 /**< Largest HV error code */ +#define HV_ERR_MIN -815 /**< Smallest HV error code */ + +#ifndef __ASSEMBLER__ + +/** Pass HV_VERSION to hv_init to request this version of the interface. */ +typedef enum { + HV_VERSION = _HV_VERSION, + HV_VERSION_OLD_HV_INIT = _HV_VERSION_OLD_HV_INIT, + +} HV_VersionNumber; + +/** Initializes the hypervisor. + * + * @param interface_version_number The version of the hypervisor interface + * that this program expects, typically HV_VERSION. + * @param chip_num Architecture number of the chip the client was built for. + * @param chip_rev_num Revision number of the chip the client was built for. + * @param client_pl Privilege level the client is built for + * (not required if interface_version_number == HV_VERSION_OLD_HV_INIT). + */ +void hv_init(HV_VersionNumber interface_version_number, + int chip_num, int chip_rev_num, int client_pl); + + +/** Queries we can make for hv_sysconf(). + * + * These numbers are part of the binary API and guaranteed not to change. + */ +typedef enum { + /** An invalid value; do not use. */ + _HV_SYSCONF_RESERVED = 0, + + /** The length of the glue section containing the hv_ procs, in bytes. */ + HV_SYSCONF_GLUE_SIZE = 1, + + /** The size of small pages, in bytes. */ + HV_SYSCONF_PAGE_SIZE_SMALL = 2, + + /** The size of large pages, in bytes. */ + HV_SYSCONF_PAGE_SIZE_LARGE = 3, + + /** Processor clock speed, in hertz. */ + HV_SYSCONF_CPU_SPEED = 4, + + /** Processor temperature, in degrees Kelvin. The value + * HV_SYSCONF_TEMP_KTOC may be subtracted from this to get degrees + * Celsius. If that Celsius value is HV_SYSCONF_OVERTEMP, this indicates + * that the temperature has hit an upper limit and is no longer being + * accurately tracked. + */ + HV_SYSCONF_CPU_TEMP = 5, + + /** Board temperature, in degrees Kelvin. The value + * HV_SYSCONF_TEMP_KTOC may be subtracted from this to get degrees + * Celsius. If that Celsius value is HV_SYSCONF_OVERTEMP, this indicates + * that the temperature has hit an upper limit and is no longer being + * accurately tracked. + */ + HV_SYSCONF_BOARD_TEMP = 6, + + /** Legal page size bitmask for hv_install_context(). + * For example, if 16KB and 64KB small pages are supported, + * it would return "HV_CTX_PG_SM_16K | HV_CTX_PG_SM_64K". + */ + HV_SYSCONF_VALID_PAGE_SIZES = 7, + + /** The size of jumbo pages, in bytes. + * If no jumbo pages are available, zero will be returned. + */ + HV_SYSCONF_PAGE_SIZE_JUMBO = 8, + +} HV_SysconfQuery; + +/** Offset to subtract from returned Kelvin temperature to get degrees + Celsius. */ +#define HV_SYSCONF_TEMP_KTOC 273 + +/** Pseudo-temperature value indicating that the temperature has + * pegged at its upper limit and is no longer accurate; note that this is + * the value after subtracting HV_SYSCONF_TEMP_KTOC. */ +#define HV_SYSCONF_OVERTEMP 999 + +/** Query a configuration value from the hypervisor. + * @param query Which value is requested (HV_SYSCONF_xxx). + * @return The requested value, or -1 the requested value is illegal or + * unavailable. + */ +long hv_sysconf(HV_SysconfQuery query); + + +/** Queries we can make for hv_confstr(). + * + * These numbers are part of the binary API and guaranteed not to change. + */ +typedef enum { + /** An invalid value; do not use. */ + _HV_CONFSTR_RESERVED = 0, + + /** Board part number. */ + HV_CONFSTR_BOARD_PART_NUM = 1, + + /** Board serial number. */ + HV_CONFSTR_BOARD_SERIAL_NUM = 2, + + /** Chip serial number. */ + HV_CONFSTR_CHIP_SERIAL_NUM = 3, + + /** Board revision level. */ + HV_CONFSTR_BOARD_REV = 4, + + /** Hypervisor software version. */ + HV_CONFSTR_HV_SW_VER = 5, + + /** The name for this chip model. */ + HV_CONFSTR_CHIP_MODEL = 6, + + /** Human-readable board description. */ + HV_CONFSTR_BOARD_DESC = 7, + + /** Human-readable description of the hypervisor configuration. */ + HV_CONFSTR_HV_CONFIG = 8, + + /** Human-readable version string for the boot image (for instance, + * who built it and when, what configuration file was used). */ + HV_CONFSTR_HV_CONFIG_VER = 9, + + /** Mezzanine part number. */ + HV_CONFSTR_MEZZ_PART_NUM = 10, + + /** Mezzanine serial number. */ + HV_CONFSTR_MEZZ_SERIAL_NUM = 11, + + /** Mezzanine revision level. */ + HV_CONFSTR_MEZZ_REV = 12, + + /** Human-readable mezzanine description. */ + HV_CONFSTR_MEZZ_DESC = 13, + + /** Control path for the onboard network switch. */ + HV_CONFSTR_SWITCH_CONTROL = 14, + + /** Chip revision level. */ + HV_CONFSTR_CHIP_REV = 15, + + /** CPU module part number. */ + HV_CONFSTR_CPUMOD_PART_NUM = 16, + + /** CPU module serial number. */ + HV_CONFSTR_CPUMOD_SERIAL_NUM = 17, + + /** CPU module revision level. */ + HV_CONFSTR_CPUMOD_REV = 18, + + /** Human-readable CPU module description. */ + HV_CONFSTR_CPUMOD_DESC = 19, + + /** Per-tile hypervisor statistics. When this identifier is specified, + * the hv_confstr call takes two extra arguments. The first is the + * HV_XY_TO_LOTAR of the target tile's coordinates. The second is + * a flag word. The only current flag is the lowest bit, which means + * "zero out the stats instead of retrieving them"; in this case the + * buffer and buffer length are ignored. */ + HV_CONFSTR_HV_STATS = 20 + +} HV_ConfstrQuery; + +/** Query a configuration string from the hypervisor. + * + * @param query Identifier for the specific string to be retrieved + * (HV_CONFSTR_xxx). Some strings may require or permit extra + * arguments to be appended which select specific objects to be + * described; see the string descriptions above. + * @param buf Buffer in which to place the string. + * @param len Length of the buffer. + * @return If query is valid, then the length of the corresponding string, + * including the trailing null; if this is greater than len, the string + * was truncated. If query is invalid, HV_EINVAL. If the specified + * buffer is not writable by the client, HV_EFAULT. + */ +int hv_confstr(HV_ConfstrQuery query, HV_VirtAddr buf, int len, ...); + +/** Tile coordinate */ +typedef struct +{ + /** X coordinate, relative to supervisor's top-left coordinate */ + int x; + + /** Y coordinate, relative to supervisor's top-left coordinate */ + int y; +} HV_Coord; + + +#if CHIP_HAS_IPI() + +/** Get the PTE for sending an IPI to a particular tile. + * + * @param tile Tile which will receive the IPI. + * @param pl Indicates which IPI registers: 0 = IPI_0, 1 = IPI_1. + * @param pte Filled with resulting PTE. + * @result Zero if no error, non-zero for invalid parameters. + */ +int hv_get_ipi_pte(HV_Coord tile, int pl, HV_PTE* pte); + +/** Configure the console interrupt. + * + * When the console client interrupt is enabled, the hypervisor will + * deliver the specified IPI to the client in the following situations: + * + * - The console has at least one character available for input. + * + * - The console can accept new characters for output, and the last call + * to hv_console_write() did not write all of the characters requested + * by the client. + * + * Note that in some system configurations, console interrupt will not + * be available; clients should be prepared for this routine to fail and + * to fall back to periodic console polling in that case. + * + * @param ipi Index of the IPI register which will receive the interrupt. + * @param event IPI event number for console interrupt. If less than 0, + * disable the console IPI interrupt. + * @param coord Tile to be targeted for console interrupt. + * @return 0 on success, otherwise, HV_EINVAL if illegal parameter, + * HV_ENOTSUP if console interrupt are not available. + */ +int hv_console_set_ipi(int ipi, int event, HV_Coord coord); + +#else /* !CHIP_HAS_IPI() */ + +/** A set of interrupts. */ +typedef __hv32 HV_IntrMask; + +/** The low interrupt numbers are reserved for use by the client in + * delivering IPIs. Any interrupt numbers higher than this value are + * reserved for use by HV device drivers. */ +#define HV_MAX_IPI_INTERRUPT 7 + +/** Enable a set of device interrupts. + * + * @param enab_mask Bitmap of interrupts to enable. + */ +void hv_enable_intr(HV_IntrMask enab_mask); + +/** Disable a set of device interrupts. + * + * @param disab_mask Bitmap of interrupts to disable. + */ +void hv_disable_intr(HV_IntrMask disab_mask); + +/** Clear a set of device interrupts. + * + * @param clear_mask Bitmap of interrupts to clear. + */ +void hv_clear_intr(HV_IntrMask clear_mask); + +/** Raise a set of device interrupts. + * + * @param raise_mask Bitmap of interrupts to raise. + */ +void hv_raise_intr(HV_IntrMask raise_mask); + +/** Trigger a one-shot interrupt on some tile + * + * @param tile Which tile to interrupt. + * @param interrupt Interrupt number to trigger; must be between 0 and + * HV_MAX_IPI_INTERRUPT. + * @return HV_OK on success, or a hypervisor error code. + */ +HV_Errno hv_trigger_ipi(HV_Coord tile, int interrupt); + +#endif /* !CHIP_HAS_IPI() */ + +/** Store memory mapping in debug memory so that external debugger can read it. + * A maximum of 16 entries can be stored. + * + * @param va VA of memory that is mapped. + * @param len Length of mapped memory. + * @param pa PA of memory that is mapped. + * @return 0 on success, -1 if the maximum number of mappings is exceeded. + */ +int hv_store_mapping(HV_VirtAddr va, unsigned int len, HV_PhysAddr pa); + +/** Given a client PA and a length, return its real (HV) PA. + * + * @param cpa Client physical address. + * @param len Length of mapped memory. + * @return physical address, or -1 if cpa or len is not valid. + */ +HV_PhysAddr hv_inquire_realpa(HV_PhysAddr cpa, unsigned int len); + +/** RTC return flag for no RTC chip present. + */ +#define HV_RTC_NO_CHIP 0x1 + +/** RTC return flag for low-voltage condition, indicating that battery had + * died and time read is unreliable. + */ +#define HV_RTC_LOW_VOLTAGE 0x2 + +/** Date/Time of day */ +typedef struct { +#if CHIP_WORD_SIZE() > 32 + __hv64 tm_sec; /**< Seconds, 0-59 */ + __hv64 tm_min; /**< Minutes, 0-59 */ + __hv64 tm_hour; /**< Hours, 0-23 */ + __hv64 tm_mday; /**< Day of month, 0-30 */ + __hv64 tm_mon; /**< Month, 0-11 */ + __hv64 tm_year; /**< Years since 1900, 0-199 */ + __hv64 flags; /**< Return flags, 0 if no error */ +#else + __hv32 tm_sec; /**< Seconds, 0-59 */ + __hv32 tm_min; /**< Minutes, 0-59 */ + __hv32 tm_hour; /**< Hours, 0-23 */ + __hv32 tm_mday; /**< Day of month, 0-30 */ + __hv32 tm_mon; /**< Month, 0-11 */ + __hv32 tm_year; /**< Years since 1900, 0-199 */ + __hv32 flags; /**< Return flags, 0 if no error */ +#endif +} HV_RTCTime; + +/** Read the current time-of-day clock. + * @return HV_RTCTime of current time (GMT). + */ +HV_RTCTime hv_get_rtc(void); + + +/** Set the current time-of-day clock. + * @param time time to reset time-of-day to (GMT). + */ +void hv_set_rtc(HV_RTCTime time); + +/** Installs a context, comprising a page table and other attributes. + * + * Once this service completes, page_table will be used to translate + * subsequent virtual address references to physical memory. + * + * Installing a context does not cause an implicit TLB flush. Before + * reusing an ASID value for a different address space, the client is + * expected to flush old references from the TLB with hv_flush_asid(). + * (Alternately, hv_flush_all() may be used to flush many ASIDs at once.) + * After invalidating a page table entry, changing its attributes, or + * changing its target CPA, the client is expected to flush old references + * from the TLB with hv_flush_page() or hv_flush_pages(). Making a + * previously invalid page valid does not require a flush. + * + * Specifying an invalid ASID, or an invalid CPA (client physical address) + * (either as page_table_pointer, or within the referenced table), + * or another page table data item documented as above as illegal may + * lead to client termination; since the validation of the table is + * done as needed, this may happen before the service returns, or at + * some later time, or never, depending upon the client's pattern of + * memory references. Page table entries which supply translations for + * invalid virtual addresses may result in client termination, or may + * be silently ignored. "Invalid" in this context means a value which + * was not provided to the client via the appropriate hv_inquire_* routine. + * + * To support changing the instruction VAs at the same time as + * installing the new page table, this call explicitly supports + * setting the "lr" register to a different address and then jumping + * directly to the hv_install_context() routine. In this case, the + * new page table does not need to contain any mapping for the + * hv_install_context address itself. + * + * At most one HV_CTX_PG_SM_* flag may be specified in "flags"; + * if multiple flags are specified, HV_EINVAL is returned. + * Specifying none of the flags results in using the default page size. + * All cores participating in a given client must request the same + * page size, or the results are undefined. + * + * @param page_table Root of the page table. + * @param access PTE providing info on how to read the page table. This + * value must be consistent between multiple tiles sharing a page table, + * and must also be consistent with any virtual mappings the client + * may be using to access the page table. + * @param asid HV_ASID the page table is to be used for. + * @param flags Context flags, denoting attributes or privileges of the + * current context (HV_CTX_xxx). + * @return Zero on success, or a hypervisor error code on failure. + */ +int hv_install_context(HV_PhysAddr page_table, HV_PTE access, HV_ASID asid, + __hv32 flags); + +#endif /* !__ASSEMBLER__ */ + +#define HV_CTX_DIRECTIO 0x1 /**< Direct I/O requests are accepted from + PL0. */ + +#define HV_CTX_PG_SM_4K 0x10 /**< Use 4K small pages, if available. */ +#define HV_CTX_PG_SM_16K 0x20 /**< Use 16K small pages, if available. */ +#define HV_CTX_PG_SM_64K 0x40 /**< Use 64K small pages, if available. */ +#define HV_CTX_PG_SM_MASK 0xf0 /**< Mask of all possible small pages. */ + +#ifndef __ASSEMBLER__ + + +/** Set the number of pages ganged together by HV_PTE_SUPER at a + * particular level of the page table. + * + * The current TILE-Gx hardware only supports powers of four + * (i.e. log2_count must be a multiple of two), and the requested + * "super" page size must be less than the span of the next level in + * the page table. The largest size that can be requested is 64GB. + * + * The shift value is initially "0" for all page table levels, + * indicating that the HV_PTE_SUPER bit is effectively ignored. + * + * If you change the count from one non-zero value to another, the + * hypervisor will flush the entire TLB and TSB to avoid confusion. + * + * @param level Page table level (0, 1, or 2) + * @param log2_count Base-2 log of the number of pages to gang together, + * i.e. how much to shift left the base page size for the super page size. + * @return Zero on success, or a hypervisor error code on failure. + */ +int hv_set_pte_super_shift(int level, int log2_count); + + +/** Value returned from hv_inquire_context(). */ +typedef struct +{ + /** Physical address of page table */ + HV_PhysAddr page_table; + + /** PTE which defines access method for top of page table */ + HV_PTE access; + + /** ASID associated with this page table */ + HV_ASID asid; + + /** Context flags */ + __hv32 flags; +} HV_Context; + +/** Retrieve information about the currently installed context. + * @return The data passed to the last successful hv_install_context call. + */ +HV_Context hv_inquire_context(void); + + +/** Flushes all translations associated with the named address space + * identifier from the TLB and any other hypervisor data structures. + * Translations installed with the "global" bit are not flushed. + * + * Specifying an invalid ASID may lead to client termination. "Invalid" + * in this context means a value which was not provided to the client + * via hv_inquire_asid(). + * + * @param asid HV_ASID whose entries are to be flushed. + * @return Zero on success, or a hypervisor error code on failure. +*/ +int hv_flush_asid(HV_ASID asid); + + +/** Flushes all translations associated with the named virtual address + * and page size from the TLB and other hypervisor data structures. Only + * pages visible to the current ASID are affected; note that this includes + * global pages in addition to pages specific to the current ASID. + * + * The supplied VA need not be aligned; it may be anywhere in the + * subject page. + * + * Specifying an invalid virtual address may lead to client termination, + * or may silently succeed. "Invalid" in this context means a value + * which was not provided to the client via hv_inquire_virtual. + * + * @param address Address of the page to flush. + * @param page_size Size of pages to assume. + * @return Zero on success, or a hypervisor error code on failure. + */ +int hv_flush_page(HV_VirtAddr address, HV_PageSize page_size); + + +/** Flushes all translations associated with the named virtual address range + * and page size from the TLB and other hypervisor data structures. Only + * pages visible to the current ASID are affected; note that this includes + * global pages in addition to pages specific to the current ASID. + * + * The supplied VA need not be aligned; it may be anywhere in the + * subject page. + * + * Specifying an invalid virtual address may lead to client termination, + * or may silently succeed. "Invalid" in this context means a value + * which was not provided to the client via hv_inquire_virtual. + * + * @param start Address to flush. + * @param page_size Size of pages to assume. + * @param size The number of bytes to flush. Any page in the range + * [start, start + size) will be flushed from the TLB. + * @return Zero on success, or a hypervisor error code on failure. + */ +int hv_flush_pages(HV_VirtAddr start, HV_PageSize page_size, + unsigned long size); + + +/** Flushes all non-global translations (if preserve_global is true), + * or absolutely all translations (if preserve_global is false). + * + * @param preserve_global Non-zero if we want to preserve "global" mappings. + * @return Zero on success, or a hypervisor error code on failure. +*/ +int hv_flush_all(int preserve_global); + + +/** Restart machine with optional restart command and optional args. + * @param cmd Const pointer to command to restart with, or NULL + * @param args Const pointer to argument string to restart with, or NULL + */ +void hv_restart(HV_VirtAddr cmd, HV_VirtAddr args); + + +/** Halt machine. */ +void hv_halt(void); + + +/** Power off machine. */ +void hv_power_off(void); + + +/** Re-enter virtual-is-physical memory translation mode and restart + * execution at a given address. + * @param entry Client physical address at which to begin execution. + * @return A hypervisor error code on failure; if the operation is + * successful the call does not return. + */ +int hv_reexec(HV_PhysAddr entry); + + +/** Chip topology */ +typedef struct +{ + /** Relative coordinates of the querying tile */ + HV_Coord coord; + + /** Width of the querying supervisor's tile rectangle. */ + int width; + + /** Height of the querying supervisor's tile rectangle. */ + int height; + +} HV_Topology; + +/** Returns information about the tile coordinate system. + * + * Each supervisor is given a rectangle of tiles it potentially controls. + * These tiles are labeled using a relative coordinate system with (0,0) as + * the upper left tile regardless of their physical location on the chip. + * + * This call returns both the size of that rectangle and the position + * within that rectangle of the querying tile. + * + * Not all tiles within that rectangle may be available to the supervisor; + * to get the precise set of available tiles, you must also call + * hv_inquire_tiles(HV_INQ_TILES_AVAIL, ...). + **/ +HV_Topology hv_inquire_topology(void); + +/** Sets of tiles we can retrieve with hv_inquire_tiles(). + * + * These numbers are part of the binary API and guaranteed not to change. + */ +typedef enum { + /** An invalid value; do not use. */ + _HV_INQ_TILES_RESERVED = 0, + + /** All available tiles within the supervisor's tile rectangle. */ + HV_INQ_TILES_AVAIL = 1, + + /** The set of tiles used for hash-for-home caching. */ + HV_INQ_TILES_HFH_CACHE = 2, + + /** The set of tiles that can be legally used as a LOTAR for a PTE. */ + HV_INQ_TILES_LOTAR = 3, + + /** The set of "shared" driver tiles that the hypervisor may + * periodically interrupt. */ + HV_INQ_TILES_SHARED = 4 +} HV_InqTileSet; + +/** Returns specific information about various sets of tiles within the + * supervisor's tile rectangle. + * + * @param set Which set of tiles to retrieve. + * @param cpumask Pointer to a returned bitmask (in row-major order, + * supervisor-relative) of tiles. The low bit of the first word + * corresponds to the tile at the upper left-hand corner of the + * supervisor's rectangle. In order for the supervisor to know the + * buffer length to supply, it should first call hv_inquire_topology. + * @param length Number of bytes available for the returned bitmask. + **/ +HV_Errno hv_inquire_tiles(HV_InqTileSet set, HV_VirtAddr cpumask, int length); + + +/** An identifier for a memory controller. Multiple memory controllers + * may be connected to one chip, and this uniquely identifies each one. + */ +typedef int HV_MemoryController; + +/** A range of physical memory. */ +typedef struct +{ + HV_PhysAddr start; /**< Starting address. */ + __hv64 size; /**< Size in bytes. */ + HV_MemoryController controller; /**< Which memory controller owns this. */ +} HV_PhysAddrRange; + +/** Returns information about a range of physical memory. + * + * hv_inquire_physical() returns one of the ranges of client + * physical addresses which are available to this client. + * + * The first range is retrieved by specifying an idx of 0, and + * successive ranges are returned with subsequent idx values. Ranges + * are ordered by increasing start address (i.e., as idx increases, + * so does start), do not overlap, and do not touch (i.e., the + * available memory is described with the fewest possible ranges). + * + * If an out-of-range idx value is specified, the returned size will be zero. + * A client can count the number of ranges by increasing idx until the + * returned size is zero. There will always be at least one valid range. + * + * Some clients might not be prepared to deal with more than one + * physical address range; they still ought to call this routine and + * issue a warning message if they're given more than one range, on the + * theory that whoever configured the hypervisor to provide that memory + * should know that it's being wasted. + */ +HV_PhysAddrRange hv_inquire_physical(int idx); + +/** Possible DIMM types. */ +typedef enum +{ + NO_DIMM = 0, /**< No DIMM */ + DDR2 = 1, /**< DDR2 */ + DDR3 = 2 /**< DDR3 */ +} HV_DIMM_Type; + +#ifdef __tilegx__ + +/** Log2 of minimum DIMM bytes supported by the memory controller. */ +#define HV_MSH_MIN_DIMM_SIZE_SHIFT 29 + +/** Max number of DIMMs contained by one memory controller. */ +#define HV_MSH_MAX_DIMMS 8 + +#else + +/** Log2 of minimum DIMM bytes supported by the memory controller. */ +#define HV_MSH_MIN_DIMM_SIZE_SHIFT 26 + +/** Max number of DIMMs contained by one memory controller. */ +#define HV_MSH_MAX_DIMMS 2 + +#endif + +/** Number of bits to right-shift to get the DIMM type. */ +#define HV_DIMM_TYPE_SHIFT 0 + +/** Bits to mask to get the DIMM type. */ +#define HV_DIMM_TYPE_MASK 0xf + +/** Number of bits to right-shift to get the DIMM size. */ +#define HV_DIMM_SIZE_SHIFT 4 + +/** Bits to mask to get the DIMM size. */ +#define HV_DIMM_SIZE_MASK 0xf + +/** Memory controller information. */ +typedef struct +{ + HV_Coord coord; /**< Relative tile coordinates of the port used by a + specified tile to communicate with this controller. */ + __hv64 speed; /**< Speed of this controller in bytes per second. */ +} HV_MemoryControllerInfo; + +/** Returns information about a particular memory controller. + * + * hv_inquire_memory_controller(coord,idx) returns information about a + * particular controller. Two pieces of information are returned: + * - The relative coordinates of the port on the controller that the specified + * tile would use to contact it. The relative coordinates may lie + * outside the supervisor's rectangle, i.e. the controller may not + * be attached to a node managed by the querying node's supervisor. + * In particular note that x or y may be negative. + * - The speed of the memory controller. (This is a not-to-exceed value + * based on the raw hardware data rate, and may not be achievable in + * practice; it is provided to give clients information on the relative + * performance of the available controllers.) + * + * Clients should avoid calling this interface with invalid values. + * A client who does may be terminated. + * @param coord Tile for which to calculate the relative port position. + * @param controller Index of the controller; identical to value returned + * from other routines like hv_inquire_physical. + * @return Information about the controller. + */ +HV_MemoryControllerInfo hv_inquire_memory_controller(HV_Coord coord, + int controller); + + +/** A range of virtual memory. */ +typedef struct +{ + HV_VirtAddr start; /**< Starting address. */ + __hv64 size; /**< Size in bytes. */ +} HV_VirtAddrRange; + +/** Returns information about a range of virtual memory. + * + * hv_inquire_virtual() returns one of the ranges of client + * virtual addresses which are available to this client. + * + * The first range is retrieved by specifying an idx of 0, and + * successive ranges are returned with subsequent idx values. Ranges + * are ordered by increasing start address (i.e., as idx increases, + * so does start), do not overlap, and do not touch (i.e., the + * available memory is described with the fewest possible ranges). + * + * If an out-of-range idx value is specified, the returned size will be zero. + * A client can count the number of ranges by increasing idx until the + * returned size is zero. There will always be at least one valid range. + * + * Some clients may well have various virtual addresses hardwired + * into themselves; for instance, their instruction stream may + * have been compiled expecting to live at a particular address. + * Such clients should use this interface to verify they've been + * given the virtual address space they expect, and issue a (potentially + * fatal) warning message otherwise. + * + * Note that the returned size is a __hv64, not a __hv32, so it is + * possible to express a single range spanning the entire 32-bit + * address space. + */ +HV_VirtAddrRange hv_inquire_virtual(int idx); + + +/** A range of ASID values. */ +typedef struct +{ + HV_ASID start; /**< First ASID in the range. */ + unsigned int size; /**< Number of ASIDs. Zero for an invalid range. */ +} HV_ASIDRange; + +/** Returns information about a range of ASIDs. + * + * hv_inquire_asid() returns one of the ranges of address + * space identifiers which are available to this client. + * + * The first range is retrieved by specifying an idx of 0, and + * successive ranges are returned with subsequent idx values. Ranges + * are ordered by increasing start value (i.e., as idx increases, + * so does start), do not overlap, and do not touch (i.e., the + * available ASIDs are described with the fewest possible ranges). + * + * If an out-of-range idx value is specified, the returned size will be zero. + * A client can count the number of ranges by increasing idx until the + * returned size is zero. There will always be at least one valid range. + */ +HV_ASIDRange hv_inquire_asid(int idx); + + +/** Waits for at least the specified number of nanoseconds then returns. + * + * NOTE: this deprecated function currently assumes a 750 MHz clock, + * and is thus not generally suitable for use. New code should call + * hv_sysconf(HV_SYSCONF_CPU_SPEED), compute a cycle count to wait for, + * and delay by looping while checking the cycle counter SPR. + * + * @param nanosecs The number of nanoseconds to sleep. + */ +void hv_nanosleep(int nanosecs); + + +/** Reads a character from the console without blocking. + * + * @return A value from 0-255 indicates the value successfully read. + * A negative value means no value was ready. + */ +int hv_console_read_if_ready(void); + + +/** Writes a character to the console, blocking if the console is busy. + * + * This call cannot fail. If the console is broken for some reason, + * output will simply vanish. + * @param byte Character to write. + */ +void hv_console_putc(int byte); + + +/** Writes a string to the console, blocking if the console is busy. + * @param bytes Pointer to characters to write. + * @param len Number of characters to write. + * @return Number of characters written, or HV_EFAULT if the buffer is invalid. + */ +int hv_console_write(HV_VirtAddr bytes, int len); + + +/** Dispatch the next interrupt from the client downcall mechanism. + * + * The hypervisor uses downcalls to notify the client of asynchronous + * events. Some of these events are hypervisor-created (like incoming + * messages). Some are regular interrupts which initially occur in + * the hypervisor, and are normally handled directly by the client; + * when these occur in a client's interrupt critical section, they must + * be delivered through the downcall mechanism. + * + * A downcall is initially delivered to the client as an INTCTRL_CL + * interrupt, where CL is the client's PL. Upon entry to the INTCTRL_CL + * vector, the client must immediately invoke the hv_downcall_dispatch + * service. This service will not return; instead it will cause one of + * the client's actual downcall-handling interrupt vectors to be entered. + * The EX_CONTEXT registers in the client will be set so that when the + * client irets, it will return to the code which was interrupted by the + * INTCTRL_CL interrupt. + * + * Under some circumstances, the firing of INTCTRL_CL can race with + * the lowering of a device interrupt. In such a case, the + * hv_downcall_dispatch service may issue an iret instruction instead + * of entering one of the client's actual downcall-handling interrupt + * vectors. This will return execution to the location that was + * interrupted by INTCTRL_CL. + * + * Any saving of registers should be done by the actual handling + * vectors; no registers should be changed by the INTCTRL_CL handler. + * In particular, the client should not use a jal instruction to invoke + * the hv_downcall_dispatch service, as that would overwrite the client's + * lr register. Note that the hv_downcall_dispatch service may overwrite + * one or more of the client's system save registers. + * + * The client must not modify the INTCTRL_CL_STATUS SPR. The hypervisor + * will set this register to cause a downcall to happen, and will clear + * it when no further downcalls are pending. + * + * When a downcall vector is entered, the INTCTRL_CL interrupt will be + * masked. When the client is done processing a downcall, and is ready + * to accept another, it must unmask this interrupt; if more downcalls + * are pending, this will cause the INTCTRL_CL vector to be reentered. + * Currently the following interrupt vectors can be entered through a + * downcall: + * + * INT_MESSAGE_RCV_DWNCL (hypervisor message available) + * INT_DEV_INTR_DWNCL (device interrupt) + * INT_DMATLB_MISS_DWNCL (DMA TLB miss) + * INT_SNITLB_MISS_DWNCL (SNI TLB miss) + * INT_DMATLB_ACCESS_DWNCL (DMA TLB access violation) + */ +void hv_downcall_dispatch(void); + +#endif /* !__ASSEMBLER__ */ + +/** We use actual interrupt vectors which never occur (they're only there + * to allow setting MPLs for related SPRs) for our downcall vectors. + */ +/** Message receive downcall interrupt vector */ +#define INT_MESSAGE_RCV_DWNCL INT_BOOT_ACCESS +/** DMA TLB miss downcall interrupt vector */ +#define INT_DMATLB_MISS_DWNCL INT_DMA_ASID +/** Static nework processor instruction TLB miss interrupt vector */ +#define INT_SNITLB_MISS_DWNCL INT_SNI_ASID +/** DMA TLB access violation downcall interrupt vector */ +#define INT_DMATLB_ACCESS_DWNCL INT_DMA_CPL +/** Device interrupt downcall interrupt vector */ +#define INT_DEV_INTR_DWNCL INT_WORLD_ACCESS + +#ifndef __ASSEMBLER__ + +/** Requests the inode for a specific full pathname. + * + * Performs a lookup in the hypervisor filesystem for a given filename. + * Multiple calls with the same filename will always return the same inode. + * If there is no such filename, HV_ENOENT is returned. + * A bad filename pointer may result in HV_EFAULT instead. + * + * @param filename Constant pointer to name of requested file + * @return Inode of requested file + */ +int hv_fs_findfile(HV_VirtAddr filename); + + +/** Data returned from an fstat request. + * Note that this structure should be no more than 40 bytes in size so + * that it can always be returned completely in registers. + */ +typedef struct +{ + int size; /**< Size of file (or HV_Errno on error) */ + unsigned int flags; /**< Flags (see HV_FS_FSTAT_FLAGS) */ +} HV_FS_StatInfo; + +/** Bitmask flags for fstat request */ +typedef enum +{ + HV_FS_ISDIR = 0x0001 /**< Is the entry a directory? */ +} HV_FS_FSTAT_FLAGS; + +/** Get stat information on a given file inode. + * + * Return information on the file with the given inode. + * + * IF the HV_FS_ISDIR bit is set, the "file" is a directory. Reading + * it will return NUL-separated filenames (no directory part) relative + * to the path to the inode of the directory "file". These can be + * appended to the path to the directory "file" after a forward slash + * to create additional filenames. Note that it is not required + * that all valid paths be decomposable into valid parent directories; + * a filesystem may validly have just a few files, none of which have + * HV_FS_ISDIR set. However, if clients may wish to enumerate the + * files in the filesystem, it is recommended to include all the + * appropriate parent directory "files" to give a consistent view. + * + * An invalid file inode will cause an HV_EBADF error to be returned. + * + * @param inode The inode number of the query + * @return An HV_FS_StatInfo structure + */ +HV_FS_StatInfo hv_fs_fstat(int inode); + + +/** Read data from a specific hypervisor file. + * On error, may return HV_EBADF for a bad inode or HV_EFAULT for a bad buf. + * Reads near the end of the file will return fewer bytes than requested. + * Reads at or beyond the end of a file will return zero. + * + * @param inode the hypervisor file to read + * @param buf the buffer to read data into + * @param length the number of bytes of data to read + * @param offset the offset into the file to read the data from + * @return number of bytes successfully read, or an HV_Errno code + */ +int hv_fs_pread(int inode, HV_VirtAddr buf, int length, int offset); + + +/** Read a 64-bit word from the specified physical address. + * The address must be 8-byte aligned. + * Specifying an invalid physical address will lead to client termination. + * @param addr The physical address to read + * @param access The PTE describing how to read the memory + * @return The 64-bit value read from the given address + */ +unsigned long long hv_physaddr_read64(HV_PhysAddr addr, HV_PTE access); + + +/** Write a 64-bit word to the specified physical address. + * The address must be 8-byte aligned. + * Specifying an invalid physical address will lead to client termination. + * @param addr The physical address to write + * @param access The PTE that says how to write the memory + * @param val The 64-bit value to write to the given address + */ +void hv_physaddr_write64(HV_PhysAddr addr, HV_PTE access, + unsigned long long val); + + +/** Get the value of the command-line for the supervisor, if any. + * This will not include the filename of the booted supervisor, but may + * include configured-in boot arguments or the hv_restart() arguments. + * If the buffer is not long enough the hypervisor will NUL the first + * character of the buffer but not write any other data. + * @param buf The virtual address to write the command-line string to. + * @param length The length of buf, in characters. + * @return The actual length of the command line, including the trailing NUL + * (may be larger than "length"). + */ +int hv_get_command_line(HV_VirtAddr buf, int length); + + +/** Set a new value for the command-line for the supervisor, which will + * be returned from subsequent invocations of hv_get_command_line() on + * this tile. + * @param buf The virtual address to read the command-line string from. + * @param length The length of buf, in characters; must be no more than + * HV_COMMAND_LINE_LEN. + * @return Zero if successful, or a hypervisor error code. + */ +HV_Errno hv_set_command_line(HV_VirtAddr buf, int length); + +/** Maximum size of a command line passed to hv_set_command_line(); note + * that a line returned from hv_get_command_line() could be larger than + * this.*/ +#define HV_COMMAND_LINE_LEN 256 + +/** Tell the hypervisor how to cache non-priority pages + * (its own as well as pages explicitly represented in page tables). + * Normally these will be represented as red/black pages, but + * when the supervisor starts to allocate "priority" pages in the PTE + * the hypervisor will need to start marking those pages as (e.g.) "red" + * and non-priority pages as either "black" (if they cache-alias + * with the existing priority pages) or "red/black" (if they don't). + * The bitmask provides information on which parts of the cache + * have been used for pinned pages so far on this tile; if (1 << N) + * appears in the bitmask, that indicates that a 4KB region of the + * cache starting at (N * 4KB) is in use by a "priority" page. + * The portion of cache used by a particular page can be computed + * by taking the page's PA, modulo CHIP_L2_CACHE_SIZE(), and setting + * all the "4KB" bits corresponding to the actual page size. + * @param bitmask A bitmap of priority page set values + */ +void hv_set_caching(unsigned long bitmask); + + +/** Zero out a specified number of pages. + * The va and size must both be multiples of 4096. + * Caches are bypassed and memory is directly set to zero. + * This API is implemented only in the magic hypervisor and is intended + * to provide a performance boost to the minimal supervisor by + * giving it a fast way to zero memory pages when allocating them. + * @param va Virtual address where the page has been mapped + * @param size Number of bytes (must be a page size multiple) + */ +void hv_bzero_page(HV_VirtAddr va, unsigned int size); + + +/** State object for the hypervisor messaging subsystem. */ +typedef struct +{ +#if CHIP_VA_WIDTH() > 32 + __hv64 opaque[2]; /**< No user-serviceable parts inside */ +#else + __hv32 opaque[2]; /**< No user-serviceable parts inside */ +#endif +} +HV_MsgState; + +/** Register to receive incoming messages. + * + * This routine configures the current tile so that it can receive + * incoming messages. It must be called before the client can receive + * messages with the hv_receive_message routine, and must be called on + * each tile which will receive messages. + * + * msgstate is the virtual address of a state object of type HV_MsgState. + * Once the state is registered, the client must not read or write the + * state object; doing so will cause undefined results. + * + * If this routine is called with msgstate set to 0, the client's message + * state will be freed and it will no longer be able to receive messages. + * Note that this may cause the loss of any as-yet-undelivered messages + * for the client. + * + * If another client attempts to send a message to a client which has + * not yet called hv_register_message_state, or which has freed its + * message state, the message will not be delivered, as if the client + * had insufficient buffering. + * + * This routine returns HV_OK if the registration was successful, and + * HV_EINVAL if the supplied state object is unsuitable. Note that some + * errors may not be detected during this routine, but might be detected + * during a subsequent message delivery. + * @param msgstate State object. + **/ +HV_Errno hv_register_message_state(HV_MsgState* msgstate); + +/** Possible message recipient states. */ +typedef enum +{ + HV_TO_BE_SENT, /**< Not sent (not attempted, or recipient not ready) */ + HV_SENT, /**< Successfully sent */ + HV_BAD_RECIP /**< Bad recipient coordinates (permanent error) */ +} HV_Recip_State; + +/** Message recipient. */ +typedef struct +{ + /** X coordinate, relative to supervisor's top-left coordinate */ + unsigned int x:11; + + /** Y coordinate, relative to supervisor's top-left coordinate */ + unsigned int y:11; + + /** Status of this recipient */ + HV_Recip_State state:10; +} HV_Recipient; + +/** Send a message to a set of recipients. + * + * This routine sends a message to a set of recipients. + * + * recips is an array of HV_Recipient structures. Each specifies a tile, + * and a message state; initially, it is expected that the state will + * be set to HV_TO_BE_SENT. nrecip specifies the number of recipients + * in the recips array. + * + * For each recipient whose state is HV_TO_BE_SENT, the hypervisor attempts + * to send that tile the specified message. In order to successfully + * receive the message, the receiver must be a valid tile to which the + * sender has access, must not be the sending tile itself, and must have + * sufficient free buffer space. (The hypervisor guarantees that each + * tile which has called hv_register_message_state() will be able to + * buffer one message from every other tile which can legally send to it; + * more space may be provided but is not guaranteed.) If an invalid tile + * is specified, the recipient's state is set to HV_BAD_RECIP; this is a + * permanent delivery error. If the message is successfully delivered + * to the recipient's buffer, the recipient's state is set to HV_SENT. + * Otherwise, the recipient's state is unchanged. Message delivery is + * synchronous; all attempts to send messages are completed before this + * routine returns. + * + * If no permanent delivery errors were encountered, the routine returns + * the number of messages successfully sent: that is, the number of + * recipients whose states changed from HV_TO_BE_SENT to HV_SENT during + * this operation. If any permanent delivery errors were encountered, + * the routine returns HV_ERECIP. In the event of permanent delivery + * errors, it may be the case that delivery was not attempted to all + * recipients; if any messages were successfully delivered, however, + * recipients' state values will be updated appropriately. + * + * It is explicitly legal to specify a recipient structure whose state + * is not HV_TO_BE_SENT; such a recipient is ignored. One suggested way + * of using hv_send_message to send a message to multiple tiles is to set + * up a list of recipients, and then call the routine repeatedly with the + * same list, each time accumulating the number of messages successfully + * sent, until all messages are sent, a permanent error is encountered, + * or the desired number of attempts have been made. When used in this + * way, the routine will deliver each message no more than once to each + * recipient. + * + * Note that a message being successfully delivered to the recipient's + * buffer space does not guarantee that it is received by the recipient, + * either immediately or at any time in the future; the recipient might + * never call hv_receive_message, or could register a different state + * buffer, losing the message. + * + * Specifying the same recipient more than once in the recipient list + * is an error, which will not result in an error return but which may + * or may not result in more than one message being delivered to the + * recipient tile. + * + * buf and buflen specify the message to be sent. buf is a virtual address + * which must be currently mapped in the client's page table; if not, the + * routine returns HV_EFAULT. buflen must be greater than zero and less + * than or equal to HV_MAX_MESSAGE_SIZE, and nrecip must be less than the + * number of tiles to which the sender has access; if not, the routine + * returns HV_EINVAL. + * @param recips List of recipients. + * @param nrecip Number of recipients. + * @param buf Address of message data. + * @param buflen Length of message data. + **/ +int hv_send_message(HV_Recipient *recips, int nrecip, + HV_VirtAddr buf, int buflen); + +/** Maximum hypervisor message size, in bytes */ +#define HV_MAX_MESSAGE_SIZE 28 + + +/** Return value from hv_receive_message() */ +typedef struct +{ + int msglen; /**< Message length in bytes, or an error code */ + __hv32 source; /**< Code identifying message sender (HV_MSG_xxx) */ +} HV_RcvMsgInfo; + +#define HV_MSG_TILE 0x0 /**< Message source is another tile */ +#define HV_MSG_INTR 0x1 /**< Message source is a driver interrupt */ + +/** Receive a message. + * + * This routine retrieves a message from the client's incoming message + * buffer. + * + * Multiple messages sent from a particular sending tile to a particular + * receiving tile are received in the order that they were sent; however, + * no ordering is guaranteed between messages sent by different tiles. + * + * Whenever the a client's message buffer is empty, the first message + * subsequently received will cause the client's MESSAGE_RCV_DWNCL + * interrupt vector to be invoked through the interrupt downcall mechanism + * (see the description of the hv_downcall_dispatch() routine for details + * on downcalls). + * + * Another message-available downcall will not occur until a call to + * this routine is made when the message buffer is empty, and a message + * subsequently arrives. Note that such a downcall could occur while + * this routine is executing. If the calling code does not wish this + * to happen, it is recommended that this routine be called with the + * INTCTRL_1 interrupt masked, or inside an interrupt critical section. + * + * msgstate is the value previously passed to hv_register_message_state(). + * buf is the virtual address of the buffer into which the message will + * be written; buflen is the length of the buffer. + * + * This routine returns an HV_RcvMsgInfo structure. The msglen member + * of that structure is the length of the message received, zero if no + * message is available, or HV_E2BIG if the message is too large for the + * specified buffer. If the message is too large, it is not consumed, + * and may be retrieved by a subsequent call to this routine specifying + * a sufficiently large buffer. A buffer which is HV_MAX_MESSAGE_SIZE + * bytes long is guaranteed to be able to receive any possible message. + * + * The source member of the HV_RcvMsgInfo structure describes the sender + * of the message. For messages sent by another client tile via an + * hv_send_message() call, this value is HV_MSG_TILE; for messages sent + * as a result of a device interrupt, this value is HV_MSG_INTR. + */ + +HV_RcvMsgInfo hv_receive_message(HV_MsgState msgstate, HV_VirtAddr buf, + int buflen); + + +/** Start remaining tiles owned by this supervisor. Initially, only one tile + * executes the client program; after it calls this service, the other tiles + * are started. This allows the initial tile to do one-time configuration + * of shared data structures without having to lock them against simultaneous + * access. + */ +void hv_start_all_tiles(void); + + +/** Open a hypervisor device. + * + * This service initializes an I/O device and its hypervisor driver software, + * and makes it available for use. The open operation is per-device per-chip; + * once it has been performed, the device handle returned may be used in other + * device services calls made by any tile. + * + * @param name Name of the device. A base device name is just a text string + * (say, "pcie"). If there is more than one instance of a device, the + * base name is followed by a slash and a device number (say, "pcie/0"). + * Some devices may support further structure beneath those components; + * most notably, devices which require control operations do so by + * supporting reads and/or writes to a control device whose name + * includes a trailing "/ctl" (say, "pcie/0/ctl"). + * @param flags Flags (HV_DEV_xxx). + * @return A positive integer device handle, or a negative error code. + */ +int hv_dev_open(HV_VirtAddr name, __hv32 flags); + + +/** Close a hypervisor device. + * + * This service uninitializes an I/O device and its hypervisor driver + * software, and makes it unavailable for use. The close operation is + * per-device per-chip; once it has been performed, the device is no longer + * available. Normally there is no need to ever call the close service. + * + * @param devhdl Device handle of the device to be closed. + * @return Zero if the close is successful, otherwise, a negative error code. + */ +int hv_dev_close(int devhdl); + + +/** Read data from a hypervisor device synchronously. + * + * This service transfers data from a hypervisor device to a memory buffer. + * When the service returns, the data has been written from the memory buffer, + * and the buffer will not be further modified by the driver. + * + * No ordering is guaranteed between requests issued from different tiles. + * + * Devices may choose to support both the synchronous and asynchronous read + * operations, only one of them, or neither of them. + * + * @param devhdl Device handle of the device to be read from. + * @param flags Flags (HV_DEV_xxx). + * @param va Virtual address of the target data buffer. This buffer must + * be mapped in the currently installed page table; if not, HV_EFAULT + * may be returned. + * @param len Number of bytes to be transferred. + * @param offset Driver-dependent offset. For a random-access device, this is + * often a byte offset from the beginning of the device; in other cases, + * like on a control device, it may have a different meaning. + * @return A non-negative value if the read was at least partially successful; + * otherwise, a negative error code. The precise interpretation of + * the return value is driver-dependent, but many drivers will return + * the number of bytes successfully transferred. + */ +int hv_dev_pread(int devhdl, __hv32 flags, HV_VirtAddr va, __hv32 len, + __hv64 offset); + +#define HV_DEV_NB_EMPTY 0x1 /**< Don't block when no bytes of data can + be transferred. */ +#define HV_DEV_NB_PARTIAL 0x2 /**< Don't block when some bytes, but not all + of the requested bytes, can be + transferred. */ +#define HV_DEV_NOCACHE 0x4 /**< The caller warrants that none of the + cache lines which might contain data + from the requested buffer are valid. + Useful with asynchronous operations + only. */ + +#define HV_DEV_ALLFLAGS (HV_DEV_NB_EMPTY | HV_DEV_NB_PARTIAL | \ + HV_DEV_NOCACHE) /**< All HV_DEV_xxx flags */ + +/** Write data to a hypervisor device synchronously. + * + * This service transfers data from a memory buffer to a hypervisor device. + * When the service returns, the data has been read from the memory buffer, + * and the buffer may be overwritten by the client; the data may not + * necessarily have been conveyed to the actual hardware I/O interface. + * + * No ordering is guaranteed between requests issued from different tiles. + * + * Devices may choose to support both the synchronous and asynchronous write + * operations, only one of them, or neither of them. + * + * @param devhdl Device handle of the device to be written to. + * @param flags Flags (HV_DEV_xxx). + * @param va Virtual address of the source data buffer. This buffer must + * be mapped in the currently installed page table; if not, HV_EFAULT + * may be returned. + * @param len Number of bytes to be transferred. + * @param offset Driver-dependent offset. For a random-access device, this is + * often a byte offset from the beginning of the device; in other cases, + * like on a control device, it may have a different meaning. + * @return A non-negative value if the write was at least partially successful; + * otherwise, a negative error code. The precise interpretation of + * the return value is driver-dependent, but many drivers will return + * the number of bytes successfully transferred. + */ +int hv_dev_pwrite(int devhdl, __hv32 flags, HV_VirtAddr va, __hv32 len, + __hv64 offset); + + +/** Interrupt arguments, used in the asynchronous I/O interfaces. */ +#if CHIP_VA_WIDTH() > 32 +typedef __hv64 HV_IntArg; +#else +typedef __hv32 HV_IntArg; +#endif + +/** Interrupt messages are delivered via the mechanism as normal messages, + * but have a message source of HV_DEV_INTR. The message is formatted + * as an HV_IntrMsg structure. + */ + +typedef struct +{ + HV_IntArg intarg; /**< Interrupt argument, passed to the poll/preada/pwritea + services */ + HV_IntArg intdata; /**< Interrupt-specific interrupt data */ +} HV_IntrMsg; + +/** Request an interrupt message when a device condition is satisfied. + * + * This service requests that an interrupt message be delivered to the + * requesting tile when a device becomes readable or writable, or when any + * data queued to the device via previous write operations from this tile + * has been actually sent out on the hardware I/O interface. Devices may + * choose to support any, all, or none of the available conditions. + * + * If multiple conditions are specified, only one message will be + * delivered. If the event mask delivered to that interrupt handler + * indicates that some of the conditions have not yet occurred, the + * client must issue another poll() call if it wishes to wait for those + * conditions. + * + * Only one poll may be outstanding per device handle per tile. If more than + * one tile is polling on the same device and condition, they will all be + * notified when it happens. Because of this, clients may not assume that + * the condition signaled is necessarily still true when they request a + * subsequent service; for instance, the readable data which caused the + * poll call to interrupt may have been read by another tile in the interim. + * + * The notification interrupt message could come directly, or via the + * downcall (intctrl1) method, depending on what the tile is doing + * when the condition is satisfied. Note that it is possible for the + * requested interrupt to be delivered after this service is called but + * before it returns. + * + * @param devhdl Device handle of the device to be polled. + * @param events Flags denoting the events which will cause the interrupt to + * be delivered (HV_DEVPOLL_xxx). + * @param intarg Value which will be delivered as the intarg member of the + * eventual interrupt message; the intdata member will be set to a + * mask of HV_DEVPOLL_xxx values indicating which conditions have been + * satisifed. + * @return Zero if the interrupt was successfully scheduled; otherwise, a + * negative error code. + */ +int hv_dev_poll(int devhdl, __hv32 events, HV_IntArg intarg); + +#define HV_DEVPOLL_READ 0x1 /**< Test device for readability */ +#define HV_DEVPOLL_WRITE 0x2 /**< Test device for writability */ +#define HV_DEVPOLL_FLUSH 0x4 /**< Test device for output drained */ + + +/** Cancel a request for an interrupt when a device event occurs. + * + * This service requests that no interrupt be delivered when the events + * noted in the last-issued poll() call happen. Once this service returns, + * the interrupt has been canceled; however, it is possible for the interrupt + * to be delivered after this service is called but before it returns. + * + * @param devhdl Device handle of the device on which to cancel polling. + * @return Zero if the poll was successfully canceled; otherwise, a negative + * error code. + */ +int hv_dev_poll_cancel(int devhdl); + + +/** Scatter-gather list for preada/pwritea calls. */ +typedef struct +#if CHIP_VA_WIDTH() <= 32 +__attribute__ ((packed, aligned(4))) +#endif +{ + HV_PhysAddr pa; /**< Client physical address of the buffer segment. */ + HV_PTE pte; /**< Page table entry describing the caching and location + override characteristics of the buffer segment. Some + drivers ignore this element and will require that + the NOCACHE flag be set on their requests. */ + __hv32 len; /**< Length of the buffer segment. */ +} HV_SGL; + +#define HV_SGL_MAXLEN 16 /**< Maximum number of entries in a scatter-gather + list */ + +/** Read data from a hypervisor device asynchronously. + * + * This service transfers data from a hypervisor device to a memory buffer. + * When the service returns, the read has been scheduled. When the read + * completes, an interrupt message will be delivered, and the buffer will + * not be further modified by the driver. + * + * The number of possible outstanding asynchronous requests is defined by + * each driver, but it is recommended that it be at least two requests + * per tile per device. + * + * No ordering is guaranteed between synchronous and asynchronous requests, + * even those issued on the same tile. + * + * The completion interrupt message could come directly, or via the downcall + * (intctrl1) method, depending on what the tile is doing when the read + * completes. Interrupts do not coalesce; one is delivered for each + * asynchronous I/O request. Note that it is possible for the requested + * interrupt to be delivered after this service is called but before it + * returns. + * + * Devices may choose to support both the synchronous and asynchronous read + * operations, only one of them, or neither of them. + * + * @param devhdl Device handle of the device to be read from. + * @param flags Flags (HV_DEV_xxx). + * @param sgl_len Number of elements in the scatter-gather list. + * @param sgl Scatter-gather list describing the memory to which data will be + * written. + * @param offset Driver-dependent offset. For a random-access device, this is + * often a byte offset from the beginning of the device; in other cases, + * like on a control device, it may have a different meaning. + * @param intarg Value which will be delivered as the intarg member of the + * eventual interrupt message; the intdata member will be set to the + * normal return value from the read request. + * @return Zero if the read was successfully scheduled; otherwise, a negative + * error code. Note that some drivers may choose to pre-validate + * their arguments, and may thus detect certain device error + * conditions at this time rather than when the completion notification + * occurs, but this is not required. + */ +int hv_dev_preada(int devhdl, __hv32 flags, __hv32 sgl_len, + HV_SGL sgl[/* sgl_len */], __hv64 offset, HV_IntArg intarg); + + +/** Write data to a hypervisor device asynchronously. + * + * This service transfers data from a memory buffer to a hypervisor + * device. When the service returns, the write has been scheduled. + * When the write completes, an interrupt message will be delivered, + * and the buffer may be overwritten by the client; the data may not + * necessarily have been conveyed to the actual hardware I/O interface. + * + * The number of possible outstanding asynchronous requests is defined by + * each driver, but it is recommended that it be at least two requests + * per tile per device. + * + * No ordering is guaranteed between synchronous and asynchronous requests, + * even those issued on the same tile. + * + * The completion interrupt message could come directly, or via the downcall + * (intctrl1) method, depending on what the tile is doing when the read + * completes. Interrupts do not coalesce; one is delivered for each + * asynchronous I/O request. Note that it is possible for the requested + * interrupt to be delivered after this service is called but before it + * returns. + * + * Devices may choose to support both the synchronous and asynchronous write + * operations, only one of them, or neither of them. + * + * @param devhdl Device handle of the device to be read from. + * @param flags Flags (HV_DEV_xxx). + * @param sgl_len Number of elements in the scatter-gather list. + * @param sgl Scatter-gather list describing the memory from which data will be + * read. + * @param offset Driver-dependent offset. For a random-access device, this is + * often a byte offset from the beginning of the device; in other cases, + * like on a control device, it may have a different meaning. + * @param intarg Value which will be delivered as the intarg member of the + * eventual interrupt message; the intdata member will be set to the + * normal return value from the write request. + * @return Zero if the write was successfully scheduled; otherwise, a negative + * error code. Note that some drivers may choose to pre-validate + * their arguments, and may thus detect certain device error + * conditions at this time rather than when the completion notification + * occurs, but this is not required. + */ +int hv_dev_pwritea(int devhdl, __hv32 flags, __hv32 sgl_len, + HV_SGL sgl[/* sgl_len */], __hv64 offset, HV_IntArg intarg); + + +/** Define a pair of tile and ASID to identify a user process context. */ +typedef struct +{ + /** X coordinate, relative to supervisor's top-left coordinate */ + unsigned int x:11; + + /** Y coordinate, relative to supervisor's top-left coordinate */ + unsigned int y:11; + + /** ASID of the process on this x,y tile */ + HV_ASID asid:10; +} HV_Remote_ASID; + +/** Flush cache and/or TLB state on remote tiles. + * + * @param cache_pa Client physical address to flush from cache (ignored if + * the length encoded in cache_control is zero, or if + * HV_FLUSH_EVICT_L2 is set, or if cache_cpumask is NULL). + * @param cache_control This argument allows you to specify a length of + * physical address space to flush (maximum HV_FLUSH_MAX_CACHE_LEN). + * You can "or" in HV_FLUSH_EVICT_L2 to flush the whole L2 cache. + * You can "or" in HV_FLUSH_EVICT_L1I to flush the whole L1I cache. + * HV_FLUSH_ALL flushes all caches. + * @param cache_cpumask Bitmask (in row-major order, supervisor-relative) of + * tile indices to perform cache flush on. The low bit of the first + * word corresponds to the tile at the upper left-hand corner of the + * supervisor's rectangle. If passed as a NULL pointer, equivalent + * to an empty bitmask. On chips which support hash-for-home caching, + * if passed as -1, equivalent to a mask containing tiles which could + * be doing hash-for-home caching. + * @param tlb_va Virtual address to flush from TLB (ignored if + * tlb_length is zero or tlb_cpumask is NULL). + * @param tlb_length Number of bytes of data to flush from the TLB. + * @param tlb_pgsize Page size to use for TLB flushes. + * tlb_va and tlb_length need not be aligned to this size. + * @param tlb_cpumask Bitmask for tlb flush, like cache_cpumask. + * If passed as a NULL pointer, equivalent to an empty bitmask. + * @param asids Pointer to an HV_Remote_ASID array of tile/ASID pairs to flush. + * @param asidcount Number of HV_Remote_ASID entries in asids[]. + * @return Zero for success, or else HV_EINVAL or HV_EFAULT for errors that + * are detected while parsing the arguments. + */ +int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control, + unsigned long* cache_cpumask, + HV_VirtAddr tlb_va, unsigned long tlb_length, + unsigned long tlb_pgsize, unsigned long* tlb_cpumask, + HV_Remote_ASID* asids, int asidcount); + +/** Include in cache_control to ensure a flush of the entire L2. */ +#define HV_FLUSH_EVICT_L2 (1UL << 31) + +/** Include in cache_control to ensure a flush of the entire L1I. */ +#define HV_FLUSH_EVICT_L1I (1UL << 30) + +/** Maximum legal size to use for the "length" component of cache_control. */ +#define HV_FLUSH_MAX_CACHE_LEN ((1UL << 30) - 1) + +/** Use for cache_control to ensure a flush of all caches. */ +#define HV_FLUSH_ALL -1UL + +#else /* __ASSEMBLER__ */ + +/** Include in cache_control to ensure a flush of the entire L2. */ +#define HV_FLUSH_EVICT_L2 (1 << 31) + +/** Include in cache_control to ensure a flush of the entire L1I. */ +#define HV_FLUSH_EVICT_L1I (1 << 30) + +/** Maximum legal size to use for the "length" component of cache_control. */ +#define HV_FLUSH_MAX_CACHE_LEN ((1 << 30) - 1) + +/** Use for cache_control to ensure a flush of all caches. */ +#define HV_FLUSH_ALL -1 + +#endif /* __ASSEMBLER__ */ + +#ifndef __ASSEMBLER__ + +/** Return a 64-bit value corresponding to the PTE if needed */ +#define hv_pte_val(pte) ((pte).val) + +/** Cast a 64-bit value to an HV_PTE */ +#define hv_pte(val) ((HV_PTE) { val }) + +#endif /* !__ASSEMBLER__ */ + + +/** Bits in the size of an HV_PTE */ +#define HV_LOG2_PTE_SIZE 3 + +/** Size of an HV_PTE */ +#define HV_PTE_SIZE (1 << HV_LOG2_PTE_SIZE) + + +/* Bits in HV_PTE's low word. */ +#define HV_PTE_INDEX_PRESENT 0 /**< PTE is valid */ +#define HV_PTE_INDEX_MIGRATING 1 /**< Page is migrating */ +#define HV_PTE_INDEX_CLIENT0 2 /**< Page client state 0 */ +#define HV_PTE_INDEX_CLIENT1 3 /**< Page client state 1 */ +#define HV_PTE_INDEX_NC 4 /**< L1$/L2$ incoherent with L3$ */ +#define HV_PTE_INDEX_NO_ALLOC_L1 5 /**< Page is uncached in local L1$ */ +#define HV_PTE_INDEX_NO_ALLOC_L2 6 /**< Page is uncached in local L2$ */ +#define HV_PTE_INDEX_CACHED_PRIORITY 7 /**< Page is priority cached */ +#define HV_PTE_INDEX_PAGE 8 /**< PTE describes a page */ +#define HV_PTE_INDEX_GLOBAL 9 /**< Page is global */ +#define HV_PTE_INDEX_USER 10 /**< Page is user-accessible */ +#define HV_PTE_INDEX_ACCESSED 11 /**< Page has been accessed */ +#define HV_PTE_INDEX_DIRTY 12 /**< Page has been written */ + /* Bits 13-14 are reserved for + future use. */ +#define HV_PTE_INDEX_SUPER 15 /**< Pages ganged together for TLB */ +#define HV_PTE_INDEX_MODE 16 /**< Page mode; see HV_PTE_MODE_xxx */ +#define HV_PTE_MODE_BITS 3 /**< Number of bits in mode */ +#define HV_PTE_INDEX_CLIENT2 19 /**< Page client state 2 */ +#define HV_PTE_INDEX_LOTAR 20 /**< Page's LOTAR; must be high bits + of word */ +#define HV_PTE_LOTAR_BITS 12 /**< Number of bits in a LOTAR */ + +/* Bits in HV_PTE's high word. */ +#define HV_PTE_INDEX_READABLE 32 /**< Page is readable */ +#define HV_PTE_INDEX_WRITABLE 33 /**< Page is writable */ +#define HV_PTE_INDEX_EXECUTABLE 34 /**< Page is executable */ +#define HV_PTE_INDEX_PTFN 35 /**< Page's PTFN; must be high bits + of word */ +#define HV_PTE_PTFN_BITS 29 /**< Number of bits in a PTFN */ + +/* + * Legal values for the PTE's mode field + */ +/** Data is not resident in any caches; loads and stores access memory + * directly. + */ +#define HV_PTE_MODE_UNCACHED 1 + +/** Data is resident in the tile's local L1 and/or L2 caches; if a load + * or store misses there, it goes to memory. + * + * The copy in the local L1$/L2$ is not invalidated when the copy in + * memory is changed. + */ +#define HV_PTE_MODE_CACHE_NO_L3 2 + +/** Data is resident in the tile's local L1 and/or L2 caches. If a load + * or store misses there, it goes to an L3 cache in a designated tile; + * if it misses there, it goes to memory. + * + * If the NC bit is not set, the copy in the local L1$/L2$ is invalidated + * when the copy in the remote L3$ is changed. Otherwise, such + * invalidation will not occur. + * + * Chips for which CHIP_HAS_COHERENT_LOCAL_CACHE() is 0 do not support + * invalidation from an L3$ to another tile's L1$/L2$. If the NC bit is + * clear on such a chip, no copy is kept in the local L1$/L2$ in this mode. + */ +#define HV_PTE_MODE_CACHE_TILE_L3 3 + +/** Data is resident in the tile's local L1 and/or L2 caches. If a load + * or store misses there, it goes to an L3 cache in one of a set of + * designated tiles; if it misses there, it goes to memory. Which tile + * is chosen from the set depends upon a hash function applied to the + * physical address. This mode is not supported on chips for which + * CHIP_HAS_CBOX_HOME_MAP() is 0. + * + * If the NC bit is not set, the copy in the local L1$/L2$ is invalidated + * when the copy in the remote L3$ is changed. Otherwise, such + * invalidation will not occur. + * + * Chips for which CHIP_HAS_COHERENT_LOCAL_CACHE() is 0 do not support + * invalidation from an L3$ to another tile's L1$/L2$. If the NC bit is + * clear on such a chip, no copy is kept in the local L1$/L2$ in this mode. + */ +#define HV_PTE_MODE_CACHE_HASH_L3 4 + +/** Data is not resident in memory; accesses are instead made to an I/O + * device, whose tile coordinates are given by the PTE's LOTAR field. + * This mode is only supported on chips for which CHIP_HAS_MMIO() is 1. + * The EXECUTABLE bit may not be set in an MMIO PTE. + */ +#define HV_PTE_MODE_MMIO 5 + + +/* C wants 1ULL so it is typed as __hv64, but the assembler needs just numbers. + * The assembler can't handle shifts greater than 31, but treats them + * as shifts mod 32, so assembler code must be aware of which word + * the bit belongs in when using these macros. + */ +#ifdef __ASSEMBLER__ +#define __HV_PTE_ONE 1 /**< One, for assembler */ +#else +#define __HV_PTE_ONE 1ULL /**< One, for C */ +#endif + +/** Is this PTE present? + * + * If this bit is set, this PTE represents a valid translation or level-2 + * page table pointer. Otherwise, the page table does not contain a + * translation for the subject virtual pages. + * + * If this bit is not set, the other bits in the PTE are not + * interpreted by the hypervisor, and may contain any value. + */ +#define HV_PTE_PRESENT (__HV_PTE_ONE << HV_PTE_INDEX_PRESENT) + +/** Does this PTE map a page? + * + * If this bit is set in a level-0 page table, the entry should be + * interpreted as a level-2 page table entry mapping a jumbo page. + * + * If this bit is set in a level-1 page table, the entry should be + * interpreted as a level-2 page table entry mapping a large page. + * + * This bit should not be modified by the client while PRESENT is set, as + * doing so may race with the hypervisor's update of ACCESSED and DIRTY bits. + * + * In a level-2 page table, this bit is ignored and must be zero. + */ +#define HV_PTE_PAGE (__HV_PTE_ONE << HV_PTE_INDEX_PAGE) + +/** Does this PTE implicitly reference multiple pages? + * + * If this bit is set in the page table (either in the level-2 page table, + * or in a higher level page table in conjunction with the PAGE bit) + * then the PTE specifies a range of contiguous pages, not a single page. + * The hv_set_pte_super_shift() allows you to specify the count for + * each level of the page table. + * + * Note: this bit is not supported on TILEPro systems. + */ +#define HV_PTE_SUPER (__HV_PTE_ONE << HV_PTE_INDEX_SUPER) + +/** Is this a global (non-ASID) mapping? + * + * If this bit is set, the translations established by this PTE will + * not be flushed from the TLB by the hv_flush_asid() service; they + * will be flushed by the hv_flush_page() or hv_flush_pages() services. + * + * Setting this bit for translations which are identical in all page + * tables (for instance, code and data belonging to a client OS) can + * be very beneficial, as it will reduce the number of TLB misses. + * Note that, while it is not an error which will be detected by the + * hypervisor, it is an extremely bad idea to set this bit for + * translations which are _not_ identical in all page tables. + * + * This bit should not be modified by the client while PRESENT is set, as + * doing so may race with the hypervisor's update of ACCESSED and DIRTY bits. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_GLOBAL (__HV_PTE_ONE << HV_PTE_INDEX_GLOBAL) + +/** Is this mapping accessible to users? + * + * If this bit is set, code running at any PL will be permitted to + * access the virtual addresses mapped by this PTE. Otherwise, only + * code running at PL 1 or above will be allowed to do so. + * + * This bit should not be modified by the client while PRESENT is set, as + * doing so may race with the hypervisor's update of ACCESSED and DIRTY bits. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_USER (__HV_PTE_ONE << HV_PTE_INDEX_USER) + +/** Has this mapping been accessed? + * + * This bit is set by the hypervisor when the memory described by the + * translation is accessed for the first time. It is never cleared by + * the hypervisor, but may be cleared by the client. After the bit + * has been cleared, subsequent references are not guaranteed to set + * it again until the translation has been flushed from the TLB. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_ACCESSED (__HV_PTE_ONE << HV_PTE_INDEX_ACCESSED) + +/** Is this mapping dirty? + * + * This bit is set by the hypervisor when the memory described by the + * translation is written for the first time. It is never cleared by + * the hypervisor, but may be cleared by the client. After the bit + * has been cleared, subsequent references are not guaranteed to set + * it again until the translation has been flushed from the TLB. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_DIRTY (__HV_PTE_ONE << HV_PTE_INDEX_DIRTY) + +/** Migrating bit in PTE. + * + * This bit is guaranteed not to be inspected or modified by the + * hypervisor. The name is indicative of the suggested use by the client + * to tag pages whose L3 cache is being migrated from one cpu to another. + */ +#define HV_PTE_MIGRATING (__HV_PTE_ONE << HV_PTE_INDEX_MIGRATING) + +/** Client-private bit in PTE. + * + * This bit is guaranteed not to be inspected or modified by the + * hypervisor. + */ +#define HV_PTE_CLIENT0 (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT0) + +/** Client-private bit in PTE. + * + * This bit is guaranteed not to be inspected or modified by the + * hypervisor. + */ +#define HV_PTE_CLIENT1 (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT1) + +/** Client-private bit in PTE. + * + * This bit is guaranteed not to be inspected or modified by the + * hypervisor. + */ +#define HV_PTE_CLIENT2 (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT2) + +/** Non-coherent (NC) bit in PTE. + * + * If this bit is set, the mapping that is set up will be non-coherent + * (also known as non-inclusive). This means that changes to the L3 + * cache will not cause a local copy to be invalidated. It is generally + * recommended only for read-only mappings. + * + * In level-1 PTEs, if the Page bit is clear, this bit determines how the + * level-2 page table is accessed. + */ +#define HV_PTE_NC (__HV_PTE_ONE << HV_PTE_INDEX_NC) + +/** Is this page prevented from filling the L1$? + * + * If this bit is set, the page described by the PTE will not be cached + * the local cpu's L1 cache. + * + * If CHIP_HAS_NC_AND_NOALLOC_BITS() is not true in for this chip, + * it is illegal to use this attribute, and may cause client termination. + * + * In level-1 PTEs, if the Page bit is clear, this bit + * determines how the level-2 page table is accessed. + */ +#define HV_PTE_NO_ALLOC_L1 (__HV_PTE_ONE << HV_PTE_INDEX_NO_ALLOC_L1) + +/** Is this page prevented from filling the L2$? + * + * If this bit is set, the page described by the PTE will not be cached + * the local cpu's L2 cache. + * + * If CHIP_HAS_NC_AND_NOALLOC_BITS() is not true in for this chip, + * it is illegal to use this attribute, and may cause client termination. + * + * In level-1 PTEs, if the Page bit is clear, this bit determines how the + * level-2 page table is accessed. + */ +#define HV_PTE_NO_ALLOC_L2 (__HV_PTE_ONE << HV_PTE_INDEX_NO_ALLOC_L2) + +/** Is this a priority page? + * + * If this bit is set, the page described by the PTE will be given + * priority in the cache. Normally this translates into allowing the + * page to use only the "red" half of the cache. The client may wish to + * then use the hv_set_caching service to specify that other pages which + * alias this page will use only the "black" half of the cache. + * + * If the Cached Priority bit is clear, the hypervisor uses the + * current hv_set_caching() value to choose how to cache the page. + * + * It is illegal to set the Cached Priority bit if the Non-Cached bit + * is set and the Cached Remotely bit is clear, i.e. if requests to + * the page map directly to memory. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_CACHED_PRIORITY (__HV_PTE_ONE << \ + HV_PTE_INDEX_CACHED_PRIORITY) + +/** Is this a readable mapping? + * + * If this bit is set, code will be permitted to read from (e.g., + * issue load instructions against) the virtual addresses mapped by + * this PTE. + * + * It is illegal for this bit to be clear if the Writable bit is set. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_READABLE (__HV_PTE_ONE << HV_PTE_INDEX_READABLE) + +/** Is this a writable mapping? + * + * If this bit is set, code will be permitted to write to (e.g., issue + * store instructions against) the virtual addresses mapped by this + * PTE. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_WRITABLE (__HV_PTE_ONE << HV_PTE_INDEX_WRITABLE) + +/** Is this an executable mapping? + * + * If this bit is set, code will be permitted to execute from + * (e.g., jump to) the virtual addresses mapped by this PTE. + * + * This bit applies to any processor on the tile, if there are more + * than one. + * + * This bit is ignored in level-1 PTEs unless the Page bit is set. + */ +#define HV_PTE_EXECUTABLE (__HV_PTE_ONE << HV_PTE_INDEX_EXECUTABLE) + +/** The width of a LOTAR's x or y bitfield. */ +#define HV_LOTAR_WIDTH 11 + +/** Converts an x,y pair to a LOTAR value. */ +#define HV_XY_TO_LOTAR(x, y) ((HV_LOTAR)(((x) << HV_LOTAR_WIDTH) | (y))) + +/** Extracts the X component of a lotar. */ +#define HV_LOTAR_X(lotar) ((lotar) >> HV_LOTAR_WIDTH) + +/** Extracts the Y component of a lotar. */ +#define HV_LOTAR_Y(lotar) ((lotar) & ((1 << HV_LOTAR_WIDTH) - 1)) + +#ifndef __ASSEMBLER__ + +/** Define accessor functions for a PTE bit. */ +#define _HV_BIT(name, bit) \ +static __inline int \ +hv_pte_get_##name(HV_PTE pte) \ +{ \ + return (pte.val >> HV_PTE_INDEX_##bit) & 1; \ +} \ + \ +static __inline HV_PTE \ +hv_pte_set_##name(HV_PTE pte) \ +{ \ + pte.val |= 1ULL << HV_PTE_INDEX_##bit; \ + return pte; \ +} \ + \ +static __inline HV_PTE \ +hv_pte_clear_##name(HV_PTE pte) \ +{ \ + pte.val &= ~(1ULL << HV_PTE_INDEX_##bit); \ + return pte; \ +} + +/* Generate accessors to get, set, and clear various PTE flags. + */ +_HV_BIT(present, PRESENT) +_HV_BIT(page, PAGE) +_HV_BIT(super, SUPER) +_HV_BIT(client0, CLIENT0) +_HV_BIT(client1, CLIENT1) +_HV_BIT(client2, CLIENT2) +_HV_BIT(migrating, MIGRATING) +_HV_BIT(nc, NC) +_HV_BIT(readable, READABLE) +_HV_BIT(writable, WRITABLE) +_HV_BIT(executable, EXECUTABLE) +_HV_BIT(accessed, ACCESSED) +_HV_BIT(dirty, DIRTY) +_HV_BIT(no_alloc_l1, NO_ALLOC_L1) +_HV_BIT(no_alloc_l2, NO_ALLOC_L2) +_HV_BIT(cached_priority, CACHED_PRIORITY) +_HV_BIT(global, GLOBAL) +_HV_BIT(user, USER) + +#undef _HV_BIT + +/** Get the page mode from the PTE. + * + * This field generally determines whether and how accesses to the page + * are cached; the HV_PTE_MODE_xxx symbols define the legal values for the + * page mode. The NC, NO_ALLOC_L1, and NO_ALLOC_L2 bits modify this + * general policy. + */ +static __inline unsigned int +hv_pte_get_mode(const HV_PTE pte) +{ + return (((__hv32) pte.val) >> HV_PTE_INDEX_MODE) & + ((1 << HV_PTE_MODE_BITS) - 1); +} + +/** Set the page mode into a PTE. See hv_pte_get_mode. */ +static __inline HV_PTE +hv_pte_set_mode(HV_PTE pte, unsigned int val) +{ + pte.val &= ~(((1ULL << HV_PTE_MODE_BITS) - 1) << HV_PTE_INDEX_MODE); + pte.val |= val << HV_PTE_INDEX_MODE; + return pte; +} + +/** Get the page frame number from the PTE. + * + * This field contains the upper bits of the CPA (client physical + * address) of the target page; the complete CPA is this field with + * HV_LOG2_PAGE_TABLE_ALIGN zero bits appended to it. + * + * For all PTEs in the lowest-level page table, and for all PTEs with + * the Page bit set in all page tables, the CPA must be aligned modulo + * the relevant page size. + */ +static __inline unsigned long +hv_pte_get_ptfn(const HV_PTE pte) +{ + return pte.val >> HV_PTE_INDEX_PTFN; +} + +/** Set the page table frame number into a PTE. See hv_pte_get_ptfn. */ +static __inline HV_PTE +hv_pte_set_ptfn(HV_PTE pte, unsigned long val) +{ + pte.val &= ~(((1ULL << HV_PTE_PTFN_BITS)-1) << HV_PTE_INDEX_PTFN); + pte.val |= (__hv64) val << HV_PTE_INDEX_PTFN; + return pte; +} + +/** Get the client physical address from the PTE. See hv_pte_set_ptfn. */ +static __inline HV_PhysAddr +hv_pte_get_pa(const HV_PTE pte) +{ + return (__hv64) hv_pte_get_ptfn(pte) << HV_LOG2_PAGE_TABLE_ALIGN; +} + +/** Set the client physical address into a PTE. See hv_pte_get_ptfn. */ +static __inline HV_PTE +hv_pte_set_pa(HV_PTE pte, HV_PhysAddr pa) +{ + return hv_pte_set_ptfn(pte, pa >> HV_LOG2_PAGE_TABLE_ALIGN); +} + + +/** Get the remote tile caching this page. + * + * Specifies the remote tile which is providing the L3 cache for this page. + * + * This field is ignored unless the page mode is HV_PTE_MODE_CACHE_TILE_L3. + * + * In level-1 PTEs, if the Page bit is clear, this field determines how the + * level-2 page table is accessed. + */ +static __inline unsigned int +hv_pte_get_lotar(const HV_PTE pte) +{ + unsigned int lotar = ((__hv32) pte.val) >> HV_PTE_INDEX_LOTAR; + + return HV_XY_TO_LOTAR( (lotar >> (HV_PTE_LOTAR_BITS / 2)), + (lotar & ((1 << (HV_PTE_LOTAR_BITS / 2)) - 1)) ); +} + + +/** Set the remote tile caching a page into a PTE. See hv_pte_get_lotar. */ +static __inline HV_PTE +hv_pte_set_lotar(HV_PTE pte, unsigned int val) +{ + unsigned int x = HV_LOTAR_X(val); + unsigned int y = HV_LOTAR_Y(val); + + pte.val &= ~(((1ULL << HV_PTE_LOTAR_BITS)-1) << HV_PTE_INDEX_LOTAR); + pte.val |= (x << (HV_PTE_INDEX_LOTAR + HV_PTE_LOTAR_BITS / 2)) | + (y << HV_PTE_INDEX_LOTAR); + return pte; +} + +#endif /* !__ASSEMBLER__ */ + +/** Converts a client physical address to a ptfn. */ +#define HV_CPA_TO_PTFN(p) ((p) >> HV_LOG2_PAGE_TABLE_ALIGN) + +/** Converts a ptfn to a client physical address. */ +#define HV_PTFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_TABLE_ALIGN) + +#if CHIP_VA_WIDTH() > 32 + +/* + * Note that we currently do not allow customizing the page size + * of the L0 pages, but fix them at 4GB, so we do not use the + * "_HV_xxx" nomenclature for the L0 macros. + */ + +/** Log number of HV_PTE entries in L0 page table */ +#define HV_LOG2_L0_ENTRIES (CHIP_VA_WIDTH() - HV_LOG2_L1_SPAN) + +/** Number of HV_PTE entries in L0 page table */ +#define HV_L0_ENTRIES (1 << HV_LOG2_L0_ENTRIES) + +/** Log size of L0 page table in bytes */ +#define HV_LOG2_L0_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L0_ENTRIES) + +/** Size of L0 page table in bytes */ +#define HV_L0_SIZE (1 << HV_LOG2_L0_SIZE) + +#ifdef __ASSEMBLER__ + +/** Index in L0 for a specific VA */ +#define HV_L0_INDEX(va) \ + (((va) >> HV_LOG2_L1_SPAN) & (HV_L0_ENTRIES - 1)) + +#else + +/** Index in L1 for a specific VA */ +#define HV_L0_INDEX(va) \ + (((HV_VirtAddr)(va) >> HV_LOG2_L1_SPAN) & (HV_L0_ENTRIES - 1)) + +#endif + +#endif /* CHIP_VA_WIDTH() > 32 */ + +/** Log number of HV_PTE entries in L1 page table */ +#define _HV_LOG2_L1_ENTRIES(log2_page_size_large) \ + (HV_LOG2_L1_SPAN - log2_page_size_large) + +/** Number of HV_PTE entries in L1 page table */ +#define _HV_L1_ENTRIES(log2_page_size_large) \ + (1 << _HV_LOG2_L1_ENTRIES(log2_page_size_large)) + +/** Log size of L1 page table in bytes */ +#define _HV_LOG2_L1_SIZE(log2_page_size_large) \ + (HV_LOG2_PTE_SIZE + _HV_LOG2_L1_ENTRIES(log2_page_size_large)) + +/** Size of L1 page table in bytes */ +#define _HV_L1_SIZE(log2_page_size_large) \ + (1 << _HV_LOG2_L1_SIZE(log2_page_size_large)) + +/** Log number of HV_PTE entries in level-2 page table */ +#define _HV_LOG2_L2_ENTRIES(log2_page_size_large, log2_page_size_small) \ + (log2_page_size_large - log2_page_size_small) + +/** Number of HV_PTE entries in level-2 page table */ +#define _HV_L2_ENTRIES(log2_page_size_large, log2_page_size_small) \ + (1 << _HV_LOG2_L2_ENTRIES(log2_page_size_large, log2_page_size_small)) + +/** Log size of level-2 page table in bytes */ +#define _HV_LOG2_L2_SIZE(log2_page_size_large, log2_page_size_small) \ + (HV_LOG2_PTE_SIZE + \ + _HV_LOG2_L2_ENTRIES(log2_page_size_large, log2_page_size_small)) + +/** Size of level-2 page table in bytes */ +#define _HV_L2_SIZE(log2_page_size_large, log2_page_size_small) \ + (1 << _HV_LOG2_L2_SIZE(log2_page_size_large, log2_page_size_small)) + +#ifdef __ASSEMBLER__ + +#if CHIP_VA_WIDTH() > 32 + +/** Index in L1 for a specific VA */ +#define _HV_L1_INDEX(va, log2_page_size_large) \ + (((va) >> log2_page_size_large) & (_HV_L1_ENTRIES(log2_page_size_large) - 1)) + +#else /* CHIP_VA_WIDTH() > 32 */ + +/** Index in L1 for a specific VA */ +#define _HV_L1_INDEX(va, log2_page_size_large) \ + (((va) >> log2_page_size_large)) + +#endif /* CHIP_VA_WIDTH() > 32 */ + +/** Index in level-2 page table for a specific VA */ +#define _HV_L2_INDEX(va, log2_page_size_large, log2_page_size_small) \ + (((va) >> log2_page_size_small) & \ + (_HV_L2_ENTRIES(log2_page_size_large, log2_page_size_small) - 1)) + +#else /* __ASSEMBLER __ */ + +#if CHIP_VA_WIDTH() > 32 + +/** Index in L1 for a specific VA */ +#define _HV_L1_INDEX(va, log2_page_size_large) \ + (((HV_VirtAddr)(va) >> log2_page_size_large) & \ + (_HV_L1_ENTRIES(log2_page_size_large) - 1)) + +#else /* CHIP_VA_WIDTH() > 32 */ + +/** Index in L1 for a specific VA */ +#define _HV_L1_INDEX(va, log2_page_size_large) \ + (((HV_VirtAddr)(va) >> log2_page_size_large)) + +#endif /* CHIP_VA_WIDTH() > 32 */ + +/** Index in level-2 page table for a specific VA */ +#define _HV_L2_INDEX(va, log2_page_size_large, log2_page_size_small) \ + (((HV_VirtAddr)(va) >> log2_page_size_small) & \ + (_HV_L2_ENTRIES(log2_page_size_large, log2_page_size_small) - 1)) + +#endif /* __ASSEMBLER __ */ + +/** Position of the PFN field within the PTE (subset of the PTFN). */ +#define _HV_PTE_INDEX_PFN(log2_page_size) \ + (HV_PTE_INDEX_PTFN + (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN)) + +/** Length of the PFN field within the PTE (subset of the PTFN). */ +#define _HV_PTE_INDEX_PFN_BITS(log2_page_size) \ + (HV_PTE_INDEX_PTFN_BITS - (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN)) + +/** Converts a client physical address to a pfn. */ +#define _HV_CPA_TO_PFN(p, log2_page_size) ((p) >> log2_page_size) + +/** Converts a pfn to a client physical address. */ +#define _HV_PFN_TO_CPA(p, log2_page_size) \ + (((HV_PhysAddr)(p)) << log2_page_size) + +/** Converts a ptfn to a pfn. */ +#define _HV_PTFN_TO_PFN(p, log2_page_size) \ + ((p) >> (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN)) + +/** Converts a pfn to a ptfn. */ +#define _HV_PFN_TO_PTFN(p, log2_page_size) \ + ((p) << (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN)) + +#endif /* _HV_HV_H */ diff --git a/arch/tile/include/hv/iorpc.h b/arch/tile/include/hv/iorpc.h new file mode 100644 index 000000000..ddf160448 --- /dev/null +++ b/arch/tile/include/hv/iorpc.h @@ -0,0 +1,714 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ +#ifndef _HV_IORPC_H_ +#define _HV_IORPC_H_ + +/** + * + * Error codes and struct definitions for the IO RPC library. + * + * The hypervisor's IO RPC component provides a convenient way for + * driver authors to proxy system calls between user space, linux, and + * the hypervisor driver. The core of the system is a set of Python + * files that take ".idl" files as input and generates the following + * source code: + * + * - _rpc_call() routines for use in userspace IO libraries. These + * routines take an argument list specified in the .idl file, pack the + * arguments in to a buffer, and read or write that buffer via the + * Linux iorpc driver. + * + * - dispatch_read() and dispatch_write() routines that hypervisor + * drivers can use to implement most of their dev_pread() and + * dev_pwrite() methods. These routines decode the incoming parameter + * blob, permission check and translate parameters where appropriate, + * and then invoke a callback routine for whichever RPC call has + * arrived. The driver simply implements the set of callback + * routines. + * + * The IO RPC system also includes the Linux 'iorpc' driver, which + * proxies calls between the userspace library and the hypervisor + * driver. The Linux driver is almost entirely device agnostic; it + * watches for special flags indicating cases where a memory buffer + * address might need to be translated, etc. As a result, driver + * writers can avoid many of the problem cases related to registering + * hardware resources like memory pages or interrupts. However, the + * drivers must be careful to obey the conventions documented below in + * order to work properly with the generic Linux iorpc driver. + * + * @section iorpc_domains Service Domains + * + * All iorpc-based drivers must support a notion of service domains. + * A service domain is basically an application context - state + * indicating resources that are allocated to that particular app + * which it may access and (perhaps) other applications may not + * access. Drivers can support any number of service domains they + * choose. In some cases the design is limited by a number of service + * domains supported by the IO hardware; in other cases the service + * domains are a purely software concept and the driver chooses a + * maximum number of domains based on how much state memory it is + * willing to preallocate. + * + * For example, the mPIPE driver only supports as many service domains + * as are supported by the mPIPE hardware. This limitation is + * required because the hardware implements its own MMIO protection + * scheme to allow large MMIO mappings while still protecting small + * register ranges within the page that should only be accessed by the + * hypervisor. + * + * In contrast, drivers with no hardware service domain limitations + * (for instance the TRIO shim) can implement an arbitrary number of + * service domains. In these cases, each service domain is limited to + * a carefully restricted set of legal MMIO addresses if necessary to + * keep one application from corrupting another application's state. + * + * @section iorpc_conventions System Call Conventions + * + * The driver's open routine is responsible for allocating a new + * service domain for each hv_dev_open() call. By convention, the + * return value from open() should be the service domain number on + * success, or GXIO_ERR_NO_SVC_DOM if no more service domains are + * available. + * + * The implementations of hv_dev_pread() and hv_dev_pwrite() are + * responsible for validating the devhdl value passed up by the + * client. Since the device handle returned by hv_dev_open() should + * embed the positive service domain number, drivers should make sure + * that DRV_HDL2BITS(devhdl) is a legal service domain. If the client + * passes an illegal service domain number, the routine should return + * GXIO_ERR_INVAL_SVC_DOM. Once the service domain number has been + * validated, the driver can copy to/from the client buffer and call + * the dispatch_read() or dispatch_write() methods created by the RPC + * generator. + * + * The hv_dev_close() implementation should reset all service domain + * state and put the service domain back on a free list for + * reallocation by a future application. In most cases, this will + * require executing a hardware reset or drain flow and denying any + * MMIO regions that were created for the service domain. + * + * @section iorpc_data Special Data Types + * + * The .idl file syntax allows the creation of syscalls with special + * parameters that require permission checks or translations as part + * of the system call path. Because of limitations in the code + * generator, APIs are generally limited to just one of these special + * parameters per system call, and they are sometimes required to be + * the first or last parameter to the call. Special parameters + * include: + * + * @subsection iorpc_mem_buffer MEM_BUFFER + * + * The MEM_BUFFER() datatype allows user space to "register" memory + * buffers with a device. Registering memory accomplishes two tasks: + * Linux keeps track of all buffers that might be modified by a + * hardware device, and the hardware device drivers bind registered + * buffers to particular hardware resources like ingress NotifRings. + * The MEM_BUFFER() idl syntax can take extra flags like ALIGN_64KB, + * ALIGN_SELF_SIZE, and FLAGS indicating that memory buffers must have + * certain alignment or that the user should be able to pass a "memory + * flags" word specifying attributes like nt_hint or IO cache pinning. + * The parser will accept multiple MEM_BUFFER() flags. + * + * Implementations must obey the following conventions when + * registering memory buffers via the iorpc flow. These rules are a + * result of the Linux driver implementation, which needs to keep + * track of how many times a particular page has been registered with + * the hardware so that it can release the page when all those + * registrations are cleared. + * + * - Memory registrations that refer to a resource which has already + * been bound must return GXIO_ERR_ALREADY_INIT. Thus, it is an + * error to register memory twice without resetting (i.e. closing) the + * resource in between. This convention keeps the Linux driver from + * having to track which particular devices a page is bound to. + * + * - At present, a memory registration is only cleared when the + * service domain is reset. In this case, the Linux driver simply + * closes the HV device file handle and then decrements the reference + * counts of all pages that were previously registered with the + * device. + * + * - In the future, we may add a mechanism for unregistering memory. + * One possible implementation would require that the user specify + * which buffer is currently registered. The HV would then verify + * that that page was actually the one currently mapped and return + * success or failure to Linux, which would then only decrement the + * page reference count if the addresses were mapped. Another scheme + * might allow Linux to pass a token to the HV to be returned when the + * resource is unmapped. + * + * @subsection iorpc_interrupt INTERRUPT + * + * The INTERRUPT .idl datatype allows the client to bind hardware + * interrupts to a particular combination of IPI parameters - CPU, IPI + * PL, and event bit number. This data is passed via a special + * datatype so that the Linux driver can validate the CPU and PL and + * the HV generic iorpc code can translate client CPUs to real CPUs. + * + * @subsection iorpc_pollfd_setup POLLFD_SETUP + * + * The POLLFD_SETUP .idl datatype allows the client to set up hardware + * interrupt bindings which are received by Linux but which are made + * visible to user processes as state transitions on a file descriptor; + * this allows user processes to use Linux primitives, such as poll(), to + * await particular hardware events. This data is passed via a special + * datatype so that the Linux driver may recognize the pollable file + * descriptor and translate it to a set of interrupt target information, + * and so that the HV generic iorpc code can translate client CPUs to real + * CPUs. + * + * @subsection iorpc_pollfd POLLFD + * + * The POLLFD .idl datatype allows manipulation of hardware interrupt + * bindings set up via the POLLFD_SETUP datatype; common operations are + * resetting the state of the requested interrupt events, and unbinding any + * bound interrupts. This data is passed via a special datatype so that + * the Linux driver may recognize the pollable file descriptor and + * translate it to an interrupt identifier previously supplied by the + * hypervisor as the result of an earlier pollfd_setup operation. + * + * @subsection iorpc_blob BLOB + * + * The BLOB .idl datatype allows the client to write an arbitrary + * length string of bytes up to the hypervisor driver. This can be + * useful for passing up large, arbitrarily structured data like + * classifier programs. The iorpc stack takes care of validating the + * buffer VA and CPA as the data passes up to the hypervisor. Unlike + * MEM_BUFFER(), the buffer is not registered - Linux does not bump + * page refcounts and the HV driver should not reuse the buffer once + * the system call is complete. + * + * @section iorpc_translation Translating User Space Calls + * + * The ::iorpc_offset structure describes the formatting of the offset + * that is passed to pread() or pwrite() as part of the generated RPC code. + * When the user calls up to Linux, the rpc code fills in all the fields of + * the offset, including a 16-bit opcode, a 16 bit format indicator, and 32 + * bits of user-specified "sub-offset". The opcode indicates which syscall + * is being requested. The format indicates whether there is a "prefix + * struct" at the start of the memory buffer passed to pwrite(), and if so + * what data is in that prefix struct. These prefix structs are used to + * implement special datatypes like MEM_BUFFER() and INTERRUPT - we arrange + * to put data that needs translation and permission checks at the start of + * the buffer so that the Linux driver and generic portions of the HV iorpc + * code can easily access the data. The 32 bits of user-specified + * "sub-offset" are most useful for pread() calls where the user needs to + * also pass in a few bits indicating which register to read, etc. + * + * The Linux iorpc driver watches for system calls that contain prefix + * structs so that it can translate parameters and bump reference + * counts as appropriate. It does not (currently) have any knowledge + * of the per-device opcodes - it doesn't care what operation you're + * doing to mPIPE, so long as it can do all the generic book-keeping. + * The hv/iorpc.h header file defines all of the generic encoding bits + * needed to translate iorpc calls without knowing which particular + * opcode is being issued. + * + * @section iorpc_globals Global iorpc Calls + * + * Implementing mmap() required adding some special iorpc syscalls + * that are only called by the Linux driver, never by userspace. + * These include get_mmio_base() and check_mmio_offset(). These + * routines are described in globals.idl and must be included in every + * iorpc driver. By providing these routines in every driver, Linux's + * mmap implementation can easily get the PTE bits it needs and + * validate the PA offset without needing to know the per-device + * opcodes to perform those tasks. + * + * @section iorpc_kernel Supporting gxio APIs in the Kernel + * + * The iorpc code generator also supports generation of kernel code + * implementing the gxio APIs. This capability is currently used by + * the mPIPE network driver, and will likely be used by the TRIO root + * complex and endpoint drivers and perhaps an in-kernel crypto + * driver. Each driver that wants to instantiate iorpc calls in the + * kernel needs to generate a kernel version of the generate rpc code + * and (probably) copy any related gxio source files into the kernel. + * The mPIPE driver provides a good example of this pattern. + */ + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#if defined(__HV__) +#include +#elif defined(__KERNEL__) +#include +#include +#else +#include +#endif + + +/** Code indicating translation services required within the RPC path. + * These indicate whether there is a translatable struct at the start + * of the RPC buffer and what information that struct contains. + */ +enum iorpc_format_e +{ + /** No translation required, no prefix struct. */ + IORPC_FORMAT_NONE, + + /** No translation required, no prefix struct, no access to this + * operation from user space. */ + IORPC_FORMAT_NONE_NOUSER, + + /** Prefix struct contains user VA and size. */ + IORPC_FORMAT_USER_MEM, + + /** Prefix struct contains CPA, size, and homing bits. */ + IORPC_FORMAT_KERNEL_MEM, + + /** Prefix struct contains interrupt. */ + IORPC_FORMAT_KERNEL_INTERRUPT, + + /** Prefix struct contains user-level interrupt. */ + IORPC_FORMAT_USER_INTERRUPT, + + /** Prefix struct contains pollfd_setup (interrupt information). */ + IORPC_FORMAT_KERNEL_POLLFD_SETUP, + + /** Prefix struct contains user-level pollfd_setup (file descriptor). */ + IORPC_FORMAT_USER_POLLFD_SETUP, + + /** Prefix struct contains pollfd (interrupt cookie). */ + IORPC_FORMAT_KERNEL_POLLFD, + + /** Prefix struct contains user-level pollfd (file descriptor). */ + IORPC_FORMAT_USER_POLLFD, +}; + + +/** Generate an opcode given format and code. */ +#define IORPC_OPCODE(FORMAT, CODE) (((FORMAT) << 16) | (CODE)) + +/** The offset passed through the read() and write() system calls + combines an opcode with 32 bits of user-specified offset. */ +union iorpc_offset +{ +#ifndef __BIG_ENDIAN__ + uint64_t offset; /**< All bits. */ + + struct + { + uint16_t code; /**< RPC code. */ + uint16_t format; /**< iorpc_format_e */ + uint32_t sub_offset; /**< caller-specified offset. */ + }; + + uint32_t opcode; /**< Opcode combines code & format. */ +#else + uint64_t offset; /**< All bits. */ + + struct + { + uint32_t sub_offset; /**< caller-specified offset. */ + uint16_t format; /**< iorpc_format_e */ + uint16_t code; /**< RPC code. */ + }; + + struct + { + uint32_t padding; + uint32_t opcode; /**< Opcode combines code & format. */ + }; +#endif +}; + + +/** Homing and cache hinting bits that can be used by IO devices. */ +struct iorpc_mem_attr +{ + unsigned int lotar_x:4; /**< lotar X bits (or Gx page_mask). */ + unsigned int lotar_y:4; /**< lotar Y bits (or Gx page_offset). */ + unsigned int hfh:1; /**< Uses hash-for-home. */ + unsigned int nt_hint:1; /**< Non-temporal hint. */ + unsigned int io_pin:1; /**< Only fill 'IO' cache ways. */ +}; + +/** Set the nt_hint bit. */ +#define IORPC_MEM_BUFFER_FLAG_NT_HINT (1 << 0) + +/** Set the IO pin bit. */ +#define IORPC_MEM_BUFFER_FLAG_IO_PIN (1 << 1) + + +/** A structure used to describe memory registration. Different + protection levels describe memory differently, so this union + contains all the different possible descriptions. As a request + moves up the call chain, each layer translates from one + description format to the next. In particular, the Linux iorpc + driver translates user VAs into CPAs and homing parameters. */ +union iorpc_mem_buffer +{ + struct + { + uint64_t va; /**< User virtual address. */ + uint64_t size; /**< Buffer size. */ + unsigned int flags; /**< nt_hint, IO pin. */ + } + user; /**< Buffer as described by user apps. */ + + struct + { + unsigned long long cpa; /**< Client physical address. */ +#if defined(__KERNEL__) || defined(__HV__) + size_t size; /**< Buffer size. */ + HV_PTE pte; /**< PTE describing memory homing. */ +#else + uint64_t size; + uint64_t pte; +#endif + unsigned int flags; /**< nt_hint, IO pin. */ + } + kernel; /**< Buffer as described by kernel. */ + + struct + { + unsigned long long pa; /**< Physical address. */ + size_t size; /**< Buffer size. */ + struct iorpc_mem_attr attr; /**< Homing and locality hint bits. */ + } + hv; /**< Buffer parameters for HV driver. */ +}; + + +/** A structure used to describe interrupts. The format differs slightly + * for user and kernel interrupts. As with the mem_buffer_t, translation + * between the formats is done at each level. */ +union iorpc_interrupt +{ + struct + { + int cpu; /**< CPU. */ + int event; /**< evt_num */ + } + user; /**< Interrupt as described by user applications. */ + + struct + { + int x; /**< X coord. */ + int y; /**< Y coord. */ + int ipi; /**< int_num */ + int event; /**< evt_num */ + } + kernel; /**< Interrupt as described by the kernel. */ + +}; + + +/** A structure used to describe interrupts used with poll(). The format + * differs significantly for requests from user to kernel, and kernel to + * hypervisor. As with the mem_buffer_t, translation between the formats + * is done at each level. */ +union iorpc_pollfd_setup +{ + struct + { + int fd; /**< Pollable file descriptor. */ + } + user; /**< pollfd_setup as described by user applications. */ + + struct + { + int x; /**< X coord. */ + int y; /**< Y coord. */ + int ipi; /**< int_num */ + int event; /**< evt_num */ + } + kernel; /**< pollfd_setup as described by the kernel. */ + +}; + + +/** A structure used to describe previously set up interrupts used with + * poll(). The format differs significantly for requests from user to + * kernel, and kernel to hypervisor. As with the mem_buffer_t, translation + * between the formats is done at each level. */ +union iorpc_pollfd +{ + struct + { + int fd; /**< Pollable file descriptor. */ + } + user; /**< pollfd as described by user applications. */ + + struct + { + int cookie; /**< hv cookie returned by the pollfd_setup operation. */ + } + kernel; /**< pollfd as described by the kernel. */ + +}; + + +/** The various iorpc devices use error codes from -1100 to -1299. + * + * This range is distinct from netio (-700 to -799), the hypervisor + * (-800 to -899), tilepci (-900 to -999), ilib (-1000 to -1099), + * gxcr (-1300 to -1399) and gxpci (-1400 to -1499). + */ +enum gxio_err_e { + + /** Largest iorpc error number. */ + GXIO_ERR_MAX = -1101, + + + /********************************************************/ + /* Generic Error Codes */ + /********************************************************/ + + /** Bad RPC opcode - possible version incompatibility. */ + GXIO_ERR_OPCODE = -1101, + + /** Invalid parameter. */ + GXIO_ERR_INVAL = -1102, + + /** Memory buffer did not meet alignment requirements. */ + GXIO_ERR_ALIGNMENT = -1103, + + /** Memory buffers must be coherent and cacheable. */ + GXIO_ERR_COHERENCE = -1104, + + /** Resource already initialized. */ + GXIO_ERR_ALREADY_INIT = -1105, + + /** No service domains available. */ + GXIO_ERR_NO_SVC_DOM = -1106, + + /** Illegal service domain number. */ + GXIO_ERR_INVAL_SVC_DOM = -1107, + + /** Illegal MMIO address. */ + GXIO_ERR_MMIO_ADDRESS = -1108, + + /** Illegal interrupt binding. */ + GXIO_ERR_INTERRUPT = -1109, + + /** Unreasonable client memory. */ + GXIO_ERR_CLIENT_MEMORY = -1110, + + /** No more IOTLB entries. */ + GXIO_ERR_IOTLB_ENTRY = -1111, + + /** Invalid memory size. */ + GXIO_ERR_INVAL_MEMORY_SIZE = -1112, + + /** Unsupported operation. */ + GXIO_ERR_UNSUPPORTED_OP = -1113, + + /** Insufficient DMA credits. */ + GXIO_ERR_DMA_CREDITS = -1114, + + /** Operation timed out. */ + GXIO_ERR_TIMEOUT = -1115, + + /** No such device or object. */ + GXIO_ERR_NO_DEVICE = -1116, + + /** Device or resource busy. */ + GXIO_ERR_BUSY = -1117, + + /** I/O error. */ + GXIO_ERR_IO = -1118, + + /** Permissions error. */ + GXIO_ERR_PERM = -1119, + + + + /********************************************************/ + /* Test Device Error Codes */ + /********************************************************/ + + /** Illegal register number. */ + GXIO_TEST_ERR_REG_NUMBER = -1120, + + /** Illegal buffer slot. */ + GXIO_TEST_ERR_BUFFER_SLOT = -1121, + + + /********************************************************/ + /* MPIPE Error Codes */ + /********************************************************/ + + + /** Invalid buffer size. */ + GXIO_MPIPE_ERR_INVAL_BUFFER_SIZE = -1131, + + /** Cannot allocate buffer stack. */ + GXIO_MPIPE_ERR_NO_BUFFER_STACK = -1140, + + /** Invalid buffer stack number. */ + GXIO_MPIPE_ERR_BAD_BUFFER_STACK = -1141, + + /** Cannot allocate NotifRing. */ + GXIO_MPIPE_ERR_NO_NOTIF_RING = -1142, + + /** Invalid NotifRing number. */ + GXIO_MPIPE_ERR_BAD_NOTIF_RING = -1143, + + /** Cannot allocate NotifGroup. */ + GXIO_MPIPE_ERR_NO_NOTIF_GROUP = -1144, + + /** Invalid NotifGroup number. */ + GXIO_MPIPE_ERR_BAD_NOTIF_GROUP = -1145, + + /** Cannot allocate bucket. */ + GXIO_MPIPE_ERR_NO_BUCKET = -1146, + + /** Invalid bucket number. */ + GXIO_MPIPE_ERR_BAD_BUCKET = -1147, + + /** Cannot allocate eDMA ring. */ + GXIO_MPIPE_ERR_NO_EDMA_RING = -1148, + + /** Invalid eDMA ring number. */ + GXIO_MPIPE_ERR_BAD_EDMA_RING = -1149, + + /** Invalid channel number. */ + GXIO_MPIPE_ERR_BAD_CHANNEL = -1150, + + /** Bad configuration. */ + GXIO_MPIPE_ERR_BAD_CONFIG = -1151, + + /** Empty iqueue. */ + GXIO_MPIPE_ERR_IQUEUE_EMPTY = -1152, + + /** Empty rules. */ + GXIO_MPIPE_ERR_RULES_EMPTY = -1160, + + /** Full rules. */ + GXIO_MPIPE_ERR_RULES_FULL = -1161, + + /** Corrupt rules. */ + GXIO_MPIPE_ERR_RULES_CORRUPT = -1162, + + /** Invalid rules. */ + GXIO_MPIPE_ERR_RULES_INVALID = -1163, + + /** Classifier is too big. */ + GXIO_MPIPE_ERR_CLASSIFIER_TOO_BIG = -1170, + + /** Classifier is too complex. */ + GXIO_MPIPE_ERR_CLASSIFIER_TOO_COMPLEX = -1171, + + /** Classifier has bad header. */ + GXIO_MPIPE_ERR_CLASSIFIER_BAD_HEADER = -1172, + + /** Classifier has bad contents. */ + GXIO_MPIPE_ERR_CLASSIFIER_BAD_CONTENTS = -1173, + + /** Classifier encountered invalid symbol. */ + GXIO_MPIPE_ERR_CLASSIFIER_INVAL_SYMBOL = -1174, + + /** Classifier encountered invalid bounds. */ + GXIO_MPIPE_ERR_CLASSIFIER_INVAL_BOUNDS = -1175, + + /** Classifier encountered invalid relocation. */ + GXIO_MPIPE_ERR_CLASSIFIER_INVAL_RELOCATION = -1176, + + /** Classifier encountered undefined symbol. */ + GXIO_MPIPE_ERR_CLASSIFIER_UNDEF_SYMBOL = -1177, + + + /********************************************************/ + /* TRIO Error Codes */ + /********************************************************/ + + /** Cannot allocate memory map region. */ + GXIO_TRIO_ERR_NO_MEMORY_MAP = -1180, + + /** Invalid memory map region number. */ + GXIO_TRIO_ERR_BAD_MEMORY_MAP = -1181, + + /** Cannot allocate scatter queue. */ + GXIO_TRIO_ERR_NO_SCATTER_QUEUE = -1182, + + /** Invalid scatter queue number. */ + GXIO_TRIO_ERR_BAD_SCATTER_QUEUE = -1183, + + /** Cannot allocate push DMA ring. */ + GXIO_TRIO_ERR_NO_PUSH_DMA_RING = -1184, + + /** Invalid push DMA ring index. */ + GXIO_TRIO_ERR_BAD_PUSH_DMA_RING = -1185, + + /** Cannot allocate pull DMA ring. */ + GXIO_TRIO_ERR_NO_PULL_DMA_RING = -1186, + + /** Invalid pull DMA ring index. */ + GXIO_TRIO_ERR_BAD_PULL_DMA_RING = -1187, + + /** Cannot allocate PIO region. */ + GXIO_TRIO_ERR_NO_PIO = -1188, + + /** Invalid PIO region index. */ + GXIO_TRIO_ERR_BAD_PIO = -1189, + + /** Cannot allocate ASID. */ + GXIO_TRIO_ERR_NO_ASID = -1190, + + /** Invalid ASID. */ + GXIO_TRIO_ERR_BAD_ASID = -1191, + + + /********************************************************/ + /* MICA Error Codes */ + /********************************************************/ + + /** No such accelerator type. */ + GXIO_MICA_ERR_BAD_ACCEL_TYPE = -1220, + + /** Cannot allocate context. */ + GXIO_MICA_ERR_NO_CONTEXT = -1221, + + /** PKA command queue is full, can't add another command. */ + GXIO_MICA_ERR_PKA_CMD_QUEUE_FULL = -1222, + + /** PKA result queue is empty, can't get a result from the queue. */ + GXIO_MICA_ERR_PKA_RESULT_QUEUE_EMPTY = -1223, + + /********************************************************/ + /* GPIO Error Codes */ + /********************************************************/ + + /** Pin not available. Either the physical pin does not exist, or + * it is reserved by the hypervisor for system usage. */ + GXIO_GPIO_ERR_PIN_UNAVAILABLE = -1240, + + /** Pin busy. The pin exists, and is available for use via GXIO, but + * it has been attached by some other process or driver. */ + GXIO_GPIO_ERR_PIN_BUSY = -1241, + + /** Cannot access unattached pin. One or more of the pins being + * manipulated by this call are not attached to the requesting + * context. */ + GXIO_GPIO_ERR_PIN_UNATTACHED = -1242, + + /** Invalid I/O mode for pin. The wiring of the pin in the system + * is such that the I/O mode or electrical control parameters + * requested could cause damage. */ + GXIO_GPIO_ERR_PIN_INVALID_MODE = -1243, + + /** Smallest iorpc error number. */ + GXIO_ERR_MIN = -1299 +}; + + +#endif /* !_HV_IORPC_H_ */ diff --git a/arch/tile/include/hv/netio_errors.h b/arch/tile/include/hv/netio_errors.h new file mode 100644 index 000000000..e1591bff6 --- /dev/null +++ b/arch/tile/include/hv/netio_errors.h @@ -0,0 +1,122 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Error codes returned from NetIO routines. + */ + +#ifndef __NETIO_ERRORS_H__ +#define __NETIO_ERRORS_H__ + +/** + * @addtogroup error + * + * @brief The error codes returned by NetIO functions. + * + * NetIO functions return 0 (defined as ::NETIO_NO_ERROR) on success, and + * a negative value if an error occurs. + * + * In cases where a NetIO function failed due to a error reported by + * system libraries, the error code will be the negation of the + * system errno at the time of failure. The @ref netio_strerror() + * function will deliver error strings for both NetIO and system error + * codes. + * + * @{ + */ + +/** The set of all NetIO errors. */ +typedef enum +{ + /** Operation successfully completed. */ + NETIO_NO_ERROR = 0, + + /** A packet was successfully retrieved from an input queue. */ + NETIO_PKT = 0, + + /** Largest NetIO error number. */ + NETIO_ERR_MAX = -701, + + /** The tile is not registered with the IPP. */ + NETIO_NOT_REGISTERED = -701, + + /** No packet was available to retrieve from the input queue. */ + NETIO_NOPKT = -702, + + /** The requested function is not implemented. */ + NETIO_NOT_IMPLEMENTED = -703, + + /** On a registration operation, the target queue already has the maximum + * number of tiles registered for it, and no more may be added. On a + * packet send operation, the output queue is full and nothing more can + * be queued until some of the queued packets are actually transmitted. */ + NETIO_QUEUE_FULL = -704, + + /** The calling process or thread is not bound to exactly one CPU. */ + NETIO_BAD_AFFINITY = -705, + + /** Cannot allocate memory on requested controllers. */ + NETIO_CANNOT_HOME = -706, + + /** On a registration operation, the IPP specified is not configured + * to support the options requested; for instance, the application + * wants a specific type of tagged headers which the configured IPP + * doesn't support. Or, the supplied configuration information is + * not self-consistent, or is out of range; for instance, specifying + * both NETIO_RECV and NETIO_NO_RECV, or asking for more than + * NETIO_MAX_SEND_BUFFERS to be preallocated. On a VLAN or bucket + * configure operation, the number of items, or the base item, was + * out of range. + */ + NETIO_BAD_CONFIG = -707, + + /** Too many tiles have registered to transmit packets. */ + NETIO_TOOMANY_XMIT = -708, + + /** Packet transmission was attempted on a queue which was registered + with transmit disabled. */ + NETIO_UNREG_XMIT = -709, + + /** This tile is already registered with the IPP. */ + NETIO_ALREADY_REGISTERED = -710, + + /** The Ethernet link is down. The application should try again later. */ + NETIO_LINK_DOWN = -711, + + /** An invalid memory buffer has been specified. This may be an unmapped + * virtual address, or one which does not meet alignment requirements. + * For netio_input_register(), this error may be returned when multiple + * processes specify different memory regions to be used for NetIO + * buffers. That can happen if these processes specify explicit memory + * regions with the ::NETIO_FIXED_BUFFER_VA flag, or if tmc_cmem_init() + * has not been called by a common ancestor of the processes. + */ + NETIO_FAULT = -712, + + /** Cannot combine user-managed shared memory and cache coherence. */ + NETIO_BAD_CACHE_CONFIG = -713, + + /** Smallest NetIO error number. */ + NETIO_ERR_MIN = -713, + +#ifndef __DOXYGEN__ + /** Used internally to mean that no response is needed; never returned to + * an application. */ + NETIO_NO_RESPONSE = 1 +#endif +} netio_error_t; + +/** @} */ + +#endif /* __NETIO_ERRORS_H__ */ diff --git a/arch/tile/include/hv/netio_intf.h b/arch/tile/include/hv/netio_intf.h new file mode 100644 index 000000000..8d20972ab --- /dev/null +++ b/arch/tile/include/hv/netio_intf.h @@ -0,0 +1,2975 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * NetIO interface structures and macros. + */ + +#ifndef __NETIO_INTF_H__ +#define __NETIO_INTF_H__ + +#include + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) +#include +#define netio_assert assert /**< Enable assertions from macros */ +#else +#define netio_assert(...) ((void)(0)) /**< Disable assertions from macros */ +#endif + +/* + * If none of these symbols are defined, we're building libnetio in an + * environment where we have pthreads, so we'll enable locking. + */ +#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) && \ + !defined(__NEWLIB__) +#define _NETIO_PTHREAD /**< Include a mutex in netio_queue_t below */ + +/* + * If NETIO_UNLOCKED is defined, we don't do use per-cpu locks on + * per-packet NetIO operations. We still do pthread locking on things + * like netio_input_register, though. This is used for building + * libnetio_unlocked. + */ +#ifndef NETIO_UNLOCKED + +/* Avoid PLT overhead by using our own inlined per-cpu lock. */ +#include +typedef int _netio_percpu_mutex_t; + +static __inline int +_netio_percpu_mutex_init(_netio_percpu_mutex_t* lock) +{ + *lock = 0; + return 0; +} + +static __inline int +_netio_percpu_mutex_lock(_netio_percpu_mutex_t* lock) +{ + while (__builtin_expect(__insn_tns(lock), 0)) + sched_yield(); + return 0; +} + +static __inline int +_netio_percpu_mutex_unlock(_netio_percpu_mutex_t* lock) +{ + *lock = 0; + return 0; +} + +#else /* NETIO_UNLOCKED */ + +/* Don't do any locking for per-packet NetIO operations. */ +typedef int _netio_percpu_mutex_t; +#define _netio_percpu_mutex_init(L) +#define _netio_percpu_mutex_lock(L) +#define _netio_percpu_mutex_unlock(L) + +#endif /* NETIO_UNLOCKED */ +#endif /* !__HV__, !__BOGUX, !__KERNEL__, !__NEWLIB__ */ + +/** How many tiles can register for a given queue. + * @ingroup setup */ +#define NETIO_MAX_TILES_PER_QUEUE 64 + + +/** Largest permissible queue identifier. + * @ingroup setup */ +#define NETIO_MAX_QUEUE_ID 255 + + +#ifndef __DOXYGEN__ + +/* Metadata packet checksum/ethertype flags. */ + +/** The L4 checksum has not been calculated. */ +#define _NETIO_PKT_NO_L4_CSUM_SHIFT 0 +#define _NETIO_PKT_NO_L4_CSUM_RMASK 1 +#define _NETIO_PKT_NO_L4_CSUM_MASK \ + (_NETIO_PKT_NO_L4_CSUM_RMASK << _NETIO_PKT_NO_L4_CSUM_SHIFT) + +/** The L3 checksum has not been calculated. */ +#define _NETIO_PKT_NO_L3_CSUM_SHIFT 1 +#define _NETIO_PKT_NO_L3_CSUM_RMASK 1 +#define _NETIO_PKT_NO_L3_CSUM_MASK \ + (_NETIO_PKT_NO_L3_CSUM_RMASK << _NETIO_PKT_NO_L3_CSUM_SHIFT) + +/** The L3 checksum is incorrect (or perhaps has not been calculated). */ +#define _NETIO_PKT_BAD_L3_CSUM_SHIFT 2 +#define _NETIO_PKT_BAD_L3_CSUM_RMASK 1 +#define _NETIO_PKT_BAD_L3_CSUM_MASK \ + (_NETIO_PKT_BAD_L3_CSUM_RMASK << _NETIO_PKT_BAD_L3_CSUM_SHIFT) + +/** The Ethernet packet type is unrecognized. */ +#define _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT 3 +#define _NETIO_PKT_TYPE_UNRECOGNIZED_RMASK 1 +#define _NETIO_PKT_TYPE_UNRECOGNIZED_MASK \ + (_NETIO_PKT_TYPE_UNRECOGNIZED_RMASK << \ + _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT) + +/* Metadata packet type flags. */ + +/** Where the packet type bits are; this field is the index into + * _netio_pkt_info. */ +#define _NETIO_PKT_TYPE_SHIFT 4 +#define _NETIO_PKT_TYPE_RMASK 0x3F + +/** How many VLAN tags the packet has, and, if we have two, which one we + * actually grouped on. A VLAN within a proprietary (Marvell or Broadcom) + * tag is counted here. */ +#define _NETIO_PKT_VLAN_SHIFT 4 +#define _NETIO_PKT_VLAN_RMASK 0x3 +#define _NETIO_PKT_VLAN_MASK \ + (_NETIO_PKT_VLAN_RMASK << _NETIO_PKT_VLAN_SHIFT) +#define _NETIO_PKT_VLAN_NONE 0 /* No VLAN tag. */ +#define _NETIO_PKT_VLAN_ONE 1 /* One VLAN tag. */ +#define _NETIO_PKT_VLAN_TWO_OUTER 2 /* Two VLAN tags, outer one used. */ +#define _NETIO_PKT_VLAN_TWO_INNER 3 /* Two VLAN tags, inner one used. */ + +/** Which proprietary tags the packet has. */ +#define _NETIO_PKT_TAG_SHIFT 6 +#define _NETIO_PKT_TAG_RMASK 0x3 +#define _NETIO_PKT_TAG_MASK \ + (_NETIO_PKT_TAG_RMASK << _NETIO_PKT_TAG_SHIFT) +#define _NETIO_PKT_TAG_NONE 0 /* No proprietary tags. */ +#define _NETIO_PKT_TAG_MRVL 1 /* Marvell HyperG.Stack tags. */ +#define _NETIO_PKT_TAG_MRVL_EXT 2 /* HyperG.Stack extended tags. */ +#define _NETIO_PKT_TAG_BRCM 3 /* Broadcom HiGig tags. */ + +/** Whether a packet has an LLC + SNAP header. */ +#define _NETIO_PKT_SNAP_SHIFT 8 +#define _NETIO_PKT_SNAP_RMASK 0x1 +#define _NETIO_PKT_SNAP_MASK \ + (_NETIO_PKT_SNAP_RMASK << _NETIO_PKT_SNAP_SHIFT) + +/* NOTE: Bits 9 and 10 are unused. */ + +/** Length of any custom data before the L2 header, in words. */ +#define _NETIO_PKT_CUSTOM_LEN_SHIFT 11 +#define _NETIO_PKT_CUSTOM_LEN_RMASK 0x1F +#define _NETIO_PKT_CUSTOM_LEN_MASK \ + (_NETIO_PKT_CUSTOM_LEN_RMASK << _NETIO_PKT_CUSTOM_LEN_SHIFT) + +/** The L4 checksum is incorrect (or perhaps has not been calculated). */ +#define _NETIO_PKT_BAD_L4_CSUM_SHIFT 16 +#define _NETIO_PKT_BAD_L4_CSUM_RMASK 0x1 +#define _NETIO_PKT_BAD_L4_CSUM_MASK \ + (_NETIO_PKT_BAD_L4_CSUM_RMASK << _NETIO_PKT_BAD_L4_CSUM_SHIFT) + +/** Length of the L2 header, in words. */ +#define _NETIO_PKT_L2_LEN_SHIFT 17 +#define _NETIO_PKT_L2_LEN_RMASK 0x1F +#define _NETIO_PKT_L2_LEN_MASK \ + (_NETIO_PKT_L2_LEN_RMASK << _NETIO_PKT_L2_LEN_SHIFT) + + +/* Flags in minimal packet metadata. */ + +/** We need an eDMA checksum on this packet. */ +#define _NETIO_PKT_NEED_EDMA_CSUM_SHIFT 0 +#define _NETIO_PKT_NEED_EDMA_CSUM_RMASK 1 +#define _NETIO_PKT_NEED_EDMA_CSUM_MASK \ + (_NETIO_PKT_NEED_EDMA_CSUM_RMASK << _NETIO_PKT_NEED_EDMA_CSUM_SHIFT) + +/* Data within the packet information table. */ + +/* Note that, for efficiency, code which uses these fields assumes that none + * of the shift values below are zero. See uses below for an explanation. */ + +/** Offset within the L2 header of the innermost ethertype (in halfwords). */ +#define _NETIO_PKT_INFO_ETYPE_SHIFT 6 +#define _NETIO_PKT_INFO_ETYPE_RMASK 0x1F + +/** Offset within the L2 header of the VLAN tag (in halfwords). */ +#define _NETIO_PKT_INFO_VLAN_SHIFT 11 +#define _NETIO_PKT_INFO_VLAN_RMASK 0x1F + +#endif + + +/** The size of a memory buffer representing a small packet. + * @ingroup egress */ +#define SMALL_PACKET_SIZE 256 + +/** The size of a memory buffer representing a large packet. + * @ingroup egress */ +#define LARGE_PACKET_SIZE 2048 + +/** The size of a memory buffer representing a jumbo packet. + * @ingroup egress */ +#define JUMBO_PACKET_SIZE (12 * 1024) + + +/* Common ethertypes. + * @ingroup ingress */ +/** @{ */ +/** The ethertype of IPv4. */ +#define ETHERTYPE_IPv4 (0x0800) +/** The ethertype of ARP. */ +#define ETHERTYPE_ARP (0x0806) +/** The ethertype of VLANs. */ +#define ETHERTYPE_VLAN (0x8100) +/** The ethertype of a Q-in-Q header. */ +#define ETHERTYPE_Q_IN_Q (0x9100) +/** The ethertype of IPv6. */ +#define ETHERTYPE_IPv6 (0x86DD) +/** The ethertype of MPLS. */ +#define ETHERTYPE_MPLS (0x8847) +/** @} */ + + +/** The possible return values of NETIO_PKT_STATUS. + * @ingroup ingress + */ +typedef enum +{ + /** No problems were detected with this packet. */ + NETIO_PKT_STATUS_OK, + /** The packet is undersized; this is expected behavior if the packet's + * ethertype is unrecognized, but otherwise the packet is likely corrupt. */ + NETIO_PKT_STATUS_UNDERSIZE, + /** The packet is oversized and some trailing bytes have been discarded. + This is expected behavior for short packets, since it's impossible to + precisely determine the amount of padding which may have been added to + them to make them meet the minimum Ethernet packet size. */ + NETIO_PKT_STATUS_OVERSIZE, + /** The packet was judged to be corrupt by hardware (for instance, it had + a bad CRC, or part of it was discarded due to lack of buffer space in + the I/O shim) and should be discarded. */ + NETIO_PKT_STATUS_BAD +} netio_pkt_status_t; + + +/** Log2 of how many buckets we have. */ +#define NETIO_LOG2_NUM_BUCKETS (10) + +/** How many buckets we have. + * @ingroup ingress */ +#define NETIO_NUM_BUCKETS (1 << NETIO_LOG2_NUM_BUCKETS) + + +/** + * @brief A group-to-bucket identifier. + * + * @ingroup setup + * + * This tells us what to do with a given group. + */ +typedef union { + /** The header broken down into bits. */ + struct { + /** Whether we should balance on L4, if available */ + unsigned int __balance_on_l4:1; + /** Whether we should balance on L3, if available */ + unsigned int __balance_on_l3:1; + /** Whether we should balance on L2, if available */ + unsigned int __balance_on_l2:1; + /** Reserved for future use */ + unsigned int __reserved:1; + /** The base bucket to use to send traffic */ + unsigned int __bucket_base:NETIO_LOG2_NUM_BUCKETS; + /** The mask to apply to the balancing value. This must be one less + * than a power of two, e.g. 0x3 or 0xFF. + */ + unsigned int __bucket_mask:NETIO_LOG2_NUM_BUCKETS; + /** Pad to 32 bits */ + unsigned int __padding:(32 - 4 - 2 * NETIO_LOG2_NUM_BUCKETS); + } bits; + /** To send out the IDN. */ + unsigned int word; +} +netio_group_t; + + +/** + * @brief A VLAN-to-bucket identifier. + * + * @ingroup setup + * + * This tells us what to do with a given VLAN. + */ +typedef netio_group_t netio_vlan_t; + + +/** + * A bucket-to-queue mapping. + * @ingroup setup + */ +typedef unsigned char netio_bucket_t; + + +/** + * A packet size can always fit in a netio_size_t. + * @ingroup setup + */ +typedef unsigned int netio_size_t; + + +/** + * @brief Ethernet standard (ingress) packet metadata. + * + * @ingroup ingress + * + * This is additional data associated with each packet. + * This structure is opaque and accessed through the @ref ingress. + * + * Also, the buffer population operation currently assumes that standard + * metadata is at least as large as minimal metadata, and will need to be + * modified if that is no longer the case. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + /** This structure is opaque. */ + unsigned char opaque[24]; +#else + /** The overall ordinal of the packet */ + unsigned int __packet_ordinal; + /** The ordinal of the packet within the group */ + unsigned int __group_ordinal; + /** The best flow hash IPP could compute. */ + unsigned int __flow_hash; + /** Flags pertaining to checksum calculation, packet type, etc. */ + unsigned int __flags; + /** The first word of "user data". */ + unsigned int __user_data_0; + /** The second word of "user data". */ + unsigned int __user_data_1; +#endif +} +netio_pkt_metadata_t; + + +/** To ensure that the L3 header is aligned mod 4, the L2 header should be + * aligned mod 4 plus 2, since every supported L2 header is 4n + 2 bytes + * long. The standard way to do this is to simply add 2 bytes of padding + * before the L2 header. + */ +#define NETIO_PACKET_PADDING 2 + + + +/** + * @brief Ethernet minimal (egress) packet metadata. + * + * @ingroup egress + * + * This structure represents information about packets which have + * been processed by @ref netio_populate_buffer() or + * @ref netio_populate_prepend_buffer(). This structure is opaque + * and accessed through the @ref egress. + * + * @internal This structure is actually copied into the memory used by + * standard metadata, which is assumed to be large enough. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + /** This structure is opaque. */ + unsigned char opaque[14]; +#else + /** The offset of the L2 header from the start of the packet data. */ + unsigned short l2_offset; + /** The offset of the L3 header from the start of the packet data. */ + unsigned short l3_offset; + /** Where to write the checksum. */ + unsigned char csum_location; + /** Where to start checksumming from. */ + unsigned char csum_start; + /** Flags pertaining to checksum calculation etc. */ + unsigned short flags; + /** The L2 length of the packet. */ + unsigned short l2_length; + /** The checksum with which to seed the checksum generator. */ + unsigned short csum_seed; + /** How much to checksum. */ + unsigned short csum_length; +#endif +} +netio_pkt_minimal_metadata_t; + + +#ifndef __DOXYGEN__ + +/** + * @brief An I/O notification header. + * + * This is the first word of data received from an I/O shim in a notification + * packet. It contains framing and status information. + */ +typedef union +{ + unsigned int word; /**< The whole word. */ + /** The various fields. */ + struct + { + unsigned int __channel:7; /**< Resource channel. */ + unsigned int __type:4; /**< Type. */ + unsigned int __ack:1; /**< Whether an acknowledgement is needed. */ + unsigned int __reserved:1; /**< Reserved. */ + unsigned int __protocol:1; /**< A protocol-specific word is added. */ + unsigned int __status:2; /**< Status of the transfer. */ + unsigned int __framing:2; /**< Framing of the transfer. */ + unsigned int __transfer_size:14; /**< Transfer size in bytes (total). */ + } bits; +} +__netio_pkt_notif_t; + + +/** + * Returns the base address of the packet. + */ +#define _NETIO_PKT_HANDLE_BASE(p) \ + ((unsigned char*)((p).word & 0xFFFFFFC0)) + +/** + * Returns the base address of the packet. + */ +#define _NETIO_PKT_BASE(p) \ + _NETIO_PKT_HANDLE_BASE(p->__packet) + +/** + * @brief An I/O notification packet (second word) + * + * This is the second word of data received from an I/O shim in a notification + * packet. This is the virtual address of the packet buffer, plus some flag + * bits. (The virtual address of the packet is always 256-byte aligned so we + * have room for 8 bits' worth of flags in the low 8 bits.) + * + * @internal + * NOTE: The low two bits must contain "__queue", so the "packet size" + * (SIZE_SMALL, SIZE_LARGE, or SIZE_JUMBO) can be determined quickly. + * + * If __addr or __offset are moved, _NETIO_PKT_BASE + * (defined right below this) must be changed. + */ +typedef union +{ + unsigned int word; /**< The whole word. */ + /** The various fields. */ + struct + { + /** Which queue the packet will be returned to once it is sent back to + the IPP. This is one of the SIZE_xxx values. */ + unsigned int __queue:2; + + /** The IPP handle of the sending IPP. */ + unsigned int __ipp_handle:2; + + /** Reserved for future use. */ + unsigned int __reserved:1; + + /** If 1, this packet has minimal (egress) metadata; otherwise, it + has standard (ingress) metadata. */ + unsigned int __minimal:1; + + /** Offset of the metadata within the packet. This value is multiplied + * by 64 and added to the base packet address to get the metadata + * address. Note that this field is aligned within the word such that + * you can easily extract the metadata address with a 26-bit mask. */ + unsigned int __offset:2; + + /** The top 24 bits of the packet's virtual address. */ + unsigned int __addr:24; + } bits; +} +__netio_pkt_handle_t; + +#endif /* !__DOXYGEN__ */ + + +/** + * @brief A handle for an I/O packet's storage. + * @ingroup ingress + * + * netio_pkt_handle_t encodes the concept of a ::netio_pkt_t with its + * packet metadata removed. It is a much smaller type that exists to + * facilitate applications where the full ::netio_pkt_t type is too + * large, such as those that cache enormous numbers of packets or wish + * to transmit packet descriptors over the UDN. + * + * Because there is no metadata, most ::netio_pkt_t operations cannot be + * performed on a netio_pkt_handle_t. It supports only + * netio_free_handle() (to free the buffer) and + * NETIO_PKT_CUSTOM_DATA_H() (to access a pointer to its contents). + * The application must acquire any additional metadata it wants from the + * original ::netio_pkt_t and record it separately. + * + * A netio_pkt_handle_t can be extracted from a ::netio_pkt_t by calling + * NETIO_PKT_HANDLE(). An invalid handle (analogous to NULL) can be + * created by assigning the value ::NETIO_PKT_HANDLE_NONE. A handle can + * be tested for validity with NETIO_PKT_HANDLE_IS_VALID(). + */ +typedef struct +{ + unsigned int word; /**< Opaque bits. */ +} netio_pkt_handle_t; + +/** + * @brief A packet descriptor. + * + * @ingroup ingress + * @ingroup egress + * + * This data structure represents a packet. The structure is manipulated + * through the @ref ingress and the @ref egress. + * + * While the contents of a netio_pkt_t are opaque, the structure itself is + * portable. This means that it may be shared between all tiles which have + * done a netio_input_register() call for the interface on which the pkt_t + * was initially received (via netio_get_packet()) or retrieved (via + * netio_get_buffer()). The contents of a netio_pkt_t can be transmitted to + * another tile via shared memory, or via a UDN message, or by other means. + * The destination tile may then use the pkt_t as if it had originally been + * received locally; it may read or write the packet's data, read its + * metadata, free the packet, send the packet, transfer the netio_pkt_t to + * yet another tile, and so forth. + * + * Once a netio_pkt_t has been transferred to a second tile, the first tile + * should not reference the original copy; in particular, if more than one + * tile frees or sends the same netio_pkt_t, the IPP's packet free lists will + * become corrupted. Note also that each tile which reads or modifies + * packet data must obey the memory coherency rules outlined in @ref input. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + /** This structure is opaque. */ + unsigned char opaque[32]; +#else + /** For an ingress packet (one with standard metadata), this is the + * notification header we got from the I/O shim. For an egress packet + * (one with minimal metadata), this word is zero if the packet has not + * been populated, and nonzero if it has. */ + __netio_pkt_notif_t __notif_header; + + /** Virtual address of the packet buffer, plus state flags. */ + __netio_pkt_handle_t __packet; + + /** Metadata associated with the packet. */ + netio_pkt_metadata_t __metadata; +#endif +} +netio_pkt_t; + + +#ifndef __DOXYGEN__ + +#define __NETIO_PKT_NOTIF_HEADER(pkt) ((pkt)->__notif_header) +#define __NETIO_PKT_IPP_HANDLE(pkt) ((pkt)->__packet.bits.__ipp_handle) +#define __NETIO_PKT_QUEUE(pkt) ((pkt)->__packet.bits.__queue) +#define __NETIO_PKT_NOTIF_HEADER_M(mda, pkt) ((pkt)->__notif_header) +#define __NETIO_PKT_IPP_HANDLE_M(mda, pkt) ((pkt)->__packet.bits.__ipp_handle) +#define __NETIO_PKT_MINIMAL(pkt) ((pkt)->__packet.bits.__minimal) +#define __NETIO_PKT_QUEUE_M(mda, pkt) ((pkt)->__packet.bits.__queue) +#define __NETIO_PKT_FLAGS_M(mda, pkt) ((mda)->__flags) + +/* Packet information table, used by the attribute access functions below. */ +extern const uint16_t _netio_pkt_info[]; + +#endif /* __DOXYGEN__ */ + + +#ifndef __DOXYGEN__ +/* These macros are deprecated and will disappear in a future MDE release. */ +#define NETIO_PKT_GOOD_CHECKSUM(pkt) \ + NETIO_PKT_L4_CSUM_CORRECT(pkt) +#define NETIO_PKT_GOOD_CHECKSUM_M(mda, pkt) \ + NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt) +#endif /* __DOXYGEN__ */ + + +/* Packet attribute access functions. */ + +/** Return a pointer to the metadata for a packet. + * @ingroup ingress + * + * Calling this function once and passing the result to other retrieval + * functions with a "_M" suffix usually improves performance. This + * function must be called on an 'ingress' packet (i.e. one retrieved + * by @ref netio_get_packet(), on which @ref netio_populate_buffer() or + * @ref netio_populate_prepend_buffer have not been called). Use of this + * function on an 'egress' packet will cause an assertion failure. + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's standard metadata. + */ +static __inline netio_pkt_metadata_t* +NETIO_PKT_METADATA(netio_pkt_t* pkt) +{ + netio_assert(!pkt->__packet.bits.__minimal); + return &pkt->__metadata; +} + + +/** Return a pointer to the minimal metadata for a packet. + * @ingroup egress + * + * Calling this function once and passing the result to other retrieval + * functions with a "_MM" suffix usually improves performance. This + * function must be called on an 'egress' packet (i.e. one on which + * @ref netio_populate_buffer() or @ref netio_populate_prepend_buffer() + * have been called, or one retrieved by @ref netio_get_buffer()). Use of + * this function on an 'ingress' packet will cause an assertion failure. + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's standard metadata. + */ +static __inline netio_pkt_minimal_metadata_t* +NETIO_PKT_MINIMAL_METADATA(netio_pkt_t* pkt) +{ + netio_assert(pkt->__packet.bits.__minimal); + return (netio_pkt_minimal_metadata_t*) &pkt->__metadata; +} + + +/** Determine whether a packet has 'minimal' metadata. + * @ingroup pktfuncs + * + * This function will return nonzero if the packet is an 'egress' + * packet (i.e. one on which @ref netio_populate_buffer() or + * @ref netio_populate_prepend_buffer() have been called, or one + * retrieved by @ref netio_get_buffer()), and zero if the packet + * is an 'ingress' packet (i.e. one retrieved by @ref netio_get_packet(), + * which has not been converted into an 'egress' packet). + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the packet has minimal metadata. + */ +static __inline unsigned int +NETIO_PKT_IS_MINIMAL(netio_pkt_t* pkt) +{ + return pkt->__packet.bits.__minimal; +} + + +/** Return a handle for a packet's storage. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return A handle for the packet's storage. + */ +static __inline netio_pkt_handle_t +NETIO_PKT_HANDLE(netio_pkt_t* pkt) +{ + netio_pkt_handle_t h; + h.word = pkt->__packet.word; + return h; +} + + +/** A special reserved value indicating the absence of a packet handle. + * + * @ingroup pktfuncs + */ +#define NETIO_PKT_HANDLE_NONE ((netio_pkt_handle_t) { 0 }) + + +/** Test whether a packet handle is valid. + * + * Applications may wish to use the reserved value NETIO_PKT_HANDLE_NONE + * to indicate no packet at all. This function tests to see if a packet + * handle is a real handle, not this special reserved value. + * + * @ingroup pktfuncs + * + * @param[in] handle Handle on which to operate. + * @return One if the packet handle is valid, else zero. + */ +static __inline unsigned int +NETIO_PKT_HANDLE_IS_VALID(netio_pkt_handle_t handle) +{ + return handle.word != 0; +} + + + +/** Return a pointer to the start of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup ingress + * + * @param[in] handle Handle on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_CUSTOM_DATA_H(netio_pkt_handle_t handle) +{ + return _NETIO_PKT_HANDLE_BASE(handle) + NETIO_PACKET_PADDING; +} + + +/** Return the length of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's custom header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + /* + * Note that we effectively need to extract a quantity from the flags word + * which is measured in words, and then turn it into bytes by shifting + * it left by 2. We do this all at once by just shifting right two less + * bits, and shifting the mask up two bits. + */ + return ((mda->__flags >> (_NETIO_PKT_CUSTOM_LEN_SHIFT - 2)) & + (_NETIO_PKT_CUSTOM_LEN_RMASK << 2)); +} + + +/** Return the length of the packet, starting with the custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (__NETIO_PKT_NOTIF_HEADER(pkt).bits.__transfer_size - + NETIO_PACKET_PADDING); +} + + +/** Return a pointer to the start of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_CUSTOM_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return NETIO_PKT_CUSTOM_DATA_H(NETIO_PKT_HANDLE(pkt)); +} + + +/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's L2 header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + /* + * Note that we effectively need to extract a quantity from the flags word + * which is measured in words, and then turn it into bytes by shifting + * it left by 2. We do this all at once by just shifting right two less + * bits, and shifting the mask up two bits. We then add two bytes. + */ + return ((mda->__flags >> (_NETIO_PKT_L2_LEN_SHIFT - 2)) & + (_NETIO_PKT_L2_LEN_RMASK << 2)) + 2; +} + + +/** Return the length of the packet, starting with the L2 (Ethernet) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt) - + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda,pkt)); +} + + +/** Return a pointer to the start of the packet's L2 (Ethernet) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_L2_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_CUSTOM_DATA_M(mda, pkt) + + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt)); +} + + +/** Retrieve the length of the packet, starting with the L3 (generally, + * the IP) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Length of the packet's L3 header and data, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L3_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_L2_LENGTH_M(mda, pkt) - + NETIO_PKT_L2_HEADER_LENGTH_M(mda,pkt)); +} + + +/** Return a pointer to the packet's L3 (generally, the IP) header. + * @ingroup ingress + * + * Note that we guarantee word alignment of the L3 header. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's L3 header. + */ +static __inline unsigned char* +NETIO_PKT_L3_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_L2_DATA_M(mda, pkt) + + NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt)); +} + + +/** Return the ordinal of the packet. + * @ingroup ingress + * + * Each packet is given an ordinal number when it is delivered by the IPP. + * In the medium term, the ordinal is unique and monotonically increasing, + * being incremented by 1 for each packet; the ordinal of the first packet + * delivered after the IPP starts is zero. (Since the ordinal is of finite + * size, given enough input packets, it will eventually wrap around to zero; + * in the long term, therefore, ordinals are not unique.) The ordinals + * handed out by different IPPs are not disjoint, so two packets from + * different IPPs may have identical ordinals. Packets dropped by the + * IPP or by the I/O shim are not assigned ordinals. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP packet ordinal. + */ +static __inline unsigned int +NETIO_PKT_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__packet_ordinal; +} + + +/** Return the per-group ordinal of the packet. + * @ingroup ingress + * + * Each packet is given a per-group ordinal number when it is + * delivered by the IPP. By default, the group is the packet's VLAN, + * although IPP can be recompiled to use different values. In + * the medium term, the ordinal is unique and monotonically + * increasing, being incremented by 1 for each packet; the ordinal of + * the first packet distributed to a particular group is zero. + * (Since the ordinal is of finite size, given enough input packets, + * it will eventually wrap around to zero; in the long term, + * therefore, ordinals are not unique.) The ordinals handed out by + * different IPPs are not disjoint, so two packets from different IPPs + * may have identical ordinals; similarly, packets distributed to + * different groups may have identical ordinals. Packets dropped by + * the IPP or by the I/O shim are not assigned ordinals. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP, per-group ordinal. + */ +static __inline unsigned int +NETIO_PKT_GROUP_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__group_ordinal; +} + + +/** Return the VLAN ID assigned to the packet. + * @ingroup ingress + * + * This value is usually contained within the packet header. + * + * This value will be zero if the packet does not have a VLAN tag, or if + * this value was not extracted from the packet. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's VLAN ID. + */ +static __inline unsigned short +NETIO_PKT_VLAN_ID_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + int vl = (mda->__flags >> _NETIO_PKT_VLAN_SHIFT) & _NETIO_PKT_VLAN_RMASK; + unsigned short* pkt_p; + int index; + unsigned short val; + + if (vl == _NETIO_PKT_VLAN_NONE) + return 0; + + pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt); + index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK; + + val = pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_VLAN_SHIFT) & + _NETIO_PKT_INFO_VLAN_RMASK]; + +#ifdef __TILECC__ + return (__insn_bytex(val) >> 16) & 0xFFF; +#else + return (__builtin_bswap32(val) >> 16) & 0xFFF; +#endif +} + + +/** Return the ethertype of the packet. + * @ingroup ingress + * + * This value is usually contained within the packet header. + * + * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED_M() + * returns true, and otherwise, may not be well defined. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's ethertype. + */ +static __inline unsigned short +NETIO_PKT_ETHERTYPE_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + unsigned short* pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt); + int index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK; + + unsigned short val = + pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_ETYPE_SHIFT) & + _NETIO_PKT_INFO_ETYPE_RMASK]; + + return __builtin_bswap32(val) >> 16; +} + + +/** Return the flow hash computed on the packet. + * @ingroup ingress + * + * For TCP and UDP packets, this hash is calculated by hashing together + * the "5-tuple" values, specifically the source IP address, destination + * IP address, protocol type, source port and destination port. + * The hash value is intended to be helpful for millions of distinct + * flows. + * + * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is + * derived by hashing together the source and destination IP addresses. + * + * For MPLS-encapsulated packets, the flow hash is derived by hashing + * the first MPLS label. + * + * For all other packets the flow hash is computed from the source + * and destination Ethernet addresses. + * + * The hash is symmetric, meaning it produces the same value if the + * source and destination are swapped. The only exceptions are + * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple + * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32 + * (Encap Security Payload), which use only the destination address + * since the source address is not meaningful. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's 32-bit flow hash. + */ +static __inline unsigned int +NETIO_PKT_FLOW_HASH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__flow_hash; +} + + +/** Return the first word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first + * word of user data contains the least significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_low()). + * + * See the System Programmer's Guide for details. + * + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's first word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_0_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__user_data_0; +} + + +/** Return the second word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second + * word of user data contains the most significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_high()). + * + * See the System Programmer's Guide for details. + * + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's second word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_1_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__user_data_1; +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L4 checksum was calculated. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & _NETIO_PKT_NO_L4_CSUM_MASK); +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to + * be correct. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & + (_NETIO_PKT_BAD_L4_CSUM_MASK | _NETIO_PKT_NO_L4_CSUM_MASK)); +} + + +/** Determine whether the L3 (IP) checksum was calculated. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L3 (IP) checksum was calculated. +*/ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & _NETIO_PKT_NO_L3_CSUM_MASK); +} + + +/** Determine whether the L3 (IP) checksum was calculated and found to be + * correct. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & + (_NETIO_PKT_BAD_L3_CSUM_MASK | _NETIO_PKT_NO_L3_CSUM_MASK)); +} + + +/** Determine whether the ethertype was recognized and L3 packet data was + * processed. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the ethertype was recognized and L3 packet data was + * processed. + */ +static __inline unsigned int +NETIO_PKT_ETHERTYPE_RECOGNIZED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & _NETIO_PKT_TYPE_UNRECOGNIZED_MASK); +} + + +/** Retrieve the status of a packet and any errors that may have occurred + * during ingress processing (length mismatches, CRC errors, etc.). + * @ingroup ingress + * + * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() + * returns zero are always reported as underlength, as there is no a priori + * means to determine their length. Normally, applications should use + * @ref NETIO_PKT_BAD_M() instead of explicitly checking status with this + * function. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's status. + */ +static __inline netio_pkt_status_t +NETIO_PKT_STATUS_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status; +} + + +/** Report whether a packet is bad (i.e., was shorter than expected based on + * its headers, or had a bad CRC). + * @ingroup ingress + * + * Note that this function does not verify L3 or L4 checksums. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the packet is bad and should be discarded. + */ +static __inline unsigned int +NETIO_PKT_BAD_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return ((NETIO_PKT_STATUS_M(mda, pkt) & 1) && + (NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt) || + NETIO_PKT_STATUS_M(mda, pkt) == NETIO_PKT_STATUS_BAD)); +} + + +/** Return the length of the packet, starting with the L2 (Ethernet) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return mmd->l2_length; +} + + +/** Return the length of the L2 (Ethernet) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's L2 header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ + return mmd->l3_offset - mmd->l2_offset; +} + + +/** Return the length of the packet, starting with the L3 (IP) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return Length of the packet's L3 header and data, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L3_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return (NETIO_PKT_L2_LENGTH_MM(mmd, pkt) - + NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt)); +} + + +/** Return a pointer to the packet's L3 (generally, the IP) header. + * @ingroup egress + * + * Note that we guarantee word alignment of the L3 header. + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's L3 header. + */ +static __inline unsigned char* +NETIO_PKT_L3_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return _NETIO_PKT_BASE(pkt) + mmd->l3_offset; +} + + +/** Return a pointer to the packet's L2 (Ethernet) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_L2_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return _NETIO_PKT_BASE(pkt) + mmd->l2_offset; +} + + +/** Retrieve the status of a packet and any errors that may have occurred + * during ingress processing (length mismatches, CRC errors, etc.). + * @ingroup ingress + * + * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() + * returns zero are always reported as underlength, as there is no a priori + * means to determine their length. Normally, applications should use + * @ref NETIO_PKT_BAD() instead of explicitly checking status with this + * function. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's status. + */ +static __inline netio_pkt_status_t +NETIO_PKT_STATUS(netio_pkt_t* pkt) +{ + netio_assert(!pkt->__packet.bits.__minimal); + + return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status; +} + + +/** Report whether a packet is bad (i.e., was shorter than expected based on + * its headers, or had a bad CRC). + * @ingroup ingress + * + * Note that this function does not verify L3 or L4 checksums. + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the packet is bad and should be discarded. + */ +static __inline unsigned int +NETIO_PKT_BAD(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_BAD_M(mda, pkt); +} + + +/** Return the length of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's custom header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_HEADER_LENGTH(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt); +} + + +/** Return the length of the packet, starting with the custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_LENGTH(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt); +} + + +/** Return a pointer to the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_CUSTOM_DATA(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_CUSTOM_DATA_M(mda, pkt); +} + + +/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's L2 header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_HEADER_LENGTH(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt); + } +} + + +/** Return the length of the packet, starting with the L2 (Ethernet) header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_LENGTH(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L2_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L2_LENGTH_M(mda, pkt); + } +} + + +/** Return a pointer to the packet's L2 (Ethernet) header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_L2_DATA(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L2_DATA_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L2_DATA_M(mda, pkt); + } +} + + +/** Retrieve the length of the packet, starting with the L3 (generally, the IP) + * header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return Length of the packet's L3 header and data, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L3_LENGTH(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L3_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_LENGTH_M(mda, pkt); + } +} + + +/** Return a pointer to the packet's L3 (generally, the IP) header. + * @ingroup pktfuncs + * + * Note that we guarantee word alignment of the L3 header. + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's L3 header. + */ +static __inline unsigned char* +NETIO_PKT_L3_DATA(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L3_DATA_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_DATA_M(mda, pkt); + } +} + + +/** Return the ordinal of the packet. + * @ingroup ingress + * + * Each packet is given an ordinal number when it is delivered by the IPP. + * In the medium term, the ordinal is unique and monotonically increasing, + * being incremented by 1 for each packet; the ordinal of the first packet + * delivered after the IPP starts is zero. (Since the ordinal is of finite + * size, given enough input packets, it will eventually wrap around to zero; + * in the long term, therefore, ordinals are not unique.) The ordinals + * handed out by different IPPs are not disjoint, so two packets from + * different IPPs may have identical ordinals. Packets dropped by the + * IPP or by the I/O shim are not assigned ordinals. + * + * + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP packet ordinal. + */ +static __inline unsigned int +NETIO_PKT_ORDINAL(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_ORDINAL_M(mda, pkt); +} + + +/** Return the per-group ordinal of the packet. + * @ingroup ingress + * + * Each packet is given a per-group ordinal number when it is + * delivered by the IPP. By default, the group is the packet's VLAN, + * although IPP can be recompiled to use different values. In + * the medium term, the ordinal is unique and monotonically + * increasing, being incremented by 1 for each packet; the ordinal of + * the first packet distributed to a particular group is zero. + * (Since the ordinal is of finite size, given enough input packets, + * it will eventually wrap around to zero; in the long term, + * therefore, ordinals are not unique.) The ordinals handed out by + * different IPPs are not disjoint, so two packets from different IPPs + * may have identical ordinals; similarly, packets distributed to + * different groups may have identical ordinals. Packets dropped by + * the IPP or by the I/O shim are not assigned ordinals. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP, per-group ordinal. + */ +static __inline unsigned int +NETIO_PKT_GROUP_ORDINAL(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_GROUP_ORDINAL_M(mda, pkt); +} + + +/** Return the VLAN ID assigned to the packet. + * @ingroup ingress + * + * This is usually also contained within the packet header. If the packet + * does not have a VLAN tag, the VLAN ID returned by this function is zero. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's VLAN ID. + */ +static __inline unsigned short +NETIO_PKT_VLAN_ID(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_VLAN_ID_M(mda, pkt); +} + + +/** Return the ethertype of the packet. + * @ingroup ingress + * + * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() + * returns true, and otherwise, may not be well defined. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's ethertype. + */ +static __inline unsigned short +NETIO_PKT_ETHERTYPE(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_ETHERTYPE_M(mda, pkt); +} + + +/** Return the flow hash computed on the packet. + * @ingroup ingress + * + * For TCP and UDP packets, this hash is calculated by hashing together + * the "5-tuple" values, specifically the source IP address, destination + * IP address, protocol type, source port and destination port. + * The hash value is intended to be helpful for millions of distinct + * flows. + * + * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is + * derived by hashing together the source and destination IP addresses. + * + * For MPLS-encapsulated packets, the flow hash is derived by hashing + * the first MPLS label. + * + * For all other packets the flow hash is computed from the source + * and destination Ethernet addresses. + * + * The hash is symmetric, meaning it produces the same value if the + * source and destination are swapped. The only exceptions are + * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple + * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32 + * (Encap Security Payload), which use only the destination address + * since the source address is not meaningful. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's 32-bit flow hash. + */ +static __inline unsigned int +NETIO_PKT_FLOW_HASH(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_FLOW_HASH_M(mda, pkt); +} + + +/** Return the first word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first + * word of user data contains the least significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_low()). + * + * See the System Programmer's Guide for details. + * + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return The packet's first word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_0(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_USER_DATA_0_M(mda, pkt); +} + + +/** Return the second word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second + * word of user data contains the most significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_high()). + * + * See the System Programmer's Guide for details. + * + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return The packet's second word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_1(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_USER_DATA_1_M(mda, pkt); +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L4 checksum was calculated. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CALCULATED(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L4_CSUM_CALCULATED_M(mda, pkt); +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to + * be correct. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CORRECT(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt); +} + + +/** Determine whether the L3 (IP) checksum was calculated. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L3 (IP) checksum was calculated. +*/ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CALCULATED(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_CSUM_CALCULATED_M(mda, pkt); +} + + +/** Determine whether the L3 (IP) checksum was calculated and found to be + * correct. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CORRECT(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_CSUM_CORRECT_M(mda, pkt); +} + + +/** Determine whether the Ethertype was recognized and L3 packet data was + * processed. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the Ethertype was recognized and L3 packet data was + * processed. + */ +static __inline unsigned int +NETIO_PKT_ETHERTYPE_RECOGNIZED(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt); +} + + +/** Set an egress packet's L2 length, using a metadata pointer to speed the + * computation. + * @ingroup egress + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @param[in] len Packet L2 length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt, + int len) +{ + mmd->l2_length = len; +} + + +/** Set an egress packet's L2 length. + * @ingroup egress + * + * @param[in,out] pkt Packet on which to operate. + * @param[in] len Packet L2 length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_LENGTH(netio_pkt_t* pkt, int len) +{ + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + NETIO_PKT_SET_L2_LENGTH_MM(mmd, pkt, len); +} + + +/** Set an egress packet's L2 header length, using a metadata pointer to + * speed the computation. + * @ingroup egress + * + * It is not normally necessary to call this routine; only the L2 length, + * not the header length, is needed to transmit a packet. It may be useful if + * the egress packet will later be processed by code which expects to use + * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload. + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @param[in] len Packet L2 header length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt, int len) +{ + mmd->l3_offset = mmd->l2_offset + len; +} + + +/** Set an egress packet's L2 header length. + * @ingroup egress + * + * It is not normally necessary to call this routine; only the L2 length, + * not the header length, is needed to transmit a packet. It may be useful if + * the egress packet will later be processed by code which expects to use + * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload. + * + * @param[in,out] pkt Packet on which to operate. + * @param[in] len Packet L2 header length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_HEADER_LENGTH(netio_pkt_t* pkt, int len) +{ + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + NETIO_PKT_SET_L2_HEADER_LENGTH_MM(mmd, pkt, len); +} + + +/** Set up an egress packet for hardware checksum computation, using a + * metadata pointer to speed the operation. + * @ingroup egress + * + * NetIO provides the ability to automatically calculate a standard + * 16-bit Internet checksum on transmitted packets. The application + * may specify the point in the packet where the checksum starts, the + * number of bytes to be checksummed, and the two bytes in the packet + * which will be replaced with the completed checksum. (If the range + * of bytes to be checksummed includes the bytes to be replaced, the + * initial values of those bytes will be included in the checksum.) + * + * For some protocols, the packet checksum covers data which is not present + * in the packet, or is at least not contiguous to the main data payload. + * For instance, the TCP checksum includes a "pseudo-header" which includes + * the source and destination IP addresses of the packet. To accommodate + * this, the checksum engine may be "seeded" with an initial value, which + * the application would need to compute based on the specific protocol's + * requirements. Note that the seed is given in host byte order (little- + * endian), not network byte order (big-endian); code written to compute a + * pseudo-header checksum in network byte order will need to byte-swap it + * before use as the seed. + * + * Note that the checksum is computed as part of the transmission process, + * so it will not be present in the packet upon completion of this routine. + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @param[in] start Offset within L2 packet of the first byte to include in + * the checksum. + * @param[in] length Number of bytes to include in the checksum. + * the checksum. + * @param[in] location Offset within L2 packet of the first of the two bytes + * to be replaced with the calculated checksum. + * @param[in] seed Initial value of the running checksum before any of the + * packet data is added. + */ +static __inline void +NETIO_PKT_DO_EGRESS_CSUM_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt, int start, int length, + int location, uint16_t seed) +{ + mmd->csum_start = start; + mmd->csum_length = length; + mmd->csum_location = location; + mmd->csum_seed = seed; + mmd->flags |= _NETIO_PKT_NEED_EDMA_CSUM_MASK; +} + + +/** Set up an egress packet for hardware checksum computation. + * @ingroup egress + * + * NetIO provides the ability to automatically calculate a standard + * 16-bit Internet checksum on transmitted packets. The application + * may specify the point in the packet where the checksum starts, the + * number of bytes to be checksummed, and the two bytes in the packet + * which will be replaced with the completed checksum. (If the range + * of bytes to be checksummed includes the bytes to be replaced, the + * initial values of those bytes will be included in the checksum.) + * + * For some protocols, the packet checksum covers data which is not present + * in the packet, or is at least not contiguous to the main data payload. + * For instance, the TCP checksum includes a "pseudo-header" which includes + * the source and destination IP addresses of the packet. To accommodate + * this, the checksum engine may be "seeded" with an initial value, which + * the application would need to compute based on the specific protocol's + * requirements. Note that the seed is given in host byte order (little- + * endian), not network byte order (big-endian); code written to compute a + * pseudo-header checksum in network byte order will need to byte-swap it + * before use as the seed. + * + * Note that the checksum is computed as part of the transmission process, + * so it will not be present in the packet upon completion of this routine. + * + * @param[in,out] pkt Packet on which to operate. + * @param[in] start Offset within L2 packet of the first byte to include in + * the checksum. + * @param[in] length Number of bytes to include in the checksum. + * the checksum. + * @param[in] location Offset within L2 packet of the first of the two bytes + * to be replaced with the calculated checksum. + * @param[in] seed Initial value of the running checksum before any of the + * packet data is added. + */ +static __inline void +NETIO_PKT_DO_EGRESS_CSUM(netio_pkt_t* pkt, int start, int length, + int location, uint16_t seed) +{ + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + NETIO_PKT_DO_EGRESS_CSUM_MM(mmd, pkt, start, length, location, seed); +} + + +/** Return the number of bytes which could be prepended to a packet, using a + * metadata pointer to speed the operation. + * See @ref netio_populate_prepend_buffer() to get a full description of + * prepending. + * + * @param[in,out] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline int +NETIO_PKT_PREPEND_AVAIL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (pkt->__packet.bits.__offset << 6) + + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt); +} + + +/** Return the number of bytes which could be prepended to a packet, using a + * metadata pointer to speed the operation. + * See @ref netio_populate_prepend_buffer() to get a full description of + * prepending. + * @ingroup egress + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline int +NETIO_PKT_PREPEND_AVAIL_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return (pkt->__packet.bits.__offset << 6) + mmd->l2_offset; +} + + +/** Return the number of bytes which could be prepended to a packet. + * See @ref netio_populate_prepend_buffer() to get a full description of + * prepending. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline int +NETIO_PKT_PREPEND_AVAIL(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_PREPEND_AVAIL_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_PREPEND_AVAIL_M(mda, pkt); + } +} + + +/** Flush a packet's minimal metadata from the cache, using a metadata pointer + * to speed the operation. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's minimal metadata from the cache, using a metadata + * pointer to speed the operation. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's minimal metadata from the cache, + * using a metadata pointer to speed the operation. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ +} + + +/** Flush a packet's metadata from the cache, using a metadata pointer + * to speed the operation. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's metadata from the cache, using a metadata + * pointer to speed the operation. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's metadata from the cache, + * using a metadata pointer to speed the operation. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ +} + + +/** Flush a packet's minimal metadata from the cache. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_MINIMAL_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's minimal metadata from the cache. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_MINIMAL_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's minimal metadata from the cache. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_MINIMAL_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Flush a packet's metadata from the cache. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's metadata from the cache. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's metadata from the cache. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_METADATA(netio_pkt_t* pkt) +{ +} + +/** Number of NUMA nodes we can distribute buffers to. + * @ingroup setup */ +#define NETIO_NUM_NODE_WEIGHTS 16 + +/** + * @brief An object for specifying the characteristics of NetIO communication + * endpoint. + * + * @ingroup setup + * + * The @ref netio_input_register() function uses this structure to define + * how an application tile will communicate with an IPP. + * + * + * Future updates to NetIO may add new members to this structure, + * which can affect the success of the registration operation. Thus, + * if dynamically initializing the structure, applications are urged to + * zero it out first, for example: + * + * @code + * netio_input_config_t config; + * memset(&config, 0, sizeof (config)); + * config.flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE; + * config.num_receive_packets = NETIO_MAX_RECEIVE_PKTS; + * config.queue_id = 0; + * . + * . + * . + * @endcode + * + * since that guarantees that any unused structure members, including + * members which did not exist when the application was first developed, + * will not have unexpected values. + * + * If statically initializing the structure, we strongly recommend use of + * C99-style named initializers, for example: + * + * @code + * netio_input_config_t config = { + * .flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, + * .num_receive_packets = NETIO_MAX_RECEIVE_PKTS, + * .queue_id = 0, + * }, + * @endcode + * + * instead of the old-style structure initialization: + * + * @code + * // Bad example! Currently equivalent to the above, but don't do this. + * netio_input_config_t config = { + * NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, NETIO_MAX_RECEIVE_PKTS, 0 + * }, + * @endcode + * + * since the C99 style requires no changes to the code if elements of the + * config structure are rearranged. (It also makes the initialization much + * easier to understand.) + * + * Except for items which address a particular tile's transmit or receive + * characteristics, such as the ::NETIO_RECV flag, applications are advised + * to specify the same set of configuration data on all registrations. + * This prevents differing results if multiple tiles happen to do their + * registration operations in a different order on different invocations of + * the application. This is particularly important for things like link + * management flags, and buffer size and homing specifications. + * + * Unless the ::NETIO_FIXED_BUFFER_VA flag is specified in flags, the NetIO + * buffer pool is automatically created and mapped into the application's + * virtual address space at an address chosen by the operating system, + * using the common memory (cmem) facility in the Tilera Multicore + * Components library. The cmem facility allows multiple processes to gain + * access to shared memory which is mapped into each process at an + * identical virtual address. In order for this to work, the processes + * must have a common ancestor, which must create the common memory using + * tmc_cmem_init(). + * + * In programs using the iLib process creation API, or in programs which use + * only one process (which include programs using the pthreads library), + * tmc_cmem_init() is called automatically. All other applications + * must call it explicitly, before any child processes which might call + * netio_input_register() are created. + */ +typedef struct +{ + /** Registration characteristics. + + This value determines several characteristics of the registration; + flags for different types of behavior are ORed together to make the + final flag value. Generally applications should specify exactly + one flag from each of the following categories: + + - Whether the application will be receiving packets on this queue + (::NETIO_RECV or ::NETIO_NO_RECV). + + - Whether the application will be transmitting packets on this queue, + and if so, whether it will request egress checksum calculation + (::NETIO_XMIT, ::NETIO_XMIT_CSUM, or ::NETIO_NO_XMIT). It is + legal to call netio_get_buffer() without one of the XMIT flags, + as long as ::NETIO_RECV is specified; in this case, the retrieved + buffers must be passed to another tile for transmission. + + - Whether the application expects any vendor-specific tags in + its packets' L2 headers (::NETIO_TAG_NONE, ::NETIO_TAG_BRCM, + or ::NETIO_TAG_MRVL). This must match the configuration of the + target IPP. + + To accommodate applications written to previous versions of the NetIO + interface, none of the flags above are currently required; if omitted, + NetIO behaves more or less as if ::NETIO_RECV | ::NETIO_XMIT_CSUM | + ::NETIO_TAG_NONE were used. However, explicit specification of + the relevant flags allows NetIO to do a better job of resource + allocation, allows earlier detection of certain configuration errors, + and may enable advanced features or higher performance in the future, + so their use is strongly recommended. + + Note that specifying ::NETIO_NO_RECV along with ::NETIO_NO_XMIT + is a special case, intended primarily for use by programs which + retrieve network statistics or do link management operations. + When these flags are both specified, the resulting queue may not + be used with NetIO routines other than netio_get(), netio_set(), + and netio_input_unregister(). See @ref link for more information + on link management. + + Other flags are optional; their use is described below. + */ + int flags; + + /** Interface name. This is a string which identifies the specific + Ethernet controller hardware to be used. The format of the string + is a device type and a device index, separated by a slash; so, + the first 10 Gigabit Ethernet controller is named "xgbe/0", while + the second 10/100/1000 Megabit Ethernet controller is named "gbe/1". + */ + const char* interface; + + /** Receive packet queue size. This specifies the maximum number + of ingress packets that can be received on this queue without + being retrieved by @ref netio_get_packet(). If the IPP's distribution + algorithm calls for a packet to be sent to this queue, and this + number of packets are already pending there, the new packet + will either be discarded, or sent to another tile registered + for the same queue_id (see @ref drops). This value must + be at least ::NETIO_MIN_RECEIVE_PKTS, can always be at least + ::NETIO_MAX_RECEIVE_PKTS, and may be larger than that on certain + interfaces. + */ + int num_receive_packets; + + /** The queue ID being requested. Legal values for this range from 0 + to ::NETIO_MAX_QUEUE_ID, inclusive. ::NETIO_MAX_QUEUE_ID is always + greater than or equal to the number of tiles; this allows one queue + for each tile, plus at least one additional queue. Some applications + may wish to use the additional queue as a destination for unwanted + packets, since packets delivered to queues for which no tiles have + registered are discarded. + */ + unsigned int queue_id; + + /** Maximum number of small send buffers to be held in the local empty + buffer cache. This specifies the size of the area which holds + empty small egress buffers requested from the IPP but not yet + retrieved via @ref netio_get_buffer(). This value must be greater + than zero if the application will ever use @ref netio_get_buffer() + to allocate empty small egress buffers; it may be no larger than + ::NETIO_MAX_SEND_BUFFERS. See @ref epp for more details on empty + buffer caching. + */ + int num_send_buffers_small_total; + + /** Number of small send buffers to be preallocated at registration. + If this value is nonzero, the specified number of empty small egress + buffers will be requested from the IPP during the netio_input_register + operation; this may speed the execution of @ref netio_get_buffer(). + This may be no larger than @ref num_send_buffers_small_total. See @ref + epp for more details on empty buffer caching. + */ + int num_send_buffers_small_prealloc; + + /** Maximum number of large send buffers to be held in the local empty + buffer cache. This specifies the size of the area which holds empty + large egress buffers requested from the IPP but not yet retrieved via + @ref netio_get_buffer(). This value must be greater than zero if the + application will ever use @ref netio_get_buffer() to allocate empty + large egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS. + See @ref epp for more details on empty buffer caching. + */ + int num_send_buffers_large_total; + + /** Number of large send buffers to be preallocated at registration. + If this value is nonzero, the specified number of empty large egress + buffers will be requested from the IPP during the netio_input_register + operation; this may speed the execution of @ref netio_get_buffer(). + This may be no larger than @ref num_send_buffers_large_total. See @ref + epp for more details on empty buffer caching. + */ + int num_send_buffers_large_prealloc; + + /** Maximum number of jumbo send buffers to be held in the local empty + buffer cache. This specifies the size of the area which holds empty + jumbo egress buffers requested from the IPP but not yet retrieved via + @ref netio_get_buffer(). This value must be greater than zero if the + application will ever use @ref netio_get_buffer() to allocate empty + jumbo egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS. + See @ref epp for more details on empty buffer caching. + */ + int num_send_buffers_jumbo_total; + + /** Number of jumbo send buffers to be preallocated at registration. + If this value is nonzero, the specified number of empty jumbo egress + buffers will be requested from the IPP during the netio_input_register + operation; this may speed the execution of @ref netio_get_buffer(). + This may be no larger than @ref num_send_buffers_jumbo_total. See @ref + epp for more details on empty buffer caching. + */ + int num_send_buffers_jumbo_prealloc; + + /** Total packet buffer size. This determines the total size, in bytes, + of the NetIO buffer pool. Note that the maximum number of available + buffers of each size is determined during hypervisor configuration + (see the System Programmer's Guide for details); this just + influences how much host memory is allocated for those buffers. + + The buffer pool is allocated from common memory, which will be + automatically initialized if needed. If your buffer pool is larger + than 240 MB, you might need to explicitly call @c tmc_cmem_init(), + as described in the Application Libraries Reference Manual (UG227). + + Packet buffers are currently allocated in chunks of 16 MB; this + value will be rounded up to the next larger multiple of 16 MB. + If this value is zero, a default of 32 MB will be used; this was + the value used by previous versions of NetIO. Note that taking this + default also affects the placement of buffers on Linux NUMA nodes. + See @ref buffer_node_weights for an explanation of buffer placement. + + In order to successfully allocate packet buffers, Linux must have + available huge pages on the relevant Linux NUMA nodes. See the + System Programmer's Guide for information on configuring + huge page support in Linux. + */ + uint64_t total_buffer_size; + + /** Buffer placement weighting factors. + + This array specifies the relative amount of buffering to place + on each of the available Linux NUMA nodes. This array is + indexed by the NUMA node, and the values in the array are + proportional to the amount of buffer space to allocate on that + node. + + If memory striping is enabled in the Hypervisor, then there is + only one logical NUMA node (node 0). In that case, NetIO will by + default ignore the suggested buffer node weights, and buffers + will be striped across the physical memory controllers. See + UG209 System Programmer's Guide for a description of the + hypervisor option that controls memory striping. + + If memory striping is disabled, then there are up to four NUMA + nodes, corresponding to the four DDRAM controllers in the TILE + processor architecture. See UG100 Tile Processor Architecture + Overview for a diagram showing the location of each of the DDRAM + controllers relative to the tile array. + + For instance, if memory striping is disabled, the following + configuration strucure: + + @code + netio_input_config_t config = { + . + . + . + .total_buffer_size = 4 * 16 * 1024 * 1024; + .buffer_node_weights = { 1, 0, 1, 0 }, + }, + @endcode + + would result in 32 MB of buffers being placed on controller 0, and + 32 MB on controller 2. (Since buffers are allocated in units of + 16 MB, some sets of weights will not be able to be matched exactly.) + + For the weights to be effective, @ref total_buffer_size must be + nonzero. If @ref total_buffer_size is zero, causing the default + 32 MB of buffer space to be used, then any specified weights will + be ignored, and buffers will positioned as they were in previous + versions of NetIO: + + - For xgbe/0 and gbe/0, 16 MB of buffers will be placed on controller 1, + and the other 16 MB will be placed on controller 2. + + - For xgbe/1 and gbe/1, 16 MB of buffers will be placed on controller 2, + and the other 16 MB will be placed on controller 3. + + If @ref total_buffer_size is nonzero, but all weights are zero, + then all buffer space will be allocated on Linux NUMA node zero. + + By default, the specified buffer placement is treated as a hint; + if sufficient free memory is not available on the specified + controllers, the buffers will be allocated elsewhere. However, + if the ::NETIO_STRICT_HOMING flag is specified in @ref flags, then a + failure to allocate buffer space exactly as requested will cause the + registration operation to fail with an error of ::NETIO_CANNOT_HOME. + + Note that maximal network performance cannot be achieved with + only one memory controller. + */ + uint8_t buffer_node_weights[NETIO_NUM_NODE_WEIGHTS]; + + /** Fixed virtual address for packet buffers. Only valid when + ::NETIO_FIXED_BUFFER_VA is specified in @ref flags; see the + description of that flag for details. + */ + void* fixed_buffer_va; + + /** + Maximum number of outstanding send packet requests. This value is + only relevant when an EPP is in use; it determines the number of + slots in the EPP's outgoing packet queue which this tile is allowed + to consume, and thus the number of packets which may be sent before + the sending tile must wait for an acknowledgment from the EPP. + Modifying this value is generally only helpful when using @ref + netio_send_packet_vector(), where it can help improve performance by + allowing a single vector send operation to process more packets. + Typically it is not specified, and the default, which divides the + outgoing packet slots evenly between all tiles on the chip, is used. + + If a registration asks for more outgoing packet queue slots than are + available, ::NETIO_TOOMANY_XMIT will be returned. The total number + of packet queue slots which are available for all tiles for each EPP + is subject to change, but is currently ::NETIO_TOTAL_SENDS_OUTSTANDING. + + + This value is ignored if ::NETIO_XMIT is not specified in flags. + If you want to specify a large value here for a specific tile, you are + advised to specify NETIO_NO_XMIT on other, non-transmitting tiles so + that they do not consume a default number of packet slots. Any tile + transmitting is required to have at least ::NETIO_MIN_SENDS_OUTSTANDING + slots allocated to it; values less than that will be silently + increased by the NetIO library. + */ + int num_sends_outstanding; +} +netio_input_config_t; + + +/** Registration flags; used in the @ref netio_input_config_t structure. + * @addtogroup setup + */ +/** @{ */ + +/** Fail a registration request if we can't put packet buffers + on the specified memory controllers. */ +#define NETIO_STRICT_HOMING 0x00000002 + +/** This application expects no tags on its L2 headers. */ +#define NETIO_TAG_NONE 0x00000004 + +/** This application expects Marvell extended tags on its L2 headers. */ +#define NETIO_TAG_MRVL 0x00000008 + +/** This application expects Broadcom tags on its L2 headers. */ +#define NETIO_TAG_BRCM 0x00000010 + +/** This registration may call routines which receive packets. */ +#define NETIO_RECV 0x00000020 + +/** This registration may not call routines which receive packets. */ +#define NETIO_NO_RECV 0x00000040 + +/** This registration may call routines which transmit packets. */ +#define NETIO_XMIT 0x00000080 + +/** This registration may call routines which transmit packets with + checksum acceleration. */ +#define NETIO_XMIT_CSUM 0x00000100 + +/** This registration may not call routines which transmit packets. */ +#define NETIO_NO_XMIT 0x00000200 + +/** This registration wants NetIO buffers mapped at an application-specified + virtual address. + + NetIO buffers are by default created by the TMC common memory facility, + which must be configured by a common ancestor of all processes sharing + a network interface. When this flag is specified, NetIO buffers are + instead mapped at an address chosen by the application (and specified + in @ref netio_input_config_t::fixed_buffer_va). This allows multiple + unrelated but cooperating processes to share a NetIO interface. + All processes sharing the same interface must specify this flag, + and all must specify the same fixed virtual address. + + @ref netio_input_config_t::fixed_buffer_va must be a + multiple of 16 MB, and the packet buffers will occupy @ref + netio_input_config_t::total_buffer_size bytes of virtual address + space, beginning at that address. If any of those virtual addresses + are currently occupied by other memory objects, like application or + shared library code or data, @ref netio_input_register() will return + ::NETIO_FAULT. While it is impossible to provide a fixed_buffer_va + which will work for all applications, a good first guess might be to + use 0xb0000000 minus @ref netio_input_config_t::total_buffer_size. + If that fails, it might be helpful to consult the running application's + virtual address description file (/proc/pid/maps) to see + which regions of virtual address space are available. + */ +#define NETIO_FIXED_BUFFER_VA 0x00000400 + +/** This registration call will not complete unless the network link + is up. The process will wait several seconds for this to happen (the + precise interval is link-dependent), but if the link does not come up, + ::NETIO_LINK_DOWN will be returned. This flag is the default if + ::NETIO_NOREQUIRE_LINK_UP is not specified. Note that this flag by + itself does not request that the link be brought up; that can be done + with the ::NETIO_AUTO_LINK_UPDN or ::NETIO_AUTO_LINK_UP flags (the + latter is the default if no NETIO_AUTO_LINK_xxx flags are specified), + or by explicitly setting the link's desired state via netio_set(). + If the link is not brought up by one of those methods, and this flag + is specified, the registration operation will return ::NETIO_LINK_DOWN. + This flag is ignored if it is specified along with ::NETIO_NO_XMIT and + ::NETIO_NO_RECV. See @ref link for more information on link + management. + */ +#define NETIO_REQUIRE_LINK_UP 0x00000800 + +/** This registration call will complete even if the network link is not up. + Whenever the link is not up, packets will not be sent or received: + netio_get_packet() will return ::NETIO_NOPKT once all queued packets + have been drained, and netio_send_packet() and similar routines will + return NETIO_QUEUE_FULL once the outgoing packet queue in the EPP + or the I/O shim is full. See @ref link for more information on link + management. + */ +#define NETIO_NOREQUIRE_LINK_UP 0x00001000 + +#ifndef __DOXYGEN__ +/* + * These are part of the implementation of the NETIO_AUTO_LINK_xxx flags, + * but should not be used directly by applications, and are thus not + * documented. + */ +#define _NETIO_AUTO_UP 0x00002000 +#define _NETIO_AUTO_DN 0x00004000 +#define _NETIO_AUTO_PRESENT 0x00008000 +#endif + +/** Set the desired state of the link to up, allowing any speeds which are + supported by the link hardware, as part of this registration operation. + Do not take down the link automatically. This is the default if + no other NETIO_AUTO_LINK_xxx flags are specified. This flag is ignored + if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. + See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_UP (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP) + +/** Set the desired state of the link to up, allowing any speeds which are + supported by the link hardware, as part of this registration operation. + Set the desired state of the link to down the next time no tiles are + registered for packet reception or transmission. This flag is ignored + if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. + See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_UPDN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP | \ + _NETIO_AUTO_DN) + +/** Set the desired state of the link to down the next time no tiles are + registered for packet reception or transmission. This flag is ignored + if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. + See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_DN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_DN) + +/** Do not bring up the link automatically as part of this registration + operation. Do not take down the link automatically. This flag + is ignored if it is specified along with ::NETIO_NO_XMIT and + ::NETIO_NO_RECV. See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_NONE _NETIO_AUTO_PRESENT + + +/** Minimum number of receive packets. */ +#define NETIO_MIN_RECEIVE_PKTS 16 + +/** Lower bound on the maximum number of receive packets; may be higher + than this on some interfaces. */ +#define NETIO_MAX_RECEIVE_PKTS 128 + +/** Maximum number of send buffers, per packet size. */ +#define NETIO_MAX_SEND_BUFFERS 16 + +/** Number of EPP queue slots, and thus outstanding sends, per EPP. */ +#define NETIO_TOTAL_SENDS_OUTSTANDING 2015 + +/** Minimum number of EPP queue slots, and thus outstanding sends, per + * transmitting tile. */ +#define NETIO_MIN_SENDS_OUTSTANDING 16 + + +/**@}*/ + +#ifndef __DOXYGEN__ + +/** + * An object for providing Ethernet packets to a process. + */ +struct __netio_queue_impl_t; + +/** + * An object for managing the user end of a NetIO queue. + */ +struct __netio_queue_user_impl_t; + +#endif /* !__DOXYGEN__ */ + + +/** A netio_queue_t describes a NetIO communications endpoint. + * @ingroup setup + */ +typedef struct +{ +#ifdef __DOXYGEN__ + uint8_t opaque[8]; /**< This is an opaque structure. */ +#else + struct __netio_queue_impl_t* __system_part; /**< The system part. */ + struct __netio_queue_user_impl_t* __user_part; /**< The user part. */ +#ifdef _NETIO_PTHREAD + _netio_percpu_mutex_t lock; /**< Queue lock. */ +#endif +#endif +} +netio_queue_t; + + +/** + * @brief Packet send context. + * + * @ingroup egress + * + * Packet send context for use with netio_send_packet_prepare and _commit. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + uint8_t opaque[44]; /**< This is an opaque structure. */ +#else + uint8_t flags; /**< Defined below */ + uint8_t datalen; /**< Number of valid words pointed to by data. */ + uint32_t request[9]; /**< Request to be sent to the EPP or shim. Note + that this is smaller than the 11-word maximum + request size, since some constant values are + not saved in the context. */ + uint32_t *data; /**< Data to be sent to the EPP or shim via IDN. */ +#endif +} +netio_send_pkt_context_t; + + +#ifndef __DOXYGEN__ +#define SEND_PKT_CTX_USE_EPP 1 /**< We're sending to an EPP. */ +#define SEND_PKT_CTX_SEND_CSUM 2 /**< Request includes a checksum. */ +#endif + +/** + * @brief Packet vector entry. + * + * @ingroup egress + * + * This data structure is used with netio_send_packet_vector() to send multiple + * packets with one NetIO call. The structure should be initialized by + * calling netio_pkt_vector_set(), rather than by setting the fields + * directly. + * + * This structure is guaranteed to be a power of two in size, no + * bigger than one L2 cache line, and to be aligned modulo its size. + */ +typedef struct +#ifndef __DOXYGEN__ +__attribute__((aligned(8))) +#endif +{ + /** Reserved for use by the user application. When initialized with + * the netio_set_pkt_vector_entry() function, this field is guaranteed + * to be visible to readers only after all other fields are already + * visible. This way it can be used as a valid flag or generation + * counter. */ + uint8_t user_data; + + /* Structure members below this point should not be accessed directly by + * applications, as they may change in the future. */ + + /** Low 8 bits of the packet address to send. The high bits are + * acquired from the 'handle' field. */ + uint8_t buffer_address_low; + + /** Number of bytes to transmit. */ + uint16_t size; + + /** The raw handle from a netio_pkt_t. If this is NETIO_PKT_HANDLE_NONE, + * this vector entry will be skipped and no packet will be transmitted. */ + netio_pkt_handle_t handle; +} +netio_pkt_vector_entry_t; + + +/** + * @brief Initialize fields in a packet vector entry. + * + * @ingroup egress + * + * @param[out] v Pointer to the vector entry to be initialized. + * @param[in] pkt Packet to be transmitted when the vector entry is passed to + * netio_send_packet_vector(). Note that the packet's attributes + * (e.g., its L2 offset and length) are captured at the time this + * routine is called; subsequent changes in those attributes will not + * be reflected in the packet which is actually transmitted. + * Changes in the packet's contents, however, will be so reflected. + * If this is NULL, no packet will be transmitted. + * @param[in] user_data User data to be set in the vector entry. + * This function guarantees that the "user_data" field will become + * visible to a reader only after all other fields have become visible. + * This allows a structure in a ring buffer to be written and read + * by a polling reader without any locks or other synchronization. + */ +static __inline void +netio_pkt_vector_set(volatile netio_pkt_vector_entry_t* v, netio_pkt_t* pkt, + uint8_t user_data) +{ + if (pkt) + { + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = + (netio_pkt_minimal_metadata_t*) &pkt->__metadata; + v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_MM(mmd, pkt) & 0xFF; + v->size = NETIO_PKT_L2_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = &pkt->__metadata; + v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_M(mda, pkt) & 0xFF; + v->size = NETIO_PKT_L2_LENGTH_M(mda, pkt); + } + v->handle.word = pkt->__packet.word; + } + else + { + v->handle.word = 0; /* Set handle to NETIO_PKT_HANDLE_NONE. */ + } + + __asm__("" : : : "memory"); + + v->user_data = user_data; +} + + +/** + * Flags and structures for @ref netio_get() and @ref netio_set(). + * @ingroup config + */ + +/** @{ */ +/** Parameter class; addr is a NETIO_PARAM_xxx value. */ +#define NETIO_PARAM 0 +/** Interface MAC address. This address is only valid with @ref netio_get(). + * The value is a 6-byte MAC address. Depending upon the overall system + * design, a MAC address may or may not be available for each interface. */ +#define NETIO_PARAM_MAC 0 + +/** Determine whether to suspend output on the receipt of pause frames. + * If the value is nonzero, the I/O shim will suspend output when a pause + * frame is received. If the value is zero, pause frames will be ignored. */ +#define NETIO_PARAM_PAUSE_IN 1 + +/** Determine whether to send pause frames if the I/O shim packet FIFOs are + * nearly full. If the value is zero, pause frames are not sent. If + * the value is nonzero, it is the delay value which will be sent in any + * pause frames which are output, in units of 512 bit times. */ +#define NETIO_PARAM_PAUSE_OUT 2 + +/** Jumbo frame support. The value is a 4-byte integer. If the value is + * nonzero, the MAC will accept frames of up to 10240 bytes. If the value + * is zero, the MAC will only accept frames of up to 1544 bytes. */ +#define NETIO_PARAM_JUMBO 3 + +/** I/O shim's overflow statistics register. The value is two 16-bit integers. + * The first 16-bit value (or the low 16 bits, if the value is treated as a + * 32-bit number) is the count of packets which were completely dropped and + * not delivered by the shim. The second 16-bit value (or the high 16 bits, + * if the value is treated as a 32-bit number) is the count of packets + * which were truncated and thus only partially delivered by the shim. This + * register is automatically reset to zero after it has been read. + */ +#define NETIO_PARAM_OVERFLOW 4 + +/** IPP statistics. This address is only valid with @ref netio_get(). The + * value is a netio_stat_t structure. Unlike the I/O shim statistics, the + * IPP statistics are not all reset to zero on read; see the description + * of the netio_stat_t for details. */ +#define NETIO_PARAM_STAT 5 + +/** Possible link state. The value is a combination of "NETIO_LINK_xxx" + * flags. With @ref netio_get(), this will indicate which flags are + * actually supported by the hardware. + * + * For historical reasons, specifying this value to netio_set() will have + * the same behavior as using ::NETIO_PARAM_LINK_CONFIG, but this usage is + * discouraged. + */ +#define NETIO_PARAM_LINK_POSSIBLE_STATE 6 + +/** Link configuration. The value is a combination of "NETIO_LINK_xxx" flags. + * With @ref netio_set(), this will attempt to immediately bring up the + * link using whichever of the requested flags are supported by the + * hardware, or take down the link if the flags are zero; if this is + * not possible, an error will be returned. Many programs will want + * to use ::NETIO_PARAM_LINK_DESIRED_STATE instead. + * + * For historical reasons, specifying this value to netio_get() will + * have the same behavior as using ::NETIO_PARAM_LINK_POSSIBLE_STATE, + * but this usage is discouraged. + */ +#define NETIO_PARAM_LINK_CONFIG NETIO_PARAM_LINK_POSSIBLE_STATE + +/** Current link state. This address is only valid with @ref netio_get(). + * The value is zero or more of the "NETIO_LINK_xxx" flags, ORed together. + * If the link is down, the value ANDed with NETIO_LINK_SPEED will be + * zero; if the link is up, the value ANDed with NETIO_LINK_SPEED will + * result in exactly one of the NETIO_LINK_xxx values, indicating the + * current speed. */ +#define NETIO_PARAM_LINK_CURRENT_STATE 7 + +/** Variant symbol for current state, retained for compatibility with + * pre-MDE-2.1 programs. */ +#define NETIO_PARAM_LINK_STATUS NETIO_PARAM_LINK_CURRENT_STATE + +/** Packet Coherence protocol. This address is only valid with @ref netio_get(). + * The value is nonzero if the interface is configured for cache-coherent DMA. + */ +#define NETIO_PARAM_COHERENT 8 + +/** Desired link state. The value is a conbination of "NETIO_LINK_xxx" + * flags, which specify the desired state for the link. With @ref + * netio_set(), this will, in the background, attempt to bring up the link + * using whichever of the requested flags are reasonable, or take down the + * link if the flags are zero. The actual link up or down operation may + * happen after this call completes. If the link state changes in the + * future, the system will continue to try to get back to the desired link + * state; for instance, if the link is brought up successfully, and then + * the network cable is disconnected, the link will go down. However, the + * desired state of the link is still up, so if the cable is reconnected, + * the link will be brought up again. + * + * With @ref netio_get(), this will indicate the desired state for the + * link, as set with a previous netio_set() call, or implicitly by a + * netio_input_register() or netio_input_unregister() operation. This may + * not reflect the current state of the link; to get that, use + * ::NETIO_PARAM_LINK_CURRENT_STATE. */ +#define NETIO_PARAM_LINK_DESIRED_STATE 9 + +/** NetIO statistics structure. Retrieved using the ::NETIO_PARAM_STAT + * address passed to @ref netio_get(). */ +typedef struct +{ + /** Number of packets which have been received by the IPP and forwarded + * to a tile's receive queue for processing. This value wraps at its + * maximum, and is not cleared upon read. */ + uint32_t packets_received; + + /** Number of packets which have been dropped by the IPP, because they could + * not be received, or could not be forwarded to a tile. The former happens + * when the IPP does not have a free packet buffer of suitable size for an + * incoming frame. The latter happens when all potential destination tiles + * for a packet, as defined by the group, bucket, and queue configuration, + * have full receive queues. This value wraps at its maximum, and is not + * cleared upon read. */ + uint32_t packets_dropped; + + /* + * Note: the #defines after each of the following four one-byte values + * denote their location within the third word of the netio_stat_t. They + * are intended for use only by the IPP implementation and are thus omitted + * from the Doxygen output. + */ + + /** Number of packets dropped because no worker was able to accept a new + * packet. This value saturates at its maximum, and is cleared upon + * read. */ + uint8_t drops_no_worker; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_WORKER 0 +#endif + + /** Number of packets dropped because no small buffers were available. + * This value saturates at its maximum, and is cleared upon read. */ + uint8_t drops_no_smallbuf; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_SMALLBUF 1 +#endif + + /** Number of packets dropped because no large buffers were available. + * This value saturates at its maximum, and is cleared upon read. */ + uint8_t drops_no_largebuf; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_LARGEBUF 2 +#endif + + /** Number of packets dropped because no jumbo buffers were available. + * This value saturates at its maximum, and is cleared upon read. */ + uint8_t drops_no_jumbobuf; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_JUMBOBUF 3 +#endif +} +netio_stat_t; + + +/** Link can run, should run, or is running at 10 Mbps. */ +#define NETIO_LINK_10M 0x01 + +/** Link can run, should run, or is running at 100 Mbps. */ +#define NETIO_LINK_100M 0x02 + +/** Link can run, should run, or is running at 1 Gbps. */ +#define NETIO_LINK_1G 0x04 + +/** Link can run, should run, or is running at 10 Gbps. */ +#define NETIO_LINK_10G 0x08 + +/** Link should run at the highest speed supported by the link and by + * the device connected to the link. Only usable as a value for + * the link's desired state; never returned as a value for the current + * or possible states. */ +#define NETIO_LINK_ANYSPEED 0x10 + +/** All legal link speeds. */ +#define NETIO_LINK_SPEED (NETIO_LINK_10M | \ + NETIO_LINK_100M | \ + NETIO_LINK_1G | \ + NETIO_LINK_10G | \ + NETIO_LINK_ANYSPEED) + + +/** MAC register class. Addr is a register offset within the MAC. + * Registers within the XGbE and GbE MACs are documented in the Tile + * Processor I/O Device Guide (UG104). MAC registers start at address + * 0x4000, and do not include the MAC_INTERFACE registers. */ +#define NETIO_MAC 1 + +/** MDIO register class (IEEE 802.3 clause 22 format). Addr is the "addr" + * member of a netio_mdio_addr_t structure. */ +#define NETIO_MDIO 2 + +/** MDIO register class (IEEE 802.3 clause 45 format). Addr is the "addr" + * member of a netio_mdio_addr_t structure. */ +#define NETIO_MDIO_CLAUSE45 3 + +/** NetIO MDIO address type. Retrieved or provided using the ::NETIO_MDIO + * address passed to @ref netio_get() or @ref netio_set(). */ +typedef union +{ + struct + { + unsigned int reg:16; /**< MDIO register offset. For clause 22 access, + must be less than 32. */ + unsigned int phy:5; /**< Which MDIO PHY to access. */ + unsigned int dev:5; /**< Which MDIO device to access within that PHY. + Applicable for clause 45 access only; ignored + for clause 22 access. */ + } + bits; /**< Container for bitfields. */ + uint64_t addr; /**< Value to pass to @ref netio_get() or + * @ref netio_set(). */ +} +netio_mdio_addr_t; + +/** @} */ + +#endif /* __NETIO_INTF_H__ */ diff --git a/arch/tile/include/hv/syscall_public.h b/arch/tile/include/hv/syscall_public.h new file mode 100644 index 000000000..9cc0837e6 --- /dev/null +++ b/arch/tile/include/hv/syscall_public.h @@ -0,0 +1,42 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file syscall.h + * Indices for the hypervisor system calls that are intended to be called + * directly, rather than only through hypervisor-generated "glue" code. + */ + +#ifndef _SYS_HV_INCLUDE_SYSCALL_PUBLIC_H +#define _SYS_HV_INCLUDE_SYSCALL_PUBLIC_H + +/** Fast syscall flag bit location. When this bit is set, the hypervisor + * handles the syscall specially. + */ +#define HV_SYS_FAST_SHIFT 14 + +/** Fast syscall flag bit mask. */ +#define HV_SYS_FAST_MASK (1 << HV_SYS_FAST_SHIFT) + +/** Bit location for flagging fast syscalls that can be called from PL0. */ +#define HV_SYS_FAST_PLO_SHIFT 13 + +/** Fast syscall allowing PL0 bit mask. */ +#define HV_SYS_FAST_PL0_MASK (1 << HV_SYS_FAST_PLO_SHIFT) + +/** Perform an MF that waits for all victims to reach DRAM. */ +#define HV_SYS_fence_incoherent (51 | HV_SYS_FAST_MASK \ + | HV_SYS_FAST_PL0_MASK) + +#endif /* !_SYS_HV_INCLUDE_SYSCALL_PUBLIC_H */ -- cgit v1.2.3-54-g00ecf