DMA Calypte

This core provides simple DMA functionality for both RX and TX directions. The design was primary focused on the lowest latency possible for the transaction from the input of the DMA core to reach its output. The block scheme as well as its connection to the NDK design is provided in the following figure:

../../../_images/tx_calypte_block-dma_whole_block.svg
ENTITY DMA_CALYPTE IS
Generics

Generic

Type

Default

Description

=====

Global settings

=====

Settings affecting both RX and TX or the top level entity itself

DEVICE

string

“ULTRASCALE”

Name of target device, the supported are: “ULTRASCALE”

USR_MFB_REGIONS

natural

1

USER MFB data bus configuration Defines the total width of User data stream.

USR_MFB_REGION_SIZE

natural

8

USR_MFB_BLOCK_SIZE

natural

8

USR_MFB_ITEM_WIDTH

natural

8

=====

PCIe-side bus settings

=====

=====

PCIE_RQ_MFB_REGIONS

natural

2

Requester Request MFB interface configration, allowed configurations are: (1,1,8,32)

PCIE_RQ_MFB_REGION_SIZE

natural

1

PCIE_RQ_MFB_BLOCK_SIZE

natural

8

PCIE_RQ_MFB_ITEM_WIDTH

natural

32

PCIE_CQ_MFB_REGIONS

natural

2

Completer Request MFB interface configration, allowed configurations are: (1,1,8,32)

PCIE_CQ_MFB_REGION_SIZE

natural

1

PCIE_CQ_MFB_BLOCK_SIZE

natural

8

PCIE_CQ_MFB_ITEM_WIDTH

natural

32

HDR_META_WIDTH

natural

24

Width of User Header Metadata information on RX: added to header sent to header Buffer in RAM on TX: extracted from descriptor and propagated to output

=====

RX DMA settings

=====

Settings for RX direction of DMA Module

RX_CHANNELS

natural

8

Total number of RX DMA Channels (multiples of 2 at best) Minimum: 4

RX_PTR_WIDTH

natural

16

Width of Software and Hardware Descriptor Pointer Defines width of signals used for these values in DMA Module Affects logic complexity Maximum value: 32 (restricted by size of SDP and HDP MI register)

USR_RX_PKT_SIZE_MAX

natural

2**12

Maximum size of a User packet (in bytes) Defines width of Packet length signals. the maximum is 2**16 - 1

TRBUF_REG_EN

boolean

false

Enables an additional register of the transaction buffer that improves throughput

=====

TX DMA settings

=====

Settings for TX direction of DMA Module

TX_CHANNELS

natural

8

Total number of TX DMA Channels Minimum value: TX_SEL_CHANNELS*DMA_ENDPOINTS

TX_PTR_WIDTH

natural

14

Width of Software and Hardware Descriptor Pointer Defines width of signals used for these values in DMA Module Affects logic complexity Maximum value: 32 (restricted by size of SDP and HDP MI register)

USR_TX_PKT_SIZE_MAX

natural

2**12

Maximum size of a User packet (in bytes) Defines width of Packet length signals. the maximum is 2**16 - 1

=====

Optional settings

=====

Settings for testing and debugging, settings usually left unchanged at entity-area constants.

DSP_CNT_WIDTH

natural

64

Width of DSP packet and byte statistics counters

RX_GEN_EN

boolean

TRUE

Enable generation of RX/TX side of DMA Module

TX_GEN_EN

boolean

TRUE

ST_SP_DBG_SIGNAL_W

natural

2

MI_WIDTH

natural

32

Width of MI bus

Ports

Port

Type

Mode

Description

CLK

std_logic

in

RESET

std_logic

in

=====

RX DMA User-side MFB

=====

=====

USR_RX_MFB_META_CHAN

std_logic_vector(log2(RX_CHANNELS) -1 downto 0)

in

USR_RX_MFB_META_HDR_META

std_logic_vector(HDR_META_WIDTH -1 downto 0)

in

USR_RX_MFB_DATA

std_logic_vector(USR_MFB_REGIONS*USR_MFB_REGION_SIZE*USR_MFB_BLOCK_SIZE*USR_MFB_ITEM_WIDTH-1 downto 0)

in

USR_RX_MFB_SOF

std_logic_vector(USR_MFB_REGIONS -1 downto 0)

in

USR_RX_MFB_EOF

std_logic_vector(USR_MFB_REGIONS -1 downto 0)

in

USR_RX_MFB_SOF_POS

std_logic_vector(USR_MFB_REGIONS*max(1, log2(USR_MFB_REGION_SIZE)) -1 downto 0)

in

USR_RX_MFB_EOF_POS

std_logic_vector(USR_MFB_REGIONS*max(1, log2(USR_MFB_REGION_SIZE*USR_MFB_BLOCK_SIZE)) -1 downto 0)

in

USR_RX_MFB_SRC_RDY

std_logic

in

USR_RX_MFB_DST_RDY

std_logic

out

=====

TX DMA User-side MFB

=====

=====

USR_TX_MFB_META_PKT_SIZE

std_logic_vector(log2(USR_TX_PKT_SIZE_MAX + 1) -1 downto 0)

out

USR_TX_MFB_META_CHAN

std_logic_vector(log2(TX_CHANNELS) -1 downto 0)

out

USR_TX_MFB_META_HDR_META

std_logic_vector(HDR_META_WIDTH -1 downto 0)

out

USR_TX_MFB_DATA

std_logic_vector(USR_MFB_REGIONS*USR_MFB_REGION_SIZE*USR_MFB_BLOCK_SIZE*USR_MFB_ITEM_WIDTH-1 downto 0)

out

USR_TX_MFB_SOF

std_logic_vector(USR_MFB_REGIONS -1 downto 0)

out

USR_TX_MFB_EOF

std_logic_vector(USR_MFB_REGIONS -1 downto 0)

out

USR_TX_MFB_SOF_POS

std_logic_vector(USR_MFB_REGIONS*max(1, log2(USR_MFB_REGION_SIZE)) -1 downto 0)

out

USR_TX_MFB_EOF_POS

std_logic_vector(USR_MFB_REGIONS*max(1, log2(USR_MFB_REGION_SIZE*USR_MFB_BLOCK_SIZE)) -1 downto 0)

out

USR_TX_MFB_SRC_RDY

std_logic

out

USR_TX_MFB_DST_RDY

std_logic

in

ST_SP_DBG_CHAN

std_logic_vector(log2(TX_CHANNELS) -1 downto 0)

out

ST_SP_DBG_META

std_logic_vector(ST_SP_DBG_SIGNAL_W -1 downto 0)

out

=====

PCIe-side interfaces

=====

=====

PCIE_RQ_MFB_DATA

std_logic_vector(PCIE_RQ_MFB_REGIONS*PCIE_RQ_MFB_REGION_SIZE*PCIE_RQ_MFB_BLOCK_SIZE*PCIE_RQ_MFB_ITEM_WIDTH-1 downto 0)

out

Upstream MFB interface (for sending data to PCIe Endpoints)

PCIE_RQ_MFB_META

std_logic_vector(PCIE_RQ_MFB_REGIONS*PCIE_RQ_META_WIDTH -1 downto 0)

out

PCIE_RQ_MFB_SOF

std_logic_vector(PCIE_RQ_MFB_REGIONS -1 downto 0)

out

PCIE_RQ_MFB_EOF

std_logic_vector(PCIE_RQ_MFB_REGIONS -1 downto 0)

out

PCIE_RQ_MFB_SOF_POS

std_logic_vector(PCIE_RQ_MFB_REGIONS*max(1, log2(PCIE_RQ_MFB_REGION_SIZE)) -1 downto 0)

out

PCIE_RQ_MFB_EOF_POS

std_logic_vector(PCIE_RQ_MFB_REGIONS*max(1, log2(PCIE_RQ_MFB_REGION_SIZE*PCIE_RQ_MFB_BLOCK_SIZE)) -1 downto 0)

out

PCIE_RQ_MFB_SRC_RDY

std_logic

out

PCIE_RQ_MFB_DST_RDY

std_logic

in

PCIE_CQ_MFB_DATA

std_logic_vector(PCIE_CQ_MFB_REGIONS*PCIE_CQ_MFB_REGION_SIZE*PCIE_CQ_MFB_BLOCK_SIZE*PCIE_CQ_MFB_ITEM_WIDTH-1 downto 0)

in

Downstream MFB interface (for sending data from PCIe Endpoints)

PCIE_CQ_MFB_META

std_logic_vector(PCIE_CQ_MFB_REGIONS*PCIE_CQ_META_WIDTH -1 downto 0)

in

PCIE_CQ_MFB_SOF

std_logic_vector(PCIE_CQ_MFB_REGIONS -1 downto 0)

in

PCIE_CQ_MFB_EOF

std_logic_vector(PCIE_CQ_MFB_REGIONS -1 downto 0)

in

PCIE_CQ_MFB_SOF_POS

std_logic_vector(PCIE_CQ_MFB_REGIONS*max(1, log2(PCIE_CQ_MFB_REGION_SIZE)) -1 downto 0)

in

PCIE_CQ_MFB_EOF_POS

std_logic_vector(PCIE_CQ_MFB_REGIONS*max(1, log2(PCIE_CQ_MFB_REGION_SIZE*PCIE_CQ_MFB_BLOCK_SIZE)) -1 downto 0)

in

PCIE_CQ_MFB_SRC_RDY

std_logic

in

PCIE_CQ_MFB_DST_RDY

std_logic

out

=====

MI interface for SW access

=====

=====

MI_ADDR

std_logic_vector (MI_WIDTH -1 downto 0)

in

MI_DWR

std_logic_vector (MI_WIDTH -1 downto 0)

in

MI_BE

std_logic_vector (MI_WIDTH/8-1 downto 0)

in

MI_RD

std_logic

in

MI_WR

std_logic

in

MI_DRD

std_logic_vector (MI_WIDTH -1 downto 0)

out

MI_ARDY

std_logic

out

MI_DRDY

std_logic

out

Supported PCIe Configurations

The design can be configured for various bus widths and PCIe IP core configurations.

  1. Device: AMD Kintex UltraScale+

    PCI Express configuration: Gen3 x8

    Internal bus width: 256 bits

    Frequency: 250 MHz

    Input MFB configuration: 1,4,8,8

    Output MFB configuration: 1,1,8,32

  2. Device: AMD Kintex UltraScale+, Intel Agilex F

    PCI Express configuration: Gen3 x16 (AMD), Gen4 x16 (Intel)

    Internal bus width: 512 bits

    Frequency: 250 MHz (AMD), 400 MHz (Intel)

    Input MFB configuration: 1,8,8,8

    Output MFB configuration: 2,1,8,32

Local Subcomponents