Barrel Shifter

A high-performance parameterized barrel shifter component for digital logic designs, implementing fast multi-bit shift and rotate operations in constant time. Supports configurable data widths, shift directions (left/right).

We present the results of several tests for different implementations of Barrel Shifter. Tests have been done with Vivado 2025.1 (AMD/Xilinx).

Note

These implementations (V0, V1, V2, V3, V4) are not 100% tested. They have been used only for resource elaboration. Implementation of barrel_shifter in git is tested.

V0 - Implements barrel shifting by computing block indices with modulo arithmetic for each output block via a multiplexer process.
 architecture BARREL_SHIFTER_ARCH of BARREL_SHIFTER is
     constant BLOCK_WIDTH : natural := DATA_WIDTH / BLOCKS;
 begin

     multiplexors: for i in 0 to BLOCKS-1 generate
         process (DATA_IN, SEL)
             variable sel_aux : integer;
             variable sel_blk : integer;
         begin
             if (SHIFT_LEFT) then
                 sel_aux := conv_integer('0'&SEL);
             else
                 sel_aux := conv_integer('0'&(0-SEL));
             end if;

             sel_blk := ((BLOCKS-sel_aux+i) mod BLOCKS);

             DATA_OUT((i+1)*BLOCK_WIDTH-1 downto i*BLOCK_WIDTH) <= DATA_IN((sel_blk+1)*BLOCK_WIDTH-1 downto sel_blk*BLOCK_WIDTH);
         end process;
     end generate;

 end architecture;
V1 - Uses conditional generation and arithmetic to precompute source block indices per output block, avoiding repeated modulo operations.
 architecture BARREL_SHIFTER_ARCH of BARREL_SHIFTER is
     constant BLOCK_WIDTH : natural := DATA_WIDTH / BLOCKS;
     signal sel_blk : integer range 0 to BLOCKS-1;
 begin

     sel_int_gen : if (BLOCKS > 1) generate
         sel_int <= to_integer(unsigned(SEL));
     else generate
         sel_int <= 0;
     end generate;

     multiplexors: for i in 0 to BLOCKS-1 generate
         signal sel_int : integer range 0 to BLOCKS-1;
     begin

         shift_sel_gen : if (SHIFT_LEFT) generate
             sel_blk <= i + sel_int when sel_int <= (BLOCKS -i) else sel_int - (BLOCKS -i);
         else generate
             sel_blk <=  (BLOCKS-1 -i) - sel_int when sel_int < i else i - sel_int;
         end generate;

         DATA_OUT((i+1)*BLOCK_WIDTH-1 downto i*BLOCK_WIDTH) <= DATA_IN((sel_blk+1)*BLOCK_WIDTH-1 downto sel_blk*BLOCK_WIDTH);
     end generate;

 end architecture;
V2 - Duplicates input data and selects output via direct slicing on the extended vector, avoiding block-wise indexing.
architecture BARREL_SHIFTER_ARCH of BARREL_SHIFTER is
    constant BLOCK_WIDTH : natural := DATA_WIDTH / BLOCKS;
    signal data_in_tmp  : std_logic_vector(2*DATA_WIDTH-1 downto 0);
    signal sel_int : integer range 0 to BLOCKS-1;
begin

    data_in_tmp <= DATA_IN & DATA_IN;

    sel_int_gen : if (BLOCKS > 1) generate
        sel_int <= to_integer(unsigned(SEL));
    else generate
        sel_int <= 0;
    end generate;

    shift_sel_gen : if (SHIFT_LEFT) generate
        DATA_OUT <= data_in_tmp(DATA_WIDTH   -1 + (sel_int*BLOCK_WIDTH) downto          0 + (sel_int*BLOCK_WIDTH));
    else generate
        DATA_OUT <= data_in_tmp(DATA_WIDTH*2 -1 - (sel_int*BLOCK_WIDTH) downto DATA_WIDTH - (sel_int*BLOCK_WIDTH));
    end generate;
end architecture;
V3 - Leverages VHDL’s built-in rol/ror shift operators for concise rotation implementation.
 architecture BARREL_SHIFTER_ARCH of BARREL_SHIFTER is
     constant BLOCK_WIDTH : natural := DATA_WIDTH / BLOCKS;
     signal sel_int : integer range 0 to BLOCKS-1;
 begin

     sel_int_gen : if (BLOCKS > 1) generate
         sel_int <= to_integer(unsigned(SEL));
     else generate
         sel_int <= 0;
     end generate;

     shift_sel_gen : if (SHIFT_LEFT) generate
         DATA_OUT <= DATA_IN rol (sel_int*BLOCK_WIDTH);
     else generate
         DATA_OUT <= DATA_IN ror (sel_int*BLOCK_WIDTH);
     end generate;
 end architecture;
V4 - Uses shift_left/shift_right from numeric_std on an extended input vector to implement logical shifts.
 architecture BARREL_SHIFTER_ARCH of BARREL_SHIFTER is
     constant BLOCK_WIDTH : natural := DATA_WIDTH / BLOCKS;
     signal data_in_tmp   : unsigned(2*DATA_WIDTH-1 downto 0);
     signal data_out_tmp  : unsigned(2*DATA_WIDTH-1 downto 0);
     signal sel_int : integer range 0 to BLOCKS-1;
 begin

     data_in_tmp <= unsigned(DATA_IN & DATA_IN);

     sel_int_gen : if (BLOCKS > 1) generate
         sel_int <= to_integer(unsigned(SEL));
     else generate
         sel_int <= 0;
     end generate;

     shift_sel_gen : if (SHIFT_LEFT) generate
         data_out_tmp <= IEEE.numeric_std.shift_left(data_in_tmp, sel_int*BLOCK_WIDTH);
         DATA_OUT <= std_logic_vector(data_out_tmp(DATA_WIDTH*2-1 downto DATA_WIDTH));
     else generate
         data_out_tmp <= IEEE.numeric_std.shift_right(data_in_tmp, sel_int*BLOCK_WIDTH);
         DATA_OUT <= std_logic_vector(data_out_tmp(DATA_WIDTH-1 downto 0));
     end generate;
 end architecture;
ENTITY BARREL_SHIFTER_GEN IS

Generically adjustable barrel shifter supporting shifts of single bits and whole blocks Supports bite-level rotation when BLOCK_SIZE == 1 Configurable shift direction

Generics

Generic

Type

Default

Description

BLOCKS

integer

256

input/output data width in BLOCKs

BLOCK_SIZE

integer

64

size of one block in bits

SHIFT_LEFT

boolean

false

set true to shift left, false to shift right

Ports

Port

Type

Mode

Description

DATA_IN

std_logic_vector(BLOCKS*BLOCK_SIZE-1 downto 0)

in

DATA_OUT

std_logic_vector(BLOCKS*BLOCK_SIZE-1 downto 0)

out

SEL

std_logic_vector(log2(BLOCKS)-1 downto 0)

in

ENTITY BARREL_SHIFTER_GEN_PIPED IS
Generics

Generic

Type

Default

Description

BLOCKS

integer

256

input/output data width in BLOCKs

BLOCK_WIDTH

integer

64

width of one block in bits

BAR_SHIFT_LATENCY

integer

0

barrel shifting latency

INPUT_REG

boolean

false

input register enable (adds additional 1 CLK latency)

OUTPUT_REG

boolean

false

output register enable (adds additional 1 CLK latency)

SHIFT_LEFT

boolean

false

set true to shift left, false to shift right

METADATA_WIDTH

integer

0

Metadata can be useful when you want to send additional info to the TX side along with the rotated value. (for example the value of the RX_SEL signal)

Ports

Port

Type

Mode

Description

CLK

std_logic

in

unused when MUX_LATENCY==0 and OUTPUT_REG==INPUT_REG==false

RESET

std_logic

in

unused when MUX_LATENCY==0 and OUTPUT_REG==INPUT_REG==false

RX_DATA

std_logic_vector(BLOCK_WIDTH*BLOCKS-1 downto 0)

in

RX_SEL

std_logic_vector(log2(BLOCKS)-1 downto 0)

in

RX_METADATA

std_logic_vector(METADATA_WIDTH-1 downto 0)

in

RX_SRC_RDY

std_logic

in

RX_DST_RDY

std_logic

out

TX_DATA

std_logic_vector(BLOCK_WIDTH*BLOCKS-1 downto 0)

out

TX_METADATA

std_logic_vector(METADATA_WIDTH-1 downto 0)

out

TX_SRC_RDY

std_logic

out

TX_DST_RDY

std_logic

in