Barrel Shifter

A high-performance parameterized barrel shifter component for digital logic designs, implementing fast multi-bit shift and rotate operations in constant time. Supports configurable data widths, shift directions (left/right).

We present the results of several tests for different implementations of Barrel Shifter. Tests have been done with Vivado 2025.1 (AMD/Xilinx).

Note

These implementations (V0, V1, V2, V3, V4) are not 100% tested. They have been used only for resource elaboration. Implementation of barrel_shifter in git is tested.

V0 - Implements barrel shifting by computing block indices with modulo arithmetic for each output block via a multiplexer process.
 architecture BARREL_SHIFTER_ARCH of BARREL_SHIFTER is
     constant BLOCK_WIDTH : natural := DATA_WIDTH / BLOCKS;
 begin

     multiplexors: for i in 0 to BLOCKS-1 generate
         process (DATA_IN, SEL)
             variable sel_aux : integer;
             variable sel_blk : integer;
         begin
             if (SHIFT_LEFT) then
                 sel_aux := conv_integer('0'&SEL);
             else
                 sel_aux := conv_integer('0'&(0-SEL));
             end if;

             sel_blk := ((BLOCKS-sel_aux+i) mod BLOCKS);

             DATA_OUT((i+1)*BLOCK_WIDTH-1 downto i*BLOCK_WIDTH) <= DATA_IN((sel_blk+1)*BLOCK_WIDTH-1 downto sel_blk*BLOCK_WIDTH);
         end process;
     end generate;

 end architecture;
V1 - Uses conditional generation and arithmetic to precompute source block indices per output block, avoiding repeated modulo operations.
 architecture BARREL_SHIFTER_ARCH of BARREL_SHIFTER is
     constant BLOCK_WIDTH : natural := DATA_WIDTH / BLOCKS;
     signal sel_blk : integer range 0 to BLOCKS-1;
 begin

     sel_int_gen : if (BLOCKS > 1) generate
         sel_int <= to_integer(unsigned(SEL));
     else generate
         sel_int <= 0;
     end generate;

     multiplexors: for i in 0 to BLOCKS-1 generate
         signal sel_int : integer range 0 to BLOCKS-1;
     begin

         shift_sel_gen : if (SHIFT_LEFT) generate
             sel_blk <= i + sel_int when sel_int <= (BLOCKS -i) else sel_int - (BLOCKS -i);
         else generate
             sel_blk <=  (BLOCKS-1 -i) - sel_int when sel_int < i else i - sel_int;
         end generate;

         DATA_OUT((i+1)*BLOCK_WIDTH-1 downto i*BLOCK_WIDTH) <= DATA_IN((sel_blk+1)*BLOCK_WIDTH-1 downto sel_blk*BLOCK_WIDTH);
     end generate;

 end architecture;
V2 - Duplicates input data and selects output via direct slicing on the extended vector, avoiding block-wise indexing.
architecture BARREL_SHIFTER_ARCH of BARREL_SHIFTER is
    constant BLOCK_WIDTH : natural := DATA_WIDTH / BLOCKS;
    signal data_in_tmp  : std_logic_vector(2*DATA_WIDTH-1 downto 0);
    signal sel_int : integer range 0 to BLOCKS-1;
begin

    data_in_tmp <= DATA_IN & DATA_IN;

    sel_int_gen : if (BLOCKS > 1) generate
        sel_int <= to_integer(unsigned(SEL));
    else generate
        sel_int <= 0;
    end generate;

    shift_sel_gen : if (SHIFT_LEFT) generate
        DATA_OUT <= data_in_tmp(DATA_WIDTH   -1 + (sel_int*BLOCK_WIDTH) downto          0 + (sel_int*BLOCK_WIDTH));
    else generate
        DATA_OUT <= data_in_tmp(DATA_WIDTH*2 -1 - (sel_int*BLOCK_WIDTH) downto DATA_WIDTH - (sel_int*BLOCK_WIDTH));
    end generate;
end architecture;
V3 - Leverages VHDL’s built-in rol/ror shift operators for concise rotation implementation.
 architecture BARREL_SHIFTER_ARCH of BARREL_SHIFTER is
     constant BLOCK_WIDTH : natural := DATA_WIDTH / BLOCKS;
     signal sel_int : integer range 0 to BLOCKS-1;
 begin

     sel_int_gen : if (BLOCKS > 1) generate
         sel_int <= to_integer(unsigned(SEL));
     else generate
         sel_int <= 0;
     end generate;

     shift_sel_gen : if (SHIFT_LEFT) generate
         DATA_OUT <= DATA_IN rol (sel_int*BLOCK_WIDTH);
     else generate
         DATA_OUT <= DATA_IN ror (sel_int*BLOCK_WIDTH);
     end generate;
 end architecture;
V4 - Uses shift_left/shift_right from numeric_std on an extended input vector to implement logical shifts.
 architecture BARREL_SHIFTER_ARCH of BARREL_SHIFTER is
     constant BLOCK_WIDTH : natural := DATA_WIDTH / BLOCKS;
     signal data_in_tmp   : unsigned(2*DATA_WIDTH-1 downto 0);
     signal data_out_tmp  : unsigned(2*DATA_WIDTH-1 downto 0);
     signal sel_int : integer range 0 to BLOCKS-1;
 begin

     data_in_tmp <= unsigned(DATA_IN & DATA_IN);

     sel_int_gen : if (BLOCKS > 1) generate
         sel_int <= to_integer(unsigned(SEL));
     else generate
         sel_int <= 0;
     end generate;

     shift_sel_gen : if (SHIFT_LEFT) generate
         data_out_tmp <= IEEE.numeric_std.shift_left(data_in_tmp, sel_int*BLOCK_WIDTH);
         DATA_OUT <= std_logic_vector(data_out_tmp(DATA_WIDTH*2-1 downto DATA_WIDTH));
     else generate
         data_out_tmp <= IEEE.numeric_std.shift_right(data_in_tmp, sel_int*BLOCK_WIDTH);
         DATA_OUT <= std_logic_vector(data_out_tmp(DATA_WIDTH-1 downto 0));
     end generate;
 end architecture;
ENTITY BARREL_SHIFTER_GEN IS

Generically adjustable barrel shifter where single bits as well as whole blocks can be shifted. The direction can also be set.

Generics

Generic

Type

Default

Description

BLOCKS

integer

256

input/output data width in BLOCKs

BLOCK_SIZE

integer

64

size of one block in bits

SHIFT_LEFT

boolean

false

set true to shift left, false to shift right

Ports

Port

Type

Mode

Description

DATA_IN

std_logic_vector(BLOCKS*BLOCK_SIZE-1 downto 0)

in

DATA_OUT

std_logic_vector(BLOCKS*BLOCK_SIZE-1 downto 0)

out

SEL

std_logic_vector(log2(BLOCKS)-1 downto 0)

in

ENTITY BARREL_SHIFTER_GEN_PIPED IS
Generics

Generic

Type

Default

Description

BLOCKS

integer

256

input/output data width in BLOCKs

BLOCK_WIDTH

integer

64

width of one block in bits

BAR_SHIFT_LATENCY

integer

0

barrel shifting latency

INPUT_REG

boolean

false

input register enable (adds additional 1 CLK latency)

OUTPUT_REG

boolean

false

output register enable (adds additional 1 CLK latency)

SHIFT_LEFT

boolean

false

set true to shift left, false to shift right

METADATA_WIDTH

integer

0

Metadata can be useful when you want to send additional info to the TX side along with the rotated value. (for example the value of the RX_SEL signal)

Ports

Port

Type

Mode

Description

CLK

std_logic

in

unused when MUX_LATENCY==0 and OUTPUT_REG==INPUT_REG==false

RESET

std_logic

in

unused when MUX_LATENCY==0 and OUTPUT_REG==INPUT_REG==false

RX_DATA

std_logic_vector(BLOCK_WIDTH*BLOCKS-1 downto 0)

in

RX_SEL

std_logic_vector(log2(BLOCKS)-1 downto 0)

in

RX_METADATA

std_logic_vector(METADATA_WIDTH-1 downto 0)

in

RX_SRC_RDY

std_logic

in

RX_DST_RDY

std_logic

out

TX_DATA

std_logic_vector(BLOCK_WIDTH*BLOCKS-1 downto 0)

out

TX_METADATA

std_logic_vector(METADATA_WIDTH-1 downto 0)

out

TX_SRC_RDY

std_logic

out

TX_DST_RDY

std_logic

in