Efinix Trion Block Memory 節約術
作成者:mou-mou
TrionのBlock Memoryは5Kbitということで、通常よく使用する8bit, 16bit, 32bit幅のRAMを構成すると必然的に4/5しか使用されず、20%は無駄な資源となってしまいます。
まずは、Efinity® Synthesis User GuideのTrue Dual-Port Memory Examplesのアドレス幅を12bitに拡張してみました。
module ram4Kx8_tdp_mix (wdataA, addrA, clkA, weA, rdataA, wdataB, addrB, clkB, weB, rdataB);
parameter AWIDTH = 12; // 4Kに拡張
parameter DWIDTH = 8;
localparam DEPTH = 1 << AWIDTH;
localparam MAX_DATA = (1<<DWIDTH)-1;
input [DWIDTH-1:0] wdataA, wdataB;
input [AWIDTH-1:0] addrA, addrB;
input clkA, weA;
input clkB, weB;
output reg [DWIDTH-1:0] rdataA, rdataB;
reg [DWIDTH-1:0] mem [DEPTH-1:0];
integer i;
initial begin
// The memory is initialized with
// decreasing values startingfrom MAX_DATA
for (i=0;i<DEPTH;i=i+1)
mem[i] = MAX_DATA - i;
end
always@(posedge clkA) begin
// Use blocking assignments to for read-first
rdataA = mem[addrA];
if (weA) begin
mem[addrA] = wdataA;
end
end
always@(posedge clkB) begin
// Use blocking assignments to force write-first
if (weB) begin
mem[addrB] = wdataB;
end
rdataB = mem[addrB];
end
endmodule
合成した結果、EFX_DPRAM_5Kは8Block使用されていました。すなわち、32768bitのRAMを構成するために、40960bitの資源を消費したことになります。同じくXilinx社やIntel社ではメモリ幅は9bit 18bit 36bitなので、それと比較してもロスが大きいと思います。
そこで、メモリ構成を少し工夫してみることを提案いたします。
この構成ならば、32768bitのRAMを構成するために、35840bitの資源の消費でおさえることができます。無駄な資源は20%から9%に低減します。
一般的なRTL記述では、任意のメモリ構成にはできませんので、EFX_DPRAM_5K primitiveを使用して記述してみます。初期値はサンプルと同じ少し結果を得るように記述するのは面倒なので、ここではすべて0にします。
module ram4Kx8_user (
input [7:0] wdataA,
input [7:0] wdataB,
input [11:0] addrA,
input [11:0] addrB,
input clkA,
input weA,
input clkB,
input weB,
output [7:0] rdataA,
output [7:0] rdataB
);
reg [1:0] ba_reg;
reg [1:0] bb_reg;
wire [4:0] rdataA_4_0 [3:0];
wire [4:0] rdataB_4_0 [3:0];
assign rdataA[4:0] = rdataA_4_0[ba_reg];
assign rdataB[4:0] = rdataB_4_0[bb_reg];
always @(posedge clkA) begin
ba_reg <= addrA[11:10];
end
always @(posedge clkB) begin
bb_reg <= addrB[11:10];
end
generate
genvar i;
for (i = 0; i < 4; i = i + 1) begin : gen_dpram_5bit
EFX_DPRAM_5K # (
.READ_WIDTH_A(5), // 5 1024x5
.WRITE_WIDTH_A(5), // 5 1024x5
.OUTPUT_REG_A(1'b0), // 1 add pipe-line read register
.CLKA_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WEA_POLARITY(1'b1), // 0 active low, 1 active high
.CLKEA_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WRITE_MODE_A("READ_FIRST"), // Output "old" data
.READ_WIDTH_B(5), // 5 1024x5
.WRITE_WIDTH_B(5), // 5 1024x5
.OUTPUT_REG_B(1'b0), // 1 add pipe-line read register
.CLKB_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WEB_POLARITY(1'b1), // 0 active low, 1 active high
.CLKEB_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WRITE_MODE_B("WRITE_FIRST"), // Output "new" data
.INIT_0(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_1(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_2(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_3(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_4(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_5(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_6(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_7(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_8(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_9(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_A(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_B(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_C(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_D(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_E(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_F(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_10(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_11(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_12(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_13(256'h0000000000000000000000000000000000000000000000000000000000000000)
) EFX_DPRAM_5K_inst (
.RDATAA(rdataA_4_0[i]), // Read data output A
.ADDRA(addrA[9:0]), // Address input A
.CLKA(clkA), // Clock input A
.CLKEA(i[1:0]==addrA[11:10]), // Clock-enable input A
.WEA(weA), // Write-enable input A
.WDATAA(wdataA[4:0]), // Write data input A
.RDATAB(rdataB_4_0[i]), // Read data output B
.ADDRB(addrB[9:0]), // Address input B
.CLKB(clkB), // Clock input B
.CLKEB(i[1:0]==addrB[11:10]), // Clock-enable input B
.WEB(weB), // Write-enable input B
.WDATAB(wdataB[4:0]) // Write data input B
);
end
for (i = 0; i < 3; i = i + 1) begin : gen_dpram_1bit
EFX_DPRAM_5K # (
.READ_WIDTH_A(1), // 1 4096x5
.WRITE_WIDTH_A(1), // 1 1024x5
.OUTPUT_REG_A(1'b0), // 1 add pipe-line read register
.CLKA_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WEA_POLARITY(1'b1), // 0 active low, 1 active high
.CLKEA_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WRITE_MODE_A("READ_FIRST"), // Output "old" data
.READ_WIDTH_B(1), // 1 4096x5
.WRITE_WIDTH_B(1), // 1 4096x5
.OUTPUT_REG_B(1'b0), // 1 add pipe-line read register
.CLKB_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WEB_POLARITY(1'b1), // 0 active low, 1 active high
.CLKEB_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WRITE_MODE_B("WRITE_FIRST"), // Output "new" data
.INIT_0(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_1(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_2(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_3(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_4(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_5(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_6(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_7(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_8(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_9(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_A(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_B(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_C(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_D(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_E(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_F(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_10(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_11(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_12(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_13(256'h0000000000000000000000000000000000000000000000000000000000000000)
) EFX_DPRAM_5K_inst (
.RDATAA(rdataA[i+5]), // Read data output A
.ADDRA(addrA), // Address input A
.CLKA(clkA), // Clock input A
.CLKEA(1'b1), // Clock-enable input A
.WEA(weA), // Write-enable input A
.WDATAA(wdataA[i+5]), // Write data input A
.RDATAB(rdataB[i+5]), // Read data output B
.ADDRB(addrB), // Address input B
.CLKB(clkB), // Clock input B
.CLKEB(1'b1), // Clock-enable input B
.WEB(weB), // Write-enable input B
.WDATAB(wdataB[i+5]) // Write data input B
);
end
endgenerate
endmodule
16bit幅の場合は、さらに効果が大きくなります。8K x 16bitの構成で考えてみます。
通常は32個の5K Block Memoryを消費しますが、この構成ならば、26個のBlock Memoryの消費で対応できます。131072bitのRAMを構成するために、133120bitの資源の消費でおさえることができます。無駄な資源は20%から1.5%に低減します。
この例についても、実際にRTLで記述してみます。
module ram8Kx16_user (
input [15:0] wdataA,
input [15:0] wdataB,
input [12:0] addrA,
input [12:0] addrB,
input clkA,
input weA,
input clkB,
input weB,
output [15:0] rdataA,
output [15:0] rdataB
);
reg [2:0] ba_reg;
reg [2:0] bb_reg;
wire [14:0] rdataA_14_0 [7:0];
wire [0:0] rdataA_15 [1:0];
wire [14:0] rdataB_14_0 [7:0];
wire [0:0] rdataB_15 [1:0];
assign rdataA[14:0] = rdataA_14_0[ba_reg];
assign rdataA[15] = rdataA_15[ba_reg[2]];
assign rdataB[14:0] = rdataB_14_0[bb_reg];
assign rdataB[15] = rdataB_15[bb_reg[2]];
always @(posedge clkA) begin
ba_reg <= addrA[12:10];
end
always @(posedge clkB) begin
bb_reg <= addrB[12:10];
end
generate
genvar i;
for (i = 0; i < 8; i = i + 1) begin : gen_dpram_5bit_d
genvar j;
for (j = 0; j < 3; j = j + 1) begin : gen_dpram_5bit_w
EFX_DPRAM_5K # (
.READ_WIDTH_A(5), // 5 1024x5
.WRITE_WIDTH_A(5), // 5 1024x5
.OUTPUT_REG_A(1'b0), // 1 add pipe-line read register
.CLKA_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WEA_POLARITY(1'b1), // 0 active low, 1 active high
.CLKEA_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WRITE_MODE_A("READ_FIRST"), // Output "old" data
.READ_WIDTH_B(5), // 5 1024x5
.WRITE_WIDTH_B(5), // 5 1024x5
.OUTPUT_REG_B(1'b0), // 1 add pipe-line read register
.CLKB_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WEB_POLARITY(1'b1), // 0 active low, 1 active high
.CLKEB_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WRITE_MODE_B("WRITE_FIRST"), // Output "new" data
.INIT_0(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_1(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_2(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_3(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_4(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_5(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_6(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_7(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_8(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_9(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_A(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_B(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_C(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_D(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_E(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_F(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_10(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_11(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_12(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_13(256'h0000000000000000000000000000000000000000000000000000000000000000)
) EFX_DPRAM_5K_inst (
.RDATAA(rdataA_14_0[i][j*5+4:j*5]), // Read data output A
.ADDRA(addrA[9:0]), // Address input A
.CLKA(clkA), // Clock input A
.CLKEA(i[2:0]==addrA[12:10]), // Clock-enable input A
.WEA(weA), // Write-enable input A
.WDATAA(wdataA[j*5+4:j*5]), // Write data input A
.RDATAB(rdataB_14_0[i][j*5+4:j*5]), // Read data output B
.ADDRB(addrB[9:0]), // Address input B
.CLKB(clkB), // Clock input B
.CLKEB(i[2:0]==addrB[12:10]), // Clock-enable input B
.WEB(weB), // Write-enable input B
.WDATAB(wdataB[j*5+4:j*5]) // Write data input B
);
end
end
endgenerate
generate
genvar k;
for (k = 0; k < 2; k = k + 1) begin : gen_dpram_1bit
EFX_DPRAM_5K # (
.READ_WIDTH_A(1), // 1 4096x5
.WRITE_WIDTH_A(1), // 1 1024x5
.OUTPUT_REG_A(1'b0), // 1 add pipe-line read register
.CLKA_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WEA_POLARITY(1'b1), // 0 active low, 1 active high
.CLKEA_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WRITE_MODE_A("READ_FIRST"), // Output "old" data
.READ_WIDTH_B(1), // 1 4096x5
.WRITE_WIDTH_B(1), // 1 4096x5
.OUTPUT_REG_B(1'b0), // 1 add pipe-line read register
.CLKB_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WEB_POLARITY(1'b1), // 0 active low, 1 active high
.CLKEB_POLARITY(1'b1), // 0 falling edge, 1 rising edge
.WRITE_MODE_B("WRITE_FIRST"), // Output "new" data
.INIT_0(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_1(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_2(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_3(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_4(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_5(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_6(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_7(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_8(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_9(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_A(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_B(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_C(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_D(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_E(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_F(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_10(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_11(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_12(256'h0000000000000000000000000000000000000000000000000000000000000000),
.INIT_13(256'h0000000000000000000000000000000000000000000000000000000000000000)
) EFX_DPRAM_5K_inst (
.RDATAA(rdataA_15[k]), // Read data output A
.ADDRA(addrA[11:0]), // Address input A
.CLKA(clkA), // Clock input A
.CLKEA(k[0]==addrA[12]), // Clock-enable input A
.WEA(weA), // Write-enable input A
.WDATAA(wdataA[15]), // Write data input A
.RDATAB(rdataB_15[k]), // Read data output B
.ADDRB(addrB[11:0]), // Address input B
.CLKB(clkB), // Clock input B
.CLKEB(k[0]==addrB[12]), // Clock-enable input B
.WEB(weB), // Write-enable input B
.WDATAB(wdataB[15]) // Write data input B
);
end
endgenerate
endmodule
Byte Enableの対応やビット幅の違うDPRAMを構築するには制約はあるのですが、シンプルなメモリ構成ならば実装も簡単ですので、ブロックメモリが不足しそうな場合にはこのような実装を検討してみてはいかがでしょうか。