串/并行乘法器的速度比较慢,本节就是在上节的基础上对电路进行改进,提高速度。引入流水线的形式,将相邻的两个部分积乘积结构再以加法树的形式相加,形成了结构上的流水化处理。缩短关键路径
代码:16位的乘法器相乘
module Multiply( clk,rst, dataa,datab, dout ); input clk; input rst; input [7:0] dataa; input [7:0] datab; output [15:0] dout; reg [15:0]stored0; reg [15:0]stored1; reg [15:0]stored2; reg [15:0]stored3; reg [15:0]stored4; reg [15:0]stored5; reg [15:0]stored6; reg [15:0]stored7; reg [16: 0] add01; reg [16: 0] add23; reg [16: 0] add45; reg [16: 0] add67; reg [17:0] mul_out; always @ (posedge clk or negedge rst) begin if(!rst) begin //初始化寄存器变量 // dout <= 16'd0; stored0 <= 16'd0; stored1 <= 16'd0; stored2 <= 16'd0; stored3 <= 16'd0; stored4 <= 16'd0; stored5 <= 16'd0; stored6 <= 16'd0; stored7 <= 16'd0; add01<= 17'd0; add23<= 17'd0; add45<= 17'd0; add67<= 17'd0; mul_out<= 18'd0; end else begin //实现移位相加 stored7 <= datab[7]?{1'b0,dataa,7'b0}: 8'b0; stored6 <= datab[6]?{2'b0,dataa,6'b0}: 8'b0; stored5 <= datab[5]?{3'b0,dataa,5'b0}: 8'b0; stored4 <= datab[4]?{4'b0,dataa,4'b0}: 8'b0; stored3 <= datab[3]?{5'b0,dataa,3'b0}: 8'b0; stored2 <= datab[2]?{6'b0,dataa,2'b0}: 8'b0; stored1 <= datab[1]?{7'b0,dataa,1'b0}: 8'b0; stored0 <= datab[0]?{8'b0,dataa }: 8'b0; add01 <= stored1 + stored0; add23 <= stored3 + stored2; add45 <= stored5 + stored4; add67 <= stored7 + stored6; mul_out <= (add01 + add23)+(add45+add67); end end assign dout = mul_out[15:0]; endmodule testbench代码为: `timescale 1 ns/ 1 ps module mult_test; reg clk; reg rst; reg [7:0]dataa; reg [7:0]datab; wire [15:0]dout; Multiply u1( .clk(clk), .rst(rst), .dataa(dataa), .datab(datab), .dout(dout) ); initial begin rst = 0; clk=0; #10 rst = 1; end always #5 clk = ~clk; always@(posedge clk) begin dataa = {$random}%15; datab = {$random}%13; end endmodule