LoongArch five-stage pipeline implementation

On the basis of a single cycle, it is split into a five-level pipeline of fetching, decoding, executing, memory access, and writing back.

mycpu_top.v
`include "mycpu.h"

module id_stage(
    inputclk,
    input reset,
    //allowin
    input es_allowin ,
    output ds_allowin ,
    //from fs
    input fs_to_ds_valid,
    input [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus ,
    //toes
    output ds_to_es_valid,
    output [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus ,
    //to fs
    output [`BR_BUS_WD -1:0] br_bus ,
    //to rf: for write back
    input [`WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus

    //
    // output [ 4 :0] WB_dest,EXE_dest,MEM_dest,

    // input es_load_op ,
    // input [31 :0] EXE_result,MEM_result,WB_result
);
//wire br_stall; //increase
// wire load_stall;
wire br_taken;
reg ds_valid;
wire [31:0] br_target;



// assign br_bus = {br_stall,br_taken,br_target}; //Modify

wire ds_ready_go;

wire [31 :0] fs_pc;
reg [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus_r;
assign fs_pc = fs_to_ds_bus[31:0];

wire [31:0] ds_inst;
wire [31:0] ds_pc;
assign {<!-- -->ds_inst,
        ds_pc } = fs_to_ds_bus_r;

wire rf_we;
wire [4:0] rf_waddr;
wire [31:0] rf_wdata;
assign {<!-- -->rf_we , //37:37
        rf_waddr, //36:32
        rf_wdata //31:0
       } = ws_to_rf_bus;



wire [11:0] alu_op;
wire load_op;
wire src1_is_pc;
wire src2_is_imm;
wire res_from_mem;
wire gr_we;
wire mem_we;
wire src_reg_is_rd;
wire [4:0] dest;
wire [31:0] imm;
// wire [31:0] rs_value;
// wire [31:0] rt_value;
wire [31:0] rj_value,rkd_value;
wire [31:0] br_offs,jirl_offs;


wire [5:0] op_31_26;
wire [3:0] op_25_22;
wire [1:0] op_21_20;
wire [4:0] op_19_15;
wire[4:0]rd;
wire [4:0] rj;
wire[4:0]rk;
wire [11:0] i12;
wire [19:0] i20;
wire [15:0] i16;
wire [25:0] i26;

wire [63:0] op_31_26_d;
wire [15:0] op_25_22_d;
wire [3:0] op_21_20_d;
wire [31:0] op_19_15_d;

wire inst_add_w;
wire inst_sub_w;
wire inst_slt;
wire inst_sltu;
wire inst_nor;
wire inst_and;
wire inst_or;
wire inst_xor;
wire inst_slli_w;
wire inst_srli_w;
wire inst_srai_w;
wire inst_addi_w;
wire inst_ld_w;
wire inst_st_w;
wire inst_jirl;
wire inst_b;
wire inst_bl;
wire inst_beq;
wire inst_bne;
wire inst_lu12i_w;

wire need_ui5;
wire need_si12;
wire need_si16;
wire need_si20;
wire need_si26;
wire src2_is_4;

wire [4:0] rf_raddr1;
wire [31:0] rf_rdata1;
wire [4:0] rf_raddr2;
wire [31:0] rf_rdata2;

wire rs_eq_rt;

assign br_bus = {<!-- -->br_taken,br_target};

assign ds_to_es_bus = {<!-- -->alu_op , //149:138
                       load_op , //137:137
                       src1_is_pc , //136:136
                       src2_is_imm , //135:135
                       gr_we , //134:134
                       mem_we , //133:133
                       dest , //132:128
                       imm , //127:96
                       rj_value , //95 :64
                       rkd_value , //63 :32
                       ds_pc //31:0
                      };

assign ds_ready_go = ds_valid;
//lab4
// assign ds_ready_go = ds_valid & amp; ~rs_wait & amp; ~rt_wait;
//lab5
// assign ds_ready_go = ds_valid & amp; ~load_stall;
assign ds_allowin = !ds_valid || ds_ready_go & amp; & amp; es_allowin;
assign ds_to_es_valid = ds_valid & amp; & amp; ds_ready_go;

always @(posedge clk ) begin
    if (reset) begin
        ds_valid <= 1'b0;
    end
    else if (ds_allowin) begin
        ds_valid <= fs_to_ds_valid;
    end
end
always @(posedge clk) begin
    if (fs_to_ds_valid & amp; & amp; ds_allowin) begin
        fs_to_ds_bus_r <= fs_to_ds_bus;
    end
end

assign op_31_26 = ds_inst[31:26];
assign op_25_22 = ds_inst[25:22];
assign op_21_20 = ds_inst[21:20];
assign op_19_15 = ds_inst[19:15];

assign rd = ds_inst[4: 0];
assign rj = ds_inst[9: 5];
assign rk = ds_inst[14:10];

assign i12 = ds_inst[21:10];
assign i20 = ds_inst[24: 5];
assign i16 = ds_inst[25:10];
assign i26 = {<!-- -->ds_inst[9: 0], ds_inst[25:10]};

decoder_6_64 u_dec0(.in(op_31_26 ), .out(op_31_26_d ));
decoder_4_16 u_dec1(.in(op_25_22 ), .out(op_25_22_d ));
decoder_2_4 u_dec2(.in(op_21_20 ), .out(op_21_20_d ));
decoder_5_32 u_dec3(.in(op_19_15 ), .out(op_19_15_d ));

assign inst_add_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h00];
assign inst_sub_w = op_31_26_d[6'h00] & amp; op_25_22_d[4'h0] & amp; op_21_20_d[2'h1] & amp; op_19_15_d[5'h02];
assign inst_slt = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h04];
assign inst_sltu = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h05];
assign inst_nor = op_31_26_d[6'h00] & amp; op_25_22_d[4'h0] & amp; op_21_20_d[2'h1] & amp; op_19_15_d[5'h08];
assign inst_and = op_31_26_d[6'h00] & amp; op_25_22_d[4'h0] & amp; op_21_20_d[2'h1] & amp; op_19_15_d[5'h09];
assign inst_or = op_31_26_d[6'h00] & amp; op_25_22_d[4'h0] & amp; op_21_20_d[2'h1] & amp; op_19_15_d[5'h0a];
assign inst_xor = op_31_26_d[6'h00] & amp; op_25_22_d[4'h0] & amp; op_21_20_d[2'h1] & amp; op_19_15_d[5'h0b];
assign inst_slli_w = op_31_26_d[6'h00] & amp; op_25_22_d[4'h1] & amp; op_21_20_d[2'h0] & amp; op_19_15_d[5'h01];
assign inst_srli_w = op_31_26_d[6'h00] & amp; op_25_22_d[4'h1] & amp; op_21_20_d[2'h0] & amp; op_19_15_d[5'h09];
assign inst_srai_w = op_31_26_d[6'h00] & amp; op_25_22_d[4'h1] & amp; op_21_20_d[2'h0] & amp; op_19_15_d[5'h11];
assign inst_addi_w = op_31_26_d[6'h00] & amp; op_25_22_d[4'ha];
assign inst_ld_w = op_31_26_d[6'h0a] & amp; op_25_22_d[4'h2];
assign inst_st_w = op_31_26_d[6'h0a] & amp; op_25_22_d[4'h6];
assign inst_jirl = op_31_26_d[6'h13];
assign inst_b = op_31_26_d[6'h14];
assign inst_bl = op_31_26_d[6'h15];
assign inst_beq = op_31_26_d[6'h16];
assign inst_bne = op_31_26_d[6'h17];
assign inst_lu12i_w= op_31_26_d[6'h05] & amp; ~ds_inst[25];

assign alu_op[ 0] = inst_add_w | inst_addi_w | inst_ld_w | inst_st_w
                    | inst_jirl | inst_bl;
assign alu_op[ 1] = inst_sub_w;
assign alu_op[2] = inst_slt;
assign alu_op[3] = inst_sltu;
assign alu_op[4] = inst_and;
assign alu_op[5] = inst_nor;
assign alu_op[ 6] = inst_or;
assign alu_op[7] = inst_xor;
assign alu_op[8] = inst_slli_w;
assign alu_op[9] = inst_srli_w;
assign alu_op[10] = inst_srai_w;
assign alu_op[11] = inst_lu12i_w;

assign need_ui5 = inst_slli_w | inst_srli_w | inst_srai_w;
assign need_si12 = inst_addi_w | inst_ld_w | inst_st_w;
assign need_si16 = inst_jirl | inst_beq | inst_bne;
assign need_si20 = inst_lu12i_w;
assign need_si26 = inst_b | inst_bl;
assign src2_is_4 = inst_jirl | inst_bl;//See if 4 should be added.

assign imm = src2_is_4 ? 32'h4 :
             need_si20 ? {<!-- -->i20[19:0], 12'b0} :
/*need_ui5 || need_si12*/{<!-- -->{<!-- -->20{<!-- -->i12[11]}}, i12[11:0]};

assign br_offs = need_si26 ? {<!-- -->{<!-- --> 4{<!-- -->i26[25]}}, i26[25:0], 2'b0} :
                             {<!-- -->{<!-- -->14{<!-- -->i16[15]}}, i16[15:0], 2'b0};
assign jirl_offs = {<!-- -->{<!-- -->14{<!-- -->i16[15]}}, i16[15:0], 2'b0};

assign load_op = inst_ld_w;
assign src_reg_is_rd = inst_beq | inst_bne | inst_st_w;

assign src1_is_pc = inst_jirl | inst_bl;

assign src2_is_imm = inst_slli_w |
                       inst_srli_w |
                       inst_srai_w |
                       inst_addi_w |
                       inst_ld_w |
                       inst_st_w |
                       inst_lu12i_w|
                       inst_jirl |
                       inst_bl;


assign res_from_mem = inst_ld_w;
assign dst_is_r1 = inst_bl;
//Whether it is necessary to write to the general register
assign gr_we = ~inst_st_w & amp; ~inst_beq & amp; ~inst_bne & amp; ~inst_b;

assign mem_we = inst_st_w;
//Need to change here
assign dest = dst_is_r1 ? 5'd01 : rd;
//block
// assign dest = dst_is_r31 ? 5'd31 :
// dst_is_rt ? rt :
// inst_no_dest ? 5'd0 : rd;

assign rf_raddr1 = rj;
assign rf_raddr2 = src_reg_is_rd ? rd :rk;
regfile u_regfile(
    .clk (clk),
    .raddr1 (rf_raddr1),
    .rdata1 (rf_rdata1),
    .raddr2 (rf_raddr2),
    .rdata2 (rf_rdata2),
    .we (rf_we ),
    .waddr (rf_waddr),
    .wdata (rf_wdata)
    );

assign rj_value = rf_rdata1;
assign rkd_value = rf_rdata2;
//lab5
// assign rs_value = rs_wait ? (rs == EXE_dest? EXE_result:
// rs == MEM_dest?MEM_result:WB_result)
// : rf_rdata1;
// assign rt_value = rt_wait ? (rt == EXE_dest? EXE_result:
// rt == MEM_dest?MEM_result:WB_result)
// : rf_rdata2;

assign rj_eq_rd = (rj_value == rkd_value);
assign br_taken = ( inst_beq & amp; & amp; rj_eq_rd
                   || inst_bne & amp; & amp; !rj_eq_rd
                   || inst_jirl
                   || inst_bl
                   || inst_b
                ) & amp; & amp; ds_valid;
assign br_target = (inst_beq || inst_bne || inst_bl || inst_b) ? (ds_pc + br_offs) :
                                                   /*inst_jirl*/ (rj_value + jirl_offs);
//lab4
//wire src1_no_rs;//The command rs field is not 0, and rs is not read from the register file
//wire src2_no_rt;//The command rt field is not 0, and rt is not read from the register file
// assign src1_no_rs = 1'b0;
// assign src2_no_rt = inst_addiu | load_op|inst_jal|inst_lui;

// wire rs_wait,rt_wait;
// assign rs_wait = ~src1_no_rs & amp; (rs!=5'd0)
// & amp; ( (rs==EXE_dest) | (rs==MEM_dest) | (rs==WB_dest));

// assign rt_wait = ~src2_no_rt & amp; (rt!=5'd0)
// & amp; ( (rt==EXE_dest) | (rt==MEM_dest) | (rt==WB_dest));


// assign br_stall = br_taken & amp; load_stall & amp; {5{ds_valid}}; //Add
// assign load_stall = (rs_wait & amp; (rs == EXE_dest) & amp; es_load_op ) ||
// (rt_wait & amp; (rt == EXE_dest) & amp; es_load_op );


// wire inst_no_dest;
// assign inst_no_dest = inst_beq|inst_bne|inst_jr|inst_sw;

endmodule

2. IF_stage.v
`include "mycpu.h"

module if_stage(
    inputclk,
    input reset,
    //allwoin
    input ds_allowin ,
    //brbus
    input [`BR_BUS_WD -1:0] br_bus ,
    //tods
    output fs_to_ds_valid ,
    output [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus ,
    // inst sram interface
    output inst_sram_en ,
    output [3:0] inst_sram_we ,
    output [31:0] inst_sram_addr ,
    output [31:0] inst_sram_wdata,
    input [31:0] inst_sram_rdata
);

reg fs_valid;//Indicates whether the current stage is valid
wire fs_ready_go;
wire fs_allowin; //And determine whether the IF stage accepts the instruction fetch request
wire to_fs_valid;//Indicates whether the instruction can be passed to the next stage

//wire pre_fs_ready_go; //Increase
//wire br_stall; //increase
// assign to_fs_valid = ~reset & amp; & amp; pre_fs_ready_go;//Modify
// assign pre_fs_ready_go = ~br_stall; //Add


wire [31:0] seq_pc;
wire [31:0] nextpc;

wire br_taken;
wire [31:0] br_target;
assign {<!-- -->br_taken,br_target} = br_bus;
// assign {br_stall,br_taken,br_target} = br_bus; //Modify branch prediction
// assign inst_sram_en = to_fs_valid & amp; & amp; fs_allowin & amp; & amp; ~br_stall; //Modify
wire [31:0] fs_inst;
reg [31:0] fs_pc;
assign fs_to_ds_bus = {<!-- -->fs_inst ,
                       fs_pc };

// pre-IF stage
assign to_fs_valid = ~reset;
assign seq_pc = fs_pc + 3'h4;
assign nextpc = br_taken ? br_target : seq_pc;

// IF stage
assign fs_ready_go = 1'b1;
assign fs_allowin = !fs_valid || (fs_ready_go & amp; & amp; ds_allowin);
assign fs_to_ds_valid = fs_valid & amp; & amp; fs_ready_go;

always @(posedge clk) begin
    if (reset) begin
        fs_valid <= 1'b0;
    end
    else if (fs_allowin) begin
        fs_valid <= to_fs_valid;
    end

    if (reset) begin
        fs_pc <= 32'h1bfffffc; //trick: to make nextpc be 0x1c000000 during reset
    end
    else if (to_fs_valid & amp; & amp; fs_allowin) begin
        fs_pc <= nextpc;
    end
end

assign inst_sram_en = to_fs_valid & amp; & amp; fs_allowin;
assign inst_sram_we = 4'h0;
assign inst_sram_addr = nextpc;
assign inst_sram_wdata = 32'b0;

assign fs_inst = inst_sram_rdata;

endmodule

3. ID_stage
`include "mycpu.h"

module id_stage(
    inputclk,
    input reset,
    //allowin
    input es_allowin ,
    output ds_allowin ,
    //from fs
    input fs_to_ds_valid,
    input [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus ,
    //toes
    output ds_to_es_valid,
    output [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus ,
    //to fs
    output [`BR_BUS_WD -1:0] br_bus ,
    //to rf: for write back
    input [`WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus

    //
    // output [ 4 :0] WB_dest,EXE_dest,MEM_dest,

    // input es_load_op ,
    // input [31 :0] EXE_result,MEM_result,WB_result
);
//wire br_stall; //increase
// wire load_stall;
wire br_taken;
reg ds_valid;
wire [31:0] br_target;



// assign br_bus = {br_stall,br_taken,br_target}; //Modify

wire ds_ready_go;

wire [31:0] fs_pc;
reg [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus_r;
assign fs_pc = fs_to_ds_bus[31:0];

wire [31:0] ds_inst;
wire [31:0] ds_pc;
assign {<!-- -->ds_inst,
        ds_pc } = fs_to_ds_bus_r;

wire rf_we;
wire [4:0] rf_waddr;
wire [31:0] rf_wdata;
assign {<!-- -->rf_we , //37:37
        rf_waddr, //36:32
        rf_wdata //31:0
       } = ws_to_rf_bus;



wire [11:0] alu_op;
wire load_op;
wire src1_is_pc;
wire src2_is_imm;
wire res_from_mem;
wire gr_we;
wire mem_we;
wire src_reg_is_rd;
wire [4:0] dest;
wire [31:0] imm;
// wire [31:0] rs_value;
// wire [31:0] rt_value;
wire [31:0] rj_value,rkd_value;
wire [31:0] br_offs,jirl_offs;


wire [5:0] op_31_26;
wire [3:0] op_25_22;
wire [1:0] op_21_20;
wire [4:0] op_19_15;
wire[4:0]rd;
wire [4:0] rj;
wire[4:0]rk;
wire [11:0] i12;
wire [19:0] i20;
wire [15:0] i16;
wire [25:0] i26;

wire [63:0] op_31_26_d;
wire [15:0] op_25_22_d;
wire [3:0] op_21_20_d;
wire [31:0] op_19_15_d;

wire inst_add_w;
wire inst_sub_w;
wire inst_slt;
wire inst_sltu;
wire inst_nor;
wire inst_and;
wire inst_or;
wire inst_xor;
wire inst_slli_w;
wire inst_srli_w;
wire inst_srai_w;
wire inst_addi_w;
wire inst_ld_w;
wire inst_st_w;
wire inst_jirl;
wire inst_b;
wire inst_bl;
wire inst_beq;
wire inst_bne;
wire inst_lu12i_w;

wire need_ui5;
wire need_si12;
wire need_si16;
wire need_si20;
wire need_si26;
wire src2_is_4;

wire [4:0] rf_raddr1;
wire [31:0] rf_rdata1;
wire [4:0] rf_raddr2;
wire [31:0] rf_rdata2;

wire rs_eq_rt;

assign br_bus = {<!-- -->br_taken,br_target};

assign ds_to_es_bus = {<!-- -->alu_op , //149:138
                       load_op , //137:137
                       src1_is_pc , //136:136
                       src2_is_imm , //135:135
                       gr_we , //134:134
                       mem_we , //133:133
                       dest , //132:128
                       imm , //127:96
                       rj_value , //95 :64
                       rkd_value , //63 :32
                       ds_pc //31:0
                      };

assign ds_ready_go = ds_valid;
//lab4
// assign ds_ready_go = ds_valid & amp; ~rs_wait & amp; ~rt_wait;
//lab5
// assign ds_ready_go = ds_valid & amp; ~load_stall;
assign ds_allowin = !ds_valid || ds_ready_go & amp; & amp; es_allowin;
assign ds_to_es_valid = ds_valid & amp; & amp; ds_ready_go;

always @(posedge clk ) begin
    if (reset) begin
        ds_valid <= 1'b0;
    end
    else if (ds_allowin) begin
        ds_valid <= fs_to_ds_valid;
    end
end
always @(posedge clk) begin
    if (fs_to_ds_valid & amp; & amp; ds_allowin) begin
        fs_to_ds_bus_r <= fs_to_ds_bus;
    end
end

assign op_31_26 = ds_inst[31:26];
assign op_25_22 = ds_inst[25:22];
assign op_21_20 = ds_inst[21:20];
assign op_19_15 = ds_inst[19:15];

assign rd = ds_inst[4: 0];
assign rj = ds_inst[9: 5];
assign rk = ds_inst[14:10];

assign i12 = ds_inst[21:10];
assign i20 = ds_inst[24: 5];
assign i16 = ds_inst[25:10];
assign i26 = {<!-- -->ds_inst[9: 0], ds_inst[25:10]};

decoder_6_64 u_dec0(.in(op_31_26 ), .out(op_31_26_d ));
decoder_4_16 u_dec1(.in(op_25_22 ), .out(op_25_22_d ));
decoder_2_4 u_dec2(.in(op_21_20 ), .out(op_21_20_d ));
decoder_5_32 u_dec3(.in(op_19_15 ), .out(op_19_15_d ));

assign inst_add_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h00];
assign inst_sub_w = op_31_26_d[6'h00] & amp; op_25_22_d[4'h0] & amp; op_21_20_d[2'h1] & amp; op_19_15_d[5'h02];
assign inst_slt = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h04];
assign inst_sltu = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h05];
assign inst_nor = op_31_26_d[6'h00] & amp; op_25_22_d[4'h0] & amp; op_21_20_d[2'h1] & amp; op_19_15_d[5'h08];
assign inst_and = op_31_26_d[6'h00] & amp; op_25_22_d[4'h0] & amp; op_21_20_d[2'h1] & amp; op_19_15_d[5'h09];
assign inst_or = op_31_26_d[6'h00] & amp; op_25_22_d[4'h0] & amp; op_21_20_d[2'h1] & amp; op_19_15_d[5'h0a];
assign inst_xor = op_31_26_d[6'h00] & amp; op_25_22_d[4'h0] & amp; op_21_20_d[2'h1] & amp; op_19_15_d[5'h0b];
assign inst_slli_w = op_31_26_d[6'h00] & amp; op_25_22_d[4'h1] & amp; op_21_20_d[2'h0] & amp; op_19_15_d[5'h01];
assign inst_srli_w = op_31_26_d[6'h00] & amp; op_25_22_d[4'h1] & amp; op_21_20_d[2'h0] & amp; op_19_15_d[5'h09];
assign inst_srai_w = op_31_26_d[6'h00] & amp; op_25_22_d[4'h1] & amp; op_21_20_d[2'h0] & amp; op_19_15_d[5'h11];
assign inst_addi_w = op_31_26_d[6'h00] & amp; op_25_22_d[4'ha];
assign inst_ld_w = op_31_26_d[6'h0a] & amp; op_25_22_d[4'h2];
assign inst_st_w = op_31_26_d[6'h0a] & amp; op_25_22_d[4'h6];
assign inst_jirl = op_31_26_d[6'h13];
assign inst_b = op_31_26_d[6'h14];
assign inst_bl = op_31_26_d[6'h15];
assign inst_beq = op_31_26_d[6'h16];
assign inst_bne = op_31_26_d[6'h17];
assign inst_lu12i_w= op_31_26_d[6'h05] & amp; ~ds_inst[25];

assign alu_op[ 0] = inst_add_w | inst_addi_w | inst_ld_w | inst_st_w
                    | inst_jirl | inst_bl;
assign alu_op[ 1] = inst_sub_w;
assign alu_op[2] = inst_slt;
assign alu_op[3] = inst_sltu;
assign alu_op[4] = inst_and;
assign alu_op[5] = inst_nor;
assign alu_op[ 6] = inst_or;
assign alu_op[7] = inst_xor;
assign alu_op[8] = inst_slli_w;
assign alu_op[9] = inst_srli_w;
assign alu_op[10] = inst_srai_w;
assign alu_op[11] = inst_lu12i_w;

assign need_ui5 = inst_slli_w | inst_srli_w | inst_srai_w;
assign need_si12 = inst_addi_w | inst_ld_w | inst_st_w;
assign need_si16 = inst_jirl | inst_beq | inst_bne;
assign need_si20 = inst_lu12i_w;
assign need_si26 = inst_b | inst_bl;
assign src2_is_4 = inst_jirl | inst_bl;//See if you need to add 4.

assign imm = src2_is_4 ? 32'h4 :
             need_si20 ? {<!-- -->i20[19:0], 12'b0} :
/*need_ui5 || need_si12*/{<!-- -->{<!-- -->20{<!-- -->i12[11]}}, i12[11:0]};

assign br_offs = need_si26 ? {<!-- -->{<!-- --> 4{<!-- -->i26[25]}}, i26[25:0], 2'b0} :
                             {<!-- -->{<!-- -->14{<!-- -->i16[15]}}, i16[15:0], 2'b0};
assign jirl_offs = {<!-- -->{<!-- -->14{<!-- -->i16[15]}}, i16[15:0], 2'b0};

assign load_op = inst_ld_w;
assign src_reg_is_rd = inst_beq | inst_bne | inst_st_w;

assign src1_is_pc = inst_jirl | inst_bl;

assign src2_is_imm = inst_slli_w |
                       inst_srli_w |
                       inst_srai_w |
                       inst_addi_w |
                       inst_ld_w |
                       inst_st_w |
                       inst_lu12i_w|
                       inst_jirl |
                       inst_bl;


assign res_from_mem = inst_ld_w;
assign dst_is_r1 = inst_bl;
//Whether it is necessary to write to the general register
assign gr_we = ~inst_st_w & amp; ~inst_beq & amp; ~inst_bne & amp; ~inst_b;

assign mem_we = inst_st_w;
//Need to change here
assign dest = dst_is_r1 ? 5'd01 : rd;
//block
// assign dest = dst_is_r31 ? 5'd31 :
// dst_is_rt ? rt :
// inst_no_dest ? 5'd0 : rd;

assign rf_raddr1 = rj;
assign rf_raddr2 = src_reg_is_rd ? rd :rk;
regfile u_regfile(
    .clk (clk),
    .raddr1 (rf_raddr1),
    .rdata1 (rf_rdata1),
    .raddr2 (rf_raddr2),
    .rdata2 (rf_rdata2),
    .we (rf_we ),
    .waddr (rf_waddr),
    .wdata (rf_wdata)
    );

assign rj_value = rf_rdata1;
assign rkd_value = rf_rdata2;
//lab5
// assign rs_value = rs_wait ? (rs == EXE_dest? EXE_result:
// rs == MEM_dest?MEM_result:WB_result)
// : rf_rdata1;
// assign rt_value = rt_wait ? (rt == EXE_dest? EXE_result:
// rt == MEM_dest?MEM_result:WB_result)
// : rf_rdata2;

assign rj_eq_rd = (rj_value == rkd_value);
assign br_taken = ( inst_beq & amp; & amp; rj_eq_rd
                   || inst_bne & amp; & amp; !rj_eq_rd
                   || inst_jirl
                   || inst_bl
                   || inst_b
                ) & amp; & amp; ds_valid;
assign br_target = (inst_beq || inst_bne || inst_bl || inst_b) ? (ds_pc + br_offs) :
                                                   /*inst_jirl*/ (rj_value + jirl_offs);
//lab4
//wire src1_no_rs;//The command rs field is not 0, and rs is not read from the register file
//wire src2_no_rt;//The command rt field is not 0, and rt is not read from the register file
// assign src1_no_rs = 1'b0;
// assign src2_no_rt = inst_addiu | load_op|inst_jal|inst_lui;

// wire rs_wait,rt_wait;
// assign rs_wait = ~src1_no_rs & amp; (rs!=5'd0)
// & amp; ( (rs==EXE_dest) | (rs==MEM_dest) | (rs==WB_dest));

// assign rt_wait = ~src2_no_rt & amp; (rt!=5'd0)
// & amp; ( (rt==EXE_dest) | (rt==MEM_dest) | (rt==WB_dest));


// assign br_stall = br_taken & amp; load_stall & amp; {5{ds_valid}}; //Add
// assign load_stall = (rs_wait & amp; (rs == EXE_dest) & amp; es_load_op ) ||
// (rt_wait & amp; (rt == EXE_dest) & amp; es_load_op );


// wire inst_no_dest;
// assign inst_no_dest = inst_beq|inst_bne|inst_jr|inst_sw;

endmodule

4. EXE_stage
`include "mycpu.h"

module exe_stage(
    inputclk,
    input reset,
    //allowin
    input ms_allowin,
    output es_allowin ,
    //from ds
    input ds_to_es_valid,
    input [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus ,
    //to ms
    output es_to_ms_valid,
    output [`ES_TO_MS_BUS_WD -1:0] es_to_ms_bus ,
    // data sram interface
    output data_sram_en ,
    output [3:0] data_sram_we ,
    output [31:0] data_sram_addr ,
    output [31:0] data_sram_wdata
    // output [4:0] EXE_dest ,

    // output es_load_op
    // output [31:0] EXE_result
);

reg es_valid;
wire es_ready_go;

reg [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus_r;
wire[11:0] es_alu_op;
wire es_load_op;
//wire es_src1_is_sa;
wire es_src1_is_pc;
wire es_src2_is_imm;
//wire es_src2_is_8;
wire es_gr_we;
wire es_mem_we;
wire[4:0]es_dest;
wire [31:0] es_imm;
wire [31:0] es_rj_value;
wire [31:0] es_rkd_value;
wire [31:0] es_pc;
assign {<!-- -->es_alu_op , //149:138
        es_load_op , //137:137
        es_src1_is_pc , //136:136
        es_src2_is_imm , //135:135
        es_gr_we , //134:134
        es_mem_we , //133:133
        es_dest , //132:128
        es_imm , //127:96
        es_rj_value , //95 :64
        es_rkd_value , //63 :32
        es_pc //31:0
       } = ds_to_es_bus_r;

wire [31:0] es_alu_src1;
wire [31:0] es_alu_src2;
wire [31:0] es_alu_result;

wire es_res_from_mem;

assign es_res_from_mem = es_load_op;
assign es_to_ms_bus = {<!-- -->es_res_from_mem, //70:70
                       es_gr_we , //69:69
                       es_dest , //68:64
                       es_alu_result , //63:32
                       es_pc //31:0
                      };

assign es_ready_go = 1'b1;
assign es_allowin = !es_valid || es_ready_go & amp; & amp; ms_allowin;
assign es_to_ms_valid = es_valid & amp; & amp; es_ready_go;
always @(posedge clk) begin
    if (reset) begin
        es_valid <= 1'b0;
    end
    else if (es_allowin) begin
        es_valid <= ds_to_es_valid;
    end

    if (ds_to_es_valid & amp; & amp; es_allowin) begin
        ds_to_es_bus_r <= ds_to_es_bus;
    end
end

// assign es_alu_src1 = es_src1_is_sa ? {27'b0, es_imm[10:6]} :
// es_src1_is_pc ? es_pc[31:0] :
//es_rs_value;
// assign es_alu_src2 = es_src2_is_imm ? {<!-- -->{16{es_imm[15]}}, es_imm[15:0]} :
// es_src2_is_8 ? 32'd8 :
//es_rt_value;
assign es_alu_src1 = es_src1_is_pc ? es_pc[31:0] : es_rj_value;
assign es_alu_src2 = es_src2_is_imm ? es_imm : es_rkd_value;

alu u_alu(
    .alu_op (es_alu_op ),
    .alu_src1 (es_alu_src1 ),
    .alu_src2 (es_alu_src2 ),
    .alu_result (es_alu_result)
    );

assign data_sram_en = 1'b1;
assign data_sram_we = es_mem_we & amp; & amp;es_valid ? 4'hf : 4'h0;
assign data_sram_addr = es_alu_result;
// assign data_sram_wdata = es_rt_value; may need to be changed
assign data_sram_wdata = es_rkd_value;

// assign EXE_dest = es_dest & amp; {5{es_valid}};
// assign EXE_result = es_alu_result;
endmodule


5. MEM_stage
`include "mycpu.h"

module exe_stage(
    inputclk,
    input reset,
    //allowin
    input ms_allowin,
    output es_allowin ,
    //from ds
    input ds_to_es_valid,
    input [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus ,
    //to ms
    output es_to_ms_valid,
    output [`ES_TO_MS_BUS_WD -1:0] es_to_ms_bus ,
    // data sram interface
    output data_sram_en ,
    output [3:0] data_sram_we ,
    output [31:0] data_sram_addr ,
    output [31:0] data_sram_wdata
    // output [4:0] EXE_dest ,

    // output es_load_op
    // output [31:0] EXE_result
);

reg es_valid;
wire es_ready_go;

reg [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus_r;
wire[11:0] es_alu_op;
wire es_load_op;
//wire es_src1_is_sa;
wire es_src1_is_pc;
wire es_src2_is_imm;
//wire es_src2_is_8;
wire es_gr_we;
wire es_mem_we;
wire[4:0]es_dest;
wire [31:0] es_imm;
wire [31:0] es_rj_value;
wire [31:0] es_rkd_value;
wire [31:0] es_pc;
assign {<!-- -->es_alu_op , //149:138
        es_load_op , //137:137
        es_src1_is_pc , //136:136
        es_src2_is_imm , //135:135
        es_gr_we , //134:134
        es_mem_we , //133:133
        es_dest , //132:128
        es_imm , //127:96
        es_rj_value , //95 :64
        es_rkd_value , //63 :32
        es_pc //31 :0
       } = ds_to_es_bus_r;

wire [31:0] es_alu_src1;
wire [31:0] es_alu_src2;
wire [31:0] es_alu_result;

wire es_res_from_mem;

assign es_res_from_mem = es_load_op;
assign es_to_ms_bus = {<!-- -->es_res_from_mem, //70:70
                       es_gr_we , //69:69
                       es_dest , //68:64
                       es_alu_result , //63:32
                       es_pc //31:0
                      };

assign es_ready_go = 1'b1;
assign es_allowin = !es_valid || es_ready_go & amp; & amp; ms_allowin;
assign es_to_ms_valid = es_valid & amp; & amp; es_ready_go;
always @(posedge clk) begin
    if (reset) begin
        es_valid <= 1'b0;
    end
    else if (es_allowin) begin
        es_valid <= ds_to_es_valid;
    end

    if (ds_to_es_valid & amp; & amp; es_allowin) begin
        ds_to_es_bus_r <= ds_to_es_bus;
    end
end

// assign es_alu_src1 = es_src1_is_sa ? {27'b0, es_imm[10:6]} :
// es_src1_is_pc ? es_pc[31:0] :
//es_rs_value;
// assign es_alu_src2 = es_src2_is_imm ? {<!-- -->{16{es_imm[15]}}, es_imm[15:0]} :
// es_src2_is_8 ? 32'd8 :
//es_rt_value;
assign es_alu_src1 = es_src1_is_pc ? es_pc[31:0] : es_rj_value;
assign es_alu_src2 = es_src2_is_imm ? es_imm : es_rkd_value;

alu u_alu(
    .alu_op (es_alu_op ),
    .alu_src1 (es_alu_src1 ),
    .alu_src2 (es_alu_src2 ),
    .alu_result (es_alu_result)
    );

assign data_sram_en = 1'b1;
assign data_sram_we = es_mem_we & amp; & amp;es_valid ? 4'hf : 4'h0;
assign data_sram_addr = es_alu_result;
// assign data_sram_wdata = es_rt_value; may need to be changed
assign data_sram_wdata = es_rkd_value;

// assign EXE_dest = es_dest & amp; {5{es_valid}};
// assign EXE_result = es_alu_result;
endmodule

6. WB_stage
`include "mycpu.h"

module wb_stage(
    inputclk,
    input reset,
    //allowin
    output ws_allowin ,
    //from ms
    input ms_to_ws_valid,
    input [`MS_TO_WS_BUS_WD -1:0] ms_to_ws_bus ,
    //to rf: for write back
    output [`WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus ,
    //trace debug interface
    output [31:0] debug_wb_pc ,
    output [3:0] debug_wb_rf_we ,
    output [4:0] debug_wb_rf_wnum,
    output [31:0] debug_wb_rf_wdata
    // lab4
    // output [4:0] WB_dest ,
    //lab5
    // output [31:0] WB_result
);
reg ws_valid;
wire ws_ready_go;

reg [`MS_TO_WS_BUS_WD -1:0] ms_to_ws_bus_r;
wire ws_gr_we;
wire [4:0] ws_dest;
wire [31:0] ws_final_result;
wire [31:0] ws_pc;
assign {<!-- -->ws_gr_we , //69:69
        ws_dest , //68:64
        ws_final_result, //63:32
        ws_pc //31:0
       } = ms_to_ws_bus_r;

wire rf_we;
wire [4 :0] rf_waddr;
wire [31:0] rf_wdata;
assign ws_to_rf_bus = {<!-- -->rf_we , //37:37
                       rf_waddr, //36:32
                       rf_wdata //31:0
                      };

assign ws_ready_go = 1'b1;
assign ws_allowin = !ws_valid || ws_ready_go;
always @(posedge clk) begin
    if (reset) begin
        ws_valid <= 1'b0;
    end
    else if (ws_allowin) begin
        ws_valid <= ms_to_ws_valid;
    end

    if (ms_to_ws_valid & amp; & amp; ws_allowin) begin
        ms_to_ws_bus_r <= ms_to_ws_bus;
    end
end

assign rf_we = ws_gr_we & amp; & amp;ws_valid;
assign rf_waddr = ws_dest;
assign rf_wdata = ws_final_result;

// debug info generate
assign debug_wb_pc = ws_pc;
assign debug_wb_rf_we = {<!-- -->4{<!-- -->rf_we}};
assign debug_wb_rf_wnum = ws_dest;
assign debug_wb_rf_wdata = ws_final_result;

// assign WB_dest = ws_dest & amp; {5{ws_valid}};
// assign WB_result = ws_final_result;
endmodule