FPGA implements 3X3 convolution

Because the pixel data in FPGA are all pipeline inputs, they cannot be processed in parallel like CModel. Therefore, a 3*3 matrix acquisition module needs to be used to perform convolution operations with some operators.

The specific implementation idea is to use two FIFOs to cache the data of the upper two rows respectively. When the data stream enters, first read the data of FIFO0 and FIFO1, and then write the input data and the data read from FIFO1 to FIFO0 and FIFO1 respectively.

A column of shifts read out from the FIFO replaces the data stored in the register, so that a 3*3 sliding window data can be obtained.

Use the code in the previous pure Verilog implementation of FIFO to replace the FIFO IP core to facilitate subsequent porting to other platforms.

`timescale 1ns/1ps
module filter_3X3 (
    parameter DATA_WIDTH = 8,
    parameter DATA_DEPTH = 1920
)
(
    input wire i_clk,
    input wire i_rstn,

    input wire i_de,
    input wire i_hs,
    input wire i_vs,
    input wire [DATA_WIDTH - 1 : 0] i_y_data,//Gray value

    output wire o_de_filter,
    output wire o_hs_filter,
    output wire o_vs_filter,
    output reg [DATA_WIDTH - 1 : 0] filter_11,
    output reg [DATA_WIDTH - 1 : 0] filter_12,
    output reg [DATA_WIDTH - 1 : 0] filter_13,
    output reg [DATA_WIDTH - 1 : 0] filter_21,
    output reg [DATA_WIDTH - 1 : 0] filter_22,
    output reg [DATA_WIDTH - 1 : 0] filter_23,
    output reg [DATA_WIDTH - 1 : 0] filter_31,
    output reg [DATA_WIDTH - 1 : 0] filter_32,
    output reg [DATA_WIDTH - 1 : 0] filter_33

);

reg i_de_d1;
reg i_de_d2;
reg i_hs_d1;
reg i_hs_d2;
reg i_vs_d1;
reg i_vs_d2;

wire [DATA_WIDTH - 1 : 0] tap_1;
wire [DATA_WIDTH - 1 : 0] tap_2;
wire [DATA_WIDTH - 1 : 0] tap_3;

always @(posedge i_clk or negedge i_rstn) begin
    if(i_rstn == 1'b0) begin
        i_de_d1 <= 1'b0;
        i_de_d2 <= 1'b0;
        i_hs_d1 <= 1'b0;
        i_hs_d2 <= 1'b0;
        i_vs_d1 <= 1'b0;
        i_vs_d2 <= 1'b0;
    end
    else begin
        i_de_d1 <= i_de;
        i_de_d2 <= i_de_d1;
        i_hs_d1 <= i_hs;
        i_hs_d2 <= i_hs_d1;
        i_vs_d1 <= i_vs;
        i_vs_d2 <= i_vs_d1;
    end
end

one_line_buffer #(
    .DATA_WIDTH(8),
    .DATA_DEPTH(1920)
)
inst1_one_line_buffer(
    .i_clk (i_clk),
    .i_rstn (i_rstn),

    .i_de (i_de),
    .i_y_data (i_y_data),

    .tap_1 (tap_1),
    .tap_2 (tap_2),
    .tap_3 (tap_3)
);

always @(posedge i_clk or negedge i_rstn) begin
    if(i_rstn == 1'b0) begin
        {filter_11,filter_12,filter_13} <= 0;
        {filter_21,filter_22,filter_23} <= 0;
        {filter_31,filter_32,filter_33} <= 0;
    end
    else begin
        if(i_de_d1) begin
            {filter_11,filter_12,filter_13} <= {filter_12,filter_13,tap_1};
            {filter_21,filter_22,filter_23} <= {filter_22,filter_23,tap_2};
            {filter_31,filter_32,filter_33} <= {filter_32,filter_33,tap_3};
        end
        else begin
            {filter_11,filter_12,filter_13} <= {filter_11,filter_12,filter_13};
            {filter_21,filter_22,filter_23} <= {filter_21,filter_22,filter_23};
            {filter_31,filter_32,filter_33} <= {filter_31,filter_32,filter_33};
        end
    end
end
assign o_de_filter = i_de_d2;
assign o_hs_filter = i_hs_d2;
assign o_vs_filter = i_vs_d2;

endmodule
`timescale 1ns/1ps
module one_line_buffer (
    parameter DATA_WIDTH = 8,
    parameter DATA_DEPTH = 1920
)
(
    input wire i_clk,
    input wire i_rstn,

    input wire i_de,
    input wire [DATA_WIDTH - 1 : 0] i_y_data,

    output wire [[DATA_WIDTH - 1 : 0]] tap_1,
    output wire [[DATA_WIDTH - 1 : 0]] tap_2,
    output wire [[DATA_WIDTH - 1 : 0]] tap_3
);

reg i_de_d1;
reg i_de_d2;
reg [DATA_WIDTH - 1 : 0] i_y_data_d1;
reg [DATA_WIDTH - 1 : 0] i_y_data_d2;

reg [DATA_WIDTH - 1 : 0] fifo_rd_data_0;
reg [DATA_WIDTH - 1 : 0] fifo_rd_data_0_d1;
reg [DATA_WIDTH - 1 : 0] fifo_rd_data_1;


always @(posedge i_clk or negedge i_rstn) begin
    if(i_rstn == 1'b0) begin
        i_de_d1 <= 1'b0;
        i_de_d2 <= 1'b0;

        i_y_data_d1 <= 0;
        i_y_data_d2 <= 0;
        fifo_rd_data_0_d1 <= 0;
    end
    else begin
        i_de_d1 <= i_de;
        i_de_d2 <= i_de_d2;

        i_y_data_d1 <= i_y_data;
        i_y_data_d2 <= i_y_data_d1;
        fifo_rd_data_0_d1 <= fifo_rd_data_0;
    end
end

fifo #(
    .DATA_WIDTH(8),
    .DATA_DEPTH(1920)
)
inst1_fifo(
    .i_clk (i_clk),
    .i_rstn (i_rstn),

    .wr_en (i_de_d2),
    .wr_data (i_y_data_d2),
    .wr_full(),

    .rd_en (i_de),

    .rd_empty (),
    .rd_data (fifo_rd_data_0)
);

fifo #(
    .DATA_WIDTH(8),
    .DATA_DEPTH(1920)
)
inst2_fifo(
    .i_clk (i_clk),
    .i_rstn (i_rstn),

    .wr_en (i_de_d2),
    .wr_data (fifo_rd_data_0_d1),
    .wr_full(),

    .rd_en (i_de),

    .rd_empty (),
    .rd_data (fifo_rd_data_1)
);


assign tap_1 = i_y_data;
assign tap_2 = fifo_rd_data_0;
assign tap_3 = fifo_rd_data_1;
   
endmodule
`timescale 1ns/1ps
module fifo#(
    parameter DATA_WIDTH = 8,
    parameter DATA_DEPTH = 1920
)
(
    input wire i_clk,
    input wire i_rstn,

    input wire wr_en,
    input wire [DATA_WIDTH - 1 : 0] wr_data,
    output wire wr_full,

    input wire rd_en,

    output wire rd_empty,
    output reg [DATA_WIDTH - 1 : 0] rd_data
);

//define ram
(*ram_style = "block" *) reg [DATA_WIDTH - 1 : 0] fifo_buffer [DATA_DEPTH - 1 : 0];

integer i;

initial begin
    for(i=0;i<DATA_WIDTH;i=i + 1) begin
        fifo_buffer[i] <= 0;
    end
end

reg [$clog2(DATA_DEPTH) - 1 : 0] wr_pointer = 0;//form end to read data
reg [$clog2(DATA_DEPTH) - 1 : 0] rd_pointer = 0;
reg [DATA_WIDTH - 1 : 0] rd_data_out;

always @(posedge i_clk or negedge i_rstn) begin
    if(!i_rstn) begin
        wr_pointer <= 0;
    end
    else begin
        if(wr_en) begin
            if(wr_pointer == DATA_DEPTH - 1) begin
                wr_pointer <= 0;
            end
            else begin
                wr_pointer <= wr_pointer + 1'd1;
            end
        end
        else begin
            wr_pointer <= 0;
        end
    end
end

always @(posedge i_clk or negedge i_rstn) begin
    if(!i_rstn) begin
        rd_pointer <= 0;
    end
    else begin
        if(rd_en) begin
            if(rd_pointer == DATA_DEPTH - 1) begin
                rd_pointer <= 0;
            end
            else begin
                rd_pointer <= rd_pointer + 1'd1;
            end
        end
        else begin
            rd_pointer <= 0;
        end
    end
end

always @(posedge i_clk or negedge i_rstn) begin
    if(!i_rstn) begin
        fifo_buffer[wr_pointer] <= 0;
    end
    else begin
        if(wr_en) begin
            fifo_buffer[wr_pointer] <= wr_data;
        end
        else begin
            fifo_buffer[wr_pointer] <= fifo_buffer[wr_pointer];
        end
    end
end

always @(posedge i_clk or negedge i_rstn) begin
    if(!i_rstn) begin
        rd_data_out <= 0;
    end
    else begin
        if(rd_en) begin
            rd_data_out <= fifo_buffer[wr_pointer];
        end
        else begin
            rd_data_out <= 0;
        end
    end
end

always @(posedge i_clk or negedge i_rstn) begin
    if(!i_rstn) begin
        rd_data <= 0;
    end
    else begin
        rd_data <= rd_data_out;
    end
end
endmodule