Because the pixel data in FPGA are all pipeline inputs, they cannot be processed in parallel like CModel. Therefore, a 3*3 matrix acquisition module needs to be used to perform convolution operations with some operators.
The specific implementation idea is to use two FIFOs to cache the data of the upper two rows respectively. When the data stream enters, first read the data of FIFO0 and FIFO1, and then write the input data and the data read from FIFO1 to FIFO0 and FIFO1 respectively.
A column of shifts read out from the FIFO replaces the data stored in the register, so that a 3*3 sliding window data can be obtained.
Use the code in the previous pure Verilog implementation of FIFO to replace the FIFO IP core to facilitate subsequent porting to other platforms.
`timescale 1ns/1ps module filter_3X3 ( parameter DATA_WIDTH = 8, parameter DATA_DEPTH = 1920 ) ( input wire i_clk, input wire i_rstn, input wire i_de, input wire i_hs, input wire i_vs, input wire [DATA_WIDTH - 1 : 0] i_y_data,//Gray value output wire o_de_filter, output wire o_hs_filter, output wire o_vs_filter, output reg [DATA_WIDTH - 1 : 0] filter_11, output reg [DATA_WIDTH - 1 : 0] filter_12, output reg [DATA_WIDTH - 1 : 0] filter_13, output reg [DATA_WIDTH - 1 : 0] filter_21, output reg [DATA_WIDTH - 1 : 0] filter_22, output reg [DATA_WIDTH - 1 : 0] filter_23, output reg [DATA_WIDTH - 1 : 0] filter_31, output reg [DATA_WIDTH - 1 : 0] filter_32, output reg [DATA_WIDTH - 1 : 0] filter_33 ); reg i_de_d1; reg i_de_d2; reg i_hs_d1; reg i_hs_d2; reg i_vs_d1; reg i_vs_d2; wire [DATA_WIDTH - 1 : 0] tap_1; wire [DATA_WIDTH - 1 : 0] tap_2; wire [DATA_WIDTH - 1 : 0] tap_3; always @(posedge i_clk or negedge i_rstn) begin if(i_rstn == 1'b0) begin i_de_d1 <= 1'b0; i_de_d2 <= 1'b0; i_hs_d1 <= 1'b0; i_hs_d2 <= 1'b0; i_vs_d1 <= 1'b0; i_vs_d2 <= 1'b0; end else begin i_de_d1 <= i_de; i_de_d2 <= i_de_d1; i_hs_d1 <= i_hs; i_hs_d2 <= i_hs_d1; i_vs_d1 <= i_vs; i_vs_d2 <= i_vs_d1; end end one_line_buffer #( .DATA_WIDTH(8), .DATA_DEPTH(1920) ) inst1_one_line_buffer( .i_clk (i_clk), .i_rstn (i_rstn), .i_de (i_de), .i_y_data (i_y_data), .tap_1 (tap_1), .tap_2 (tap_2), .tap_3 (tap_3) ); always @(posedge i_clk or negedge i_rstn) begin if(i_rstn == 1'b0) begin {filter_11,filter_12,filter_13} <= 0; {filter_21,filter_22,filter_23} <= 0; {filter_31,filter_32,filter_33} <= 0; end else begin if(i_de_d1) begin {filter_11,filter_12,filter_13} <= {filter_12,filter_13,tap_1}; {filter_21,filter_22,filter_23} <= {filter_22,filter_23,tap_2}; {filter_31,filter_32,filter_33} <= {filter_32,filter_33,tap_3}; end else begin {filter_11,filter_12,filter_13} <= {filter_11,filter_12,filter_13}; {filter_21,filter_22,filter_23} <= {filter_21,filter_22,filter_23}; {filter_31,filter_32,filter_33} <= {filter_31,filter_32,filter_33}; end end end assign o_de_filter = i_de_d2; assign o_hs_filter = i_hs_d2; assign o_vs_filter = i_vs_d2; endmodule
`timescale 1ns/1ps module one_line_buffer ( parameter DATA_WIDTH = 8, parameter DATA_DEPTH = 1920 ) ( input wire i_clk, input wire i_rstn, input wire i_de, input wire [DATA_WIDTH - 1 : 0] i_y_data, output wire [[DATA_WIDTH - 1 : 0]] tap_1, output wire [[DATA_WIDTH - 1 : 0]] tap_2, output wire [[DATA_WIDTH - 1 : 0]] tap_3 ); reg i_de_d1; reg i_de_d2; reg [DATA_WIDTH - 1 : 0] i_y_data_d1; reg [DATA_WIDTH - 1 : 0] i_y_data_d2; reg [DATA_WIDTH - 1 : 0] fifo_rd_data_0; reg [DATA_WIDTH - 1 : 0] fifo_rd_data_0_d1; reg [DATA_WIDTH - 1 : 0] fifo_rd_data_1; always @(posedge i_clk or negedge i_rstn) begin if(i_rstn == 1'b0) begin i_de_d1 <= 1'b0; i_de_d2 <= 1'b0; i_y_data_d1 <= 0; i_y_data_d2 <= 0; fifo_rd_data_0_d1 <= 0; end else begin i_de_d1 <= i_de; i_de_d2 <= i_de_d2; i_y_data_d1 <= i_y_data; i_y_data_d2 <= i_y_data_d1; fifo_rd_data_0_d1 <= fifo_rd_data_0; end end fifo #( .DATA_WIDTH(8), .DATA_DEPTH(1920) ) inst1_fifo( .i_clk (i_clk), .i_rstn (i_rstn), .wr_en (i_de_d2), .wr_data (i_y_data_d2), .wr_full(), .rd_en (i_de), .rd_empty (), .rd_data (fifo_rd_data_0) ); fifo #( .DATA_WIDTH(8), .DATA_DEPTH(1920) ) inst2_fifo( .i_clk (i_clk), .i_rstn (i_rstn), .wr_en (i_de_d2), .wr_data (fifo_rd_data_0_d1), .wr_full(), .rd_en (i_de), .rd_empty (), .rd_data (fifo_rd_data_1) ); assign tap_1 = i_y_data; assign tap_2 = fifo_rd_data_0; assign tap_3 = fifo_rd_data_1; endmodule
`timescale 1ns/1ps module fifo#( parameter DATA_WIDTH = 8, parameter DATA_DEPTH = 1920 ) ( input wire i_clk, input wire i_rstn, input wire wr_en, input wire [DATA_WIDTH - 1 : 0] wr_data, output wire wr_full, input wire rd_en, output wire rd_empty, output reg [DATA_WIDTH - 1 : 0] rd_data ); //define ram (*ram_style = "block" *) reg [DATA_WIDTH - 1 : 0] fifo_buffer [DATA_DEPTH - 1 : 0]; integer i; initial begin for(i=0;i<DATA_WIDTH;i=i + 1) begin fifo_buffer[i] <= 0; end end reg [$clog2(DATA_DEPTH) - 1 : 0] wr_pointer = 0;//form end to read data reg [$clog2(DATA_DEPTH) - 1 : 0] rd_pointer = 0; reg [DATA_WIDTH - 1 : 0] rd_data_out; always @(posedge i_clk or negedge i_rstn) begin if(!i_rstn) begin wr_pointer <= 0; end else begin if(wr_en) begin if(wr_pointer == DATA_DEPTH - 1) begin wr_pointer <= 0; end else begin wr_pointer <= wr_pointer + 1'd1; end end else begin wr_pointer <= 0; end end end always @(posedge i_clk or negedge i_rstn) begin if(!i_rstn) begin rd_pointer <= 0; end else begin if(rd_en) begin if(rd_pointer == DATA_DEPTH - 1) begin rd_pointer <= 0; end else begin rd_pointer <= rd_pointer + 1'd1; end end else begin rd_pointer <= 0; end end end always @(posedge i_clk or negedge i_rstn) begin if(!i_rstn) begin fifo_buffer[wr_pointer] <= 0; end else begin if(wr_en) begin fifo_buffer[wr_pointer] <= wr_data; end else begin fifo_buffer[wr_pointer] <= fifo_buffer[wr_pointer]; end end end always @(posedge i_clk or negedge i_rstn) begin if(!i_rstn) begin rd_data_out <= 0; end else begin if(rd_en) begin rd_data_out <= fifo_buffer[wr_pointer]; end else begin rd_data_out <= 0; end end end always @(posedge i_clk or negedge i_rstn) begin if(!i_rstn) begin rd_data <= 0; end else begin rd_data <= rd_data_out; end end endmodule