• verilog版插值


     
    1. 开发环境:
      IDE:LIBERO 9.0(ACTEL公司的)
      芯片:AFS600 (BGA256),是混合系列的FPGA
      仿真软件:modelsim atcel 6.5d
      综合软件:synplify pro D-2009.12A
    2. 周末无聊,复习一下VERILOG
      参考大学写的一段图像插值的C程序写的,先看看这个简单的插值C函数,水平很烂,别见笑:

      YBYTE DealPictrue::ChaZhi_Gray(float x, float y,bool lei)
      {
          int i=int(y);int j=int(x);
          if(((y-i)<5e-2&&(x-j)<5e-2)||i==0||j==0||i==w-1||j==h-1)return gray[i][j];
          float e=(x-j)*(gray[i][j+1]-gray[i][j])+gray[i][j];
          float f=(x-j)*(gray[i+1][j+1]-gray[i+1][j])+gray[i+1][j];
          unsigned char d=unsigned char((y-i)*(f-e)+e);
          if(d<0)d=0;
          else if(d>255)d=255;
          return d;
      }

      其实if(d<0)d=0;else if(d>255)d=255;放在那里根本没什么意义,呵呵,遵循原版,让大家看看我大学时候的编程水平,该程序是利用在我的一套图像处理的系统中,最终效果还好(别考虑效率)。还有,本人英语向来不是很好(这是本人的一直在努力要改变的),所以大学的很多程序变量和函数名用的是E语+拼音
    3. 看一下这个函数的verilog版本:interpolation.v

      // interpolation.v

      `include "config.v"
      `define STATE_WIDTH 4

      //插值运算

      module interpolation(iclk, //系统时钟

                          irst, //复位信号

                          ix, //图片像素的X坐标,是一个定点数,

                                              //高`DECIMAL_X_POSTION_WIDTH-1:`DECIMAL_PRECISION_POSTION位为整数部分

                                              //低`DECIMAL_PRECISION_POSTION-1:0位为小数部分

                          iy, //图片像素的Y坐标,是一个定点数,

                                              //高`DECIMAL_Y_POSTION_WIDTH-1:`DECIMAL_PRECISION_POSTION位为整数部分

                                              //低`DECIMAL_PRECISION_POSTION-1:0位为小数部分

                          isrc_first_addr, //存储图像数据缓冲区的首地址

                          isrc_dat, //存储图像数据缓冲区的数据线

                          osrc_addr, //存储图像数据缓冲区的地址线

                          osrc_rd, //存储图像数据缓冲区的读命令

                          isrc_busy, //存储图像数据缓冲区的读数据是否有效标志

                          odir_dat, //经过计算得到的插值数据

                          obusy //输出的插值数据是否有效

                          );//idir_first_addr,,odir_addr,odir_wr

                          //,v_cur_state);

      input iclk,irst;
      input [`DECIMAL_X_POSTION_WIDTH-1:0] ix;
      input [`DECIMAL_Y_POSTION_WIDTH-1:0] iy;
      input [`RAM_ADDR_WIDTH-1:0] isrc_first_addr;//,idir_first_addr;

      input [`IMG_COLOR_WIDTH-1:0] isrc_dat;
      input isrc_busy;
      output [`RAM_ADDR_WIDTH-1:0] osrc_addr;//,odir_addr;

      output osrc_rd;
      output [`IMG_COLOR_WIDTH-1:0] odir_dat;
      //output odir_wr;

      output obusy;

      //output [`STATE_WIDTH-1:0] v_cur_state;


      reg [`RAM_ADDR_WIDTH-1:0] osrc_addr,odir_addr;
      reg [`IMG_COLOR_WIDTH-1:0] odir_dat;//,dat_buf;

      reg [1:0] dir_dat_ex;

      reg [`STATE_WIDTH-1:0] cur_state,nest_state,willdo;
      reg osrc_rd,is_bound,obusy;//,odir_wr


      parameter st_begin=`STATE_WIDTH'd0,st_check_bound=`STATE_WIDTH'd1,st_loading_data=`STATE_WIDTH'd2,
                  st_loaded_data=`STATE_WIDTH'd3,st_idle=`STATE_WIDTH'd4,st_output_result=`STATE_WIDTH'd5,
                  st_load_x0_y0=`STATE_WIDTH'd6,st_load_x1_y0=`STATE_WIDTH'd7,st_load_x1_y1=`STATE_WIDTH'd8,
                  st_load_x0_y1=`STATE_WIDTH'd9,st_output_result2=`STATE_WIDTH'd10;
                

      //assign v_cur_state=cur_state;
      always @(posedge iclk or negedge irst)
          begin
              if(!irst)
                  begin
                      cur_state<=st_begin;
                  end 
              else begin
                      cur_state<=nest_state;
                  end 
          end

      always @(cur_state or isrc_busy or is_bound)
          begin
              case(cur_state)
              st_begin:
                  nest_state=st_load_x0_y0;
              st_load_x0_y0:
                  begin
                      nest_state=st_loading_data;
                      willdo=st_check_bound;
                  end
              st_load_x1_y0:
                  begin
                      nest_state=st_loading_data;
                      willdo=st_load_x1_y1;
                  end
              st_load_x1_y1:
                  begin
                      nest_state=st_loading_data;
                      willdo=st_load_x0_y1;
                  end
              st_load_x0_y1:
                  begin
                      nest_state=st_loading_data;
                      willdo=st_output_result2;
                  end
              st_check_bound:
                  begin
                      if(is_bound)
                          nest_state=st_output_result;
                      else begin
                          nest_state=st_load_x1_y0;
                       end
                  end
              st_loading_data:
                  if(isrc_busy==1'b0)nest_state=st_loaded_data;
              st_loaded_data:
                  nest_state=willdo;
              st_output_result:
                   nest_state=st_idle;
              st_output_result2:
                   nest_state=st_idle;
              default:;
              endcase
          end

      always @(posedge iclk or negedge irst)
          begin
              if(!irst)
              begin
                  obusy<=1'b1;
                  is_bound<=0;
                  dir_dat_ex<=0;
                  odir_dat<=0;
              end
              else
                  case(nest_state)
                  st_load_x0_y0:
                      begin
                          osrc_addr<=isrc_first_addr
                                      +(iy[`DECIMAL_Y_POSTION_WIDTH-1:`DECIMAL_PRECISION_POSTION]
                                      *`IMG_WIDTH
                                      +ix[`DECIMAL_X_POSTION_WIDTH-1:`DECIMAL_PRECISION_POSTION]
                                      )<<2;// *`IMG_DATA_WASTE_BYTES;IMG_DATA_WASTE_BYTES=4
                      end
                  st_load_x1_y0:
                      begin
                         osrc_addr<=osrc_addr+`IMG_DATA_WASTE_BYTES;
                      end
                  st_load_x1_y1:
                      begin
                         osrc_addr<=osrc_addr+`IMG_WIDTH*`IMG_DATA_WASTE_BYTES;
                      end
                  st_load_x0_y1:
                      begin
                         osrc_addr<=osrc_addr-`IMG_DATA_WASTE_BYTES;
                      end
                  st_check_bound:
                      begin
                          if(ix[`DECIMAL_PRECISION_POSTION-1:0]==0
                             &&(iy[`DECIMAL_PRECISION_POSTION-1:0]==0)
                             &&(ix[`DECIMAL_X_POSTION_WIDTH-1:`DECIMAL_PRECISION_POSTION]==0
                                  || iy[`DECIMAL_Y_POSTION_WIDTH-1:`DECIMAL_PRECISION_POSTION]==0 
                                  || ix[`DECIMAL_X_POSTION_WIDTH-1:`DECIMAL_PRECISION_POSTION]==(`IMG_WIDTH-1)
                                  || iy[`DECIMAL_Y_POSTION_WIDTH-1:`DECIMAL_PRECISION_POSTION]==(`IMG_HEIGHT-1)
                                 )
                             )
                             is_bound<=1'b1;
                      end
                  st_loading_data:
                      osrc_rd<=1'b1;
                  st_loaded_data:
                      begin
                          {dir_dat_ex,odir_dat}<=isrc_dat+{dir_dat_ex,odir_dat};
                          osrc_rd<=1'b0;
                      end
                  st_output_result:
                      begin
                          obusy<=1'b0;
                      end
                  st_output_result2:
                      begin
                          obusy<=1'b0;
                          odir_dat<={dir_dat_ex,odir_dat[`IMG_COLOR_WIDTH-1:2]};
                      end
                  default:;//st_idle,do nothing

                  endcase
          end

      endmodule

      `undef STATE_WIDTH

      其实也没有完全按照C版本的(噢,有点说的不准确,其实是C++版本,嘻嘻),因为考虑到通用乘法太耗资源,所以这个插值只是取了这个点周围的4个的平均值而已(是不是有点上当的感觉),其实这样对于一般的图像插值已经够了,效果比那个C版本的相差不多(这个可不是忽悠你们)
    4. 用到的配置文件:config.v

      // config.v

      `ifndef Y_IMAGE_PROCESSING
      `define Y_IMAGE_PROCESSING

      `define IMG_WIDTH_BIT 11
      `define IMG_HEIGHT_BIT 10

      `define IMG_WIDTH `IMG_WIDTH_BIT'd1440
      `define IMG_HEIGHT `IMG_HEIGHT_BIT'd900

      `define IMG_COLOR_WIDTH 5'd24

      //32BIT=4*8
      `define IMG_DATA_WASTE_BYTES 3'd4 
      `define DECIMAL_PRECISION_POSTION 4
      `define DECIMAL_X_POSTION_WIDTH (`DECIMAL_PRECISION_POSTION+`IMG_WIDTH_BIT)
      `define DECIMAL_Y_POSTION_WIDTH (`DECIMAL_PRECISION_POSTION+`IMG_HEIGHT_BIT)
      `define RAM_ADDR_WIDTH 25


      `endif

    5. 仿真测试文件:testbench.v

      // testbench.v

      `include "config.v"
      `timescale 1ns/1ns

      module testbench;
      parameter clk_period=100;//10MHz

      reg iclk,irst;
      reg [`DECIMAL_X_POSTION_WIDTH-1:0] ix;
      reg [`DECIMAL_Y_POSTION_WIDTH-1:0] iy;
      reg [`RAM_ADDR_WIDTH-1:0] isrc_first_addr;//,idir_first_addr;

      reg [`IMG_COLOR_WIDTH-1:0] isrc_dat;
      reg isrc_busy;
      wire [`RAM_ADDR_WIDTH-1:0] osrc_addr;//,odir_addr;

      wire osrc_rd,obusy;
      wire [`IMG_COLOR_WIDTH-1:0] odir_dat;
      wire [3:0] cur_state;
      always @(iclk) #(clk_period/2.0) iclk<=~iclk;

      initial
          begin
            iclk=0;
            irst=1;
            ix=17'h50;
            iy=16'h60;
            isrc_first_addr=0;
            isrc_dat=24'h0;
            isrc_busy=0;
            #clk_period irst=0;
            #clk_period irst=1;
            //#(2*clk_period) isrc_busy=0;
            #(20*clk_period) $finish;
          end
      always @(posedge osrc_rd) isrc_dat=isrc_dat+24'd10;

      interpolation ip(.iclk(iclk),
                          .irst(irst),
                          .ix(ix),
                          .iy(iy),
                          .isrc_first_addr(isrc_first_addr),
                          .isrc_dat(isrc_dat),
                          .osrc_addr(osrc_addr),
                          .osrc_rd(osrc_rd),
                          .isrc_busy(isrc_busy),
                          .odir_dat(odir_dat),
                          .obusy(obusy)//,

                         // .v_cur_state(cur_state)

                          );
      endmodule

    6. 仿真波形图
      (10+20+30+40)/4=25 满意,看起来是不是很幼稚呢其实一个大的系统都是由这些“幼稚”的部分组合起来的
    7. RTL VIEW,不好截大图,看了也白看,不过呢,不看也白不看:
    8. 一些值得关注的综合参数:
      我选择综合时钟是100MHz,可是经过综合得到的评估时钟只是76MHz
      我的芯片有13824个触发器资源可用,该插值模块用720个触发器资源(5%),IO资源有172个,该设计用了130个(76%),RAM有24块,一块也没有用到(唉,被浪费了),当然,用资源多的做开发是好的,最终产品用的时候选合适的就行(减少成本)
    9. 最坏路径:
      看不清吧,又白看了吧。我讲解一下吧,它是由osrc_addr链接起来,就是这家伙闹得我的设计暂时只能综合到76MHz,要优化代码,使系统能在更高的时钟上运行,那么就得把这些最坏路径一条一条优化好(不是叫你删掉啊,出了事别来找我麻烦),关于系统的优化,自己慢慢琢磨去吧,呵呵,经验会造就你的
    10. 总结:
      C语言那么一小段搞定的事,Verilog HDL(还是说个全称吧)用了那么多,精度还不如C函数的(声明:是我这里没有把它写成这样的,不代表它不能),难道这真是在做破事。你要这样认为也没辙,其实中国优化的后的这些模块放在系统中,这些系统就可变成并行的,就算是系统频率比某些CPU的频率低,但是他整体的性能却很容易做到比CPU做某种特定的工作效率高,这就是并行+硬件的魅力,自己慢慢去体会吧
     
  • 相关阅读:
    PAT (Basic Level) Practise:1001. 害死人不偿命的(3n+1)猜想
    流加密法
    The NMEA 0183 Protocol
    USB 描述符
    网摘
    What are the 10 algorithms one must know in order to solve most algorithm challenges/puzzles?
    Why did Jimmy Wales invest in Quora? Is he afraid that it will take over Wikipedia?
    Add Binary
    Cocos2d-x 网络资源
    Cache
  • 原文地址:https://www.cnblogs.com/lsjjob/p/6036032.html
Copyright © 2020-2023  润新知