源码是官方的2014.4 TRD工程里的,整个工程是基于zc702板子的,但手里只有块小zybo >_< 里面的硬件设计很有参考价值,最近想用FPGA加速surf算法,先在这分析下TRD工程里sobel edge detection的例程。
wiki
Top Function
这里不同于xapp1167,直接调用hls::cv的库函数,sobel边缘提取算法是重新实现的,更方便了解hls的算法实现的特点。
void image_filter(AXI_STREAM& video_in, AXI_STREAM& video_out, int rows, int cols,
int C_XR0C0, int C_XR0C1, int C_XR0C2, int C_XR1C0, int C_XR1C1, int C_XR1C2, int C_XR2C0, int C_XR2C1, int C_XR2C2,
int C_YR0C0, int C_YR0C1, int C_YR0C2, int C_YR1C0, int C_YR1C1, int C_YR1C2, int C_YR2C0, int C_YR2C1, int C_YR2C2,
int c_high_thresh, int c_low_thresh, int c_invert)
{
//Create AXI streaming interfaces for the core
//这里定义axi-stream接口用于stream图像数据
#pragma HLS INTERFACE axis port=video_in bundle=INPUT_STREAM
#pragma HLS INTERFACE axis port=video_out bundle=OUTPUT_STREAM
//设置rows、cols 为axilite总线上的寄存器,用于改变处理图像的大小(图像的最大尺寸为1920*1080)
#pragma HLS INTERFACE s_axilite port=rows bundle=CONTROL_BUS offset=0x14
#pragma HLS INTERFACE s_axilite port=cols bundle=CONTROL_BUS offset=0x1C
#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL_BUS
//#pragma HLS INTERFACE ap_stable port=rows
//#pragma HLS INTERFACE ap_stable port=cols
//设置sobel算子x、y方向的滤波模板 方便PS端改变模板(比如可以改成Prewitt算子)
#pragma HLS INTERFACE s_axilite port= C_XR0C0 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR0C1 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR0C2 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR1C0 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR1C1 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR1C2 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR2C0 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR2C1 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR2C2 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR0C0 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR0C1 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR0C2 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR1C0 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR1C1 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR1C2 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR2C0 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR2C1 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR2C2 bundle=CONTROL_BUS
//x、y阈值
#pragma HLS INTERFACE s_axilite port= c_high_thresh bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= c_low_thresh bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= c_invert bundle=CONTROL_BUS
YUV_IMAGE img_0(rows, cols);
YUV_IMAGE img_1(rows, cols);
#pragma HLS dataflow
//将axi-stream 转换为 hls::mat (hls::mat是数据流的形式)
hls::AXIvideo2Mat(video_in, img_0);
//sobel edge detection implement
sobel_filter_core(img_0, img_1, rows, cols,
C_XR0C0, C_XR0C1, C_XR0C2, C_XR1C0, C_XR1C1, C_XR1C2, C_XR2C0, C_XR2C1, C_XR2C2,
C_YR0C0, C_YR0C1, C_YR0C2, C_YR1C0, C_YR1C1, C_YR1C2, C_YR2C0, C_YR2C1, C_YR2C2,
c_high_thresh, c_low_thresh, c_invert);
//hls::mat 转换为axi-stream输出
hls::Mat2AXIvideo(img_1, video_out);
}
top function 是一个标准的hls 图像处理结构,具体内容请参看xapp1167文档
sobel_filter_core
void sobel_filter_core(YUV_IMAGE& src, YUV_IMAGE& dst, int rows, int cols,
int C_XR0C0, int C_XR0C1, int C_XR0C2, int C_XR1C0, int C_XR1C1, int C_XR1C2, int C_XR2C0, int C_XR2C1, int C_XR2C2,
int C_YR0C0, int C_YR0C1, int C_YR0C2, int C_YR1C0, int C_YR1C1, int C_YR1C2, int C_YR2C0, int C_YR2C1, int C_YR2C2,
int c_high_thresh, int c_low_thresh, int c_invert)
{
Y_BUFFER buff_A;
Y_WINDOW buff_C;
//Y_BUFFER Y_WINDOW 定义如下
//typedef hls::Window<3, 3, unsigned char>
Y_WINDOW;
//typedef hls::LineBuffer<3, MAX_WIDTH, unsigned char> Y_BUFFER;
//hls特有的memory结构 具体特征说明见下方
for(int row = 0; row < rows+1; row++){
for(int col = 0; col < cols+1; col++){
#pragma HLS loop_flatten off
// loop_flatten 选项说明
//Allows nested loops to be collapsed into a single loop with improved latency.
//
#pragma HLS dependence variable=&buff_A false
// dependence 选项说明
//Used to provide additional information that can overcome loop-carry dependencies and allow loops to be pipelined (or pipelined with lower intervals).
#pragma HLS PIPELINE II = 1
// PIPELINE 选项说明
//Reduces the initiation interval by allowing the concurrent execution of operations within a loop or function.
//流水的迭代次数为1
// Temp values are used to reduce the number of memory reads
unsigned char temp;
YUV_PIXEL tempx;
评论
查看更多