1
完善资料让更多小伙伴认识你,还能领取20积分哦, 立即完善>
扫一扫,分享给好友
1.模型转换 1.1 yolov5-face的pt模型转为onnx模型 从github上下载yolov5-face工程,然后利用里面的exprot.py将pytorch模型转换为onnx模型,转换之前需要修改yolo.py代码,从github上下载yolov5-face工程,然后利用里面的exprot.py将pytorch模型转换为onnx模型,转换之前需要修改yolo.py代码, 1. def forward(self, x): 2. # x = x.copy() # for profiling 3. z = [] # inference output 4. # self.training |= self.export 5. if self.export: 6. for i in range(self.nl): 7. x = self.m(x) 8. bs, _, ny, nx = x.shape # x(bs,48,20,20) to x(bs,3,20,20,16) 9. x = x.view(bs, self.na, self.no, ny, nx). permute(0, 1, 3, 4, 2).contiguous() 需要将代码中的 .permute(0, 1, 3, 4, 2)去掉,修改为如下代码 1. def forward(self, x): 2. # x = x.copy() # for profiling 3. z = [] # inference output 4. # self.training |= self.export 5. if self.export: 6. for i in range(self.nl): 7. x = self.m(x) 8. bs, _, ny, nx = x.shape # x(bs,48,20,20) to x(bs,3,20,20,16) 9. x = x.view(bs, self.na, self.no, ny, nx).contiguous() 这样修改的原因是后面我们的C++代码解析的是3*16*20*20类型的,而不是3*20*20*16类型的。 1.2 yolov5-face的onnx模型转换为rknn模型 由于rknntoolkit1.6.0在转换yolov-face的模型时报错,因此首先需要安装toolkit1.7.1,然后再用https://github.com/airockchip/yolov5/tree/master/rknn中的onnx2rknn.py进行模型转换,转换脚本如下。 1. import os 2. import sys 3. import numpy as np 4. from rknn.api import RKNN 5. 6. ONNX_MODEL = 'yolov5s-face.onnx' 7. RKNN_MODEL = 'yolov5s-face.rknn' 8. 9. if __name__ == '__main__': 10. 11. # Create RKNN object 12. rknn = RKNN(verbose=True) 13. 14. # pre-process config 15. print('--> config model') 16. rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], reorder_channel='0 1 2', target_platform='rv1126', 17. quantized_dtype='asymmetric_affine-u8', optimization_level=3, output_optimize=1) 18. print('done') 19. 20. print('--> Loading model') 21. ret = rknn.load_onnx(model=ONNX_MODEL) 22. if ret != 0: 23. print('Load model failed!') 24. exit(ret) 25. print('done') 26. 27. # Build model 28. print('--> Building model') 29. ret = rknn.build(do_quantization=True, dataset='./dataset.txt') 30. if ret != 0: 31. print('Build yolov5s failed!') 32. exit(ret) 33. print('done') 34. 35. # Export rknn model 36. print('--> Export RKNN model') 37. ret = rknn.export_rknn(RKNN_MODEL) 38. if ret != 0: 39. print('Export yolov5s.rknn failed!') 40. exit(ret) 41. print('done') 42. 43. rknn.release() 2.C++代码 这里以瑞芯微的代码为例,只不过在前面和后面加上了有关padding resize的处理,瑞芯微的具体代码见rknpu/rknn/rknn_api/examples/rknn_yolov5_demo at master · rockchip-linux/rknpu · GitHub 2.1 padding resize代码 瑞芯微的官方demo里面是用opencl读取图片,这里我改成了用opencv读取图片,关于opencv的交叉编译见:ubuntu交叉编译RV1126的opencv库/ubuntu交叉编译opencv void padding_resize(cv::InputArray src, cv::OutputArray dst, cv::Size size) { float padd_w = 0; float padd_h = 0; float r = std::min(float(size.width) / src.cols(), float(size.height) / src.rows()); int inside_w = round(src.cols() * r); int inside_h = round(src.rows() * r); padd_w = size.width - inside_w; padd_h = size.height - inside_h; cv::resize(src, dst, cv::Size(inside_w, inside_h)); padd_w = padd_w / 2; padd_h = padd_h / 2; //外层边框填充灰色 printf("gain:%f, padd_w:%f,padd_h:%f. in padding_resize============n", r, padd_w,padd_h); int top = int(round(padd_h - 0.1)); int bottom = int(round(padd_h + 0.1)); int left = int(round(padd_w - 0.1)); int right = int(round(padd_w + 0.1)); cv::copyMakeBorder(dst, dst, top, bottom, left, right, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114)); } cv::Mat read_image(const char *image_path, int w, int h, int & img_width, int & img_height, cv::Mat & img) { img = cv::imread(image_path); img_width = img.cols; img_height = img.rows; cv::Mat sample_resize, sample; padding_resize(img, sample_resize, cv::Size(w, h)); //cv::resize(img, sample_resize, cv::Size(w, h)); cv::cvtColor(sample_resize, sample, cv::COLOR_BGR2RGB); return sample; } 2.2 yolov5-face后处理(在瑞芯微yolov5后处理代码基础上修改) 这是瑞芯微的yolov5 demo里的代码,这里面我把坐标框后处理时影射回原图的四行代码注释掉了。然后其他代码也针对yolov5-face进行了修改,其中process这里增加了关键点的解析。 static int process(uint8_t *input, int *anchor, int grid_h, int grid_w, int height, int width, int stride, std::vector float threshold, uint32_t zp, float scale) { int validCount = 0; int grid_len = grid_h * grid_w;//80*80, 40*40, 20*20. float thres = unsigmoid(threshold); uint8_t thres_u8 = qnt_f32_to_affine(thres, zp, scale);//量化 for (int a = 0; a < 3; a++) { for (int i = 0; i < grid_h; i++) { for (int j = 0; j < grid_w; j++) { /********************************************************************************************** 排列顺序是x y w h boxScore x1 y1 x2 y2 x3 y3 x4 y4 x5 y5 idScore,然后保存的时候是先存 grid_h * grid_w个x,再存grid_h * grid_w个y,....,这样保存的。 而不是先存第一个x y w h boxScore x1 y1 x2 y2 x3 y3 x4 y4 x5 y5 idScore, 再存第二个x y w h boxScore x1 y1 x2 y2 x3 y3 x4 y4 x5 y5 idScore, **********************************************************************************************/ uint8_t box_confidence = input[(PROP_BOX_SIZE * a + 4) * grid_len + i * grid_w + j]; if (box_confidence >= thres_u8)//thres_u8经过反sigmoid和反量化操作了。 { int offset = (PROP_BOX_SIZE * a) * grid_len + i * grid_w + j;// uint8_t *in_ptr = input + offset; /*这边乘以2,减掉0.5是因为内部运算的时候做了这个处理,所以这里要反操作一下, deqnt_affine_to_f32是反量化,zp是量化时的zero point中心点,scale是量化时的尺度。*/ float box_x = sigmoid(deqnt_affine_to_f32(*in_ptr, zp, scale)) * 2.0 - 0.5; float box_y = sigmoid(deqnt_affine_to_f32(in_ptr[grid_len], zp, scale)) * 2.0 - 0.5; float box_w = sigmoid(deqnt_affine_to_f32(in_ptr[2 * grid_len], zp, scale)) * 2.0; float box_h = sigmoid(deqnt_affine_to_f32(in_ptr[3 * grid_len], zp, scale)) * 2.0; float x1 = (deqnt_affine_to_f32(in_ptr[5 * grid_len], zp, scale)); float y1 = (deqnt_affine_to_f32(in_ptr[6 * grid_len], zp, scale)); float x2 = (deqnt_affine_to_f32(in_ptr[7 * grid_len], zp, scale)); float y2 = (deqnt_affine_to_f32(in_ptr[8 * grid_len], zp, scale)); float x3 = (deqnt_affine_to_f32(in_ptr[9 * grid_len], zp, scale)); float y3 = (deqnt_affine_to_f32(in_ptr[10 * grid_len], zp, scale)); float x4 = (deqnt_affine_to_f32(in_ptr[11 * grid_len], zp, scale)); float y4 = (deqnt_affine_to_f32(in_ptr[12 * grid_len], zp, scale)); float x5 = (deqnt_affine_to_f32(in_ptr[13 * grid_len], zp, scale)); float y5 = (deqnt_affine_to_f32(in_ptr[14 * grid_len], zp, scale)); std::cout<<"landmark after deqnt_affine_to_f32:"< box_x = (box_x + j) * (float)stride;//这边的box_x是0-1的,相当于是偏移值,所以要加上j。乘以stride是映射回原图640. box_y = (box_y + i) * (float)stride;//这边的box_y是0-1的,相当于是偏移值,所以要加上i。乘以stride是映射回原图640. box_w = box_w * box_w * (float)anchor[a * 2]; box_h = box_h * box_h * (float)anchor[a * 2 + 1]; box_x -= (box_w / 2.0); box_y -= (box_h / 2.0); boxes.push_back(box_x); boxes.push_back(box_y); boxes.push_back(box_w); boxes.push_back(box_h); x1 = x1 * (float)anchor[a * 2] + j*(float)stride; y1 = y1 * (float)anchor[a * 2 + 1] + i*(float)stride; x2 = x2 * (float)anchor[a * 2] + j*(float)stride; y2 = y2 * (float)anchor[a * 2 + 1] + i*(float)stride; x3 = x3 * (float)anchor[a * 2] + j*(float)stride; y3 = y3 * (float)anchor[a * 2 + 1] + i*(float)stride; x4 = x4 * (float)anchor[a * 2] + j*(float)stride; y4 = y4 * (float)anchor[a * 2 + 1] + i*(float)stride; x5 = x5 * (float)anchor[a * 2] + j*(float)stride; y5 = y5 * (float)anchor[a * 2 + 1] + i*(float)stride; landMarks.push_back(x1); landMarks.push_back(y1); landMarks.push_back(x2); landMarks.push_back(y2); landMarks.push_back(x3); landMarks.push_back(y3); landMarks.push_back(x4); landMarks.push_back(y4); landMarks.push_back(x5); landMarks.push_back(y5); // printf("box_x=%.03f, box_y=%.03f, box_w=%.03f, box_h=%.03fn" // , box_x, box_y, box_w, box_h); uint8_t maxClassProbs = in_ptr[15 * grid_len]; int maxClassId = 0; for (int k = 0; k < OBJ_CLASS_NUM; ++k) { uint8_t prob = in_ptr[(15 + k) * grid_len];//这里是通过比较找到一个最高的得分和他的id。 if (prob > maxClassProbs) { maxClassId = k; maxClassProbs = prob; } } objProbs.push_back(sigmoid(deqnt_affine_to_f32(maxClassProbs, zp, scale))); classId.push_back(maxClassId); //boxes objProbs classId这三个变量往外返回的时候应该是每四个box对应一个objProb和一个classId。 //std::cout<<"maxClassProbs in post_process:::"< } } } } return validCount; } 3 RV1126驱动升级 由于前面使用rknntoolkit 1.7.1进行模型转换的,所以这里需要把RV1126板子的驱动由1.6.1升级为1.7.0,具体方法如下:rv1126更新驱动版本库_cumtchw-CSDN博客 1. 把https://github.com/rockchip-linux/rknpu/tree/master/drivers/linux-armhf-puma/usr push到板子的对应目录。 2. 把https://github.com/rockchip-linux/rknpu/blob/master/drivers/npu_ko/galcore_puma.ko改名为galcore.ko, push到板子/lib/modules/galcore.ko。可以先在板子上find下原来galcore.ko的位置。 3. 重启板子。 升级完成之后需要到/usr/lib目录下用下面三个命令做一下软链接。 1. [root@RV1126_RV1109:/usr/lib]# ln -snf libOpenVX.so.1.2 libOpenVX.so.1 2. [root@RV1126_RV1109:/usr/lib]# ln -snf libOpenVX.so.1 libOpenVX.so 3. [root@RV1126_RV1109:/usr/lib]# ln -snf libOpenCL.so.1.2 libOpenCL.so.1 4.完整的C++代码 4.1 postprocess.h #ifndef _RKNN_ZERO_COPY_DEMO_POSTPROCESS_H_ #define _RKNN_ZERO_COPY_DEMO_POSTPROCESS_H_ #include #include #define OBJ_NAME_MAX_SIZE 16 #define OBJ_NUMB_MAX_SIZE 64 //#define OBJ_CLASS_NUM 2 //80 #define OBJ_CLASS_NUM 1 //yolov5-face的类别数是1. #define NMS_THRESH 0.3 #define BOX_THRESH 0.87 //0.5 // #define NMS_THRESH 0.5 // #define BOX_THRESH 0.3 //0.5 #define LAND_MARK_SIZE 10//五个关键点,每个关键点有两个坐标x,y。 #define PROP_BOX_SIZE (5 + OBJ_CLASS_NUM + LAND_MARK_SIZE) //16 //#define PROP_BOX_SIZE (5+OBJ_CLASS_NUM) //10 #define REPVGG_CLASS_NUM 27 //信号灯分类数 extern char *repvgg_labels[REPVGG_CLASS_NUM]; typedef struct _BOX_RECT { int left; int right; int top; int bottom; //std::vector } BOX_RECT; typedef struct __detect_result_t { char name[OBJ_NAME_MAX_SIZE]; BOX_RECT box; std::vector float prop; } detect_result_t; typedef struct _detect_result_group_t { int id; int count; detect_result_t results[OBJ_NUMB_MAX_SIZE]; } detect_result_group_t; int post_process(uint8_t *input0, uint8_t *input1, uint8_t *input2, int model_in_h, int model_in_w, float conf_threshold, float nms_threshold, float scale_w, float scale_h, std::vector detect_result_group_t *group); int rknn_GetTop( float *pfProb, float *pfMaxProb, uint32_t *pMaxClass, uint32_t outputCount, uint32_t topNum); int loadLabelName(const char *locationFilename, char *label[], int classNum); #endif //_RKNN_ZERO_COPY_DEMO_POSTPROCESS_H_ 4.2 postprocess.cc // Copyright (c) 2021 by Rockchip Electronics Co., Ltd. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include #include #include #include #include "postprocess.h" #include #define LABEL_NALE_TXT_PATH "./model/coco_80_labels_list.txt" static char *labels[OBJ_CLASS_NUM]; char *repvgg_labels[REPVGG_CLASS_NUM]; //这是之前yolov5的anchor // const int anchor0[6] = {10, 13, 16, 30, 33, 23}; // const int anchor1[6] = {30, 61, 62, 45, 59, 119}; // const int anchor2[6] = {116, 90, 156, 198, 373, 326}; //这是yolov5-face的anchor. const int anchor0[6] = {4, 5, 8, 10, 13, 16}; const int anchor1[6] = {23, 29, 43, 55, 73, 105}; const int anchor2[6] = {146, 217, 231, 300, 335, 433}; inline static int clamp(float val, int min, int max) { return val > min ? (val < max ? val : max) : min; } char *readLine(FILE *fp, char *buffer, int *len) { int ch; int i = 0; size_t buff_len = 0; buffer = (char *)malloc(buff_len + 1); if (!buffer) return NULL; // Out of memory while ((ch = fgetc(fp)) != 'n' && ch != EOF) { buff_len++; void *tmp = realloc(buffer, buff_len + 1); if (tmp == NULL) { free(buffer); return NULL; // Out of memory } buffer = (char *)tmp; buffer = (char)ch; i++; } buffer = ' |