4Tengine部署YOLOv5-Lite
依照顺序调用Tengine核心API如下:
1. init_tengine
初始化Tengine,该函数在程序中只要调用一次即可。
2. create_graph
创建Tengine计算图。
3. prerun_graph
预运行,准备计算图推理所需资源。设置大小核,核个数、核亲和性、数据精度都在这里。
struct options { int num_thread;//核个数设置, int cluster;//大小核设置,可选TENGINE_CLUSTER_[ALL,BIG,MEDIUM,LITTLE] int precision;//精度设置,TENGINE_MODE_[FP32,FP16,HYBRID_INT8,UINT8,INT8] uint64_t affinity;//核亲和性掩码,绑定具体核, };
4. run_graph
启动Tengine计算图推理。
5. postrun_graph
停止运行graph,并释放graph占据的资源。
6. destroy_graph
销毁graph。
1、图像自适应缩放
在训练阶段,比如网络输入的尺寸608×608,但我数据的尺寸是大小不一的,一般方法是直接同一缩放到标准尺寸,然后填充黑边,如下图所示:
但如果填充的比较多,则存在信息冗余,影响推理速度。Yolov5在推理阶段,采用缩减黑边的方式,来提高推理的速度。在代码datasets.py的letterbox函数中进行了修改,对原始图像自适应的添加最少的黑边。eg:“比如我1000×800的图片不是直接缩放到608×608的大小,而是计算608/1000=0.608 然后缩放至608×486的大小,然后计算608-486=122 然后np.mod(122,32)取余数得到26,再平均成13填充到图片高度两端,最后是608×512。”
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): # Resize and pad image while meeting stride-multiple constraints shape = img.shape[:2] # current shape [height, width] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # Scale ratio (new / old) r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) if not scaleup: # only scale down, do not scale up (for better test mAP) r = min(r, 1.0) # Compute padding ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding if auto: # minimum rectangle dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding elif scaleFill: # stretch dw, dh = 0.0, 0.0 new_unpad = (new_shape[1], new_shape[0]) ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios dw /= 2 # divide padding into 2 sides dh /= 2 if shape[::-1] != new_unpad: # resize img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return img, ratio, (dw, dh)
C++版本如下:
void get_input_data_focus(const char* image_file, float* input_data, int img_h, int img_w, const float* mean, const float* scale) { cv::Mat sample = cv::imread(image_file, 1); cv::Mat img; const int target_size = 640; int imge_w = img.cols; int imge_h = img.rows; int w = imge_w; int h = imge_h; float scale_im = 1.f; if (w > h) { scale_im = (float)target_size / w; w = target_size; h = h * scale_im; } else { scale_im = (float)target_size / h; h = target_size; w = w * scale_im; } cv::cvtColor(sample, img, cv::COLOR_BGR2RGB); cv::resize(img, img, cv::Size(w, h)); // pad to target_size rectangle int wpad = (w + 31) / 32 * 32 - w; int hpad = (h + 31) / 32 * 32 - h; cv::Mat in_pad; cv::copy_make_border(img, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, cv::BORDER_CONSTANT, 114.f); img.convertTo(img, CV_32FC3); float* img_data = (float*)img.data; /* nhwc to nchw */ for (int h = 0; h < img_h; h++) { for (int w = 0; w < img_w; w++) { for (int c = 0; c < 3; c++) { int in_index = h * img_w * 3 + w * 3 + c; int out_index = c * img_h * img_w + h * img_w + w; input_data[out_index] = (img_data[in_index] - mean[c]) * scale[c]; } } } }
2、模型加载和推理
/* set runtime options */ struct options opt; opt.num_thread = num_thread; opt.cluster = TENGINE_CLUSTER_ALL; opt.precision = TENGINE_MODE_FP32; opt.affinity = 0; /* inital tengine */ if (init_tengine() != 0) { fprintf(stderr, "Initial tengine failed.\n"); return -1; } fprintf(stderr, "tengine-lite library version: %s\n", get_tengine_version()); /* create graph, load tengine model xxx.tmfile */ graph_t graph = create_graph(nullptr, "tengine", model_file); if (graph == nullptr) { fprintf(stderr, "Create graph failed.\n"); return -1; }
3 获取推理结果
/* yolov5 postprocess */ // 0: 1, 3, 20, 20, 85 // 1: 1, 3, 40, 40, 85 // 2: 1, 3, 80, 80, 85 tensor_t p8_output = get_graph_output_tensor(graph, 0, 0); tensor_t p16_output = get_graph_output_tensor(graph, 1, 0); tensor_t p32_output = get_graph_output_tensor(graph, 2, 0); float* p8_data = (float*)get_tensor_buffer(p8_output); float* p16_data = (float*)get_tensor_buffer(p16_output); float* p32_data = (float*)get_tensor_buffer(p32_output); /* postprocess */ const float prob_threshold = 0.55; const float nms_threshold = 0.5; std::vector<Object> proposals; std::vector<Object> objects8; std::vector<Object> objects16; std::vector<Object> objects32; std::vector<Object> objects; generate_proposals(32, p32_data, prob_threshold, objects32, letterbox_cols, letterbox_rows); proposals.insert(proposals.end(), objects32.begin(), objects32.end()); generate_proposals(16, p16_data, prob_threshold, objects16, letterbox_cols, letterbox_rows); proposals.insert(proposals.end(), objects16.begin(), objects16.end()); generate_proposals(8, p8_data, prob_threshold, objects8, letterbox_cols, letterbox_rows); proposals.insert(proposals.end(), objects8.begin(), objects8.end()); qsort_descent_inplace(proposals); std::vector<int> picked; nms_sorted_bboxes(proposals, picked, nms_threshold);
4 后处理
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold) { picked.clear(); const int n = faceobjects.size(); std::vector<float> areas(n); for (int i = 0; i < n; i++) { areas[i] = faceobjects[i].rect.area(); } for (int i = 0; i < n; i++) { const Object& a = faceobjects[i]; int keep = 1; for (int j = 0; j < (int)picked.size(); j++) { const Object& b = faceobjects[picked[j]]; // intersection over union float inter_area = intersection_area(a, b); float union_area = areas[i] + areas[picked[j]] - inter_area; // float IoU = inter_area / union_area if (inter_area / union_area > nms_threshold) keep = 0; } if (keep) picked.push_back(i); } } static void generate_proposals(int stride, const float* feat, float prob_threshold, std::vector<Object>& objects, int letterbox_cols, int letterbox_rows) { //static float anchors[18] = {10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326}; static float anchors[18] = {10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326}; int anchor_num = 3; int feat_w = letterbox_cols / stride; int feat_h = letterbox_rows / stride; int cls_num = 80; int anchor_group; if (stride == 8) anchor_group = 1; if (stride == 16) anchor_group = 2; if (stride == 32) anchor_group = 3; for (int h = 0; h <= feat_h - 1; h++) { for (int w = 0; w <= feat_w - 1; w++) { for (int a = 0; a <= anchor_num - 1; a++) { //process cls score int class_index = 0; float class_score = -FLT_MAX; for (int s = 0; s <= cls_num - 1; s++) { float score = feat[a * feat_w * feat_h * (cls_num + 5) + h * feat_w * (cls_num + 5) + w * (cls_num + 5) + s + 5]; if (score > class_score) { class_index = s; class_score = score; } } //process box score float box_score = feat[a * feat_w * feat_h * (cls_num + 5) + (h * feat_w) * (cls_num + 5) + w * (cls_num + 5) + 4]; float final_score = sigmoid(box_score) * sigmoid(class_score); if (final_score >= prob_threshold) { int loc_idx = a * feat_h * feat_w * (cls_num + 5) + h * feat_w * (cls_num + 5) + w * (cls_num + 5); float dx = sigmoid(feat[loc_idx + 0]); float dy = sigmoid(feat[loc_idx + 1]); float dw = sigmoid(feat[loc_idx + 2]); float dh = sigmoid(feat[loc_idx + 3]); float pred_cx = (dx * 2.0f - 0.5f + w) * stride; float pred_cy = (dy * 2.0f - 0.5f + h) * stride; float anchor_w = anchors[(anchor_group - 1) * 6 + a * 2 + 0]; float anchor_h = anchors[(anchor_group - 1) * 6 + a * 2 + 1]; float pred_w = dw * dw * 4.0f * anchor_w; float pred_h = dh * dh * 4.0f * anchor_h; float x0 = pred_cx - pred_w * 0.5f; float y0 = pred_cy - pred_h * 0.5f; float x1 = pred_cx + pred_w * 0.5f; float y1 = pred_cy + pred_h * 0.5f; Object obj; obj.rect.x = x0; obj.rect.y = y0; obj.rect.width = x1 - x0; obj.rect.height = y1 - y0; obj.label = class_index; obj.prob = final_score; objects.push_back(obj); } } } } }
5 绘图
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects) { static const char* class_names[] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"}; cv::Mat image = bgr.clone(); for (size_t i = 0; i < objects.size(); i++) { const Object& obj = objects[i]; fprintf(stderr, "%2d: %3.0f%%, [%4.0f, %4.0f, %4.0f, %4.0f], %s\n", obj.label, obj.prob * 100, obj.rect.x, obj.rect.y, obj.rect.x + obj.rect.width, obj.rect.y + obj.rect.height, class_names[obj.label]); cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); char text[256]; sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); int baseLine = 0; cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 2, &baseLine); int x = obj.rect.x; int y = obj.rect.y - label_size.height - baseLine; if (y < 0) y = 0; if (x + label_size.width > image.cols) x = image.cols - label_size.width; cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), cv::Scalar(255, 255, 255), -1); cv::putText(image, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); } cv::imwrite("yolov5s_out.jpg", image); } /* yolov5 draw the result */ float scale_letterbox; int resize_rows; int resize_cols; if ((letterbox_rows * 1.0 / img.rows) < (letterbox_cols * 1.0 / img.cols)) { scale_letterbox = letterbox_rows * 1.0 / img.rows; } else { scale_letterbox = letterbox_cols * 1.0 / img.cols; } resize_cols = int(scale_letterbox * img.cols); resize_rows = int(scale_letterbox * img.rows); int tmp_h = (letterbox_rows - resize_rows) / 2; int tmp_w = (letterbox_cols - resize_cols) / 2; float ratio_x = (float)img.rows / resize_rows; float ratio_y = (float)img.cols / resize_cols; int count = picked.size(); fprintf(stderr, "detection num: %d\n", count); objects.resize(count); for (int i = 0; i < count; i++) { objects[i] = proposals[picked[i]]; float x0 = (objects[i].rect.x); float y0 = (objects[i].rect.y); float x1 = (objects[i].rect.x + objects[i].rect.width); float y1 = (objects[i].rect.y + objects[i].rect.height); x0 = (x0 - tmp_w) * ratio_x; y0 = (y0 - tmp_h) * ratio_y; x1 = (x1 - tmp_w) * ratio_x; y1 = (y1 - tmp_h) * ratio_y; x0 = std::max(std::min(x0, (float)(img.cols - 1)), 0.f); y0 = std::max(std::min(y0, (float)(img.rows - 1)), 0.f); x1 = std::max(std::min(x1, (float)(img.cols - 1)), 0.f); y1 = std::max(std::min(y1, (float)(img.rows - 1)), 0.f); objects[i].rect.x = x0; objects[i].rect.y = y0; objects[i].rect.width = x1 - x0; objects[i].rect.height = y1 - y0; } draw_objects(img, objects); /* release tengine */ postrun_graph(graph); destroy_graph(graph); release_tengine();
6 可视化推理结果
5YOLOv5-Lite对比结果
6参考
[1].https://blog.csdn.net/weixin_45829462/article/details/119767896
[2].https://github.com/OAID/Tengine
[3].https://github.com/ppogg/YOLOv5-Lite