#include "post_process.h" #include "lyn_plugin.h" #include "lyn_plugin_dev.h" #include "string.h" #include "drawTool.h" #include <sys/time.h> #include <algorithm> #include <cmath> #include <iomanip> #include <iostream> #include <string> #include <utility> #include <vector> typedef struct { int height; int width; int ori_height; int ori_width; float score_threshold; float nms_threshold; int nms_top_k; int is_pad_resize; void *output_tensor; uint32_t class_nums; uint16_t anchor_size; } lynYolo5PostProcessInfo_t; typedef struct BboxResult { float xmin; float ymin; float xmax; float ymax; float score; int id; wchar_t* class_name; } BboxResult; typedef struct DetectionResult { int boxNum; BboxResult* result; } DetectionResult; struct Yolo5Config { std::vector<int> strides; int class_num; std::vector<std::wstring> class_names; }; enum class ModelType { Normal, X7 }; #define CLASS_NUM 2 //80 //标准 yolov5 的分类为80,后处理在APU内部对应分类为2 static Yolo5Config default_yolo5_config = { {8, 16, 32}, CLASS_NUM, // {L"人", L"火", L"烟雾", L"安全帽", L"工装",L"烟头"}}; {L"person", L"bicycle", L"car", L"motorcycle", L"airplane", L"bus", L"train", L"truck", L"boat", L"traffic light", L"fire hydrant", L"stop sign", L"parking meter", L"bench", L"bird", L"cat", L"dog", L"horse", L"sheep", L"cow", L"elephant", L"bear", L"zebra", L"giraffe", L"backpack", L"umbrella", L"handbag", L"tie", L"suitcase", L"frisbee", L"skis", L"snowboard", L"sports ball", L"kite", L"baseball bat", L"baseball glove", L"skateboard", L"surfboard", L"tennis racket", L"bottle", L"wine glass", L"cup", L"fork", L"knife", L"spoon", L"bowl", L"banana", L"apple", L"sandwich", L"orange", L"broccoli", L"carrot", L"hot dog", L"pizza", L"donut", L"cake", L"chair", L"couch", L"potted plant", L"bed", L"dining table", L"toilet", L"tv", L"laptop", L"mouse", L"remote", L"keyboard", L"cell phone", L"microwave", L"oven", L"toaster", L"sink", L"refrigerator", L"book", L"clock", L"vase", L"scissors", L"teddy bear", L"hair drier", L"toothbrush"}}; template <class ForwardIterator> inline size_t argmin(ForwardIterator first, ForwardIterator last) { return std::distance(first, std::min_element(first, last)); } template <class ForwardIterator> inline size_t argmax(ForwardIterator first, ForwardIterator last) { return std::distance(first, std::max_element(first, last)); } typedef struct Bbox { float xmin; float ymin; float xmax; float ymax; Bbox() {} Bbox(float xmin, float ymin, float xmax, float ymax) : xmin(xmin), ymin(ymin), xmax(xmax), ymax(ymax) {} ~Bbox() {} } Bbox; typedef struct Detection { int id; float score; Bbox bbox; const wchar_t* class_name; Detection() {} Detection(int id, float score, Bbox bbox) : id(id), score(score), bbox(bbox) {} Detection(int id, float score, Bbox bbox, const wchar_t* class_name) : id(id), score(score), bbox(bbox), class_name(class_name) {} friend bool operator>(const Detection& lhs, const Detection& rhs) { return (lhs.score > rhs.score); } ~Detection() {} } Detection; void yolo5Nms(std::vector<Detection>& input, float iou_threshold, int top_k, std::vector<Detection>& result, bool suppress) { std::stable_sort(input.begin(), input.end(), std::greater<Detection>()); std::vector<bool> skip(input.size(), false); std::vector<float> areas; areas.reserve(input.size()); for (size_t i = 0; i < input.size(); i++) { float width = input[i].bbox.xmax - input[i].bbox.xmin; float height = input[i].bbox.ymax - input[i].bbox.ymin; areas.push_back(width * height); } int count = 0; for (size_t i = 0; /*count < top_k && */ i < skip.size(); i++) { if (skip[i]) { continue; } skip[i] = true; ++count; for (size_t j = i + 1; j < skip.size(); ++j) { if (skip[j]) { continue; } if (suppress == false) { if (input[i].id != input[j].id) { continue; } } float xx1 = std::max(input[i].bbox.xmin, input[j].bbox.xmin); float yy1 = std::max(input[i].bbox.ymin, input[j].bbox.ymin); float xx2 = std::min(input[i].bbox.xmax, input[j].bbox.xmax); float yy2 = std::min(input[i].bbox.ymax, input[j].bbox.ymax); if (xx2 > xx1 && yy2 > yy1) { float area_intersection = (xx2 - xx1) * (yy2 - yy1); float iou_ratio = area_intersection / (areas[j] + areas[i] - area_intersection); if (iou_ratio > iou_threshold) { skip[j] = true; } } } result.push_back(input[i]); } } const int sig[2] = {1, -1}; const float result = 5.96046e-08; float half2float(int16_t ib) { int16_t s, e, m; s = (ib >> 15) & 0x1; e = (ib >> 10) & 0x1f; m = ib & 0x3ff; // added by puyang.wang@lynxi.com { if (0 == e) return sig[s] * m * result; else { union { unsigned int u32; float f32; } ou; e = (0x1f == e) ? 0xff : (e - 15 + 127); ou.u32 = (s << 31) | (e << 23) | (m << 13); return ou.f32; } } } int16_t float2half(float value) { int16_t ob; int16_t s, m; int e; // modified by puyang.wang@lynxi。修正接近于0的值不能正确转换的bug。 // int16_t expBitNum = 5; // int16_t baseBitNum = 15 - expBitNum; int maxExp = 32; // pow(2,expBitNum); int maxBase = 1024; // pow(2,baseBitNum); int biasExp = 15; // maxExp/2 - 1; s = value < 0; double thd2; // thd1 = (maxBase-1)*1.0/maxBase * pow(2,(1 - biasExp)); thd2 = 1.0 / maxBase * pow(2, (1 - biasExp)); double x; bool inf_flag = 0; x = s ? -value : value; x = (x > 65504) ? 65504 : x; int16_t indA; indA = x < thd2 / 2; // indB = x > thd2/2; if (indA) { e = 0; m = 0; s = 0; } // if (indB) else // float为Nan转为half { union { float xl; unsigned int u32; } ou; ou.xl = log2(x); if (((ou.u32 >> 23) & 0xff) == 0xff) // float为inf转为half { e = maxExp - 1; if ((ou.u32 & 0x7fffff) == 0) inf_flag = 1; else { inf_flag = 0; s = (ou.u32 >> 31); } } else { e = biasExp + floor(ou.xl); } if (e > (maxExp - 1)) printf("[double2uint16]Error: out of e range\n"); } int16_t ind1, ind2; ind1 = e <= 0; ind2 = e > 0; if (ind1) { e = 0; m = round(x * pow(2, (biasExp - 1)) * maxBase); } if (ind2) { if (31 == e) { if (inf_flag) m = 0; else m = 1; } else { double xr; xr = x / pow(2, (e - biasExp)) - 1; m = round(xr * maxBase); } } ob = (s & 0x1) << 15 | (((e & 0x1f) << 10) + m); return ob; } void tensorpostProcess(void* tensor, lynYolo5PostProcessInfo_t* post_info, int layer, std::vector<Detection>& dets, ModelType modelType) { // auto *data = reinterpret_cast<uint16_t *>(tensor); void* data = tensor; int stride = default_yolo5_config.strides[layer]; int num_pred = post_info->class_nums + 4 + 1; std::vector<int16_t> class_pred(post_info->class_nums, 0); double h_ratio = post_info->height * 1.0 / post_info->ori_height; double w_ratio = post_info->width * 1.0 / post_info->ori_width; double resize_ratio = std::min(w_ratio, h_ratio); if (post_info->is_pad_resize) { w_ratio = resize_ratio; h_ratio = resize_ratio; } int grid_height, grid_width; grid_height = post_info->height / stride; grid_width = post_info->width / stride; int16_t box_score_threshold = float2half(post_info->score_threshold); for (int h = 0; h < grid_height; h++) { for (int w = 0; w < grid_width; w++) { for (size_t k = 0; k < post_info->anchor_size; k++) { int16_t* cur_data = (int16_t*)data + k * num_pred; int16_t objness = cur_data[4]; if (objness < box_score_threshold /*post_info->score_threshold*/) { continue; } int32_t id = -1; double confidence = 0.0; if (modelType == ModelType::X7) { id = cur_data[5]; confidence = half2float(objness) * half2float(cur_data[6]); } else if (modelType == ModelType::Normal) { for (uint32_t index = 0; index < post_info->class_nums; ++index) { class_pred[index] = (cur_data[5 + index]); } id = argmax(class_pred.begin(), class_pred.end()); confidence = half2float(objness) * half2float(class_pred[id]); } if (confidence < post_info->score_threshold) { continue; } float center_x = half2float(cur_data[0]); float center_y = half2float(cur_data[1]); float scale_x = half2float(cur_data[2]); float scale_y = half2float(cur_data[3]); double xmin = (center_x - scale_x / 2.0); double ymin = (center_y - scale_y / 2.0); double xmax = (center_x + scale_x / 2.0); double ymax = (center_y + scale_y / 2.0); double w_padding = (post_info->width - w_ratio * post_info->ori_width) / 2.0; double h_padding = (post_info->height - h_ratio * post_info->ori_height) / 2.0; double xmin_org = (xmin - w_padding) / w_ratio; double xmax_org = (xmax - w_padding) / w_ratio; double ymin_org = (ymin - h_padding) / h_ratio; double ymax_org = (ymax - h_padding) / h_ratio; if (xmax_org <= 0 || ymax_org <= 0) { continue; } if (xmin_org > xmax_org || ymin_org > ymax_org) { continue; } xmin_org = std::max(xmin_org, 0.0); xmax_org = std::min(xmax_org, post_info->ori_width - 1.0); ymin_org = std::max(ymin_org, 0.0); ymax_org = std::min(ymax_org, post_info->ori_height - 1.0); Bbox bbox(xmin_org, ymin_org, xmax_org, ymax_org); int classNameSize = default_yolo5_config.class_names.size(); if (id > classNameSize || id < 0) { LOG_PLUGIN_E("ERROR!!! id: %d\n", id); continue; } dets.push_back(Detection((int)id, confidence, bbox, default_yolo5_config.class_names[(int)id].c_str())); } data = (int16_t*)data + num_pred * post_info->anchor_size; } } } DetectionResult Yolo5PostProcess(lynYolo5PostProcessInfo_t* post_info, ModelType modelType) { void* tensor0 = NULL; tensor0 = post_info->output_tensor; void* tensor1 = NULL; int grid_width1 = post_info->width / default_yolo5_config.strides[0]; int grid_height1 = post_info->height / default_yolo5_config.strides[0]; tensor1 = (int16_t*)tensor0 + 1 * post_info->anchor_size * grid_width1 * grid_height1 * (post_info->class_nums + 5); void* tensor2 = NULL; int grid_width2 = post_info->width / default_yolo5_config.strides[1]; int grid_height2 = post_info->height / default_yolo5_config.strides[1]; tensor2 = (int16_t*)tensor1 + 1 * post_info->anchor_size * grid_width2 * grid_height2 * (post_info->class_nums + 5); std::vector<Detection> dets; std::vector<Detection> det_restuls; size_t i = 0; tensorpostProcess(tensor0, post_info, 0, dets, modelType); tensorpostProcess(tensor1, post_info, 1, dets, modelType); tensorpostProcess(tensor2, post_info, 2, dets, modelType); yolo5Nms(dets, post_info->nms_threshold, post_info->nms_top_k, det_restuls, false); DetectionResult detResult; detResult.boxNum = det_restuls.size(); detResult.result = new BboxResult[detResult.boxNum]; for (i = 0; i < det_restuls.size(); i++) { detResult.result[i].xmax = det_restuls[i].bbox.xmax; detResult.result[i].xmin = det_restuls[i].bbox.xmin; detResult.result[i].ymax = det_restuls[i].bbox.ymax; detResult.result[i].ymin = det_restuls[i].bbox.ymin; detResult.result[i].score = det_restuls[i].score; detResult.result[i].id = det_restuls[i].id; detResult.result[i].class_name = const_cast<wchar_t*>(det_restuls[i].class_name); } return detResult; } int lynPostProcess(lynPostProcessPara *para) { lynYolo5PostProcessInfo_t postInfo; // 标准 yolov5 的分类为80,优化的 yolov5 模型分类为 2,根据传递的参数来判断传入的是哪种模型 // 并进行相应的后处理算法 ModelType modelType = (para->modelClassNum == 80) ? ModelType::Normal : ModelType::X7; // ModelType modelType = ModelType::Normal; postInfo.output_tensor = lynPluginGetVirtAddr(para->apuData); if (postInfo.output_tensor == nullptr) { LOG_PLUGIN_E("get virtual addr error"); return -1; } lynBoxesInfo * boxesInfo = (lynBoxesInfo *)lynPluginGetVirtAddr(para->boxesInfo); if (boxesInfo == nullptr) { LOG_PLUGIN_E("get virtual addr error"); return -1; } postInfo.width = para->modelW; postInfo.height = para->modelH; postInfo.anchor_size = para->modelAnchorSize; postInfo.ori_width = para->imgW; postInfo.ori_height = para->imgH; postInfo.class_nums = para->modelClassNum; postInfo.is_pad_resize = para->isPadResize ? 1 : 0; postInfo.score_threshold = para->scoreThreshold; postInfo.nms_threshold = para->nmsThreshold; postInfo.nms_top_k = para->nmsTopK; DetectionResult result = Yolo5PostProcess(&postInfo, modelType); if (result.boxNum > BOX_MAX_NUM) { result.boxNum = BOX_MAX_NUM; } for (int i = 0; i < result.boxNum; i++) { boxesInfo->boxes[i].xmin = result.result[i].xmin; boxesInfo->boxes[i].xmax = result.result[i].xmax; boxesInfo->boxes[i].ymin = result.result[i].ymin; boxesInfo->boxes[i].ymax = result.result[i].ymax; boxesInfo->boxes[i].score = result.result[i].score; boxesInfo->boxes[i].id = result.result[i].id; wcsncpy(boxesInfo->boxes[i].lable, result.result[i].class_name, LABLE_MAX_LEN - 1); } if (result.result != nullptr) { delete [] result.result; } boxesInfo->boxesNum = result.boxNum; return 0; }