#include #include #include #include "vendor_ai.h" #include #include #include "yolov5s_postprocess.h" #include #define MAX(x, y) (((x) > (y))?(x):(y)) #define MIN(x, y) (((x) < (y))?(x):(y)) FLOAT sigmoid(float x) { FLOAT result; result = (float)(1. / (1. + exp(-x))); return result; } float fast_exp(float x) { union { UINT32 i; float f; } v; v.i = (1 << 23)*(1.4426950409*x + 126.93490512f); return v.f; } FLOAT fast_sigmoid(float x) { FLOAT result; result = (float)(1. / (1. + fast_exp(-x))); return result; } FLOAT clip(FLOAT n, FLOAT lower, FLOAT upper) { return MAX(lower, MIN(n, upper)); } INT32 find_max_idx(INT16 *layer_data_int16) { INT16 max = layer_data_int16[0]; INT32 max_idx = 0; INT32 i; for (i = 0; i < NUM_CLASS; i+=2) { if ((*layer_data_int16) > max) { max = *layer_data_int16; max_idx = i; } layer_data_int16++; if ((*layer_data_int16) > max) { max = *layer_data_int16; max_idx = i+1; } layer_data_int16++; } return max_idx; } void transpose_data(TRANSPOSE_PARM *t_parm) { UINT32 in_addr = t_parm->in_addr; UINT32 out_addr = t_parm->out_addr; UINT32 width = t_parm->width; UINT32 height = t_parm->height; UINT32 channels = t_parm->channels; UINT32 batch = t_parm->batch_num; UINT32 in_line_ofs = t_parm->in_line_ofs; UINT32 in_ch_ofs = t_parm->in_channel_ofs; UINT32 in_batch_ofs = t_parm->in_batch_ofs; UINT32 out_line_ofs = t_parm->out_line_ofs; UINT32 out_ch_ofs = t_parm->out_channel_ofs; UINT32 out_batch_ofs = t_parm->out_batch_ofs; INT8 *order = t_parm->order; UINT32 in_shape[NN_AXIS_NUM], out_shape[NN_AXIS_NUM]; UINT32 in_ofs_old[NN_AXIS_NUM], in_ofs[NN_AXIS_NUM]; UINT32 in_x_ofs, in_y_ofs, in_c_ofs, in_n_ofs; UINT32 nin, cin, yin, xin; UINT32 nout, cout, yout, xout; UINT32 i, n, c, x, y; in_shape[0] = batch; in_shape[1] = channels; in_shape[2] = height; in_shape[3] = width; in_ofs_old[0] = in_batch_ofs; in_ofs_old[1] = in_ch_ofs; in_ofs_old[2] = in_line_ofs; in_ofs_old[3] = sizeof(INT16); for (i = 0; i < NN_AXIS_NUM; i++) { out_shape[i] = in_shape[order[i]]; in_ofs[i] = in_ofs_old[order[i]]; } batch = out_shape[0]; channels = out_shape[1]; height = out_shape[2]; width = out_shape[3]; in_n_ofs = in_ofs[0]; in_c_ofs = in_ofs[1]; in_y_ofs = in_ofs[2]; in_x_ofs = in_ofs[3]; nin = in_addr; nout = out_addr; for (n = 0; n < batch; n++) { cin = nin; cout = nout; for (c = 0; c < channels; c++) { yin = cin; yout = cout; for (y = 0; y < height; y++) { xin = yin; xout = yout; for (x = 0; x < width; x++) { *(INT16 *)xout = *(INT16 *)xin; xin += in_x_ofs; xout += sizeof(INT16); } yin += in_y_ofs; yout += out_line_ofs; } cin += in_c_ofs; cout += out_ch_ofs; } nin += in_n_ofs; nout += out_batch_ofs; } } void post_process_single(YOLOV5S_Bbox *yolov5s_post_bbox,INT32 input_w, INT32 input_h, INT32 height, INT32 width, INT32 idx, FLOAT conf_thre, INT16 *layer_data_int16, INT32 *strides, YOLOV5S_Anchor *anchor, INT32 *bbox_num, INT8 frac_bits, float scale_ratio) { YOLOV5S_Bbox tmp_bbox; YOLOV5S_Bbox *predict_bbox = yolov5s_post_bbox; FLOAT cx, cy, w_b, h_b, score; INT32 cid; INT16 *ptr = layer_data_int16; INT16 *cls_ptr = NULL; INT32 a; FLOAT h = 0.0, w = 0.0; FLOAT h_max, w_max; FLOAT h_min = -0.5, w_min = -0.5; h_max = (FLOAT)(height) - 1.5; w_max = (FLOAT)(width) - 1.5; float frac_scale = (float)(1.0 / (FLOAT)(1 << frac_bits) * scale_ratio); INT32 bbox_ind = bbox_num[0]; for (a = 0; a < 3; ++a) { for (h = h_min; h <= h_max; h = h + 1.0) { for (w = w_min; w <= w_max; w = w + 1.0) { cls_ptr = ptr + 5; cid = find_max_idx(cls_ptr); score = fast_sigmoid(ptr[4] * frac_scale) * fast_sigmoid(cls_ptr[cid] * frac_scale); //printf("xx: %d cid: %d score: %f %f %f %f %f\r\n", xx, cid , score, ptr[4], cls_ptr[cid], fast_sigmoid(ptr[4]), fast_sigmoid(cls_ptr[cid])); if (score >= conf_thre) { cx = (fast_sigmoid(ptr[0] * frac_scale)* 2.f + (float)(w)) * (float)(strides[idx]); cy = (fast_sigmoid(ptr[1] * frac_scale)* 2.f + (float)(h)) * (float)(strides[idx]); w_b = pow(fast_sigmoid(ptr[2] * frac_scale) * 2.f, 2.0) * anchor[idx * 3 + a].width; h_b = pow(fast_sigmoid(ptr[3] * frac_scale) * 2.f, 2.0) * anchor[idx * 3 + a].height; tmp_bbox.xmin = clip(cx - w_b / 2, 0.f, (float)(input_w - 1)); tmp_bbox.ymin = clip(cy - h_b / 2, 0.f, (float)(input_h - 1)); tmp_bbox.xmax = clip(cx + w_b / 2, 0.f, (float)(input_w - 1)); tmp_bbox.ymax = clip(cy + h_b / 2, 0.f, (float)(input_h - 1)); tmp_bbox.score = score; tmp_bbox.cid = cid; //printf("yy:%d cid: %d score: %f [%f %f %f %f]\r\n", yy, tmp_bbox.cid, tmp_bbox.score, tmp_bbox.xmin, tmp_bbox.ymin, tmp_bbox.xmax, tmp_bbox.ymax); predict_bbox[bbox_ind] = tmp_bbox; bbox_ind++; //yy++; } ptr += 5 + NUM_CLASS; //xx++; } } //printf("w = %f, h = %f\r\n", w, h); } bbox_num[0] = bbox_ind; //printf("yy = %d\r\n", yy); } void quick_sort(YOLOV5S_Bbox *yolov5s_post_bbox, INT32 left, INT32 right) { YOLOV5S_Bbox *bbox = yolov5s_post_bbox; if (left >= right) { return; } INT32 l = left; INT32 r = right; FLOAT key_xmin = bbox[left].xmin; FLOAT key_ymin = bbox[left].ymin; FLOAT key_xmax = bbox[left].xmax; FLOAT key_ymax = bbox[left].ymax; FLOAT key_score = bbox[left].score; FLOAT key_cid = bbox[left].cid; while (l < r) { while ((l < r) && (key_score >= bbox[r].score)) { r--; } if (l < r) { bbox[l].xmin = bbox[r].xmin; bbox[l].ymin = bbox[r].ymin; bbox[l].xmax = bbox[r].xmax; bbox[l].ymax = bbox[r].ymax; bbox[l].score = bbox[r].score; bbox[l].cid = bbox[r].cid; } while ((l < r) && (key_score <= bbox[l].score)) { l++; } if (l < r) { bbox[r].xmin = bbox[l].xmin; bbox[r].ymin = bbox[l].ymin; bbox[r].xmax = bbox[l].xmax; bbox[r].ymax = bbox[l].ymax; bbox[r].score = bbox[l].score; bbox[r].cid = bbox[l].cid; r--; } } bbox[l].xmin = key_xmin; bbox[l].ymin = key_ymin; bbox[l].xmax = key_xmax; bbox[l].ymax = key_ymax; bbox[l].score = key_score; bbox[l].cid = key_cid; if (left < (l - 1)) { quick_sort(yolov5s_post_bbox, left, (l - 1)); } if ((l + 1) < right) { quick_sort(yolov5s_post_bbox, (l + 1), right); } } INT32 yolov5s_nms(YOLOV5S_Bbox *yolov5s_final_bbox, YOLOV5S_Bbox *yolov5s_post_bbox, FLOAT nms_thre, INT32 *bbox_num) { YOLOV5S_Bbox *bbx = yolov5s_post_bbox; YOLOV5S_Bbox *final_bbx = yolov5s_final_bbox; INT32 num = bbox_num[0]; INT32 i, j, out_num = 0; FLOAT tmp_w, tmp_h; FLOAT left, right, top, bottom, width, height, u_area, iou; if (num == 0) return 0; if (num > 1) { quick_sort(bbx, 0, num - 1); } //printf("quick_sort done!\r\n"); FLOAT *area = (FLOAT *)malloc(sizeof(FLOAT) * num); for (i = 0; i < num; ++i) { tmp_w = bbx[i].xmax - bbx[i].xmin + 1; tmp_h = bbx[i].ymax - bbx[i].ymin + 1; area[i] = tmp_w * tmp_h; //printf("i:%d area:%.13f\r\n", i, area[i]); } //printf("area done!\r\n"); for (i = 0; i < num; ++i) { if(bbx[i].score == -1.0) continue; for (j = i + 1; j < num; ++j) { if(bbx[j].score == -1.0) continue; left = MAX(bbx[i].xmin, bbx[j].xmin); right = MIN(bbx[i].xmax, bbx[j].xmax); top = MAX(bbx[i].ymin, bbx[j].ymin); bottom = MIN(bbx[i].ymax, bbx[j].ymax); width = MAX(right - left + 1, 0.f); height = MAX(bottom - top + 1, 0.f); u_area = height * width; iou = (u_area) / (area[i] + area[j] - u_area); if (iou >= nms_thre) { bbx[j].score = -1.0; area[j] = -1.0; } } } //printf("bbx done!\r\n"); for (i = 0; i < num; ++i) { if (bbx[i].score == -1.0) continue; final_bbx[out_num].xmin = bbx[i].xmin; final_bbx[out_num].ymin = bbx[i].ymin; final_bbx[out_num].xmax = bbx[i].xmax; final_bbx[out_num].ymax = bbx[i].ymax; final_bbx[out_num].score= bbx[i].score; final_bbx[out_num].cid = bbx[i].cid; out_num++; } //printf("final_bbx done!\r\n"); free(area); return out_num; }