nt9856x/code/application/source/cardv/SrcCode/UIApp/Alg/yolov5s_postprocess.c
2023-03-28 15:07:53 +08:00

306 lines
8.0 KiB
C
Executable File

#include <stdio.h>
#include <string.h>
#include <pthread.h>
#include "vendor_ai.h"
#include <math.h>
#include <stdlib.h>
#include "yolov5s_postprocess.h"
#include <sys/time.h>
#define MAX(x, y) (((x) > (y))?(x):(y))
#define MIN(x, y) (((x) < (y))?(x):(y))
FLOAT sigmoid(float x)
{
FLOAT result;
result = (float)(1. / (1. + exp(-x)));
return result;
}
float fast_exp(float x)
{
union { UINT32 i; float f; } v;
v.i = (1 << 23)*(1.4426950409*x + 126.93490512f);
return v.f;
}
FLOAT fast_sigmoid(float x)
{
FLOAT result;
result = (float)(1. / (1. + fast_exp(-x)));
return result;
}
FLOAT clip(FLOAT n, FLOAT lower, FLOAT upper)
{
return MAX(lower, MIN(n, upper));
}
INT32 find_max_idx(INT16 *layer_data_int16)
{
INT16 max = layer_data_int16[0];
INT32 max_idx = 0;
INT32 i;
for (i = 0; i < NUM_CLASS; i+=2) {
if ((*layer_data_int16) > max) {
max = *layer_data_int16;
max_idx = i;
}
layer_data_int16++;
if ((*layer_data_int16) > max) {
max = *layer_data_int16;
max_idx = i+1;
}
layer_data_int16++;
}
return max_idx;
}
void transpose_data(TRANSPOSE_PARM *t_parm)
{
UINT32 in_addr = t_parm->in_addr;
UINT32 out_addr = t_parm->out_addr;
UINT32 width = t_parm->width;
UINT32 height = t_parm->height;
UINT32 channels = t_parm->channels;
UINT32 batch = t_parm->batch_num;
UINT32 in_line_ofs = t_parm->in_line_ofs;
UINT32 in_ch_ofs = t_parm->in_channel_ofs;
UINT32 in_batch_ofs = t_parm->in_batch_ofs;
UINT32 out_line_ofs = t_parm->out_line_ofs;
UINT32 out_ch_ofs = t_parm->out_channel_ofs;
UINT32 out_batch_ofs = t_parm->out_batch_ofs;
INT8 *order = t_parm->order;
UINT32 in_shape[NN_AXIS_NUM], out_shape[NN_AXIS_NUM];
UINT32 in_ofs_old[NN_AXIS_NUM], in_ofs[NN_AXIS_NUM];
UINT32 in_x_ofs, in_y_ofs, in_c_ofs, in_n_ofs;
UINT32 nin, cin, yin, xin;
UINT32 nout, cout, yout, xout;
UINT32 i, n, c, x, y;
in_shape[0] = batch;
in_shape[1] = channels;
in_shape[2] = height;
in_shape[3] = width;
in_ofs_old[0] = in_batch_ofs;
in_ofs_old[1] = in_ch_ofs;
in_ofs_old[2] = in_line_ofs;
in_ofs_old[3] = sizeof(INT16);
for (i = 0; i < NN_AXIS_NUM; i++) {
out_shape[i] = in_shape[order[i]];
in_ofs[i] = in_ofs_old[order[i]];
}
batch = out_shape[0];
channels = out_shape[1];
height = out_shape[2];
width = out_shape[3];
in_n_ofs = in_ofs[0];
in_c_ofs = in_ofs[1];
in_y_ofs = in_ofs[2];
in_x_ofs = in_ofs[3];
nin = in_addr;
nout = out_addr;
for (n = 0; n < batch; n++) {
cin = nin;
cout = nout;
for (c = 0; c < channels; c++) {
yin = cin;
yout = cout;
for (y = 0; y < height; y++) {
xin = yin;
xout = yout;
for (x = 0; x < width; x++) {
*(INT16 *)xout = *(INT16 *)xin;
xin += in_x_ofs;
xout += sizeof(INT16);
}
yin += in_y_ofs;
yout += out_line_ofs;
}
cin += in_c_ofs;
cout += out_ch_ofs;
}
nin += in_n_ofs;
nout += out_batch_ofs;
}
}
void post_process_single(YOLOV5S_Bbox *yolov5s_post_bbox,INT32 input_w, INT32 input_h, INT32 height, INT32 width, INT32 idx, FLOAT conf_thre, INT16 *layer_data_int16, INT32 *strides, YOLOV5S_Anchor *anchor, INT32 *bbox_num, INT8 frac_bits, float scale_ratio)
{
YOLOV5S_Bbox tmp_bbox;
YOLOV5S_Bbox *predict_bbox = yolov5s_post_bbox;
FLOAT cx, cy, w_b, h_b, score;
INT32 cid;
INT16 *ptr = layer_data_int16;
INT16 *cls_ptr = NULL;
INT32 a;
FLOAT h = 0.0, w = 0.0;
FLOAT h_max, w_max;
FLOAT h_min = -0.5, w_min = -0.5;
h_max = (FLOAT)(height) - 1.5;
w_max = (FLOAT)(width) - 1.5;
float frac_scale = (float)(1.0 / (FLOAT)(1 << frac_bits) * scale_ratio);
INT32 bbox_ind = bbox_num[0];
for (a = 0; a < 3; ++a) {
for (h = h_min; h <= h_max; h = h + 1.0) {
for (w = w_min; w <= w_max; w = w + 1.0) {
cls_ptr = ptr + 5;
cid = find_max_idx(cls_ptr);
score = fast_sigmoid(ptr[4] * frac_scale) * fast_sigmoid(cls_ptr[cid] * frac_scale);
//printf("xx: %d cid: %d score: %f %f %f %f %f\r\n", xx, cid , score, ptr[4], cls_ptr[cid], fast_sigmoid(ptr[4]), fast_sigmoid(cls_ptr[cid]));
if (score >= conf_thre) {
cx = (fast_sigmoid(ptr[0] * frac_scale)* 2.f + (float)(w)) * (float)(strides[idx]);
cy = (fast_sigmoid(ptr[1] * frac_scale)* 2.f + (float)(h)) * (float)(strides[idx]);
w_b = pow(fast_sigmoid(ptr[2] * frac_scale) * 2.f, 2.0) * anchor[idx * 3 + a].width;
h_b = pow(fast_sigmoid(ptr[3] * frac_scale) * 2.f, 2.0) * anchor[idx * 3 + a].height;
tmp_bbox.xmin = clip(cx - w_b / 2, 0.f, (float)(input_w - 1));
tmp_bbox.ymin = clip(cy - h_b / 2, 0.f, (float)(input_h - 1));
tmp_bbox.xmax = clip(cx + w_b / 2, 0.f, (float)(input_w - 1));
tmp_bbox.ymax = clip(cy + h_b / 2, 0.f, (float)(input_h - 1));
tmp_bbox.score = score;
tmp_bbox.cid = cid;
//printf("yy:%d cid: %d score: %f [%f %f %f %f]\r\n", yy, tmp_bbox.cid, tmp_bbox.score, tmp_bbox.xmin, tmp_bbox.ymin, tmp_bbox.xmax, tmp_bbox.ymax);
predict_bbox[bbox_ind] = tmp_bbox;
bbox_ind++;
//yy++;
}
ptr += 5 + NUM_CLASS;
//xx++;
}
}
//printf("w = %f, h = %f\r\n", w, h);
}
bbox_num[0] = bbox_ind;
//printf("yy = %d\r\n", yy);
}
void quick_sort(YOLOV5S_Bbox *yolov5s_post_bbox, INT32 left, INT32 right)
{
YOLOV5S_Bbox *bbox = yolov5s_post_bbox;
if (left >= right) {
return;
}
INT32 l = left;
INT32 r = right;
FLOAT key_xmin = bbox[left].xmin;
FLOAT key_ymin = bbox[left].ymin;
FLOAT key_xmax = bbox[left].xmax;
FLOAT key_ymax = bbox[left].ymax;
FLOAT key_score = bbox[left].score;
FLOAT key_cid = bbox[left].cid;
while (l < r) {
while ((l < r) && (key_score >= bbox[r].score)) {
r--;
}
if (l < r) {
bbox[l].xmin = bbox[r].xmin;
bbox[l].ymin = bbox[r].ymin;
bbox[l].xmax = bbox[r].xmax;
bbox[l].ymax = bbox[r].ymax;
bbox[l].score = bbox[r].score;
bbox[l].cid = bbox[r].cid;
}
while ((l < r) && (key_score <= bbox[l].score)) {
l++;
}
if (l < r) {
bbox[r].xmin = bbox[l].xmin;
bbox[r].ymin = bbox[l].ymin;
bbox[r].xmax = bbox[l].xmax;
bbox[r].ymax = bbox[l].ymax;
bbox[r].score = bbox[l].score;
bbox[r].cid = bbox[l].cid;
r--;
}
}
bbox[l].xmin = key_xmin;
bbox[l].ymin = key_ymin;
bbox[l].xmax = key_xmax;
bbox[l].ymax = key_ymax;
bbox[l].score = key_score;
bbox[l].cid = key_cid;
if (left < (l - 1)) {
quick_sort(yolov5s_post_bbox, left, (l - 1));
}
if ((l + 1) < right) {
quick_sort(yolov5s_post_bbox, (l + 1), right);
}
}
INT32 yolov5s_nms(YOLOV5S_Bbox *yolov5s_final_bbox, YOLOV5S_Bbox *yolov5s_post_bbox, FLOAT nms_thre, INT32 *bbox_num)
{
YOLOV5S_Bbox *bbx = yolov5s_post_bbox;
YOLOV5S_Bbox *final_bbx = yolov5s_final_bbox;
INT32 num = bbox_num[0];
INT32 i, j, out_num = 0;
FLOAT tmp_w, tmp_h;
FLOAT left, right, top, bottom, width, height, u_area, iou;
if (num == 0)
return 0;
if (num > 1) {
quick_sort(bbx, 0, num - 1);
}
//printf("quick_sort done!\r\n");
FLOAT *area = (FLOAT *)malloc(sizeof(FLOAT) * num);
for (i = 0; i < num; ++i) {
tmp_w = bbx[i].xmax - bbx[i].xmin + 1;
tmp_h = bbx[i].ymax - bbx[i].ymin + 1;
area[i] = tmp_w * tmp_h;
//printf("i:%d area:%.13f\r\n", i, area[i]);
}
//printf("area done!\r\n");
for (i = 0; i < num; ++i) {
if(bbx[i].score == -1.0)
continue;
for (j = i + 1; j < num; ++j) {
if(bbx[j].score == -1.0)
continue;
left = MAX(bbx[i].xmin, bbx[j].xmin);
right = MIN(bbx[i].xmax, bbx[j].xmax);
top = MAX(bbx[i].ymin, bbx[j].ymin);
bottom = MIN(bbx[i].ymax, bbx[j].ymax);
width = MAX(right - left + 1, 0.f);
height = MAX(bottom - top + 1, 0.f);
u_area = height * width;
iou = (u_area) / (area[i] + area[j] - u_area);
if (iou >= nms_thre) {
bbx[j].score = -1.0;
area[j] = -1.0;
}
}
}
//printf("bbx done!\r\n");
for (i = 0; i < num; ++i) {
if (bbx[i].score == -1.0)
continue;
final_bbx[out_num].xmin = bbx[i].xmin;
final_bbx[out_num].ymin = bbx[i].ymin;
final_bbx[out_num].xmax = bbx[i].xmax;
final_bbx[out_num].ymax = bbx[i].ymax;
final_bbx[out_num].score= bbx[i].score;
final_bbx[out_num].cid = bbx[i].cid;
out_num++;
}
//printf("final_bbx done!\r\n");
free(area);
return out_num;
}