2 #ifndef _NNDEPLOY_PREPROCESS_UTIL_H_
3 #define _NNDEPLOY_PREPROCESS_UTIL_H_
23 namespace preprocess {
29 const float *__restrict scale,
30 const float *__restrict mean,
31 const float *__restrict std) {
32 float *dst_tmp = (
float *)malloc(size *
sizeof(
float));
33 const float mul_scale = scale[0] / std[0];
34 const float add_bias = -mean[0] / std[0];
35 for (
size_t i = 0; i < size; ++i) {
36 dst_tmp[i] = src[i] * mul_scale + add_bias;
43 const float *__restrict scale,
44 const float *__restrict mean,
45 const float *__restrict std) {
46 float *dst_tmp = (
float *)malloc(size * 2 *
sizeof(
float));
47 const float mul_scale[2] = {scale[0] / std[0], scale[1] / std[1]};
48 const float add_bias[2] = {-mean[0] / std[0], -mean[1] / std[1]};
49 for (
size_t i = 0; i < size * 2; i += 2) {
50 dst_tmp[i] = src[i] * mul_scale[0] + add_bias[0];
51 dst_tmp[i + 1] = src[i + 1] * mul_scale[1] + add_bias[1];
58 const float *__restrict scale,
59 const float *__restrict mean,
60 const float *__restrict std) {
61 float *dst_tmp = (
float *)malloc(size * 3 *
sizeof(
float));
62 const float mul_scale[3] = {scale[0] / std[0], scale[1] / std[1],
64 const float add_bias[3] = {-mean[0] / std[0], -mean[1] / std[1],
66 for (
size_t i = 0; i < size * 3; i += 3) {
67 dst_tmp[i] = src[i] * mul_scale[0] + add_bias[0];
68 dst_tmp[i + 1] = src[i + 1] * mul_scale[1] + add_bias[1];
69 dst_tmp[i + 2] = src[i + 2] * mul_scale[2] + add_bias[2];
76 const float *__restrict scale,
77 const float *__restrict mean,
78 const float *__restrict std) {
79 float *dst_tmp = (
float *)malloc(size * 4 *
sizeof(
float));
80 const float mul_scale[4] = {scale[0] / std[0], scale[1] / std[1],
81 scale[2] / std[2], scale[3] / std[3]};
82 const float add_bias[4] = {-mean[0] / std[0], -mean[1] / std[1],
83 -mean[2] / std[2], -mean[3] / std[3]};
84 for (
size_t i = 0; i < size * 3; i += 3) {
85 dst_tmp[i] = src[i] * mul_scale[0] + add_bias[0];
86 dst_tmp[i + 1] = src[i + 1] * mul_scale[1] + add_bias[1];
87 dst_tmp[i + 2] = src[i + 2] * mul_scale[2] + add_bias[2];
88 dst_tmp[i + 3] = src[i + 3] * mul_scale[3] + add_bias[3];
95 size_t size,
const float *__restrict scale,
96 const float *__restrict mean,
97 const float *__restrict std) {
98 float *dst_tmp = (
float *)malloc(size * c *
sizeof(
float));
99 float *mul_scale = (
float *)malloc(c *
sizeof(
float));
100 float *add_bias = (
float *)malloc(c *
sizeof(
float));
101 for (
int j = 0; j < c; ++j) {
102 mul_scale[j] = scale[j] / std[j];
103 add_bias[j] = -mean[j] / std[j];
105 for (
size_t i = 0; i < size; i++) {
107 for (
int j = 0; j < c; ++j) {
108 dst_tmp[ii + j] = src[ii + j] * mul_scale[j] + add_bias[j];
117 template <
typename T>
119 const float *__restrict scale,
120 const float *__restrict mean,
121 const float *__restrict std) {
122 float *dst_tmp = (
float *)malloc(size *
sizeof(
float));
123 const float mul_scale = scale[0] / std[0];
124 const float add_bias = -mean[0] / std[0];
125 for (
size_t i = 0; i < size; ++i) {
126 dst_tmp[i] = src[i] * mul_scale + add_bias;
132 template <
typename T>
134 const float *__restrict scale,
135 const float *__restrict mean,
136 const float *__restrict std) {
137 float *dst_tmp = (
float *)malloc(size * 2 *
sizeof(
float));
138 const float mul_scale[2] = {scale[0] / std[0], scale[1] / std[1]};
139 const float add_bias[2] = {-mean[0] / std[0], -mean[1] / std[1]};
140 for (
size_t i = 0; i < size * 2; i += 2) {
141 dst_tmp[i] = src[i] * mul_scale[0] + add_bias[0];
142 dst_tmp[i + 1] = src[i + 1] * mul_scale[1] + add_bias[1];
147 template <
typename T>
149 const float *__restrict scale,
150 const float *__restrict mean,
151 const float *__restrict std) {
152 float *dst_tmp = (
float *)malloc(size * 3 *
sizeof(
float));
153 const float mul_scale[3] = {scale[0] / std[0], scale[1] / std[1],
155 const float add_bias[3] = {-mean[0] / std[0], -mean[1] / std[1],
157 for (
size_t i = 0; i < size * 3; i += 3) {
158 dst_tmp[i] = src[i] * mul_scale[0] + add_bias[0];
159 dst_tmp[i + 1] = src[i + 1] * mul_scale[1] + add_bias[1];
160 dst_tmp[i + 2] = src[i + 2] * mul_scale[2] + add_bias[2];
165 template <
typename T>
167 const float *__restrict scale,
168 const float *__restrict mean,
169 const float *__restrict std) {
170 float *dst_tmp = (
float *)malloc(size * 4 *
sizeof(
float));
171 const float mul_scale[4] = {scale[0] / std[0], scale[1] / std[1],
172 scale[2] / std[2], scale[3] / std[3]};
173 const float add_bias[4] = {-mean[0] / std[0], -mean[1] / std[1],
174 -mean[2] / std[2], -mean[3] / std[3]};
175 for (
size_t i = 0; i < size * 3; i += 3) {
176 dst_tmp[i] = src[i] * mul_scale[0] + add_bias[0];
177 dst_tmp[i + 1] = src[i + 1] * mul_scale[1] + add_bias[1];
178 dst_tmp[i + 2] = src[i + 2] * mul_scale[2] + add_bias[2];
179 dst_tmp[i + 3] = src[i + 3] * mul_scale[3] + add_bias[3];
184 template <
typename T>
186 size_t size,
const float *__restrict scale,
187 const float *__restrict mean,
188 const float *__restrict std) {
189 float *dst_tmp = (
float *)malloc(size * c *
sizeof(
float));
190 float *mul_scale = (
float *)malloc(c *
sizeof(
float));
191 float *add_bias = (
float *)malloc(c *
sizeof(
float));
192 for (
int j = 0; j < c; ++j) {
193 mul_scale[j] = scale[j] / std[j];
194 add_bias[j] = -mean[j] / std[j];
196 for (
size_t i = 0; i < size; i++) {
198 for (
int j = 0; j < c; ++j) {
199 dst_tmp[ii + j] = src[ii + j] * mul_scale[j] + add_bias[j];
208 template <
typename T>
210 size_t size,
const float *__restrict scale,
211 const float *__restrict mean,
212 const float *__restrict std) {
213 const float mul_scale = scale[0] / std[0];
214 const float add_bias = -mean[0] / std[0];
215 for (
size_t i = 0; i < size; ++i) {
216 dst[i] = src[i] * mul_scale + add_bias;
219 template <
typename T>
221 size_t size,
const float *__restrict scale,
222 const float *__restrict mean,
223 const float *__restrict std) {
224 const float mul_scale[2] = {scale[0] / std[0], scale[1] / std[1]};
225 const float add_bias[2] = {-mean[0] / std[0], -mean[1] / std[1]};
226 for (
size_t i = 0; i < size * 2; i += 2) {
227 dst[i] = src[i] * mul_scale[0] + add_bias[0];
228 dst[i + 1] = src[i + 1] * mul_scale[1] + add_bias[1];
231 template <
typename T>
233 size_t size,
const float *__restrict scale,
234 const float *__restrict mean,
235 const float *__restrict std) {
236 const float mul_scale[3] = {scale[0] / std[0], scale[1] / std[1],
238 const float add_bias[3] = {-mean[0] / std[0], -mean[1] / std[1],
240 for (
size_t i = 0; i < size * 3; i += 3) {
241 dst[i] = src[i] * mul_scale[0] + add_bias[0];
242 dst[i + 1] = src[i + 1] * mul_scale[1] + add_bias[1];
243 dst[i + 2] = src[i + 2] * mul_scale[2] + add_bias[2];
246 template <
typename T>
248 size_t size,
const float *__restrict scale,
249 const float *__restrict mean,
250 const float *__restrict std) {
251 const float mul_scale[4] = {scale[0] / std[0], scale[1] / std[1],
252 scale[2] / std[2], scale[3] / std[3]};
253 const float add_bias[4] = {-mean[0] / std[0], -mean[1] / std[1],
254 -mean[2] / std[2], -mean[3] / std[3]};
255 for (
size_t i = 0; i < size * 3; i += 3) {
256 dst[i] = src[i] * mul_scale[0] + add_bias[0];
257 dst[i + 1] = src[i + 1] * mul_scale[1] + add_bias[1];
258 dst[i + 2] = src[i + 2] * mul_scale[2] + add_bias[2];
259 dst[i + 3] = src[i + 3] * mul_scale[3] + add_bias[3];
262 template <
typename T>
264 const int c,
size_t size,
const float *__restrict scale,
265 const float *__restrict mean,
266 const float *__restrict std) {
267 float *mul_scale = (
float *)malloc(c *
sizeof(
float));
268 float *add_bias = (
float *)malloc(c *
sizeof(
float));
269 for (
int j = 0; j < c; ++j) {
270 mul_scale[j] = scale[j] / std[j];
271 add_bias[j] = -mean[j] / std[j];
273 for (
size_t i = 0; i < size; i++) {
275 for (
int j = 0; j < c; ++j) {
276 dst[ii + j] = src[ii + j] * mul_scale[j] + add_bias[j];
283 template <
typename T1,
typename T2>
284 void normalizeC1(
const T1 *__restrict src, T2 *__restrict dst,
size_t size,
285 const float *__restrict scale,
const float *__restrict mean,
286 const float *__restrict std) {
287 const float mul_scale = scale[0] / std[0];
288 const float add_bias = -mean[0] / std[0];
289 for (
size_t i = 0; i < size; ++i) {
290 dst[i] = (T2)(src[i] * mul_scale + add_bias);
293 template <
typename T1,
typename T2>
294 void normalizeC2(
const T1 *__restrict src, T2 *__restrict dst,
size_t size,
295 const float *__restrict scale,
const float *__restrict mean,
296 const float *__restrict std) {
297 const float mul_scale[2] = {scale[0] / std[0], scale[1] / std[1]};
298 const float add_bias[2] = {-mean[0] / std[0], -mean[1] / std[1]};
299 for (
size_t i = 0; i < size * 2; i += 2) {
300 dst[i] = (T2)(src[i] * mul_scale[0] + add_bias[0]);
301 dst[i + 1] = (T2)(src[i + 1] * mul_scale[1] + add_bias[1]);
304 template <
typename T1,
typename T2>
305 void normalizeC3(
const T1 *__restrict src, T2 *__restrict dst,
size_t size,
306 const float *__restrict scale,
const float *__restrict mean,
307 const float *__restrict std) {
308 const float mul_scale[3] = {scale[0] / std[0], scale[1] / std[1],
310 const float add_bias[3] = {-mean[0] / std[0], -mean[1] / std[1],
312 for (
size_t i = 0; i < size * 3; i += 3) {
313 dst[i] = (T2)(src[i] * mul_scale[0] + add_bias[0]);
314 dst[i + 1] = (T2)(src[i + 1] * mul_scale[1] + add_bias[1]);
315 dst[i + 2] = (T2)(src[i + 2] * mul_scale[2] + add_bias[2]);
318 template <
typename T1,
typename T2>
319 void normalizeC4(
const T1 *__restrict src, T2 *__restrict dst,
size_t size,
320 const float *__restrict scale,
const float *__restrict mean,
321 const float *__restrict std) {
322 const float mul_scale[4] = {scale[0] / std[0], scale[1] / std[1],
323 scale[2] / std[2], scale[3] / std[3]};
324 const float add_bias[4] = {-mean[0] / std[0], -mean[1] / std[1],
325 -mean[2] / std[2], -mean[3] / std[3]};
326 for (
size_t i = 0; i < size * 3; i += 3) {
327 dst[i] = (T2)(src[i] * mul_scale[0] + add_bias[0]);
328 dst[i + 1] = (T2)(src[i + 1] * mul_scale[1] + add_bias[1]);
329 dst[i + 2] = (T2)(src[i + 2] * mul_scale[2] + add_bias[2]);
330 dst[i + 3] = (T2)(src[i + 3] * mul_scale[3] + add_bias[3]);
333 template <
typename T1,
typename T2>
334 void normalizeCN(
const T1 *__restrict src, T2 *__restrict dst,
const int c,
335 size_t size,
const float *__restrict scale,
336 const float *__restrict mean,
const float *__restrict std) {
337 float *mul_scale = (
float *)malloc(c *
sizeof(
float));
338 float *add_bias = (
float *)malloc(c *
sizeof(
float));
339 for (
int j = 0; j < c; ++j) {
340 mul_scale[j] = scale[j] / std[j];
341 add_bias[j] = -mean[j] / std[j];
343 for (
size_t i = 0; i < size; i++) {
345 for (
size_t j = 0; j < c; ++j) {
346 dst[ii + j] = (T2)(src[ii + j] * mul_scale[j] + add_bias[j]);
#define NNDEPLOY_CC_API
api
bool convertFromFloatToFp16(float *fp32, void *fp16, int count)
bool convertFromFloatToBfp16(float *fp32, void *bfp16, int count)
void normalizeFp32C2(const T *__restrict src, float *__restrict dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeBfp16C3(const T *__restrict src, void *dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeBfp16C2(const T *__restrict src, void *dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeC4(const T1 *__restrict src, T2 *__restrict dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeFp32C4(const T *__restrict src, float *__restrict dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeFp16C4(const T *__restrict src, void *dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeFp16C3(const T *__restrict src, void *dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeBfp16C4(const T *__restrict src, void *dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeC2(const T1 *__restrict src, T2 *__restrict dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeFp32C1(const T *__restrict src, float *__restrict dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeBfp16C1(const T *__restrict src, void *dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeFp16C1(const T *__restrict src, void *dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeFp32CN(const T *__restrict src, float *__restrict dst, const int c, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeBfp16CN(const T *__restrict src, void *dst, const int c, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeC3(const T1 *__restrict src, T2 *__restrict dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeFp16CN(const T *__restrict src, void *dst, const int c, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
int getChannelByPixelType(base::PixelType pixel_type)
void normalizeC1(const T1 *__restrict src, T2 *__restrict dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeFp32C3(const T *__restrict src, float *__restrict dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeCN(const T1 *__restrict src, T2 *__restrict dst, const int c, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)
void normalizeFp16C2(const T *__restrict src, void *dst, size_t size, const float *__restrict scale, const float *__restrict mean, const float *__restrict std)