|
onnx2versal
|
Vector implementation for int8 BCHW maxpool with 2x2 kernel, requires INP_W%16==0, OUT_W%8==0, KH==KW==2, TT==int8_t or uint8_t, Maxpool2x2Int8BCHW::filter<24,32,16,1,6> total = 324.
#include <pool.h>
Public Member Functions | |
| void | filter (input_window< TT > *in_window, output_window< TT > *out_window) |
Static Public Member Functions | |
| static void | registerKernelClass () |
| void Maxpool2x2Int8BCHW< TT, INP_H, INP_W, OUT_H, OUT_W, B, C, KH, KW, STEP_H, STEP_W >::filter | ( | input_window< TT > * | in_window, |
| output_window< TT > * | out_window | ||
| ) |
max32 (v64int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
0x06...00, 0x0e...08 => 0 1 2 3 ... 12 13 14 15, 16 17 18 19 ... 28 29 30 31 max32(v, 0, 0x06040200, 0x0e0c0a08, 0x3210, 32, 0x06040200, 0x0e0c0a08, 0x3210); // first 32 with next 32 problem: offsets index <= 32, each 4b selects 2 adjacent lanes
128 int16 max