27 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
28 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
29 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
34 output_window<TT>* out0,
35 output_window<TT>* out1,
36 output_window<TT>* out2,
37 output_window<TT>* out3,
38 output_window<TT>* out4,
39 output_window<TT>* out5,
40 output_window<TT>* out6,
41 output_window<TT>* out7
45 output_window<TT>* out0,
46 output_window<TT>* out1,
47 output_window<TT>* out2,
48 output_window<TT>* out3,
49 output_window<TT>* out4,
50 output_window<TT>* out5,
51 output_window<TT>* out6
55 output_window<TT>* out0,
56 output_window<TT>* out1,
57 output_window<TT>* out2,
58 output_window<TT>* out3,
59 output_window<TT>* out4,
60 output_window<TT>* out5
64 output_window<TT>* out0,
65 output_window<TT>* out1,
66 output_window<TT>* out2,
67 output_window<TT>* out3,
68 output_window<TT>* out4
72 output_window<TT>* out0,
73 output_window<TT>* out1,
74 output_window<TT>* out2,
75 output_window<TT>* out3
79 output_window<TT>* out0,
80 output_window<TT>* out1,
81 output_window<TT>* out2
85 output_window<TT>* out0,
86 output_window<TT>* out1
90 output_window<TT>* out0
92 static void registerKernelClass() {
93 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0 && STRIDE >= 0));
94 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
95 static_assert(
sizeof(TT) == 4);
97 REGISTER_FUNCTION(SplitScalar::filter8);
98 }
else if (LCNT == 7) {
99 REGISTER_FUNCTION(SplitScalar::filter7);
100 }
else if (LCNT == 6) {
101 REGISTER_FUNCTION(SplitScalar::filter6);
102 }
else if (LCNT == 5) {
103 REGISTER_FUNCTION(SplitScalar::filter5);
104 }
else if (LCNT == 4) {
105 REGISTER_FUNCTION(SplitScalar::filter4);
106 }
else if (LCNT == 3) {
107 REGISTER_FUNCTION(SplitScalar::filter3);
108 }
else if (LCNT == 2) {
109 REGISTER_FUNCTION(SplitScalar::filter2);
110 }
else if (LCNT == 1) {
111 REGISTER_FUNCTION(SplitScalar::filter1);
127 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
128 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
129 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
133 input_stream<TT>* in,
134 output_window<TT>* out0,
135 output_window<TT>* out1,
136 output_window<TT>* out2,
137 output_window<TT>* out3,
138 output_window<TT>* out4,
139 output_window<TT>* out5,
140 output_window<TT>* out6,
141 output_window<TT>* out7
144 input_stream<TT>* in,
145 output_window<TT>* out0,
146 output_window<TT>* out1,
147 output_window<TT>* out2,
148 output_window<TT>* out3,
149 output_window<TT>* out4,
150 output_window<TT>* out5,
151 output_window<TT>* out6
154 input_stream<TT>* in,
155 output_window<TT>* out0,
156 output_window<TT>* out1,
157 output_window<TT>* out2,
158 output_window<TT>* out3,
159 output_window<TT>* out4,
160 output_window<TT>* out5
163 input_stream<TT>* in,
164 output_window<TT>* out0,
165 output_window<TT>* out1,
166 output_window<TT>* out2,
167 output_window<TT>* out3,
168 output_window<TT>* out4
171 input_stream<TT>* in,
172 output_window<TT>* out0,
173 output_window<TT>* out1,
174 output_window<TT>* out2,
175 output_window<TT>* out3
178 input_stream<TT>* in,
179 output_window<TT>* out0,
180 output_window<TT>* out1,
181 output_window<TT>* out2
184 input_stream<TT>* in,
185 output_window<TT>* out0,
186 output_window<TT>* out1
189 input_stream<TT>* in,
190 output_window<TT>* out0
192 static void registerKernelClass() {
193 static_assert((std::is_same<TT, int8_t>::value) || (std::is_same<TT, uint8_t>::value));
194 static_assert(OVERLAP % 16 == 0);
195 static_assert((OVERLAP <= 0) || (STRIDE % 16 == 0 && STRIDE >= 0));
196 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0));
197 static_assert((OVERLAP > 0) || (OUT_W % 16 == 0));
198 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
201 REGISTER_FUNCTION(SplitInt8::filter8);
202 }
else if (LCNT == 7) {
203 REGISTER_FUNCTION(SplitInt8::filter7);
204 }
else if (LCNT == 6) {
205 REGISTER_FUNCTION(SplitInt8::filter6);
206 }
else if (LCNT == 5) {
207 REGISTER_FUNCTION(SplitInt8::filter5);
208 }
else if (LCNT == 4) {
209 REGISTER_FUNCTION(SplitInt8::filter4);
210 }
else if (LCNT == 3) {
211 REGISTER_FUNCTION(SplitInt8::filter3);
212 }
else if (LCNT == 2) {
213 REGISTER_FUNCTION(SplitInt8::filter2);
214 }
else if (LCNT == 1) {
215 REGISTER_FUNCTION(SplitInt8::filter1);
231 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
232 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
233 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
237 input_stream<TT>* in,
238 output_stream<TT>* restrict out0,
239 output_stream<TT>* restrict out1
241 static void registerKernelClass() {
242 static_assert(
sizeof(TT) == 4);
243 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0 && STRIDE >= 0));
244 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
245 REGISTER_FUNCTION(SplitTwo32bitStreams::filter);
259 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
260 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
261 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
268 ): lane_idx(lane_idx) {};
271 input_stream<TT>* in,
272 output_stream<TT>* out0
275 static void registerKernelClass() {
276 static_assert(
sizeof(TT) == 4);
277 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0));
278 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
279 REGISTER_FUNCTION(SplitFilterFloatStream::filter);
293 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
294 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
295 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
302 ): lane_idx(lane_idx) {};
305 input_stream<TT>* in,
306 output_stream<TT>* out0,
307 output_stream<TT>* out1
310 static void registerKernelClass() {
311 static_assert(
sizeof(TT) == 4);
312 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0));
313 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
314 REGISTER_FUNCTION(SplitFilterFloatStreamTwice::filter);
327 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
328 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
329 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
333 input_stream<TT>* in,
334 output_pktstream* out0,
335 output_pktstream* out1
338 static void registerKernelClass() {
339 static_assert((std::is_same<TT, float>::value));
340 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0 && STRIDE >= 0));
341 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
342 REGISTER_FUNCTION(SplitFilterFloatPktStream::filter);
355 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
356 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
357 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
364 ): lane_idx(lane_idx) {};
367 input_stream<TT>* in,
368 output_stream<TT>* out0
371 static void registerKernelClass() {
372 static_assert((std::is_same<TT, int8_t>::value) || (std::is_same<TT, uint8_t>::value));
373 static_assert(OVERLAP % 16 == 0);
374 static_assert((OVERLAP <= 0) || (STRIDE % 16 == 0));
375 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0));
376 static_assert((OVERLAP > 0) || (OUT_W % 16 == 0));
377 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
378 REGISTER_FUNCTION(SplitFilterInt8Stream::filter);
391 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
392 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
393 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
400 ): lane_idx(lane_idx) {};
403 input_stream<TT>* in,
404 output_stream<TT>* out0,
405 output_stream<TT>* out1
408 static void registerKernelClass() {
409 static_assert((std::is_same<TT, int8_t>::value) || (std::is_same<TT, uint8_t>::value));
410 static_assert(OVERLAP % 16 == 0);
411 static_assert((OVERLAP <= 0) || (STRIDE % 16 == 0));
412 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0));
413 static_assert((OVERLAP > 0) || (OUT_W % 16 == 0));
414 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
415 REGISTER_FUNCTION(SplitFilterInt8StreamTwice::filter);
428 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
429 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
430 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
434 input_stream<TT>* in,
435 output_pktstream* out0,
436 output_pktstream* out1
439 static void registerKernelClass() {
440 static_assert((std::is_same<TT, int8_t>::value) || (std::is_same<TT, uint8_t>::value));
441 static_assert(OVERLAP % 16 == 0);
442 static_assert((OVERLAP <= 0) || (STRIDE % 16 == 0 && STRIDE >= 0));
443 static_assert((OVERLAP <= 0) || LCNT*OUT_W - OVERLAP*(LCNT-1) <= INP_W);
444 static_assert((OVERLAP > 0) || (OUT_W % 16 == 0));
445 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
446 REGISTER_FUNCTION(SplitFilterInt8PktStream::filter);