onnx2versal
Loading...
Searching...
No Matches
split.h
1#ifndef SPLIT_H_
2#define SPLIT_H_
3
4#include <type_traits>
5#include <adf.h>
6#include <assert.h>
7
8
24template <typename TT, int H, int INP_W, int OUT_W, int OVERLAP>
26 private:
27 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
28 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
29 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
30
31 public:
32 void filter8(
33 input_stream<TT>* in,
34 output_window<TT>* out0,
35 output_window<TT>* out1,
36 output_window<TT>* out2,
37 output_window<TT>* out3,
38 output_window<TT>* out4,
39 output_window<TT>* out5,
40 output_window<TT>* out6,
41 output_window<TT>* out7
42 );
43 void filter7(
44 input_stream<TT>* in,
45 output_window<TT>* out0,
46 output_window<TT>* out1,
47 output_window<TT>* out2,
48 output_window<TT>* out3,
49 output_window<TT>* out4,
50 output_window<TT>* out5,
51 output_window<TT>* out6
52 );
53 void filter6(
54 input_stream<TT>* in,
55 output_window<TT>* out0,
56 output_window<TT>* out1,
57 output_window<TT>* out2,
58 output_window<TT>* out3,
59 output_window<TT>* out4,
60 output_window<TT>* out5
61 );
62 void filter5(
63 input_stream<TT>* in,
64 output_window<TT>* out0,
65 output_window<TT>* out1,
66 output_window<TT>* out2,
67 output_window<TT>* out3,
68 output_window<TT>* out4
69 );
70 void filter4(
71 input_stream<TT>* in,
72 output_window<TT>* out0,
73 output_window<TT>* out1,
74 output_window<TT>* out2,
75 output_window<TT>* out3
76 );
77 void filter3(
78 input_stream<TT>* in,
79 output_window<TT>* out0,
80 output_window<TT>* out1,
81 output_window<TT>* out2
82 );
83 void filter2(
84 input_stream<TT>* in,
85 output_window<TT>* out0,
86 output_window<TT>* out1
87 );
88 void filter1(
89 input_stream<TT>* in,
90 output_window<TT>* out0
91 );
92 static void registerKernelClass() {
93 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0 && STRIDE >= 0));
94 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
95 static_assert(sizeof(TT) == 4); // 32-bit width stream
96 if (LCNT == 8) {
97 REGISTER_FUNCTION(SplitScalar::filter8);
98 } else if (LCNT == 7) {
99 REGISTER_FUNCTION(SplitScalar::filter7);
100 } else if (LCNT == 6) {
101 REGISTER_FUNCTION(SplitScalar::filter6);
102 } else if (LCNT == 5) {
103 REGISTER_FUNCTION(SplitScalar::filter5);
104 } else if (LCNT == 4) {
105 REGISTER_FUNCTION(SplitScalar::filter4);
106 } else if (LCNT == 3) {
107 REGISTER_FUNCTION(SplitScalar::filter3);
108 } else if (LCNT == 2) {
109 REGISTER_FUNCTION(SplitScalar::filter2);
110 } else if (LCNT == 1) {
111 REGISTER_FUNCTION(SplitScalar::filter1);
112 }
113 }
114};
115
116
124template <typename TT, int H, int INP_W, int OUT_W, int OVERLAP>
126 private:
127 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
128 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
129 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
130
131 public:
132 void filter8(
133 input_stream<TT>* in,
134 output_window<TT>* out0,
135 output_window<TT>* out1,
136 output_window<TT>* out2,
137 output_window<TT>* out3,
138 output_window<TT>* out4,
139 output_window<TT>* out5,
140 output_window<TT>* out6,
141 output_window<TT>* out7
142 );
143 void filter7(
144 input_stream<TT>* in,
145 output_window<TT>* out0,
146 output_window<TT>* out1,
147 output_window<TT>* out2,
148 output_window<TT>* out3,
149 output_window<TT>* out4,
150 output_window<TT>* out5,
151 output_window<TT>* out6
152 );
153 void filter6(
154 input_stream<TT>* in,
155 output_window<TT>* out0,
156 output_window<TT>* out1,
157 output_window<TT>* out2,
158 output_window<TT>* out3,
159 output_window<TT>* out4,
160 output_window<TT>* out5
161 );
162 void filter5(
163 input_stream<TT>* in,
164 output_window<TT>* out0,
165 output_window<TT>* out1,
166 output_window<TT>* out2,
167 output_window<TT>* out3,
168 output_window<TT>* out4
169 );
170 void filter4(
171 input_stream<TT>* in,
172 output_window<TT>* out0,
173 output_window<TT>* out1,
174 output_window<TT>* out2,
175 output_window<TT>* out3
176 );
177 void filter3(
178 input_stream<TT>* in,
179 output_window<TT>* out0,
180 output_window<TT>* out1,
181 output_window<TT>* out2
182 );
183 void filter2(
184 input_stream<TT>* in,
185 output_window<TT>* out0,
186 output_window<TT>* out1
187 );
188 void filter1(
189 input_stream<TT>* in,
190 output_window<TT>* out0
191 );
192 static void registerKernelClass() {
193 static_assert((std::is_same<TT, int8_t>::value) || (std::is_same<TT, uint8_t>::value));
194 static_assert(OVERLAP % 16 == 0);
195 static_assert((OVERLAP <= 0) || (STRIDE % 16 == 0 && STRIDE >= 0));
196 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0));
197 static_assert((OVERLAP > 0) || (OUT_W % 16 == 0));
198 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
199
200 if (LCNT == 8) {
201 REGISTER_FUNCTION(SplitInt8::filter8);
202 } else if (LCNT == 7) {
203 REGISTER_FUNCTION(SplitInt8::filter7);
204 } else if (LCNT == 6) {
205 REGISTER_FUNCTION(SplitInt8::filter6);
206 } else if (LCNT == 5) {
207 REGISTER_FUNCTION(SplitInt8::filter5);
208 } else if (LCNT == 4) {
209 REGISTER_FUNCTION(SplitInt8::filter4);
210 } else if (LCNT == 3) {
211 REGISTER_FUNCTION(SplitInt8::filter3);
212 } else if (LCNT == 2) {
213 REGISTER_FUNCTION(SplitInt8::filter2);
214 } else if (LCNT == 1) {
215 REGISTER_FUNCTION(SplitInt8::filter1);
216 }
217 }
218};
219
220
228template <typename TT, int H, int INP_W, int OUT_W, int OVERLAP>
230 private:
231 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
232 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
233 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
234
235 public:
236 void filter(
237 input_stream<TT>* in,
238 output_stream<TT>* restrict out0,
239 output_stream<TT>* restrict out1
240 );
241 static void registerKernelClass() {
242 static_assert(sizeof(TT) == 4);
243 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0 && STRIDE >= 0));
244 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
245 REGISTER_FUNCTION(SplitTwo32bitStreams::filter);
246 }
247};
248
249
256template <typename TT, int H, int INP_W, int OUT_W, int OVERLAP>
258 private:
259 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
260 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
261 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
262
263 int lane_idx;
264
265 public:
267 int lane_idx
268 ): lane_idx(lane_idx) {};
269
270 void filter(
271 input_stream<TT>* in,
272 output_stream<TT>* out0
273 );
274
275 static void registerKernelClass() {
276 static_assert(sizeof(TT) == 4);
277 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0));
278 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
279 REGISTER_FUNCTION(SplitFilterFloatStream::filter);
280 }
281};
282
283
290template <typename TT, int H, int INP_W, int OUT_W, int OVERLAP>
292 private:
293 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
294 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
295 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
296
297 int lane_idx;
298
299 public:
301 int lane_idx
302 ): lane_idx(lane_idx) {};
303
304 void filter(
305 input_stream<TT>* in,
306 output_stream<TT>* out0,
307 output_stream<TT>* out1
308 );
309
310 static void registerKernelClass() {
311 static_assert(sizeof(TT) == 4);
312 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0));
313 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
314 REGISTER_FUNCTION(SplitFilterFloatStreamTwice::filter);
315 }
316};
317
318
324template <typename TT, int H, int INP_W, int OUT_W, int OVERLAP>
326 private:
327 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
328 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
329 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
330
331 public:
332 void filter(
333 input_stream<TT>* in,
334 output_pktstream* out0,
335 output_pktstream* out1
336 );
337
338 static void registerKernelClass() {
339 static_assert((std::is_same<TT, float>::value));
340 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0 && STRIDE >= 0));
341 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
342 REGISTER_FUNCTION(SplitFilterFloatPktStream::filter);
343 }
344};
345
346
352template <typename TT, int H, int INP_W, int OUT_W, int OVERLAP>
354 private:
355 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
356 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
357 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
358
359 int lane_idx;
360
361 public:
363 int lane_idx
364 ): lane_idx(lane_idx) {};
365
366 void filter(
367 input_stream<TT>* in,
368 output_stream<TT>* out0
369 );
370
371 static void registerKernelClass() {
372 static_assert((std::is_same<TT, int8_t>::value) || (std::is_same<TT, uint8_t>::value));
373 static_assert(OVERLAP % 16 == 0);
374 static_assert((OVERLAP <= 0) || (STRIDE % 16 == 0));
375 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0));
376 static_assert((OVERLAP > 0) || (OUT_W % 16 == 0));
377 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
378 REGISTER_FUNCTION(SplitFilterInt8Stream::filter);
379 }
380};
381
382
388template <typename TT, int H, int INP_W, int OUT_W, int OVERLAP>
390 private:
391 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
392 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
393 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
394
395 int lane_idx;
396
397 public:
399 int lane_idx
400 ): lane_idx(lane_idx) {};
401
402 void filter(
403 input_stream<TT>* in,
404 output_stream<TT>* out0,
405 output_stream<TT>* out1
406 );
407
408 static void registerKernelClass() {
409 static_assert((std::is_same<TT, int8_t>::value) || (std::is_same<TT, uint8_t>::value));
410 static_assert(OVERLAP % 16 == 0);
411 static_assert((OVERLAP <= 0) || (STRIDE % 16 == 0));
412 static_assert((OVERLAP <= 0) || ((INP_W-OUT_W) % FIRST_STRIDE == 0));
413 static_assert((OVERLAP > 0) || (OUT_W % 16 == 0));
414 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
415 REGISTER_FUNCTION(SplitFilterInt8StreamTwice::filter);
416 }
417};
418
419
425template <typename TT, int H, int INP_W, int OUT_W, int OVERLAP>
427 private:
428 static constexpr int FIRST_STRIDE = OUT_W - OVERLAP;
429 static constexpr int LCNT = (INP_W - OUT_W) / FIRST_STRIDE + 1;
430 static constexpr int STRIDE = OUT_W - OVERLAP - OVERLAP;
431
432 public:
433 void filter(
434 input_stream<TT>* in,
435 output_pktstream* out0,
436 output_pktstream* out1
437 );
438
439 static void registerKernelClass() {
440 static_assert((std::is_same<TT, int8_t>::value) || (std::is_same<TT, uint8_t>::value));
441 static_assert(OVERLAP % 16 == 0);
442 static_assert((OVERLAP <= 0) || (STRIDE % 16 == 0 && STRIDE >= 0));
443 static_assert((OVERLAP <= 0) || LCNT*OUT_W - OVERLAP*(LCNT-1) <= INP_W);
444 static_assert((OVERLAP > 0) || (OUT_W % 16 == 0));
445 static_assert((OVERLAP > 0) || (OUT_W*LCNT - OVERLAP*(LCNT-1) <= INP_W));
446 REGISTER_FUNCTION(SplitFilterInt8PktStream::filter);
447 }
448};
452#endif // SPLIT_H_
Scalar implementation for slicing out portions of 32-bit stream input into pktstream,...
Definition split.h:325
Scalar implementation for slicing out portion of 32-bit stream input, requires 2*OVERLAP <= OUT_W,...
Definition split.h:291
Scalar implementation for slicing out portion of 32-bit stream input, requires 2*OVERLAP <= OUT_W,...
Definition split.h:257
Scalar implementation for slicing out portions of 32-bit stream input into pktstream,...
Definition split.h:426
Scalar implementation for slicing out portion of int8 stream input, requires 2*OVERLAP <= OUT_W,...
Definition split.h:389
Scalar implementation for slicing out portion of int8 stream input, requires 2*OVERLAP <= OUT_W,...
Definition split.h:353
Scalar implementation for int8 stream, requires 2*OVERLAP <= OUT_W, (INP_W-OUT_W) % FIRST_STRIDE == 0...
Definition split.h:125
Scalar implementation for 32-bit stream, requires 2*OVERLAP <= OUT_W, (INP_W-OUT_W) % FIRST_STRIDE ==...
Definition split.h:25
Scalar implementation for chunking 32-bit stream input into two separate streams, requires 2*OVERLAP ...
Definition split.h:229