onnx2versal
Loading...
Searching...
No Matches
concat.h
1#ifndef CONCAT_H_
2#define CONCAT_H_
3
4#include <type_traits>
5#include <adf.h>
6#include <assert.h>
7
8
21template <typename TT, int LCNT, int H, int INP_W, int OUT_W>
23 public:
24 void filter8(
25 input_window<TT>* in0,
26 input_window<TT>* in1,
27 input_window<TT>* in2,
28 input_window<TT>* in3,
29 input_window<TT>* in4,
30 input_window<TT>* in5,
31 input_window<TT>* in6,
32 input_window<TT>* in7,
33 output_stream<TT>* out
34 );
35 void filter7(
36 input_window<TT>* in0,
37 input_window<TT>* in1,
38 input_window<TT>* in2,
39 input_window<TT>* in3,
40 input_window<TT>* in4,
41 input_window<TT>* in5,
42 input_window<TT>* in6,
43 output_stream<TT>* out
44 );
45 void filter6(
46 input_window<TT>* in0,
47 input_window<TT>* in1,
48 input_window<TT>* in2,
49 input_window<TT>* in3,
50 input_window<TT>* in4,
51 input_window<TT>* in5,
52 output_stream<TT>* out
53 );
54 void filter5(
55 input_window<TT>* in0,
56 input_window<TT>* in1,
57 input_window<TT>* in2,
58 input_window<TT>* in3,
59 input_window<TT>* in4,
60 output_stream<TT>* out
61 );
62 void filter4(
63 input_window<TT>* in0,
64 input_window<TT>* in1,
65 input_window<TT>* in2,
66 input_window<TT>* in3,
67 output_stream<TT>* out
68 );
69 void filter3(
70 input_window<TT>* in0,
71 input_window<TT>* in1,
72 input_window<TT>* in2,
73 output_stream<TT>* out
74 );
75 void filter2(
76 input_window<TT>* in0,
77 input_window<TT>* in1,
78 output_stream<TT>* out
79 );
80 void filter1(
81 input_window<TT>* in0,
82 output_stream<TT>* out
83 );
84 static void registerKernelClass() {
85 static_assert(sizeof(TT) == 4); // 32-bit stream
86 if (LCNT == 8) {
87 REGISTER_FUNCTION(ConcatScalar::filter8);
88 } else if (LCNT == 7) {
89 REGISTER_FUNCTION(ConcatScalar::filter7);
90 } else if (LCNT == 6) {
91 REGISTER_FUNCTION(ConcatScalar::filter6);
92 } else if (LCNT == 5) {
93 REGISTER_FUNCTION(ConcatScalar::filter5);
94 } else if (LCNT == 4) {
95 REGISTER_FUNCTION(ConcatScalar::filter4);
96 } else if (LCNT == 3) {
97 REGISTER_FUNCTION(ConcatScalar::filter3);
98 } else if (LCNT == 2) {
99 REGISTER_FUNCTION(ConcatScalar::filter2);
100 } else if (LCNT == 1) {
101 REGISTER_FUNCTION(ConcatScalar::filter1);
102 }
103 }
104};
105
106
112template <typename TT, int LCNT, int H, int INP_W, int OUT_W>
114 public:
115 void filter8(
116 input_window<TT>* in0,
117 input_window<TT>* in1,
118 input_window<TT>* in2,
119 input_window<TT>* in3,
120 input_window<TT>* in4,
121 input_window<TT>* in5,
122 input_window<TT>* in6,
123 input_window<TT>* in7,
124 output_stream<TT>* out
125 );
126 void filter7(
127 input_window<TT>* in0,
128 input_window<TT>* in1,
129 input_window<TT>* in2,
130 input_window<TT>* in3,
131 input_window<TT>* in4,
132 input_window<TT>* in5,
133 input_window<TT>* in6,
134 output_stream<TT>* out
135 );
136 void filter6(
137 input_window<TT>* in0,
138 input_window<TT>* in1,
139 input_window<TT>* in2,
140 input_window<TT>* in3,
141 input_window<TT>* in4,
142 input_window<TT>* in5,
143 output_stream<TT>* out
144 );
145 void filter5(
146 input_window<TT>* in0,
147 input_window<TT>* in1,
148 input_window<TT>* in2,
149 input_window<TT>* in3,
150 input_window<TT>* in4,
151 output_stream<TT>* out
152 );
153 void filter4(
154 input_window<TT>* in0,
155 input_window<TT>* in1,
156 input_window<TT>* in2,
157 input_window<TT>* in3,
158 output_stream<TT>* out
159 );
160 void filter3(
161 input_window<TT>* in0,
162 input_window<TT>* in1,
163 input_window<TT>* in2,
164 output_stream<TT>* out
165 );
166 void filter2(
167 input_window<TT>* in0,
168 input_window<TT>* in1,
169 output_stream<TT>* out
170 );
171 void filter1(
172 input_window<TT>* in0,
173 output_stream<TT>* out
174 );
175 static void registerKernelClass() {
176 static_assert(INP_W%4==0 && OUT_W%4==0 && (std::is_same<TT, float>::value));
177 if (LCNT == 8) {
178 REGISTER_FUNCTION(ConcatFloat::filter8);
179 } else if (LCNT == 7) {
180 REGISTER_FUNCTION(ConcatFloat::filter7);
181 } else if (LCNT == 6) {
182 REGISTER_FUNCTION(ConcatFloat::filter6);
183 } else if (LCNT == 5) {
184 REGISTER_FUNCTION(ConcatFloat::filter5);
185 } else if (LCNT == 4) {
186 REGISTER_FUNCTION(ConcatFloat::filter4);
187 } else if (LCNT == 3) {
188 REGISTER_FUNCTION(ConcatFloat::filter3);
189 } else if (LCNT == 2) {
190 REGISTER_FUNCTION(ConcatFloat::filter2);
191 } else if (LCNT == 1) {
192 REGISTER_FUNCTION(ConcatFloat::filter1);
193 }
194 }
195};
196
197
203template <typename TT, int LCNT, int H, int INP_W, int OUT_W>
205 public:
206 void filter8(
207 input_window<TT>* in0,
208 input_window<TT>* in1,
209 input_window<TT>* in2,
210 input_window<TT>* in3,
211 input_window<TT>* in4,
212 input_window<TT>* in5,
213 input_window<TT>* in6,
214 input_window<TT>* in7,
215 output_stream<TT>* out
216 );
217 void filter7(
218 input_window<TT>* in0,
219 input_window<TT>* in1,
220 input_window<TT>* in2,
221 input_window<TT>* in3,
222 input_window<TT>* in4,
223 input_window<TT>* in5,
224 input_window<TT>* in6,
225 output_stream<TT>* out
226 );
227 void filter6(
228 input_window<TT>* in0,
229 input_window<TT>* in1,
230 input_window<TT>* in2,
231 input_window<TT>* in3,
232 input_window<TT>* in4,
233 input_window<TT>* in5,
234 output_stream<TT>* out
235 );
236 void filter5(
237 input_window<TT>* in0,
238 input_window<TT>* in1,
239 input_window<TT>* in2,
240 input_window<TT>* in3,
241 input_window<TT>* in4,
242 output_stream<TT>* out
243 );
244 void filter4(
245 input_window<TT>* in0,
246 input_window<TT>* in1,
247 input_window<TT>* in2,
248 input_window<TT>* in3,
249 output_stream<TT>* out
250 );
251 void filter3(
252 input_window<TT>* in0,
253 input_window<TT>* in1,
254 input_window<TT>* in2,
255 output_stream<TT>* out
256 );
257 void filter2(
258 input_window<TT>* in0,
259 input_window<TT>* in1,
260 output_stream<TT>* out
261 );
262 void filter1(
263 input_window<TT>* in0,
264 output_stream<TT>* out
265 );
266 static void registerKernelClass() {
267 static_assert(INP_W%16==0);
268 static_assert(OUT_W%16==0);
269 static_assert(std::is_same<TT, int8_t>::value || std::is_same<TT, uint8_t>::value);
270 if (LCNT == 8) {
271 REGISTER_FUNCTION(ConcatInt8::filter8);
272 } else if (LCNT == 7) {
273 REGISTER_FUNCTION(ConcatInt8::filter7);
274 } else if (LCNT == 6) {
275 REGISTER_FUNCTION(ConcatInt8::filter6);
276 } else if (LCNT == 5) {
277 REGISTER_FUNCTION(ConcatInt8::filter5);
278 } else if (LCNT == 4) {
279 REGISTER_FUNCTION(ConcatInt8::filter4);
280 } else if (LCNT == 3) {
281 REGISTER_FUNCTION(ConcatInt8::filter3);
282 } else if (LCNT == 2) {
283 REGISTER_FUNCTION(ConcatInt8::filter2);
284 } else if (LCNT == 1) {
285 REGISTER_FUNCTION(ConcatInt8::filter1);
286 }
287 }
288};
289
290
295template <typename TT, int H, int INP_W1, int INP_W2, int OUT_W>
297 public:
298 void filter(
299 input_stream<TT>* in0,
300 input_stream<TT>* in1,
301 output_stream<TT>* out
302 );
303 static void registerKernelClass() {
304 static_assert(sizeof(TT) == 4); // 32-bit stream
305 // also expects INP_W1 < OUT_W, not included due to conditional instances in graph
306 REGISTER_FUNCTION(ConcatFloatStream::filter);
307 }
308};
309
310
314template <typename TT, int H, int INP_W1, int INP_W2, int OUT_W>
316 public:
317 void filter(
318 input_stream<TT>* in0,
319 input_stream<TT>* in1,
320 output_stream<TT>* out
321 );
322 static void registerKernelClass() {
323 static_assert(sizeof(TT) == 4); // 32-bit stream
324 // also expects INP_W1 < OUT_W, not included due to conditional instances in graph
325 REGISTER_FUNCTION(ConcatFloatStreamWithStall::filter);
326 }
327};
328
329
334template <typename TT>
336 private:
337 int H;
338 int INP_W1;
339 int INP_W2;
340 int OUT_W;
341
342 public:
344 int H,
345 int INP_W1,
346 int INP_W2,
347 int OUT_W
348 ): H(H), INP_W1(INP_W1), INP_W2(INP_W2), OUT_W(OUT_W) {};
349
350 void filter(
351 input_stream<TT>* in0,
352 input_stream<TT>* in1,
353 output_stream<TT>* out
354 );
355 static void registerKernelClass() {
356 static_assert(sizeof(TT) == 4); // 32-bit stream
357 // also expects INP_W1 < OUT_W, not included due to conditional instances in graph
358 REGISTER_FUNCTION(ConcatFloatStreamSequentially::filter);
359 REGISTER_PARAMETER(H);
360 REGISTER_PARAMETER(INP_W1);
361 REGISTER_PARAMETER(INP_W2);
362 REGISTER_PARAMETER(OUT_W);
363 }
364};
365
366
371template <typename TT, int LCNT, int H, int INP_W, int OUT_W>
373 public:
374 void filter(
375 input_pktstream* in,
376 output_stream<TT>* out
377 );
378 static void registerKernelClass() {
379 static_assert(sizeof(TT) == 4); // 32-bit stream
380 // also expects INP_W1 < OUT_W, not included due to conditional instances in graph
381 REGISTER_FUNCTION(ConcatFloatPktStream::filter);
382 }
383};
384
385
390template <typename TT, int H, int INP_W1, int INP_W2, int OUT_W>
392 public:
393 void filter(
394 input_stream<TT>* in0,
395 input_stream<TT>* in1,
396 output_stream<TT>* out
397 );
398 static void registerKernelClass() {
399 static_assert(INP_W1 % 16 == 0 && INP_W2 % 16 == 0 && OUT_W % 16 == 0);
400 static_assert((std::is_same<TT, int8_t>::value) || (std::is_same<TT, uint8_t>::value));
401 // also expects INP_W1 < OUT_W, not included due to conditional instances in graph
402 REGISTER_FUNCTION(ConcatInt8Stream::filter);
403 }
404};
405
410template <typename TT, int LCNT, int H, int INP_W, int OUT_W>
412 public:
413 void filter(
414 input_stream<TT>* in0,
415 input_stream<TT>* in1,
416 output_stream<TT>* out
417 );
418 static void registerKernelClass() {
419 static_assert(sizeof(TT) == 4); // 32-bit stream
420 static_assert(OUT_W % INP_W == 0);
421 REGISTER_FUNCTION(ConcatTwo32bitStreams::filter);
422 }
423};
427#endif // CONCAT_H_
Scalar implementation for stream concat, ConcatFloatPktStream<f,4,32,32,64> takes cycles.
Definition concat.h:372
Scalar implementation for stream concat, ConcatFloatStreamSequentially<f,4,32,32,64> takes ~1000 cycl...
Definition concat.h:335
Scalar implementation for stream concat,.
Definition concat.h:315
Scalar implementation for stream concat, ConcatFloatStream<f,4,32,32,64> takes ~1000 cycles.
Definition concat.h:296
Vector implementation, requires INP_W%4=0, OUT_W%4=0. ConcatFloat<f,5,4,32,144> takes 715 cycles (~30...
Definition concat.h:113
Vector implementation for stream concat with int8, ConcatInt8Stream<f,4,32,32,64> takes cycles.
Definition concat.h:391
Vector implementation for int8_t, requires INP_W%16=0, OUT_W%16=0, ConcatInt8<f,5,...
Definition concat.h:204
Scalar implementation, ConcatScalar<f,5,4,32,144> takes 5858 cycles (~850 for output window)
Definition concat.h:22
Scalar implementation for concatenating 2 chunked streams, ConcatTwo32bitStreams<f,...
Definition concat.h:411