onnx2versal
Loading...
Searching...
No Matches
graph_transpose.h
1#ifndef __TRANSPOSE_GRAPH_H__
2#define __TRANSPOSE_GRAPH_H__
3
4#include <adf.h>
5#include "transpose.h"
6#include "split.h"
7#include "graph_conv.h"
8#include "graph_utils.h"
9
10
33template <template<typename, int, int, int, int, int> class TRANSPOSE,
34 typename TT, int B, int H, int W, int C, int PAD_W>
35class TransposeGraph : public adf::graph {
36
37 private:
38 adf::kernel k[1];
39
40 public:
41 adf::port<input> pin[1];
42 adf::port<output> pout[1];
43
45 k[0] = adf::kernel::create_object<TRANSPOSE<TT, B, H, W, C, PAD_W>>();
46 adf::source(k[0]) = "transpose.cc";
47 adf::headers(k[0]) = {"transpose.h"};
48 adf::runtime<ratio>(k[0]) = 0.6;
49
50 adf::connect<adf::window<B*H*PAD_W*C*sizeof(TT)>> (pin[0], k[0].in[0]);
51 adf::connect<adf::window<B*H*W*C*sizeof(TT)>> (k[0].out[0], pout[0]);
52 }
53
54};
55
56
65template <
66 template<typename, int, int, int, int, int> class TRANSPOSE,
67 int HCHUNK,
68 typename TT, int B, int H, int W, int C, int PAD_W>
69class TransposeHChunkGraph : public adf::graph {
70
71 private:
72 static constexpr int LCNT = H / HCHUNK;
73 adf::kernel split[(LCNT+1)/2];
74 adf::kernel k[LCNT];
75 ConcatStreamGraph<ConcatFloatStreamWithStall, float_t, LCNT, B*C, HCHUNK*W, H*W> concat_graph;
76
77 public:
78 adf::port<input> pin[1];
79 adf::port<output> pout[1];
80
82 static_assert(H % HCHUNK == 0);
83 static_assert(LCNT <= 8);
84 static_assert(B*HCHUNK*W*C*4 <= TILE_BYTES);
85
86 for (int i = 0; i < LCNT/2; i++) {
87 split[i] = adf::kernel::create_object<SplitFilterFloatStreamTwice<float_t, B, H*W*C, HCHUNK*W*C, 0>>(i*2);
88 adf::source(split[i]) = "split.cc";
89 adf::headers(split[i]) = {"split.h"};
90 adf::runtime<ratio>(split[i]) = 0.6;
91
92 adf::connect<adf::stream> (pin[0], split[i].in[0]);
93
94 adf::samples_per_iteration(split[i].in[0]) = B*H*W*C;
95 adf::samples_per_iteration(split[i].out[0]) = B*HCHUNK*W*C;
96 adf::samples_per_iteration(split[i].out[1]) = B*HCHUNK*W*C;
97 }
98 if ((LCNT & 0x1) == 1) {
99 int i = (LCNT+1)/2 - 1;
100 split[i] = adf::kernel::create_object<SplitFilterFloatStream<float_t, B, H*W*C, HCHUNK*W*C, 0>>(LCNT-1);
101 adf::source(split[i]) = "split.cc";
102 adf::headers(split[i]) = {"split.h"};
103 adf::runtime<ratio>(split[i]) = 0.6;
104
105 adf::connect<adf::stream> (pin[0], split[i].in[0]);
106
107 adf::samples_per_iteration(split[i].in[0]) = B*H*W*C;
108 adf::samples_per_iteration(split[i].out[0]) = B*HCHUNK*W*C;
109 }
110
111 for (int i = 0; i < LCNT; i++) {
112 k[i] = adf::kernel::create_object<TRANSPOSE<TT, B, HCHUNK, W, C, PAD_W>>();
113 adf::source(k[i]) = "transpose.cc";
114 adf::headers(k[i]) = {"transpose.h"};
115 adf::runtime<ratio>(k[i]) = 0.6;
116 if (B*C*HCHUNK*W*4 > MAX_PARAM_BYTES)
117 adf::single_buffer(k[i].in[0]);
118
119 adf::connect<adf::window<B*HCHUNK*PAD_W*C*sizeof(TT)>> (split[i/2].out[i&0x1], k[i].in[0]);
120 adf::connect<adf::stream> (k[i].out[0], concat_graph.pin[i]);
121 adf::samples_per_iteration(k[i].out[0]) = B*C*HCHUNK*W;
122 }
123 adf::connect<adf::stream> (concat_graph.pout[0], pout[0]);
124 }
125
126};
127
128
137template <
138 template<typename, int, int, int, int> class SPLIT,
139 template<typename, int, int, int, int, int> class TRANSPOSE,
140 template<typename, int, int, int, int> class CONCAT,
141 int HCHUNK,
142 typename TT, int B, int H, int W, int C, int PAD_W>
143class TransposeChunkHPktStreamGraph : public adf::graph {
144
145 private:
146 static constexpr int LCNT = H / HCHUNK;
148 mSplitGraph split_graph;
149
150 adf::kernel k[LCNT];
151 ConcatStreamGraph<CONCAT, TT, LCNT, B*C, HCHUNK*W, H*W> concat_graph;
152
153 public:
154 adf::port<input> pin[1];
155 adf::port<output> pout[1];
156
158 static_assert(H % HCHUNK == 0);
159 static_assert(LCNT <= 8);
160 static_assert(B*HCHUNK*W*C*4 <= TILE_BYTES);
161
162 adf::connect<adf::stream> (pin[0], split_graph.pin[0]);
163
164 for (int i = 0; i < LCNT; i++) {
165 k[i] = adf::kernel::create_object<TRANSPOSE<TT, B, HCHUNK, W, C, PAD_W>>();
166 adf::source(k[i]) = "transpose.cc";
167 adf::headers(k[i]) = {"transpose.h"};
168 adf::runtime<ratio>(k[i]) = 0.6;
169 if (B*C*HCHUNK*W*4 > MAX_PARAM_BYTES)
170 adf::single_buffer(k[i].out[0]);
171
172 adf::connect<adf::pktstream> (split_graph.pout[i], k[i].in[0]);
173 adf::connect<adf::window<B*C*HCHUNK*W*sizeof(TT)>> (k[i].out[0], concat_graph.pin[i]);
174
175 if ((i&0x1) == 1) {
176 adf::location<adf::kernel>(k[i]) = adf::location<adf::kernel>(k[i-1]) + adf::relative_offset({.col_offset=0, .row_offset=1});
177 }
178 }
179
180 adf::connect<adf::stream> (concat_graph.pout[0], pout[0]);
181
182 }
183
184};
188#endif // __TRANSPOSE_GRAPH_H__
Graph wrapper for two stream split.
Definition graph_split.h:185
Multi instance graph, pktstream graph deadlocks for large H.
Definition graph_transpose.h:143
Single instance graph.
Definition graph_transpose.h:35
Multi instance graph.
Definition graph_transpose.h:69