onnx2versal
Loading...
Searching...
No Matches
mac.h
1#ifndef MAC_KERNEL_H
2#define MAC_KERNEL_H
3
4#include <type_traits>
5#include <assert.h>
6#include <adf.h>
7
8
26template <typename TT, int B, int W, int IS_RELU>
27class MacScalar {
28
29 private:
30 alignas(32) TT (&weights)[W];
31 alignas(32) TT (&bias)[W];
32
33 public:
34 MacScalar (
35 TT (&w)[W],
36 TT (&b)[W]
37 ): weights(w), bias(b) {};
38
39 void filter(
40 input_window<TT>* in,
41 output_window<TT>* out
42 );
43
44 static void registerKernelClass() {
45 REGISTER_FUNCTION(MacScalar::filter);
46 REGISTER_PARAMETER(weights);
47 REGISTER_PARAMETER(bias);
48 }
49};
50
51
56template <typename TT, int B, int W, int IS_RELU>
57class MacFloat {
58
59 private:
60 alignas(32) float (&weights)[W];
61 alignas(32) float (&bias)[W];
62
63 public:
64 MacFloat (
65 float (&w)[W],
66 float (&b)[W]
67 ): weights(w), bias(b) {};
68
69 void filter(
70 input_window<float>* in,
71 output_window<float>* out
72 );
73
74 static void registerKernelClass() {
75 static_assert(W % 8 == 0 && (std::is_same<TT, float>::value));
76 REGISTER_FUNCTION(MacFloat::filter);
77 REGISTER_PARAMETER(weights);
78 REGISTER_PARAMETER(bias);
79 }
80};
84#endif // MAC_KERNEL_H
Scalar implementation, MacFloat<196,128> takes 1881 cycles.
Definition mac.h:57
Scalar implementation, MacScalar<196,128> takes cycles.
Definition mac.h:27