onnx2versal
Loading...
Searching...
No Matches
add.h
1#ifndef ADD_H_
2#define ADD_H_
3
4#include <assert.h>
5#include <type_traits>
6#include <adf.h>
7
8
23template <typename TT, int W, int IS_RELU>
24class AddScalar {
25 public:
26 void filter(
27 input_stream<TT>* restrict inA,
28 input_stream<TT>* restrict inB,
29 output_stream<TT>* restrict out
30 );
31
32 static void registerKernelClass() {
33 static_assert(sizeof(TT) == 4);
34 REGISTER_FUNCTION(AddScalar::filter);
35 }
36};
37
38
43template <typename TT, int W, int IS_RELU>
44class AddFloat {
45 public:
46 void filter(
47 input_stream<TT>* restrict inA,
48 input_stream<TT>* restrict inB,
49 output_stream<TT>* restrict out
50 );
51
52 static void registerKernelClass() {
53 static_assert(W%4 == 0 && (std::is_same<TT, float>::value));
54 REGISTER_FUNCTION(AddFloat::filter);
55 }
56};
60#endif // ADD_H_
Vector implementation for float, AddScalar<float_t, 16384, 1> takes 24595 cycles (window 5668*8=45344...
Definition add.h:44
Scalar implementation, AddScalar<float_t, 16384, 1> takes 131102 cycles, (window 45097*8=360776 cycle...
Definition add.h:24