27 input_stream<TT>* restrict inA,
28 input_stream<TT>* restrict inB,
29 output_stream<TT>* restrict out
32 static void registerKernelClass() {
33 static_assert(
sizeof(TT) == 4);
34 REGISTER_FUNCTION(AddScalar::filter);
47 input_stream<TT>* restrict inA,
48 input_stream<TT>* restrict inB,
49 output_stream<TT>* restrict out
52 static void registerKernelClass() {
53 static_assert(W%4 == 0 && (std::is_same<TT, float>::value));
54 REGISTER_FUNCTION(AddFloat::filter);
Vector implementation for float, AddScalar<float_t, 16384, 1> takes 24595 cycles (window 5668*8=45344...
Definition add.h:44
Scalar implementation, AddScalar<float_t, 16384, 1> takes 131102 cycles, (window 45097*8=360776 cycle...
Definition add.h:24