34 alignas(32) TTPARAM (&weights)[N*K];
35 alignas(32) int32_t (&bias)[N];
44 alignas(32) TT in_row[K];
56 ): weights(w), bias(b), x_scale(x_scale), w_scale(w_scale), y_scale(y_scale), x_zero(x_zero), w_zero(w_zero), y_zero(y_zero) {
57 scale = x_scale*w_scale/y_scale;
62 output_stream<TT>* out
65 static void registerKernelClass() {
66 static_assert((std::is_same<TT, int8_t>::value) || (std::is_same<TT, uint8_t>::value));
67 REGISTER_FUNCTION(QgemmScalar::filter);
68 REGISTER_PARAMETER(weights);
69 REGISTER_PARAMETER(bias);
82 alignas(32) TTPARAM (&weights)[N*K];
83 alignas(32) int32_t (&bias)[N];
96 alignas(32) TT in_row[K];
111 input_stream<TT>* in,
112 output_stream<TT>* out
115 static void registerKernelClass() {
116 static_assert((std::is_same<TT, int8_t>::value) || (std::is_same<TT, uint8_t>::value));
117 static_assert(K % 16 == 0);
118 static_assert(N % 16 == 0);
120 REGISTER_PARAMETER(weights);
121 REGISTER_PARAMETER(bias);
134 alignas(32) int32_t (&bias)[N];
147 alignas(32) TT in_row[K];
161 input_stream<TT>* in,
162 input_stream<TTPARAM>* weight,
163 output_stream<TT>* out
166 static void registerKernelClass() {
167 static_assert((std::is_same<TT, int8_t>::value) || (std::is_same<TT, uint8_t>::value));
168 static_assert((std::is_same<TTPARAM, int8_t>::value) || (std::is_same<TTPARAM, uint8_t>::value));
169 static_assert(K % 16 == 0);
170 static_assert(N % 16 == 0);
171 REGISTER_FUNCTION(QgemmStream::filter);
172 REGISTER_PARAMETER(bias);
Scalar implementation for MK*KN, stores weights and biases, QgemmScalar<a,a,1,80,32> takes 17340 cycl...
Definition qgemm.h:31
Vector implementation for MK*KN, stores weights and biases, requires N%16=0 Qgemm<a,...
Definition qgemm.h:79