onnx2versal/conv_8h_source.html

#ifndef CONV_H_

#define CONV_H_


#include <adf.h>

#include <assert.h>


template <int INP_H, int INP_W, int OUT_W, int OUT_W_PAD, int STEP_H, int STEP_W,

          int B, int C, int M, int KH, int KW, int GROUP, int IS_RELU>


class ConvReluScalar {


  private:

    static constexpr int OUT_H = (INP_H - KH) / STEP_H + 1;

    static constexpr int C_PER_M = C / GROUP; // each m kernel of shape (1,C_PER_M,K,K) applied on input of shape (1,C_PER_M,H,W)

    alignas(32) float (&weights)[M*KH*KW*C];

    alignas(32) float (&bias)[M];


  public:

    ConvReluScalar(

      float (&w)[M*KH*KW*C],

      float (&b)[M]

    ): weights(w), bias(b) {};


    void filter(

      input_window<float>* in,      // BCHW

      output_window<float>* out     // BMHW

    );


    static void registerKernelClass() {

      static_assert(C % GROUP == 0);

      REGISTER_FUNCTION(ConvReluScalar::filter);

      REGISTER_PARAMETER(weights);

      REGISTER_PARAMETER(bias);

    }


};


template <int INP_H, int INP_W, int OUT_W, int OUT_W_PAD, int STEP_H, int STEP_W,

          int B, int C, int M, int KH, int KW, int GROUP, int IS_RELU>


class Conv5x5on8Relu {


  private:

    static constexpr int OUT_H = (INP_H - KH) / STEP_H + 1;

    alignas(32) float (&weights)[M*C*KH*8];

    alignas(32) float (&bias)[M];


  public:

    Conv5x5on8Relu(

      float (&w)[M*C*KH*8],

      float (&b)[M]

    ): weights(w), bias(b) {};


    void filter(

      input_window<float>* in,      // BCHW

      output_window<float>* out     // BMHW

    );


    static void registerKernelClass() {

      static_assert(GROUP == 1);

      static_assert(KH==5);

      static_assert(KW==5);

      static_assert(INP_W%4==0);

      static_assert(OUT_W_PAD%8==0);

      static_assert(STEP_H == 1 && STEP_W == 1);

      REGISTER_FUNCTION(Conv5x5on8Relu::filter);

      REGISTER_PARAMETER(weights);

      REGISTER_PARAMETER(bias);

    }


};


template <int INP_H, int INP_W, int OUT_W, int OUT_W_PAD, int STEP_H, int STEP_W,

          int B, int C, int M, int KH, int KW, int GROUP, int IS_RELU>


class ConvHx4Relu {


  private:

    static constexpr int OUT_H = (INP_H - KH) / STEP_H + 1;

    static constexpr int CKK_ROW_SIZE = C*((KH*KW+3)/4*4);


    alignas(32) float (&weights)[M*CKK_ROW_SIZE];

    alignas(32) float (&bias)[M];


  public:

    ConvHx4Relu(

      float (&w)[M*CKK_ROW_SIZE],

      float (&b)[M]

    ): weights(w), bias(b) {};


    void filter(

      input_window<float>* in,      // BCHW

      output_window<float>* out     // BMHW

    );


    static void registerKernelClass() {

      static_assert(GROUP == 1);

      static_assert(KW<=4);

      static_assert(INP_W%4==0);

      static_assert(OUT_W_PAD%8==0);

      static_assert(STEP_H == 1 && STEP_W == 1);

      REGISTER_FUNCTION(ConvHx4Relu::filter);

      REGISTER_PARAMETER(weights);

      REGISTER_PARAMETER(bias);

    }


};


template <int INP_H, int INP_W, int OUT_W, int OUT_W_PAD, int STEP_H, int STEP_W,

          int B, int C, int M, int KH, int KW, int GROUP, int IS_RELU>


class Conv1x1Relu {


  private:

    static constexpr int OUT_H = (INP_H - KH) / STEP_H + 1;


    alignas(32) float (&weights)[M*C];

    alignas(32) float (&bias)[M];


  public:

    Conv1x1Relu(

      float (&w)[M*C],

      float (&b)[M]

    ): weights(w), bias(b) {};


    void filter(

      input_window<float>* in,      // BCHW

      output_window<float>* out     // BMHW

    );


    static void registerKernelClass() {

      static_assert(GROUP == 1);

      static_assert(KH==1);

      static_assert(KW==1);

      static_assert(INP_W%4==0);

      static_assert(OUT_W_PAD%8==0 && STEP_W==1 || OUT_W_PAD%4==0 && STEP_W==2);

      static_assert(STEP_H == 1 || STEP_H == 2);

      static_assert(STEP_W == 1 || STEP_W == 2);

      REGISTER_FUNCTION(Conv1x1Relu::filter);

      REGISTER_PARAMETER(weights);

      REGISTER_PARAMETER(bias);

    }


};


template <int INP_H, int INP_W, int OUT_W, int OUT_W_PAD, int STEP_H, int STEP_W,

          int B, int C, int M, int KH, int KW, int GROUP, int IS_RELU>


class ConvReluScalarStream {


  private:

    static constexpr int OUT_H = (INP_H - KH) / STEP_H + 1;

    static constexpr int C_PER_M = C / GROUP; // each m kernel of shape (1,C_PER_M,K,K) applied on input of shape (1,C_PER_M,H,W)

    static constexpr int CKK_ROW_SIZE = C_PER_M*KH*KW;

    alignas(32) float (&bias)[M];

    alignas(32) float ckk_row[CKK_ROW_SIZE];


  public:

    ConvReluScalarStream(

      float (&b)[M]

    ): bias(b) {};


    void filter(

      input_window<float>* in,      // BCHW

      input_stream<float>* weights, // MCKK

      output_stream<float>* out     // BMHW

    );


    static void registerKernelClass() {

      static_assert(C % GROUP == 0);

      REGISTER_FUNCTION(ConvReluScalarStream::filter);

      REGISTER_PARAMETER(bias);

    }


};


template <int INP_H, int INP_W, int OUT_W, int OUT_W_PAD, int STEP_H, int STEP_W,

          int B, int C, int M, int KH, int KW, int GROUP, int IS_RELU>


class ConvHx8ReluStream {


  private:

    static constexpr int OUT_H = (INP_H - KH) / STEP_H + 1;

    static constexpr int C_PER_M = C / GROUP; // each m kernel of shape (1,C_PER_M,K,K) applied on input of shape (1,C_PER_M,H,W)

    static constexpr int CKK_ROW_SIZE = C_PER_M*KH*8;

    alignas(32) float (&bias)[M];

    alignas(32) float ckk_row[CKK_ROW_SIZE];

    alignas(32) float width_row[OUT_W_PAD];


  public:

    ConvHx8ReluStream(

      float (&b)[M]

    ): bias(b) {};


    void filter(

      input_window<float>* in,      // BCHW

      input_stream<float>* weights, // MCKK

      output_stream<float>* out     // BMHW

    );


    static void registerKernelClass() {

      static_assert(C % GROUP == 0);

      static_assert(KW<=8);

      static_assert(INP_W%4==0);

      static_assert(OUT_W_PAD%8==0);

      static_assert(STEP_H == 1 && STEP_W == 1);

      REGISTER_FUNCTION(ConvHx8ReluStream::filter);

      REGISTER_PARAMETER(bias);

    }


};


template <int INP_H, int INP_W, int OUT_W, int OUT_W_PAD, int STEP_H, int STEP_W,

          int B, int C, int M, int KH, int KW, int GROUP, int IS_RELU>


class ConvHx4ReluStream {


  private:

    static constexpr int OUT_H = (INP_H - KH) / STEP_H + 1;

    static constexpr int C_PER_M = C / GROUP; // each m kernel of shape (1,C_PER_M,K,K) applied on input of shape (1,C_PER_M,H,W)

    static constexpr int CKK_ROW_SIZE = C_PER_M*((KH*KW+3)/4*4);

    static constexpr unsigned int X_OFFSET = (STEP_W == 1) ? 0x76543210 : ((STEP_W == 2) ? 0x00006420 : 0x0000c840);

    static constexpr int W_LOOP_STEP       = (STEP_W == 1) ? 8 : 4;

    static constexpr int W_LOOP_IN_STEP    = (STEP_W != 4) ? 8 : 16;


    alignas(32) float (&bias)[M];

    alignas(32) float ckk_row[CKK_ROW_SIZE];


  public:

    ConvHx4ReluStream(

      float (&b)[M]

    ): bias(b) {};


    void filter(

      input_window<float>* in,      // BCHW

      input_stream<float>* weights, // MCKK

      output_stream<float>* out     // BMHW

    );


    static void registerKernelClass() {

      static_assert(KW<=4);

      static_assert(INP_W%4==0);

      static_assert(OUT_W_PAD%8==0 && STEP_W==1 || OUT_W_PAD%4==0 && STEP_W==2 || OUT_W_PAD%4==0 && STEP_W == 4);

      static_assert(STEP_H == 1 || STEP_H == 2 || STEP_H == 4);

      static_assert(STEP_W == 1 || STEP_W == 2 || STEP_W == 4);

      REGISTER_FUNCTION(ConvHx4ReluStream::filter);

      REGISTER_PARAMETER(bias);

    }


};


template <int INP_H, int INP_W, int OUT_W, int OUT_W_PAD, int STEP_H, int STEP_W,

          int B, int C, int M, int KH, int KW, int GROUP, int IS_RELU>


class ConvHx4ReluStreamMultiRow {


  private:

    static constexpr int OUT_H = (INP_H - KH) / STEP_H + 1;

    static constexpr int CKK_ROW_SIZE = C*((KH*KW+3)/4*4);

    static constexpr unsigned int X_OFFSET = 0x76543210;


    alignas(32) float (&bias)[M];

    alignas(32) float ckk_row[CKK_ROW_SIZE];

    alignas(32) float out_row[OUT_W_PAD];


  public:

    ConvHx4ReluStreamMultiRow(

      float (&b)[M]

    ): bias(b) {};


    void filter(

      input_window<float>* in,      // BCHW

      input_stream<float>* weights, // MCKK

      output_stream<float>* out     // BMHW

    );


    static void registerKernelClass() {

      static_assert(GROUP == 1);

      static_assert(KH==3);

      static_assert(KW==3);

      static_assert(INP_W%4==0);

      static_assert(OUT_W_PAD%8==0);

      static_assert(STEP_H == 1);

      static_assert(STEP_W == 1);

      REGISTER_FUNCTION(ConvHx4ReluStreamMultiRow::filter);

      REGISTER_PARAMETER(bias);

    }


};


template <int INP_H, int INP_W, int OUT_W, int OUT_W_PAD, int STEP_H, int STEP_W,

          int B, int C, int M, int KH, int KW, int GROUP, int IS_RELU>


class ConvHx4Out4ReluStream {


  private:

    static constexpr int OUT_H = (INP_H - KH) / STEP_H + 1;

    static constexpr int C_PER_M = C / GROUP; // each m kernel of shape (1,C_PER_M,K,K) applied on input of shape (1,C_PER_M,H,W)

    static constexpr int CKK_ROW_SIZE = C_PER_M*((KH*KW+3)/4*4);


    alignas(32) float (&bias)[M];

    alignas(32) float ckk_row[CKK_ROW_SIZE];


  public:

    ConvHx4Out4ReluStream(

      float (&b)[M]

    ): bias(b) {};


    void filter(

      input_window<float>* in,      // BCHW

      input_stream<float>* weights, // MCKK

      output_stream<float>* out     // BMHW

    );


    static void registerKernelClass() {

      static_assert(KW<=4);

      static_assert(INP_W%4==0);

      static_assert(OUT_W_PAD == 4);

      static_assert(STEP_H == 1);

      static_assert(STEP_W == 1);

      REGISTER_FUNCTION(ConvHx4Out4ReluStream::filter);

      REGISTER_PARAMETER(bias);

    }


};


template <int INP_H, int INP_W, int OUT_W, int OUT_W_PAD, int STEP_H, int STEP_W,

          int B, int C, int M, int KH, int KW, int GROUP, int IS_RELU>


class Conv1x1ReluStream {


  private:

    static constexpr int OUT_H = (INP_H - KH) / STEP_H + 1;

    static constexpr int CKK_ROW_SIZE = (C+3)/4*4;


    alignas(32) float (&bias)[M];

    alignas(32) float ckk_row[CKK_ROW_SIZE];


  public:

    Conv1x1ReluStream(

      float (&b)[M]

    ): bias(b) {};


    void filter(

      input_window<float>* in,      // BCHW

      input_stream<float>* weights, // MCKK

      output_stream<float>* out     // BMHW

    );


    static void registerKernelClass() {

      static_assert(GROUP == 1);

      static_assert(KH==1);

      static_assert(KW==1);

      static_assert(INP_W%4==0);

      static_assert(OUT_W_PAD%8==0 && STEP_W==1 || OUT_W_PAD%4==0 && STEP_W==2);

      static_assert(STEP_H == 1 || STEP_H == 2);

      static_assert(STEP_W == 1 || STEP_W == 2);

      REGISTER_FUNCTION(Conv1x1ReluStream::filter);

      REGISTER_PARAMETER(bias);

    }


};


template <int INP_H, int INP_W, int OUT_W, int OUT_W_PAD, int STEP_H, int STEP_W,

          int B, int C, int M, int KH, int KW, int GROUP, int IS_RELU>


class Conv1x1Out4ReluStream {


  private:

    static constexpr int OUT_H = (INP_H - KH) / STEP_H + 1;

    static constexpr int C_PER_M = C / GROUP; // each m kernel of shape (1,C_PER_M,K,K) applied on input of shape (1,C_PER_M,H,W)

    static constexpr int CKK_ROW_SIZE = (C_PER_M+3)/4*4;


    alignas(32) float (&bias)[M];

    alignas(32) float ckk_row[CKK_ROW_SIZE];


  public:

    Conv1x1Out4ReluStream(

      float (&b)[M]

    ): bias(b) {};


    void filter(

      input_window<float>* in,      // BCHW

      input_stream<float>* weights, // MCKK

      output_stream<float>* out     // BMHW

    );


    static void registerKernelClass() {

      static_assert(KW<=4);

      static_assert(INP_W%4==0);

      static_assert(OUT_W_PAD == 4);

      static_assert(STEP_H == 1);

      static_assert(STEP_W == 1);

      REGISTER_FUNCTION(Conv1x1Out4ReluStream::filter);

      REGISTER_PARAMETER(bias);

    }


};


template <int INP_H, int INP_W, int OUT_W, int OUT_W_PAD, int STEP_H, int STEP_W,

          int B, int C, int M, int KH, int KW, int GROUP, int IS_RELU>


class ConvHx4ReluPktStream {


  private:

    static constexpr int OUT_H = (INP_H - KH) / STEP_H + 1;

    static constexpr int C_PER_M = C / GROUP; // each m kernel of shape (1,C_PER_M,K,K) applied on input of shape (1,C_PER_M,H,W)

    static constexpr int CKK_ROW_SIZE = C_PER_M*((KH*KW+3)/4*4);

    static constexpr int INP_SIZE = B*C*INP_H*INP_W;


    static constexpr unsigned int X_OFFSET = (STEP_W == 1) ? 0x76543210 : ((STEP_W == 2) ? 0x00006420 : 0x0000c840);

    static constexpr int W_LOOP_STEP       = (STEP_W == 1) ? 8 : 4;

    static constexpr int W_LOOP_IN_STEP    = (STEP_W != 4) ? 8 : 16;


    alignas(32) float (&bias)[M];

    alignas(32) float ckk_row[CKK_ROW_SIZE];

    alignas(32) float in[INP_SIZE];


  public:

    ConvHx4ReluPktStream(

      float (&b)[M]

    ): bias(b) {};


    void filter(

      input_pktstream* in_s,      // BCHW

      input_stream<float>* weights, // MCKK

      output_stream<float>* out     // BMHW

    );


    static void registerKernelClass() {

      static_assert(KW<=4);

      static_assert(INP_W%4==0);

      static_assert(OUT_W_PAD%8==0 && STEP_W==1 || OUT_W_PAD%4==0 && STEP_W==2 || OUT_W_PAD%4==0 && STEP_W == 4);

      static_assert(STEP_H == 1 || STEP_H == 2 || STEP_H == 4);

      static_assert(STEP_W == 1 || STEP_W == 2 || STEP_W == 4);

      REGISTER_FUNCTION(ConvHx4ReluPktStream::filter);

      REGISTER_PARAMETER(bias);

    }


};


template <int INP_H, int INP_W, int OUT_W, int OUT_W_PAD, int STEP_H, int STEP_W,

          int B, int C, int M, int KH, int KW, int GROUP, int IS_RELU>


class Conv1x1ReluPktStream {


  private:

    static constexpr int OUT_H = (INP_H - KH) / STEP_H + 1;

    static constexpr int CKK_ROW_SIZE = (C+3)/4*4;

    static constexpr int INP_SIZE = B*C*INP_H*INP_W;


    alignas(32) float (&bias)[M];

    alignas(32) float ckk_row[CKK_ROW_SIZE];

    alignas(32) float in[INP_SIZE];


  public:

    Conv1x1ReluPktStream(

      float (&b)[M]

    ): bias(b) {};


    void filter(

      input_pktstream* in_s,      // BCHW

      input_stream<float>* weights, // MCKK

      output_stream<float>* out     // BMHW

    );


    static void registerKernelClass() {

      static_assert(GROUP == 1);

      static_assert(KH==1);

      static_assert(KW==1);

      static_assert(INP_W%4==0);

      static_assert(OUT_W_PAD%8==0 && STEP_W==1 || OUT_W_PAD%4==0 && STEP_W==2);

      static_assert(STEP_H == 1 || STEP_H == 2);

      static_assert(STEP_W == 1 || STEP_W == 2);

      REGISTER_FUNCTION(Conv1x1ReluPktStream::filter);

      REGISTER_PARAMETER(bias);

    }


};


#endif // CONV_H_

Conv1x1Out4ReluStream
Vector stream implementation for OUT_W == 4 < 8, stores biases, requires KH==KW==1,...
Definition conv.h:450

Conv1x1ReluPktStream
Vector stream implementation for BCHW, stores biases, requires KH==KW==1, INP_W%4==0,...
Definition conv.h:536

Conv1x1ReluStream
Vector stream implementation for BCHW, stores biases, requires KH==KW==1, INP_W%4==0,...
Definition conv.h:409

Conv1x1Relu
Vector stream implementation for BCHW, stores weights and biases, requires KH==KW==1,...
Definition conv.h:159

Conv5x5on8Relu
Vector implementation for 5x5 BCHW, stores weights and biases, requires KH==KW==5,...
Definition conv.h:77

ConvHx4Out4ReluStream
Vector stream implementation for OUT_W == 4 < 8, stores biases, requires KW<=3, INP_W%4==0,...
Definition conv.h:368

ConvHx4ReluPktStream
Vector stream implementation for BCHW, stores biases, requires KW<=3, INP_W%4==0, OUT_W_PAD%(8|4)==0,...
Definition conv.h:490

ConvHx4ReluStreamMultiRow
Vector stream implementation for BCHW, stores biases, requires KH==KW==3, INP_W%4==0,...
Definition conv.h:325

ConvHx4ReluStream
Vector stream implementation for BCHW, stores biases, requires KW<=3, INP_W%4==0, OUT_W_PAD%(8|4)==0,...
Definition conv.h:281

ConvHx4Relu
Vector implementation for 3x3 BCHW, stores weights and biases, requires KW<=4, INP_W%4==0,...
Definition conv.h:118

ConvHx8ReluStream
Scalar stream implementation for BCHW, stores biases, requires GROUP==1, ConvHx8ReluStream<28,...
Definition conv.h:238

ConvReluScalarStream
Scalar stream implementation for BCHW, stores biases, requires GROUP==1, ConvReluScalarStream<26,...
Definition conv.h:202

ConvReluScalar
Scalar implementation for BCHW, stores weights and biases, requires GROUP==1, ConvReluScalar<28,...
Definition conv.h:40