пре 4 година · a41d1750ff
--- a/tf-addOp/build.sh
+++ b/tf-addOp/build.sh
@@ -1,15 +0,0 @@
 
				-#!/bin/bash
			
 
				-
			
 
				-if [ "$TF_CFLAGS" == "" ]; then
			
 
				-  export TF_CFLAGS=( $(python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') )
			
 
				-fi
			
 
				-if [ "$TF_LFLAGS" == "" ]; then
			
 
				-  export TF_LFLAGS=( $(python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
			
 
				-fi
			
 
				-
			
 
				-g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2 -v
			
 
				-
			
 
				-#g++ -g -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC -I/usr/local/lib/python3.6/dist-packages/tensorflow_core/include -D_GLIBCXX_USE_CXX11_ABI=0 -L/usr/local/lib/python3.6/dist-packages/tensorflow_core -l:libtensorflow_framework.so.2 -O2
			
 
				-
			
 
				-#g++ -g -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2 -Wall -Wl,-z,defs
			
 
				-
			
--- a/tf-addOp/model.png
+++ b/tf-addOp/model.png
--- a/tf-addOp/test.py
+++ b/tf-addOp/test.py
@@ -1,88 +0,0 @@
 
				-import tensorflow as tf

			
 
				-import tensorflow.keras as keras

			
 
				-from keras import layers

			
 
				-from keras.layers import Input, Embedding, LSTM, Dense, Dropout, Flatten, MaxPooling2D, Conv2D

			
 
				-from keras.models import Model, Sequential

			
 
				-from keras.datasets import mnist

			
 
				-from keras.utils import plot_model, to_categorical

			
 
				-

			
 
				-import numpy as np

			
 
				-from IPython import embed

			
 
				-

			
 
				-

			
 
				-batch_size = 128

			
 
				-num_classes = 10

			
 
				-epochs = 1 # 12

			
 
				-

			
 
				-# input image dimensions

			
 
				-img_rows, img_cols = 28, 28

			
 
				-

			
 
				-# the data, split between train and test sets

			
 
				-(x_train, y_train), (x_test, y_test) = mnist.load_data()

			
 
				-

			
 
				-x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)

			
 
				-x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)

			
 
				-input_shape = (img_rows, img_cols, 1)

			
 
				-

			
 
				-x_train = x_train.astype('float32')

			
 
				-x_test = x_test.astype('float32')

			
 
				-x_train /= 255

			
 
				-x_test /= 255

			
 
				-print('x_train shape:', x_train.shape)

			
 
				-print(x_train.shape[0], 'train samples')

			
 
				-print(x_test.shape[0], 'test samples')

			
 
				-

			
 
				-# convert class vectors to binary class matrices

			
 
				-y_train = to_categorical(y_train, num_classes)

			
 
				-y_test = to_categorical(y_test, num_classes)

			
 
				-

			
 
				-

			
 
				-class Linear(layers.Layer):

			
 
				-

			
 
				-  def __init__(self, units=32, input_dim=32):

			
 
				-    super(Linear, self).__init__()

			
 
				-    w_init = tf.random_normal_initializer()

			
 
				-    self.w = tf.Variable(initial_value=w_init(shape=(input_dim, units),

			
 
				-                                              dtype='float32'),

			
 
				-                         trainable=True)

			
 
				-    b_init = tf.zeros_initializer()

			
 
				-    self.b = tf.Variable(initial_value=b_init(shape=(units,),

			
 
				-                                              dtype='float32'),

			
 
				-                         trainable=True)

			
 
				-

			
 
				-  def call(self, inputs):

			
 
				-    print(inputs)

			
 
				-    embed()

			
 
				-    return tf.matmul(inputs, self.w) + self.b

			
 
				-

			
 
				-

			
 
				-model = Sequential()

			
 
				-model.add(Conv2D(32, kernel_size=(3, 3),

			
 
				-                 activation='relu',

			
 
				-                 input_shape=input_shape))

			
 
				-model.add(Conv2D(64, (3, 3), activation='relu'))

			
 
				-

			
 
				-model.add(MaxPooling2D(pool_size=(2, 2)))

			
 
				-model.add(Dropout(0.25))

			
 
				-model.add(Flatten())

			
 
				-model.add(Dense(128, activation='relu'))

			
 
				-model.add(Dropout(0.5))

			
 
				-model.add(Dense(num_classes, activation='softmax'))

			
 
				-

			
 
				-model.add(Linear(10,10))

			
 
				-

			
 
				-

			
 
				-model.compile(loss=keras.losses.categorical_crossentropy,

			
 
				-              optimizer=keras.optimizers.Adadelta(),

			
 
				-              metrics=['accuracy'])

			
 
				-

			
 
				-model.fit(x_train, y_train,

			
 
				-          batch_size=batch_size,

			
 
				-          epochs=epochs,

			
 
				-          verbose=1,

			
 
				-          validation_data=(x_test, y_test))

			
 
				-score = model.evaluate(x_test, y_test, verbose=0)

			
 
				-print('Test loss:', score[0])

			
 
				-print('Test accuracy:', score[1])

			
 
				-

			
 
				-plot_model(model, to_file='model.png', expand_nested=True, show_shapes=True)

			
--- a/tf-addOp/train.py
+++ b/tf-addOp/train.py
@@ -1,72 +0,0 @@
 
				-import tensorflow as tf
			
 
				-import tensorflow.keras as keras
			
 
				-from tensorflow.keras import layers
			
 
				-from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, Flatten, MaxPooling2D, Conv2D
			
 
				-from tensorflow.keras.models import Model, Sequential
			
 
				-from tensorflow.keras.datasets import mnist
			
 
				-from tensorflow.keras.utils import plot_model, to_categorical
			
 
				-
			
 
				-import numpy as np
			
 
				-from IPython import embed
			
 
				-
			
 
				-zero_out_module = tf.load_op_library('./zero_out.so')
			
 
				-
			
 
				-batch_size = 128
			
 
				-num_classes = 10
			
 
				-epochs = 1 # 12
			
 
				-
			
 
				-# input image dimensions
			
 
				-img_rows, img_cols = 28, 28
			
 
				-
			
 
				-# the data, split between train and test sets
			
 
				-(x_train, y_train), (x_test, y_test) = mnist.load_data()
			
 
				-
			
 
				-x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
			
 
				-x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
			
 
				-input_shape = (img_rows, img_cols, 1)
			
 
				-
			
 
				-x_train = x_train.astype('float32')
			
 
				-x_test = x_test.astype('float32')
			
 
				-x_train /= 255
			
 
				-x_test /= 255
			
 
				-print('x_train shape:', x_train.shape)
			
 
				-print(x_train.shape[0], 'train samples')
			
 
				-print(x_test.shape[0], 'test samples')
			
 
				-
			
 
				-# convert class vectors to binary class matrices
			
 
				-y_train = to_categorical(y_train, num_classes)
			
 
				-y_test = to_categorical(y_test, num_classes)
			
 
				-
			
 
				-class Linear(layers.Layer):
			
 
				-
			
 
				-  def __init__(self, units=32, input_dim=32):
			
 
				-    super(Linear, self).__init__()
			
 
				-  def call(self, inputs):
			
 
				-    ints = tf.dtypes.cast(inputs, dtype=tf.int32)
			
 
				-    print(ints)
			
 
				-    outs = zero_out_module.zero_out(ints)
			
 
				-    return tf.dtypes.cast(outs, dtype=tf.float32)
			
 
				-
			
 
				-model = Sequential()
			
 
				-model.add(Flatten())
			
 
				-model.add(Dense(128, activation='relu'))
			
 
				-model.add(Dropout(0.5))
			
 
				-model.add(Dense(num_classes, activation='softmax'))
			
 
				-
			
 
				-model.add(Linear())
			
 
				-
			
 
				-model.compile(loss=keras.losses.categorical_crossentropy,
			
 
				-              optimizer=keras.optimizers.Adadelta(),
			
 
				-              metrics=['accuracy'])
			
 
				-              
			
 
				-model.fit(x_train, y_train,
			
 
				-          batch_size=batch_size,
			
 
				-          epochs=epochs,
			
 
				-          verbose=1,
			
 
				-          validation_data=(x_test, y_test))
			
 
				-
			
 
				-score = model.evaluate(x_test, y_test, verbose=0)
			
 
				-print('Test loss:', score[0])
			
 
				-print('Test accuracy:', score[1])
			
 
				-
			
 
				-plot_model(model, to_file='model.png', expand_nested=True, show_shapes=True)
			
--- a/tf-addOp/zero_out.cc
+++ b/tf-addOp/zero_out.cc
@@ -1,49 +0,0 @@
 
				-#include "tensorflow/core/framework/op.h"
			
 
				-#include "tensorflow/core/framework/shape_inference.h"
			
 
				-#include "tensorflow/core/framework/function.h"
			
 
				-
			
 
				-using namespace tensorflow;
			
 
				-
			
 
				-REGISTER_OP("ZeroOut")
			
 
				-    .Input("to_zero: int32")
			
 
				-    .Output("zeroed: int32")
			
 
				-    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
			
 
				-      c->set_output(0, c->input(0));
			
 
				-      return Status::OK();
			
 
				-    });
			
 
				-
			
 
				-#include "tensorflow/core/framework/op_kernel.h"
			
 
				-
			
 
				-using namespace tensorflow;
			
 
				-
			
 
				-class ZeroOutOp : public OpKernel {
			
 
				- public:
			
 
				-  explicit ZeroOutOp(OpKernelConstruction* context) : OpKernel(context) {}
			
 
				-
			
 
				-  void Compute(OpKernelContext* context) override {
			
 
				-    // Grab the input tensor
			
 
				-    const Tensor& input_tensor = context->input(0);
			
 
				-    auto input = input_tensor.flat<int32>();
			
 
				-
			
 
				-    printf("call n: %d\n", n++);
			
 
				-
			
 
				-    // Create an output tensor
			
 
				-    Tensor* output_tensor = NULL;
			
 
				-    OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
			
 
				-                                                     &output_tensor));
			
 
				-    auto output_flat = output_tensor->flat<int32>();
			
 
				-
			
 
				-    // Set all but the first element of the output tensor to 0.
			
 
				-    const int N = input.size();
			
 
				-    
			
 
				-    for (int i = 1; i < N; i++) {
			
 
				-      output_flat(i) = 0;
			
 
				-    }
			
 
				-    // Preserve the first input value if possible.
			
 
				-    if (N > 0) output_flat(0) = input(0);
			
 
				-  }
			
 
				-
			
 
				-  int n = 0;
			
 
				-};
			
 
				-
			
 
				-REGISTER_KERNEL_BUILDER(Name("ZeroOut").Device(DEVICE_CPU), ZeroOutOp);
			
--- a/tf-addOp/zero_out.py
+++ b/tf-addOp/zero_out.py
@@ -1,7 +0,0 @@
 
				-import tensorflow as tf
			
 
				-zero_out_module = tf.load_op_library('./zero_out.so')
			
 
				-
			
 
				-print(zero_out_module.zero_out([[1, 2], [3, 4]]))
			
 
				-
			
 
				-# Prints
			
 
				-# array([[1, 0], [0, 0]], dtype=int32)
			
--- a/tf-addOp/zero_out.so
+++ b/tf-addOp/zero_out.so
--- a/tf-matMulOp/build.sh
+++ b/tf-matMulOp/build.sh
@@ -1,10 +0,0 @@
 
				-#!/bin/bash
			
 
				-
			
 
				-if [ "$TF_CFLAGS" == "" ]; then
			
 
				-  export TF_CFLAGS=( $(python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))' 2>/dev/null) )
			
 
				-fi
			
 
				-if [ "$TF_LFLAGS" == "" ]; then
			
 
				-  export TF_LFLAGS=( $(python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))' 2>/dev/null) )
			
 
				-fi
			
 
				-
			
 
				-g++ -g -std=c++11 -shared matMul.cc -o matMul.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2 -Wall
			
--- a/tf-matMulOp/makefile
+++ b/tf-matMulOp/makefile
@@ -1,29 +0,0 @@
 
				-CXX=/usr/bin/g++
			
 
				-
			
 
				-FLAGS = -g -Wall -pthread -std=c++11
			
 
				-
			
 
				-TF_CFLAGS=$(shell python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))' 2>/dev/null)
			
 
				-TF_LFLAGS=$(shell python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))' 2>/dev/null)
			
 
				-
			
 
				-SRC_DIR=.
			
 
				-INC_DIR=.
			
 
				-BUILD_DIR=.
			
 
				-
			
 
				-SRCS=$(wildcard $(SRC_DIR)/*.cpp)
			
 
				-OBJS=$(patsubst $(SRC_DIR)/%.cpp,$(BUILD_DIR)/%.o,$(SRCS))
			
 
				-
			
 
				-EXECUTABLE=matMul.so
			
 
				-
			
 
				-all: dir $(BUILD_DIR)/$(EXECUTABLE)
			
 
				-
			
 
				-dir:
			
 
				-	mkdir -p $(BUILD_DIR)
			
 
				-
			
 
				-$(BUILD_DIR)/$(EXECUTABLE): $(OBJS)
			
 
				-	$(CXX) -shared $(TF_LFLAGS) -Wall -o $@ $^
			
 
				-
			
 
				-$(OBJS): $(BUILD_DIR)/%.o : $(SRC_DIR)/%.cpp $(INC_DIR)/%.hpp
			
 
				-	$(CXX) $(FLAGS) -fPIC -c $(TF_CFLAGS) -I$(INC_DIR) -o $@ $< -O2
			
 
				-
			
 
				-clean:
			
 
				-	rm -f $(BUILD_DIR)/*.o $(BUILD_DIR)/$(EXECUTABLE)
			
--- a/tf-matMulOp/matMul.cc
+++ b/tf-matMulOp/matMul.cc
@@ -1,165 +0,0 @@
 
				-#include "tensorflow/core/framework/op.h"
			
 
				-#include "tensorflow/core/framework/shape_inference.h"
			
 
				-#include "tensorflow/core/framework/function.h"
			
 
				-
			
 
				-#include "tensorflow/core/lib/math/math_util.h"
			
 
				-
			
 
				-using namespace tensorflow;
			
 
				-typedef FunctionDefHelper FDH;
			
 
				-
			
 
				-REGISTER_OP("MyMatMul")
			
 
				-    .Input("to_zero: int32")
			
 
				-    .Output("zeroed: int32")
			
 
				-    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
			
 
				-      c->set_output(0, c->input(0));
			
 
				-      return Status::OK();
			
 
				-    });
			
 
				-
			
 
				-REGISTER_OP("MyConv2D")
			
 
				-    .Input("input: int32")
			
 
				-    .Input("filter: int32")
			
 
				-    .Output("output: int32")
			
 
				-    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
			
 
				-      c->set_output(0, c->input(0));
			
 
				-      return Status::OK();
			
 
				-    });
			
 
				-    
			
 
				-#include "tensorflow/core/framework/op_kernel.h"
			
 
				-
			
 
				-using namespace tensorflow;
			
 
				-/*
			
 
				-class Conv2DOp : public OpKernel {
			
 
				- public:
			
 
				-  explicit Conv2DOp(OpKernelConstruction* context) : OpKernel(context) {}
			
 
				-
			
 
				-  void Compute(OpKernelContext* context) override {
			
 
				-    // Grab the input tensor
			
 
				-    const Tensor& input_tensor = context->input(0);
			
 
				-    auto input = input_tensor.flat<int32>();
			
 
				-
			
 
				-    printf("call n: %d\n", n++);
			
 
				-
			
 
				-    // Create an output tensor
			
 
				-    Tensor* output_tensor = NULL;
			
 
				-    OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
			
 
				-                                                     &output_tensor));
			
 
				-    auto output_flat = output_tensor->flat<int32>();
			
 
				-
			
 
				-    // Set all but the first element of the output tensor to 0.
			
 
				-    const int N = input.size();
			
 
				-    
			
 
				-    for (int i = 1; i < N; i++) {
			
 
				-      output_flat(i) = 0;
			
 
				-    }
			
 
				-    // Preserve the first input value if possible.
			
 
				-    if (N > 0) output_flat(0) = input(0);
			
 
				-  }
			
 
				-
			
 
				-  int n = 0;
			
 
				-};
			
 
				-*/
			
 
				-
			
 
				-
			
 
				-class Conv2DOp : public OpKernel {
			
 
				- public:
			
 
				-  explicit Conv2DOp(OpKernelConstruction* context) : OpKernel(context) {
			
 
				-  }
			
 
				-
			
 
				-  void Compute(OpKernelContext* context) override {
			
 
				-    // Input tensor is of the following dimensions:
			
 
				-    // [ batch, in_rows, in_cols, in_depth ]
			
 
				-    const Tensor& input = context->input(0);
			
 
				-
			
 
				-    // Input filter is of the following dimensions:
			
 
				-    // [ filter_rows, filter_cols, in_depth, out_depth]
			
 
				-    const Tensor& filter = context->input(1);
			
 
				-
			
 
				-    TensorShape out_shape = input.shape();
			
 
				-
			
 
				-    // Output tensor is of the following dimensions:
			
 
				-    // [ in_batch, out_rows, out_cols, out_depth ]
			
 
				-    Tensor* output = nullptr;
			
 
				-    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
			
 
				-
			
 
				-    std::cout << "Conv2D" << std::endl;
			
 
				-
			
 
				-    // If there is nothing to compute, return.
			
 
				-    if (out_shape.num_elements() == 0) {
			
 
				-      return;
			
 
				-    }
			
 
				-
			
 
				-    
			
 
				-  }
			
 
				-
			
 
				- private:
			
 
				-  //LaunchConv2DOp<Device, T> launcher_;
			
 
				-
			
 
				-  TF_DISALLOW_COPY_AND_ASSIGN(Conv2DOp);
			
 
				-};
			
 
				-
			
 
				-
			
 
				-REGISTER_KERNEL_BUILDER(Name("MyConv2D").Device(DEVICE_CPU), Conv2DOp);
			
 
				-
			
 
				-static Status MatMulGradHelper(FunctionDef* g, const string& opname,
			
 
				-                               const string& attr_adj_x,
			
 
				-                               const string& attr_adj_y, const string& x0,
			
 
				-                               bool ax0, const string& x1, bool ax1,
			
 
				-                               const string& y0, bool ay0, const string& y1,
			
 
				-                               bool ay1) {
			
 
				-  // The final outputs are "dx" and "dy". If we're broadcasting compute
			
 
				-  // intermediate nodes for now.
			
 
				-  std::vector<FDH::Node> nodes = {
			
 
				-      {{("dx")},
			
 
				-       opname,
			
 
				-       {x0, x1},
			
 
				-       {{"T", "$T"}, {attr_adj_x, ax0}, {attr_adj_y, ax1}}},
			
 
				-      {{("dy")},
			
 
				-       opname,
			
 
				-       {y0, y1},
			
 
				-       {{"T", "$T"}, {attr_adj_x, ay0}, {attr_adj_y, ay1}}},
			
 
				-  };
			
 
				-
			
 
				-  *g = FDH::Define(
			
 
				-      // Arg defs
			
 
				-      {"x: T", "y: T", "dz: T"},
			
 
				-      // Ret val defs
			
 
				-      {"dx: T", "dy: T"},
			
 
				-      // Attr defs
			
 
				-      {{"T: {half, float, double}"}},
			
 
				-      // Nodes
			
 
				-      nodes);
			
 
				-  return Status::OK();
			
 
				-}
			
 
				-Status MatMulGrad(const AttrSlice& attrs, FunctionDef* g) {
			
 
				-  const string opname = "MyMatMul";
			
 
				-  const string attr_adj_x = "transpose_a";
			
 
				-  const string attr_adj_y = "transpose_b";
			
 
				-  DataType T;
			
 
				-  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "T", &T));
			
 
				-  if (T == DT_COMPLEX64 || T == DT_COMPLEX128) {
			
 
				-    return errors::Unimplemented(
			
 
				-        "MatMul gradient for complex is not supported yet.");
			
 
				-  }
			
 
				-  bool ta;
			
 
				-  bool tb;
			
 
				-  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_x, &ta));
			
 
				-  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_y, &tb));
			
 
				-
			
 
				-  if (!ta && !tb) {
			
 
				-    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "dz", false, "y",
			
 
				-                            true, "x", true, "dz", false);
			
 
				-  }
			
 
				-  if (!ta && tb) {
			
 
				-    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "dz", false, "y",
			
 
				-                            false, "dz", true, "x", false);
			
 
				-  }
			
 
				-  if (ta && !tb) {
			
 
				-    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "y", false, "dz",
			
 
				-                            true, "x", false, "dz", false);
			
 
				-  }
			
 
				-  CHECK(ta && tb);
			
 
				-  return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, "y", true, "dz",
			
 
				-                          true, "dz", true, "x", true);
			
 
				-}
			
 
				-
			
 
				-REGISTER_OP_GRADIENT("MyConv2D", MatMulGrad);
			
--- a/tf-matMulOp/matMul.so
+++ b/tf-matMulOp/matMul.so
--- a/tf-matMulOp/train.py
+++ b/tf-matMulOp/train.py
@@ -1,70 +0,0 @@
 
				-import tensorflow as tf
			
 
				-import tensorflow.keras as keras
			
 
				-from tensorflow.keras import layers
			
 
				-from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, Flatten, MaxPooling2D, Conv2D
			
 
				-from tensorflow.keras.models import Model, Sequential
			
 
				-from tensorflow.keras.datasets import mnist
			
 
				-from tensorflow.keras.utils import plot_model, to_categorical
			
 
				-
			
 
				-import numpy as np
			
 
				-from IPython import embed
			
 
				-
			
 
				-my_matmul_module = tf.load_op_library('./matMul.so')
			
 
				-
			
 
				-batch_size = 128
			
 
				-num_classes = 10
			
 
				-epochs = 1 # 12
			
 
				-
			
 
				-# input image dimensions
			
 
				-img_rows, img_cols = 28, 28
			
 
				-
			
 
				-# the data, split between train and test sets
			
 
				-(x_train, y_train), (x_test, y_test) = mnist.load_data()
			
 
				-
			
 
				-x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
			
 
				-x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
			
 
				-input_shape = (img_rows, img_cols, 1)
			
 
				-
			
 
				-x_train = x_train.astype('float32')
			
 
				-x_test = x_test.astype('float32')
			
 
				-x_train /= 255
			
 
				-x_test /= 255
			
 
				-print('x_train shape:', x_train.shape)
			
 
				-print(x_train.shape[0], 'train samples')
			
 
				-print(x_test.shape[0], 'test samples')
			
 
				-
			
 
				-# convert class vectors to binary class matrices
			
 
				-y_train = to_categorical(y_train, num_classes)
			
 
				-y_test = to_categorical(y_test, num_classes)
			
 
				-
			
 
				-class Conv2DFPGA(layers.Layer):
			
 
				-  def __init__(self, kernel):
			
 
				-    super(Conv2DFPGA, self).__init__()
			
 
				-    self.kernel = kernel
			
 
				-  def call(self, inputs):
			
 
				-    ints = tf.dtypes.cast(inputs, dtype=tf.int32)
			
 
				-    outs = my_matmul_module.MyConv2D(input=ints, filter=ints)
			
 
				-    return tf.dtypes.cast(outs, dtype=tf.float32)
			
 
				-
			
 
				-model = Sequential()
			
 
				-model.add(Conv2DFPGA([0,0]))
			
 
				-model.add(Flatten())
			
 
				-model.add(Dense(128, activation='relu'))
			
 
				-model.add(Dropout(0.5))
			
 
				-model.add(Dense(num_classes, activation='softmax'))
			
 
				-
			
 
				-model.compile(loss=keras.losses.categorical_crossentropy,
			
 
				-              optimizer=keras.optimizers.Adadelta(),
			
 
				-              metrics=['accuracy'])
			
 
				-              
			
 
				-model.fit(x_train, y_train,
			
 
				-          batch_size=batch_size,
			
 
				-          epochs=epochs,
			
 
				-          verbose=1,
			
 
				-          validation_data=(x_test, y_test))
			
 
				-
			
 
				-score = model.evaluate(x_test, y_test, verbose=0)
			
 
				-print('Test loss:', score[0])
			
 
				-print('Test accuracy:', score[1])
			
 
				-
			
 
				-plot_model(model, to_file='model.png', expand_nested=True, show_shapes=True)