4 gadi atpakaļ · dd6d8a9524
--- a/examples/screengrab.py
+++ b/examples/screengrab.py
@@ -22,14 +22,14 @@ width, height = 228, 228
 
				 sct = mss()
			
 
				 stop = 0
			
 
				 
			
 
				-a = layers.Input(dtype=tf.int32, shape=(width, height, 3))
			
 
				+a = layers.Input(dtype=tf.float32, shape=(width, height, 3))
			
 
				 z = Conv2DFPGA(1)(a)
			
 
				 model = models.Model(inputs=a, outputs=z)
			
 
				 
			
 
				 
			
 
				-#model.compile(loss=tf.keras.losses.categorical_crossentropy,
			
 
				-#              optimizer=tf.keras.optimizers.Adadelta(),
			
 
				-#              metrics=['accuracy'])
			
 
				+model.compile(loss=tf.keras.losses.categorical_crossentropy,
			
 
				+              optimizer=tf.keras.optimizers.Adadelta(),
			
 
				+              metrics=['accuracy'])
			
 
				 
			
 
				 sct_img = sct.grab(bounding_box)
			
 
				 np_img = np.array(sct_img)
			
@@ -46,14 +46,15 @@ while True:
 
				     [randint(0,256), randint(0,256), randint(0,256)],
			
 
				     10
			
 
				   )
			
 
				-  img32 = tf.cast(resized_image, tf.int32)
			
 
				+  img32 = tf.cast(resized_image, tf.float32)
			
 
				   #img32 = np.expand_dims(img32, axis=2)
			
 
				 
			
 
				   cv2.imshow('screen', resized_image)
			
 
				   x,y,w,h = cv2.getWindowImageRect('screen')
			
 
				   batch = np.expand_dims(img32, axis=0)
			
 
				-  batch = tf.tile(batch, [2,1,1,1])
			
 
				+  batch = tf.tile(batch, [1,1,1,1])
			
 
				 
			
 
				+  print(batch.shape)
			
 
				 
			
 
				   predictions = model.predict(batch)
			
 
				 
			
--- a/examples/train.py
+++ b/examples/train.py
@@ -39,7 +39,7 @@ print(x_test.shape[0], 'test samples')
 
				 y_train = to_categorical(y_train, num_classes)
			
 
				 y_test = to_categorical(y_test, num_classes)
			
 
				 
			
 
				-a = layers.Input(dtype=tf.int32, shape=(28, 28, 1))
			
 
				+a = layers.Input(shape=(28, 28, 1))
			
 
				 b = Conv2DFPGA(2)(a)
			
 
				 c = Conv2DFPGA(1)(a)
			
 
				 d = Conv2DFPGA(1)(b)
			
--- a/include/conv2D.hpp
+++ b/include/conv2D.hpp
@@ -33,8 +33,6 @@ namespace tf_lib {
 
				     private:
			
 
				 
			
 
				       int instance = -1;
			
 
				-      int delay = 1000;
			
 
				-
			
 
				       
			
 
				       int tagCounter = 0;
			
 
				 
			
--- a/layers/conv2D.py
+++ b/layers/conv2D.py
@@ -35,6 +35,4 @@ class Conv2D(layers.Layer):
 
				 
			
 
				   def call(self, inputs):
			
 
				 
			
 
				-    #out = tf.Tensor(tf.int32, shape=inputs.shape)
			
 
				-    intKernel = tf.cast(self.kernel, dtype=tf.int32)
			
 
				-    return load_op.op_lib.MyConv2D(input=inputs, filter=intKernel, delay=0*self.filters)
			
 
				+    return load_op.op_lib.MyConv2D(input=inputs, filter=self.kernel)
			
--- a/lib/mlfpga/include/modules.hpp
+++ b/lib/mlfpga/include/modules.hpp
@@ -9,7 +9,7 @@
 
				   MOD_DEF( conv2D_2x11_Module, 0x9323eb24, "2D Konvolution 2x11", 224*224, 224*224 ),   \
			
 
				   MOD_DEF( neuronModule, 0x03b30000, "Neuron", 21, 1 ), \
			
 
				   MOD_DEF( dummyBigModule, 0x2cb31e7c, "Dummy 1024", 1024, 1024), \
			
 
				-  MOD_DEF( conv2D_5x5_Module, 0x4cd2e19c, "2D Konvolution 5x5", 228*228, 228*228)
			
 
				+  MOD_DEF( conv2D_5x5_Module, 0x4cd2e19c, "2D Konvolution 5x5", 5*5+228*228, 228*228)
			
 
				 
			
 
				 #define MOD_DEF( identifier, id, name, sendLen, recvLen )  identifier
			
 
				 enum Module { MODS_DEF };
			
--- a/src/conv2D.cpp
+++ b/src/conv2D.cpp
@@ -9,8 +9,6 @@ namespace tf_lib {
 
				 
			
 
				   Conv2DOp::Conv2DOp(OpKernelConstruction* context) : AsyncOpKernel(context) {
			
 
				     instance = instances++;
			
 
				-    OP_REQUIRES_OK(context, context->GetAttr("delay", &delay));
			
 
				-
			
 
				   };
			
 
				 
			
 
				   void Conv2DOp::ComputeAsync(OpKernelContext* context, DoneCallback done) {
			
@@ -52,8 +50,9 @@ namespace tf_lib {
 
				     OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
			
 
				 
			
 
				 
			
 
				-    auto input_tensor = input.tensor<int32, 4>();
			
 
				-    auto output_tensor = output->tensor<int32, 4>();
			
 
				+    auto input_tensor = input.tensor<float, 4>();
			
 
				+    auto kernel_tensor = kernel.tensor<float, 4>();
			
 
				+    auto output_tensor = output->tensor<float, 4>();
			
 
				 
			
 
				     auto worker = connectionManager.createWorker(Module::conv2D_5x5_Module, batchSize * channels * filters);
			
 
				     {
			
@@ -65,9 +64,15 @@ namespace tf_lib {
 
				         for(int channel=0; channel<channels; channel++) {
			
 
				           for(int filter=0; filter<filters; filter++) {
			
 
				             auto job = jobs->getJob(sample * channels * filters + channel * filters + filter);
			
 
				+            
			
 
				+            for(int x=0; x<5; x++) {
			
 
				+              for(int y=0; y<5; y++) {
			
 
				+                job->setPayload(5*5 + x*outputSize + y, *((uint32_t*)&kernel_tensor(filter, y, x, channel)));
			
 
				+              }
			
 
				+            }
			
 
				             for(int x=0; x<outputSize; x++) {
			
 
				               for(int y=0; y<outputSize; y++) {
			
 
				-                job->setPayload(x*outputSize + y, input_tensor(sample, x, y, channel));
			
 
				+                job->setPayload(5*5 + x*outputSize + y, *((uint32_t*)&input_tensor(sample, y, x, channel)));
			
 
				               }
			
 
				             }
			
 
				             job->setReady();
			
@@ -83,7 +88,7 @@ namespace tf_lib {
 
				             auto job = jobs->getJob(sample * channels * filters + channel * filters + filter);
			
 
				             for(int x=0; x<outputSize; x++) {
			
 
				               for(int y=0; y<outputSize; y++) {
			
 
				-                output_tensor(sample, x, y, channel) = job->getResponsePayload(x*outputSize + y);
			
 
				+                output_tensor(sample, y, x, channel) = job->getResponsePayload(x*outputSize + y);
			
 
				               }
			
 
				             }
			
 
				           }
			
--- a/src/entrypoint.cpp
+++ b/src/entrypoint.cpp
@@ -64,10 +64,9 @@ namespace tf_lib {
 
				   }
			
 
				 
			
 
				   REGISTER_OP("MyConv2D")
			
 
				-      .Input("input: int32")
			
 
				-      .Input("filter: int32")
			
 
				-      .Attr("delay: int")
			
 
				-      .Output("output: int32")
			
 
				+      .Input("input: float")
			
 
				+      .Input("filter: float")
			
 
				+      .Output("output: float")
			
 
				       .SetShapeFn([](InferenceContext* c) {
			
 
				         //INPUT: NHWC
			
 
				         //KERNEL: HWIO