4 年之前 · 8381df6aad
--- a/c++/include/conv2D.hpp
+++ b/c++/include/conv2D.hpp
@@ -39,11 +39,11 @@ namespace tf_lib {
 
				       int tagCounter = 0;
			
 
				 
			
 
				       int width = 224;
			
 
				-      int kernel = 5;
			
 
				-      int border = kernel/2;
			
 
				+      int kernelSize = 5;
			
 
				+      int border = kernelSize/2;
			
 
				       int sizeWithBorder = width + 2*border;
			
 
				       int pixels = sizeWithBorder * sizeWithBorder;
			
 
				-      int outputSize = sizeWithBorder;
			
 
				+      int outputSize = width;
			
 
				 
			
 
				 
			
 
				     //TF_DISALLOW_COPY_AND_ASSIGN(Conv2DOp);
			
--- a/c++/lib/mlfpga/src/worker.cpp
+++ b/c++/lib/mlfpga/src/worker.cpp
@@ -91,6 +91,7 @@ int Worker::threadMain() {
 
				                   }
			
 
				                 } else {
			
 
				                   job->setState(JobState::failed);
			
 
				+                  printf("job %08X: \x1b[31mfailed\x1b[0m no.: %3lu\n", job->getJobId(), currentI);
			
 
				                   job->setReceived(false);
			
 
				                 }
			
 
				               }
			
--- a/c++/src/conv2D.cpp
+++ b/c++/src/conv2D.cpp
@@ -41,8 +41,8 @@ namespace tf_lib {
 
				       filter_shape, GetFilterDimIndex<num_spatial_dims>(filter_format, 'I'));
			
 
				 
			
 
				     DimensionHandle output_rows, output_cols, output_channels;
			
 
				-    c->Add(input_spatial_dims[0], 0, &output_rows);
			
 
				-    c->Add(input_spatial_dims[1], 0, &output_cols);
			
 
				+    c->Subtract(input_spatial_dims[0], 4, &output_rows);
			
 
				+    c->Subtract(input_spatial_dims[1], 4, &output_cols);
			
 
				 
			
 
				     c->Multiply(filter_input_depth_dim, output_depth_dim, &output_channels);
			
 
				 
			
@@ -76,6 +76,10 @@ namespace tf_lib {
 
				     TensorShape kernel_shape = kernel.shape();
			
 
				     TensorShape input_shape = input.shape();
			
 
				 
			
 
				+    OP_REQUIRES_ASYNC(context, input_shape.dim_size(1) == 228, errors::InvalidArgument("Unsupported input height: ", input_shape.dim_size(1)), done);
			
 
				+    OP_REQUIRES_ASYNC(context, input_shape.dim_size(2) == 228, errors::InvalidArgument("Unsupported input width: ", input_shape.dim_size(2)), done);
			
 
				+    OP_REQUIRES_ASYNC(context, kernel_shape.dim_size(0) == 5, errors::InvalidArgument("Unsupported kernel height: ", kernel_shape.dim_size(0)), done);
			
 
				+    OP_REQUIRES_ASYNC(context, kernel_shape.dim_size(1) == 5, errors::InvalidArgument("Unsupported kernel width: ", kernel_shape.dim_size(1)), done);
			
 
				 
			
 
				     int batchSize = input_shape.dim_size(0);
			
 
				     int channels = input_shape.dim_size(3);
			
@@ -115,14 +119,14 @@ namespace tf_lib {
 
				           for(int filter=0; filter<filters; filter++) {
			
 
				             auto job = jobs->getJob(sample * channels * filters + channel * filters + filter);
			
 
				             
			
 
				-            for(int x=0; x<5; x++) {
			
 
				-              for(int y=0; y<5; y++) {
			
 
				-                job->setPayload(5*5 + x*outputSize + y, *((uint32_t*)&kernel_tensor(filter, y, x, channel)));
			
 
				+            for(int x=0; x<kernelSize; x++) {
			
 
				+              for(int y=0; y<kernelSize; y++) {
			
 
				+                job->setPayload(y*kernelSize + x, *((uint32_t*)&kernel_tensor(y, x, channel, filter)));
			
 
				               }
			
 
				             }
			
 
				-            for(int x=0; x<outputSize; x++) {
			
 
				-              for(int y=0; y<outputSize; y++) {
			
 
				-                job->setPayload(5*5 + x*outputSize + y, *((uint32_t*)&input_tensor(sample, y, x, channel)));
			
 
				+            for(int x=0; x<sizeWithBorder; x++) {
			
 
				+              for(int y=0; y<sizeWithBorder; y++) {
			
 
				+                job->setPayload(kernelSize*kernelSize + y*sizeWithBorder + x, *((uint32_t*)&input_tensor(sample, y, x, channel)));
			
 
				               }
			
 
				             }
			
 
				             job->setReady();
			
@@ -138,7 +142,8 @@ namespace tf_lib {
 
				             auto job = jobs->getJob(sample * channels * filters + channel * filters + filter);
			
 
				             for(int x=0; x<outputSize; x++) {
			
 
				               for(int y=0; y<outputSize; y++) {
			
 
				-                output_tensor(sample, y, x, channel) = job->getResponsePayload(x*outputSize + y);
			
 
				+                uint32_t val = job->getResponsePayload((y+border*2)*sizeWithBorder + (x+border*2) + 1);
			
 
				+                output_tensor(sample, y, x, channel) = *((float*)&val);
			
 
				               }
			
 
				             }
			
 
				           }
			
--- a/doku/layer/conv2d.md
+++ b/doku/layer/conv2d.md
@@ -3,16 +3,16 @@
 
				 ## Tensorgröße
			
 
				 
			
 
				 Input:
			
 
				-- in TF: `[batchSize, imageX, imageY, channels]`
			
 
				-- an FPGA: `[imageX, imageY]`
			
 
				+- in TF: `[batchSize, imageY, imageX, channels]`
			
 
				+- an FPGA: `[imageY, imageX]`
			
 
				 
			
 
				 Kernel:
			
 
				-- in TF: `[kernelX, kernelY, channels, filters]`
			
 
				-- an FPGA: `[kernelX, kernelY]`
			
 
				+- in TF: `[kernelY, kernelX, channels, filters]`
			
 
				+- an FPGA: `[kernelY, kernelX]`
			
 
				 
			
 
				 Output:
			
 
				-- vom FPGA: `[imageX2, imageY2]`
			
 
				-- an TF: `[batchSize, imageX2, imageY2, channels * filters]`
			
 
				+- vom FPGA: `[imageY2, imageX2]`
			
 
				+- an TF: `[batchSize, imageY2, imageX2, channels * filters]`
			
 
				 
			
 
				 ## Parallelisierung
			
 
				 
			
--- a/examples/screengrab.py
+++ b/examples/screengrab.py
@@ -10,7 +10,7 @@ import time
 
				 from random import randint
			
 
				 
			
 
				 import tensorflow as tf
			
 
				-from tensorflow.keras import layers, models
			
 
				+from tensorflow.keras import layers, models, initializers
			
 
				 
			
 
				 import sys
			
 
				 sys.path.append('../hostLib/')
			
@@ -23,13 +23,13 @@ sct = mss()
 
				 stop = 0
			
 
				 
			
 
				 a = layers.Input(dtype=tf.float32, shape=(width, height, 3))
			
 
				-z = Conv2DFPGA(1)(a)
			
 
				+z = Conv2DFPGA(1, kernel_initializer=initializers.Constant(1/25))(a)
			
 
				 model = models.Model(inputs=a, outputs=z)
			
 
				 
			
 
				 
			
 
				-model.compile(loss=tf.keras.losses.categorical_crossentropy,
			
 
				-              optimizer=tf.keras.optimizers.Adadelta(),
			
 
				-              metrics=['accuracy'])
			
 
				+#model.compile(loss=tf.keras.losses.categorical_crossentropy,
			
 
				+#              optimizer=tf.keras.optimizers.Adadelta(),
			
 
				+#              metrics=['accuracy'])
			
 
				 
			
 
				 sct_img = sct.grab(bounding_box)
			
 
				 np_img = np.array(sct_img)
			
@@ -59,7 +59,7 @@ while True:
 
				   predictions = model.predict(batch)
			
 
				 
			
 
				 
			
 
				-  pred8 = tf.cast(predictions / 256, tf.uint8)
			
 
				+  pred8 = tf.cast(predictions, tf.uint8)
			
 
				   for i in range(pred8.shape[0]):
			
 
				     name = 'conv_{}'.format(i)
			
 
				     cv2.imshow(name, pred8.numpy()[i])