瀏覽代碼

fixed conv2d result image size

subDesTagesMitExtraKaese 4 年之前
父節點
當前提交
8381df6aad
共有 5 個文件被更改,包括 30 次插入24 次删除
  1. 3 3
      c++/include/conv2D.hpp
  2. 1 0
      c++/lib/mlfpga/src/worker.cpp
  3. 14 9
      c++/src/conv2D.cpp
  4. 6 6
      doku/layer/conv2d.md
  5. 6 6
      examples/screengrab.py

+ 3 - 3
c++/include/conv2D.hpp

@@ -39,11 +39,11 @@ namespace tf_lib {
       int tagCounter = 0;
 
       int width = 224;
-      int kernel = 5;
-      int border = kernel/2;
+      int kernelSize = 5;
+      int border = kernelSize/2;
       int sizeWithBorder = width + 2*border;
       int pixels = sizeWithBorder * sizeWithBorder;
-      int outputSize = sizeWithBorder;
+      int outputSize = width;
 
 
     //TF_DISALLOW_COPY_AND_ASSIGN(Conv2DOp);

+ 1 - 0
c++/lib/mlfpga/src/worker.cpp

@@ -91,6 +91,7 @@ int Worker::threadMain() {
                   }
                 } else {
                   job->setState(JobState::failed);
+                  printf("job %08X: \x1b[31mfailed\x1b[0m no.: %3lu\n", job->getJobId(), currentI);
                   job->setReceived(false);
                 }
               }

+ 14 - 9
c++/src/conv2D.cpp

@@ -41,8 +41,8 @@ namespace tf_lib {
       filter_shape, GetFilterDimIndex<num_spatial_dims>(filter_format, 'I'));
 
     DimensionHandle output_rows, output_cols, output_channels;
-    c->Add(input_spatial_dims[0], 0, &output_rows);
-    c->Add(input_spatial_dims[1], 0, &output_cols);
+    c->Subtract(input_spatial_dims[0], 4, &output_rows);
+    c->Subtract(input_spatial_dims[1], 4, &output_cols);
 
     c->Multiply(filter_input_depth_dim, output_depth_dim, &output_channels);
 
@@ -76,6 +76,10 @@ namespace tf_lib {
     TensorShape kernel_shape = kernel.shape();
     TensorShape input_shape = input.shape();
 
+    OP_REQUIRES_ASYNC(context, input_shape.dim_size(1) == 228, errors::InvalidArgument("Unsupported input height: ", input_shape.dim_size(1)), done);
+    OP_REQUIRES_ASYNC(context, input_shape.dim_size(2) == 228, errors::InvalidArgument("Unsupported input width: ", input_shape.dim_size(2)), done);
+    OP_REQUIRES_ASYNC(context, kernel_shape.dim_size(0) == 5, errors::InvalidArgument("Unsupported kernel height: ", kernel_shape.dim_size(0)), done);
+    OP_REQUIRES_ASYNC(context, kernel_shape.dim_size(1) == 5, errors::InvalidArgument("Unsupported kernel width: ", kernel_shape.dim_size(1)), done);
 
     int batchSize = input_shape.dim_size(0);
     int channels = input_shape.dim_size(3);
@@ -115,14 +119,14 @@ namespace tf_lib {
           for(int filter=0; filter<filters; filter++) {
             auto job = jobs->getJob(sample * channels * filters + channel * filters + filter);
             
-            for(int x=0; x<5; x++) {
-              for(int y=0; y<5; y++) {
-                job->setPayload(5*5 + x*outputSize + y, *((uint32_t*)&kernel_tensor(filter, y, x, channel)));
+            for(int x=0; x<kernelSize; x++) {
+              for(int y=0; y<kernelSize; y++) {
+                job->setPayload(y*kernelSize + x, *((uint32_t*)&kernel_tensor(y, x, channel, filter)));
               }
             }
-            for(int x=0; x<outputSize; x++) {
-              for(int y=0; y<outputSize; y++) {
-                job->setPayload(5*5 + x*outputSize + y, *((uint32_t*)&input_tensor(sample, y, x, channel)));
+            for(int x=0; x<sizeWithBorder; x++) {
+              for(int y=0; y<sizeWithBorder; y++) {
+                job->setPayload(kernelSize*kernelSize + y*sizeWithBorder + x, *((uint32_t*)&input_tensor(sample, y, x, channel)));
               }
             }
             job->setReady();
@@ -138,7 +142,8 @@ namespace tf_lib {
             auto job = jobs->getJob(sample * channels * filters + channel * filters + filter);
             for(int x=0; x<outputSize; x++) {
               for(int y=0; y<outputSize; y++) {
-                output_tensor(sample, y, x, channel) = job->getResponsePayload(x*outputSize + y);
+                uint32_t val = job->getResponsePayload((y+border*2)*sizeWithBorder + (x+border*2) + 1);
+                output_tensor(sample, y, x, channel) = *((float*)&val);
               }
             }
           }

+ 6 - 6
doku/layer/conv2d.md

@@ -3,16 +3,16 @@
 ## Tensorgröße
 
 Input:
-- in TF: `[batchSize, imageX, imageY, channels]`
-- an FPGA: `[imageX, imageY]`
+- in TF: `[batchSize, imageY, imageX, channels]`
+- an FPGA: `[imageY, imageX]`
 
 Kernel:
-- in TF: `[kernelX, kernelY, channels, filters]`
-- an FPGA: `[kernelX, kernelY]`
+- in TF: `[kernelY, kernelX, channels, filters]`
+- an FPGA: `[kernelY, kernelX]`
 
 Output:
-- vom FPGA: `[imageX2, imageY2]`
-- an TF: `[batchSize, imageX2, imageY2, channels * filters]`
+- vom FPGA: `[imageY2, imageX2]`
+- an TF: `[batchSize, imageY2, imageX2, channels * filters]`
 
 ## Parallelisierung
 

+ 6 - 6
examples/screengrab.py

@@ -10,7 +10,7 @@ import time
 from random import randint
 
 import tensorflow as tf
-from tensorflow.keras import layers, models
+from tensorflow.keras import layers, models, initializers
 
 import sys
 sys.path.append('../hostLib/')
@@ -23,13 +23,13 @@ sct = mss()
 stop = 0
 
 a = layers.Input(dtype=tf.float32, shape=(width, height, 3))
-z = Conv2DFPGA(1)(a)
+z = Conv2DFPGA(1, kernel_initializer=initializers.Constant(1/25))(a)
 model = models.Model(inputs=a, outputs=z)
 
 
-model.compile(loss=tf.keras.losses.categorical_crossentropy,
-              optimizer=tf.keras.optimizers.Adadelta(),
-              metrics=['accuracy'])
+#model.compile(loss=tf.keras.losses.categorical_crossentropy,
+#              optimizer=tf.keras.optimizers.Adadelta(),
+#              metrics=['accuracy'])
 
 sct_img = sct.grab(bounding_box)
 np_img = np.array(sct_img)
@@ -59,7 +59,7 @@ while True:
   predictions = model.predict(batch)
 
 
-  pred8 = tf.cast(predictions / 256, tf.uint8)
+  pred8 = tf.cast(predictions, tf.uint8)
   for i in range(pred8.shape[0]):
     name = 'conv_{}'.format(i)
     cv2.imshow(name, pred8.numpy()[i])