Browse Source

added more layers

subDesTagesMitExtraKaese 3 years ago
parent
commit
71217544b7

+ 2 - 2
c++/include/conv2D.hpp

@@ -1,5 +1,5 @@
-#ifndef CONV2D_FPGA_H
-#define CONV2D_FPGA_H
+#ifndef CONV2D_1_FPGA_H
+#define CONV2D_1_FPGA_H
 
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/function.h"

+ 0 - 0
c++/include/conv2D_2.hpp


+ 0 - 0
c++/include/conv2D_3.hpp


+ 0 - 0
c++/include/conv2D_maxpool.hpp


+ 0 - 0
c++/include/conv2D_maxpool_multi.hpp


+ 5 - 1
c++/include/entrypoint.hpp

@@ -11,7 +11,11 @@
 
 #include "tensorflow/core/lib/math/math_util.h"
 
-#include "conv2D.hpp"
+#include "conv2D_1.hpp"
+#include "conv2D_2.hpp"
+#include "conv2D_3.hpp"
+#include "conv2D_maxpool.hpp"
+#include "conv2D_maxpool_multi.hpp"
 #include "dummyOp.hpp"
 #include "dummyBigOp.hpp"
 #include "../lib/mlfpga/include/connectionManager.hpp"

+ 5 - 6
c++/src/conv2D.cpp

@@ -1,5 +1,5 @@
 
-#include "conv2D.hpp"
+#include "conv2D_1.hpp"
 
 namespace tf_lib {
 
@@ -149,8 +149,7 @@ namespace tf_lib {
             auto job = jobs->getJob(sample * outputChannels * channels + outputChannel * channels + channel);
             for(int x=0; x<outputSize; x++) {
               for(int y=0; y<outputSize; y++) {
-                uint32_t val = job->getResponsePayload((y+border*2)*sizeWithBorder + (x+border*2));
-                output_tensor(sample, y, x, outputChannel) += *((float*)&val);
+                memcpy(&output_tensor(sample, y, x, outputChannel), &job->getResponseAddr()[(y+border*2)*sizeWithBorder + (x+border*2)], 4);
               }
             }
           }
@@ -227,13 +226,13 @@ namespace tf_lib {
                             true, "dz", true, "x", true);
   }
 
-  REGISTER_OP("MyConv2D")
+  REGISTER_OP("MyConv2D_1")
       .Input("input: float")
       .Input("filter: float")
       .Output("output: float")
       .SetShapeFn(conv2d_shape_fn);
 
-  REGISTER_KERNEL_BUILDER(Name("MyConv2D").Device(DEVICE_CPU), Conv2DOp);
-  REGISTER_OP_GRADIENT("MyConv2D", MatMulGrad);
+  REGISTER_KERNEL_BUILDER(Name("MyConv2D_1").Device(DEVICE_CPU), Conv2DOp);
+  REGISTER_OP_GRADIENT("MyConv2D_1", MatMulGrad);
 
 }

+ 0 - 0
c++/src/conv2D_2.cpp


+ 0 - 0
c++/src/conv2D_3.cpp


+ 0 - 0
c++/src/conv2D_maxpool.cpp


+ 0 - 0
c++/src/conv2D_maxpool_multi.cpp


+ 0 - 31
doku/layer/conv2d.md

@@ -1,31 +0,0 @@
-# Zweidimensionale Konvolution
-
-## Tensorgröße
-
-Input:
-- in TF: `[batchSize, imageY, imageX, channels]`
-- an FPGA: `[imageY, imageX]`
-
-Kernel:
-- in TF: `[kernelY, kernelX, channels, outputChannels]`
-- an FPGA: `[kernelY, kernelX]`
-
-Output:
-- vom FPGA: `[imageY2, imageX2]`
-- an TF: `[batchSize, imageY2, imageX2, outputChannels]`
-
-## Parallelisierung
-
-1.  **Ohne FPGA-seitigem Speicher**
-
-    FPGA Recheneinheiten werden verteilt `(batchSize * channels * outputChannels)` Mal verwendet.
-
-    ```python
-    for sample in range(batchSize):
-      for outputChannel in range(outputChannels):
-        for channel in range(channels):
-          output[sample][outputChannel] += f(
-            input[sample][channel], 
-            kernel[channel][outputChannel]
-          )
-    ```

+ 65 - 0
doku/layer/conv2d_1.md

@@ -0,0 +1,65 @@
+# 2D convolution of one channel
+
+## TF equivalent
+
+```python
+layers.Conv2d(
+  filters,
+  kernel_size=5,
+  strides=(1, 1),
+  padding='valid',
+  data_format='channels_last',
+  dilation_rate=(1, 1),
+  activation=None,
+  use_bias=False,
+  trainable=True
+)
+```
+
+## Tensor sizes
+
+Input:
+- from TF: `[batchSize, imageY, imageX, channels]`
+- to FPGA: `[imageY, imageX]`
+
+Filter:
+- from TF: `[kernelY, kernelX, channels, outputChannels]`
+- to FPGA: `[kernelY, kernelX]`
+
+Output:
+- from FPGA: `[imageY2, imageX2]`
+- to TF: `[batchSize, imageY2, imageX2, outputChannels]`
+
+## Parallelization
+
+- `(batchSize * channels * outputChannels)` jobs will be created.
+- layer can be called in parallel (functional model)
+
+```python
+for sample in range(batchSize):
+  for outputChannel in range(outputChannels):
+    for channel in range(channels):
+      output[sample][outputChannel] += job(
+        input[sample][channel], 
+        kernel[channel][outputChannel]
+      )
+```
+
+## Constraints
+
+```python
+  imageX  = imageY  = 228
+  kernelX = kernelY = 5
+  imageX2 = imageY2 = 224
+```
+
+## Job lengths
+
+- to FPGA:
+  `5 * 5 + 228 * 228 = 52009`
+- from FPGA:
+  `224 * 224 = 50176`
+
+## Gradient
+  same as layers.Conv2d: `conv2d_backprop` in 
+  `tensorflow/tensorflow/python/ops/nn_ops.py:2290`

+ 65 - 0
doku/layer/conv2d_2.md

@@ -0,0 +1,65 @@
+# 2D convolution with activation
+
+## TF equivalent
+
+```python
+layers.Conv2d(
+  filters,
+  kernel_size=5,
+  strides=(1, 1),
+  padding='valid',
+  data_format='channels_last',
+  dilation_rate=(1, 1),
+  activation='relu',
+  use_bias=False,
+  trainable=True
+)
+```
+
+## Tensor sizes
+
+Input:
+- from TF: `[batchSize, imageY, imageX, channels]`
+- to FPGA: `[imageY, imageX, channels]`
+
+Filter:
+- from TF: `[kernelY, kernelX, channels, outputChannels]`
+- to FPGA: `[kernelY, kernelX, channels]`
+
+Output:
+- from FPGA: `[imageY2, imageX2]`
+- to TF: `[batchSize, imageY2, imageX2, outputChannels]`
+
+## Parallelization
+
+- `(batchSize * outputChannels)` jobs will be created.
+- layer can be called in parallel (functional model)
+
+```python
+for sample in range(batchSize):
+  for outputChannel in range(outputChannels):
+    output[sample][outputChannel] = job(
+      input[sample], 
+      kernel[outputChannel]
+    )
+```
+
+## Constraints
+
+```python
+  imageX  = imageY  = 228
+  kernelX = kernelY = 5
+  imageX2 = imageY2 = 224
+  channels = 3
+```
+
+## Job lengths
+
+- to FPGA:
+  `5 * 5 * 3 + 228 * 228 * 3 = 156027`
+- from FPGA:
+  `224 * 224 = 50176`
+
+## Gradient
+  same as layers.Conv2d: `conv2d_backprop` in 
+  `tensorflow/tensorflow/python/ops/nn_ops.py:2290`

+ 65 - 0
doku/layer/conv2d_3.md

@@ -0,0 +1,65 @@
+# 2D convolution with activation and fixed output channels
+
+## TF equivalent
+
+```python
+layers.Conv2d(
+  filters=32,
+  kernel_size=5,
+  strides=(1, 1),
+  padding='valid',
+  data_format='channels_last',
+  dilation_rate=(1, 1),
+  activation='relu',
+  use_bias=False,
+  trainable=True
+)
+```
+
+## Tensor sizes
+
+Input:
+- from TF: `[batchSize, imageY, imageX, channels]`
+- to FPGA: `[imageY, imageX, channels]`
+
+Filter:
+- from TF: `[kernelY, kernelX, channels, outputChannels]`
+- to FPGA: `[kernelY, kernelX, channels, outputChannels]`
+
+Output:
+- from FPGA: `[imageY2, imageX2, outputChannels]`
+- to TF: `[batchSize, imageY2, imageX2, outputChannels]`
+
+## Parallelization
+
+- `(batchSize)` jobs will be created.
+- layer can be called in parallel (functional model)
+
+```python
+for sample in range(batchSize):
+    output[sample] = job(
+      input[sample], 
+      kernel
+    )
+```
+
+## Constraints
+
+```python
+  imageX  = imageY  = 228
+  kernelX = kernelY = 5
+  imageX2 = imageY2 = 224
+  channels = 3
+  outputChannels = 32
+```
+
+## Job lengths
+
+- to FPGA:
+  `5 * 5 * 3 * 32 + 228 * 228 * 3 = 158352`
+- from FPGA:
+  `224 * 224 * 32 = 1605632`
+
+## Gradient
+  same as layers.Conv2d: `conv2d_backprop` in 
+  `tensorflow/tensorflow/python/ops/nn_ops.py:2290`

+ 71 - 0
doku/layer/conv2d_maxpool.md

@@ -0,0 +1,71 @@
+# 2D convolution and MaxPooling
+
+## TF equivalent
+
+```python
+layers.Conv2d(
+  filters=32,
+  kernel_size=5,
+  strides=(1, 1),
+  padding='valid',
+  data_format='channels_last',
+  dilation_rate=(1, 1),
+  activation='relu',
+  use_bias=False,
+  trainable=True
+)
+
+layers.MaxPooling2D(
+  pool_size=(224, 224),
+  strides=(2, 2),
+  padding='valid',
+  data_format='channels_last'
+)
+```
+
+## Tensor sizes
+
+Input:
+- from TF: `[batchSize, imageY, imageX, channels]`
+- to FPGA: `[imageY, imageX, channels]`
+
+Filter:
+- from TF: `[kernelY, kernelX, channels, outputChannels]`
+- to FPGA: `[kernelY, kernelX, channels, outputChannels]`
+
+Output:
+- from FPGA: `[imageY2, imageX2, outputChannels]`
+- to TF: `[batchSize, imageY2, imageX2, outputChannels]`
+
+## Parallelization
+
+- `(batchSize)` jobs will be created.
+- layer can be called in parallel (functional model)
+
+```python
+for sample in range(batchSize):
+    output[sample] = job(
+      input[sample], 
+      kernel
+    )
+```
+
+## Constraints
+
+```python
+  imageX  = imageY  = 228
+  kernelX = kernelY = 5
+  imageX2 = imageY2 = 112
+  channels = 3
+  outputChannels = 32
+```
+
+## Job lengths
+
+- to FPGA:
+  `5 * 5 * 3 * 32 + 228 * 228 * 3 = 158352`
+- from FPGA:
+  `112 * 112 * 32 = 401408`
+
+## Gradient
+  tbd

+ 109 - 0
doku/layer/conv2d_maxpool_multi.md

@@ -0,0 +1,109 @@
+# multiple 2D convolutions with MaxPooling
+
+## TF equivalent
+
+```python
+layers.Conv2d(
+  filters=32,
+  kernel_size=5,
+  strides=(1, 1),
+  padding='valid',
+  data_format='channels_last',
+  dilation_rate=(1, 1),
+  activation='relu',
+  use_bias=False,
+  trainable=True
+)
+
+layers.MaxPooling2D(
+  pool_size=(224, 224),
+  strides=(2, 2),
+  padding='valid',
+  data_format='channels_last'
+)
+
+layers.Conv2d(
+  filters=32,
+  kernel_size=5,
+  strides=(1, 1),
+  padding='valid',
+  data_format='channels_last',
+  dilation_rate=(1, 1),
+  activation='relu',
+  use_bias=False,
+  trainable=True
+)
+
+layers.MaxPooling2D(
+  pool_size=(108, 108),
+  strides=(2, 2),
+  padding='valid',
+  data_format='channels_last'
+)
+
+layers.Conv2d(
+  filters=32,
+  kernel_size=5,
+  strides=(1, 1),
+  padding='valid',
+  data_format='channels_last',
+  dilation_rate=(1, 1),
+  activation='relu',
+  use_bias=False,
+  trainable=True
+)
+
+layers.MaxPooling2D(
+  pool_size=(50, 50),
+  strides=(2, 2),
+  padding='valid',
+  data_format='channels_last'
+)
+```
+
+## Tensor sizes
+
+Input:
+- from TF: `[batchSize, imageY, imageX, channels]`
+- to FPGA: `[imageY, imageX, channels]`
+
+Filter:
+- from TF: `[kernelY, kernelX, channels, outputChannels]`
+- to FPGA: `[kernelY, kernelX, channels, outputChannels]`
+
+Output:
+- from FPGA: `[imageY2, imageX2, outputChannels]`
+- to TF: `[batchSize, imageY2, imageX2, outputChannels]`
+
+## Parallelization
+
+- `(batchSize)` jobs will be created.
+- layer can be called in parallel (functional model)
+
+```python
+for sample in range(batchSize):
+    output[sample] = job(
+      input[sample], 
+      kernel
+    )
+```
+
+## Constraints
+
+```python
+  imageX  = imageY  = 228
+  kernelX = kernelY = 5
+  imageX2 = imageY2 = 25
+  channels = 3
+  outputChannels = 32
+```
+
+## Job lengths
+
+- to FPGA:
+  `5 * 5 * (3 * 32 + 32 * 32 * 2) + 228 * 228 * 3 = 209552`
+- from FPGA:
+  `25 * 25 * 32 = 20000`
+
+## Gradient
+  tbd

+ 1 - 1
hostLib/layers/__init__.py

@@ -1,2 +1,2 @@
 
-__all__ = ["conv2d"]
+__all__ = ["conv2d", "conv2d_maxpool", "conv2d_maxpool_multi"]

+ 13 - 7
hostLib/layers/conv2d.py

@@ -9,18 +9,18 @@ from .. import load_op
 
 class Conv2D(layers.Layer):
   def __init__(self,
-    filters = 1,
-    kernel_initializer = 'glorot_uniform',
+               filters = 1,
+               kernel_initializer = 'glorot_uniform',
                kernel_regularizer=None,
                kernel_constraint=None,
-    ):
+               implementation = 1):
     super(Conv2D, self).__init__()
     #int, dim of output space
     self.filters = filters
     self.kernel_initializer = initializers.get(kernel_initializer)
     self.kernel_regularizer = regularizers.get(kernel_regularizer)
     self.kernel_constraint = constraints.get(kernel_constraint)
-
+    self.implementation = implementation
 
   def build(self, input_shape):
     input_shape = tf.TensorShape(input_shape)
@@ -37,10 +37,16 @@ class Conv2D(layers.Layer):
         dtype=self.dtype)
 
   def call(self, inputs):
+    if self.implementation == 1:
+      return load_op.op_lib.MyConv2D_1(input=inputs, filter=self.kernel)
+    if self.implementation == 2:
+      return load_op.op_lib.MyConv2D_2(input=inputs, filter=self.kernel)
+    if self.implementation == 3:
+      return load_op.op_lib.MyConv2D_3(input=inputs, filter=self.kernel)
 
-    return load_op.op_lib.MyConv2D(input=inputs, filter=self.kernel)
-
-@ops.RegisterGradient("MyConv2D")
+@ops.RegisterGradient("MyConv2D_1")
+@ops.RegisterGradient("MyConv2D_2")
+@ops.RegisterGradient("MyConv2D_3")
 def _my_conv_2d_grad(op, grad):
   shape_0, shape_1 = array_ops.shape_n([op.inputs[0], op.inputs[1]])
 

+ 0 - 0
hostLib/layers/conv2d_maxpool.py


+ 0 - 0
hostLib/layers/conv2d_maxpool_multi.py