Pārlūkot izejas kodu

changed conv2D from int32 to float32

subDesTagesMitExtraKaese 4 gadi atpakaļ
vecāks
revīzija
dd6d8a9524

+ 7 - 6
examples/screengrab.py

@@ -22,14 +22,14 @@ width, height = 228, 228
 sct = mss()
 stop = 0
 
-a = layers.Input(dtype=tf.int32, shape=(width, height, 3))
+a = layers.Input(dtype=tf.float32, shape=(width, height, 3))
 z = Conv2DFPGA(1)(a)
 model = models.Model(inputs=a, outputs=z)
 
 
-#model.compile(loss=tf.keras.losses.categorical_crossentropy,
-#              optimizer=tf.keras.optimizers.Adadelta(),
-#              metrics=['accuracy'])
+model.compile(loss=tf.keras.losses.categorical_crossentropy,
+              optimizer=tf.keras.optimizers.Adadelta(),
+              metrics=['accuracy'])
 
 sct_img = sct.grab(bounding_box)
 np_img = np.array(sct_img)
@@ -46,14 +46,15 @@ while True:
     [randint(0,256), randint(0,256), randint(0,256)],
     10
   )
-  img32 = tf.cast(resized_image, tf.int32)
+  img32 = tf.cast(resized_image, tf.float32)
   #img32 = np.expand_dims(img32, axis=2)
 
   cv2.imshow('screen', resized_image)
   x,y,w,h = cv2.getWindowImageRect('screen')
   batch = np.expand_dims(img32, axis=0)
-  batch = tf.tile(batch, [2,1,1,1])
+  batch = tf.tile(batch, [1,1,1,1])
 
+  print(batch.shape)
 
   predictions = model.predict(batch)
 

+ 1 - 1
examples/train.py

@@ -39,7 +39,7 @@ print(x_test.shape[0], 'test samples')
 y_train = to_categorical(y_train, num_classes)
 y_test = to_categorical(y_test, num_classes)
 
-a = layers.Input(dtype=tf.int32, shape=(28, 28, 1))
+a = layers.Input(shape=(28, 28, 1))
 b = Conv2DFPGA(2)(a)
 c = Conv2DFPGA(1)(a)
 d = Conv2DFPGA(1)(b)

+ 0 - 2
include/conv2D.hpp

@@ -33,8 +33,6 @@ namespace tf_lib {
     private:
 
       int instance = -1;
-      int delay = 1000;
-
       
       int tagCounter = 0;
 

+ 1 - 3
layers/conv2D.py

@@ -35,6 +35,4 @@ class Conv2D(layers.Layer):
 
   def call(self, inputs):
 
-    #out = tf.Tensor(tf.int32, shape=inputs.shape)
-    intKernel = tf.cast(self.kernel, dtype=tf.int32)
-    return load_op.op_lib.MyConv2D(input=inputs, filter=intKernel, delay=0*self.filters)
+    return load_op.op_lib.MyConv2D(input=inputs, filter=self.kernel)

+ 1 - 1
lib/mlfpga/include/modules.hpp

@@ -9,7 +9,7 @@
   MOD_DEF( conv2D_2x11_Module, 0x9323eb24, "2D Konvolution 2x11", 224*224, 224*224 ),   \
   MOD_DEF( neuronModule, 0x03b30000, "Neuron", 21, 1 ), \
   MOD_DEF( dummyBigModule, 0x2cb31e7c, "Dummy 1024", 1024, 1024), \
-  MOD_DEF( conv2D_5x5_Module, 0x4cd2e19c, "2D Konvolution 5x5", 228*228, 228*228)
+  MOD_DEF( conv2D_5x5_Module, 0x4cd2e19c, "2D Konvolution 5x5", 5*5+228*228, 228*228)
 
 #define MOD_DEF( identifier, id, name, sendLen, recvLen )  identifier
 enum Module { MODS_DEF };

+ 11 - 6
src/conv2D.cpp

@@ -9,8 +9,6 @@ namespace tf_lib {
 
   Conv2DOp::Conv2DOp(OpKernelConstruction* context) : AsyncOpKernel(context) {
     instance = instances++;
-    OP_REQUIRES_OK(context, context->GetAttr("delay", &delay));
-
   };
 
   void Conv2DOp::ComputeAsync(OpKernelContext* context, DoneCallback done) {
@@ -52,8 +50,9 @@ namespace tf_lib {
     OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
 
 
-    auto input_tensor = input.tensor<int32, 4>();
-    auto output_tensor = output->tensor<int32, 4>();
+    auto input_tensor = input.tensor<float, 4>();
+    auto kernel_tensor = kernel.tensor<float, 4>();
+    auto output_tensor = output->tensor<float, 4>();
 
     auto worker = connectionManager.createWorker(Module::conv2D_5x5_Module, batchSize * channels * filters);
     {
@@ -65,9 +64,15 @@ namespace tf_lib {
         for(int channel=0; channel<channels; channel++) {
           for(int filter=0; filter<filters; filter++) {
             auto job = jobs->getJob(sample * channels * filters + channel * filters + filter);
+            
+            for(int x=0; x<5; x++) {
+              for(int y=0; y<5; y++) {
+                job->setPayload(5*5 + x*outputSize + y, *((uint32_t*)&kernel_tensor(filter, y, x, channel)));
+              }
+            }
             for(int x=0; x<outputSize; x++) {
               for(int y=0; y<outputSize; y++) {
-                job->setPayload(x*outputSize + y, input_tensor(sample, x, y, channel));
+                job->setPayload(5*5 + x*outputSize + y, *((uint32_t*)&input_tensor(sample, y, x, channel)));
               }
             }
             job->setReady();
@@ -83,7 +88,7 @@ namespace tf_lib {
             auto job = jobs->getJob(sample * channels * filters + channel * filters + filter);
             for(int x=0; x<outputSize; x++) {
               for(int y=0; y<outputSize; y++) {
-                output_tensor(sample, x, y, channel) = job->getResponsePayload(x*outputSize + y);
+                output_tensor(sample, y, x, channel) = job->getResponsePayload(x*outputSize + y);
               }
             }
           }

+ 3 - 4
src/entrypoint.cpp

@@ -64,10 +64,9 @@ namespace tf_lib {
   }
 
   REGISTER_OP("MyConv2D")
-      .Input("input: int32")
-      .Input("filter: int32")
-      .Attr("delay: int")
-      .Output("output: int32")
+      .Input("input: float")
+      .Input("filter: float")
+      .Output("output: float")
       .SetShapeFn([](InferenceContext* c) {
         //INPUT: NHWC
         //KERNEL: HWIO