From c84c427fa75d091efa730773c3342ad4589d0a9b Mon Sep 17 00:00:00 2001
From: Michael Pivato <pivmy004@mymail.unisa.edu.au>
Date: Thu, 24 Jan 2019 16:19:20 +1030
Subject: [PATCH] Fix going outside of array bounds, and attempt to make CNN
 work.

---
 GestureRecognition/SimpleHandRecogniser.py | 197 +++++++++++++++------
 1 file changed, 138 insertions(+), 59 deletions(-)

diff --git a/GestureRecognition/SimpleHandRecogniser.py b/GestureRecognition/SimpleHandRecogniser.py
index 3548a87..1bc77a7 100644
--- a/GestureRecognition/SimpleHandRecogniser.py
+++ b/GestureRecognition/SimpleHandRecogniser.py
@@ -1,10 +1,13 @@
 from GestureRecognition.handrecogniser import HandRecogniser
 import numpy as np
 import cv2
+import tensorflow as tf
 
 class SimpleHandRecogniser(HandRecogniser):
     def __init__(self, frame):
         self.img = frame
+        self.graph = None
+        self.sess = None
     
     def __calc_pos_y(self, x, radius, centre):
         """
@@ -89,9 +92,16 @@ class SimpleHandRecogniser(HandRecogniser):
         Calculates the actual gesture, returning the number of fingers 
         seen in the image.
         """
+        print('Getting Gesture')
         if self.img is None:
-            return 0
-        
+            print('There is no image')
+            return -1
+        # First cut out the frame using the neural network.
+        self.load_inference_graph()
+        print("loaded inference graph")
+        detections, scores = self.detect_hand_tensorflow(self.graph, self.sess)
+
+        print('Attempting to use pure hand recognition')
         self.img_hsv = cv2.cvtColor(self.img, cv2.COLOR_BGR2HSV)
 
         # Need to shift red pixels so they can be 0-20 rather than 250-~20
@@ -100,7 +110,9 @@ class SimpleHandRecogniser(HandRecogniser):
         self.img_hsv = self.__denoise(self.img_hsv)
         self.__segment_image()
 
+        print('calculating circle')
         radius, centre = self.__calc_circle(self.mask)
+        print('Got circle')
 
         # Now go around the circle to calculate num of times going 0->255 or vice-versa.
         # First just do it the naive way with loops.
@@ -109,9 +121,32 @@ class SimpleHandRecogniser(HandRecogniser):
         prev_x = centre[0] - radius
         prev_y = [self.__calc_pos_y(centre[0] - radius, radius, centre), self.__calc_pos_y(centre[0] - radius, radius, centre)]
         num_change = 0
-        for x in range(centre[0] - radius + 1, centre[0] + radius):
+
+        # Make sure x is also within bounds.
+        x_start = centre[0] - radius + 1
+        if x_start < 0:
+            x_start = 0
+
+        x_end = centre[0] + radius
+        if x_end >= self.mask.shape[1]:
+            x_end = self.mask.shape[1] - 1
+        print(x_start)
+        print(x_end)
+        print(self.mask.shape)
+        for x in range(x_start, x_end):
+            # Need to check circle is inside the bounds.
             ypos = self.__calc_pos_y(x, radius, centre)
+            # y above centre (ypos) and y below radius)
             y = [ypos, centre[1] - (ypos-centre[1])]
+            
+            if y[0] < 0:
+                y[0] = 0
+            if y[0] >= self.mask.shape[0]:
+                y[0] = self.mask.shape[0] - 1
+            if y[1] < 0:
+                y[1] = 0
+            if y[1] >= self.mask.shape[0]:
+                y[1] = self.mask.shape[0] - 1
             if(self.mask[y[0], x] != self.mask[prev_y[0], prev_x]):
                 num_change += 1
             if self.mask[y[1], x] != self.mask[prev_y[1], prev_x] and y[0] != y[1]:
@@ -119,81 +154,125 @@ class SimpleHandRecogniser(HandRecogniser):
             prev_x = x
             prev_y = y
 
+        print('Finished calculating, returning')
+
         return num_change / 2 - 1
 
-    def detect_hand(self, weights_path, config_path, conf_thresh = 0.5, nms_thresh = 0.4):
-        '''
-            Detects if there is a hand in the image. If there is (above a significant confidence threshold)
-            then the function will set the img property to the location of the hand according to its bounding box. 
-        '''
-        # Most of this code is from here: www.arunponnusamy.com/yolo-object-detection-opencv-python.html
-        # Also https://github.com/opencv/opencv/blob/3.4/samples/dnn/object_detection.py
+    def setFrame(self, frame):
+        self.img = frame
+
+    # Source: Victor Dibia
+    # Link: https://github.com/victordibia/handtracking
+    # Taken the code straight from his example, as it works perfectly. This is specifically
+    # from the load_inference_graph method that he wrote, and will load the graph into
+    # memory if one has not already been loaded for this object.  
+    def load_inference_graph(self):
+        """Loads a tensorflow model checkpoint into memory"""
+
+        if self.graph != None and self.sess != None:
+            # Don't load more than once.
+            return
+
+        PATH_TO_CKPT = '/Users/piv/Documents/Projects/car/GestureRecognition/frozen_inference_graph.pb'
+        # load frozen tensorflow model into memory
+        detection_graph = tf.Graph()
+        with detection_graph.as_default():
+            od_graph_def = tf.GraphDef()
+            with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
+                serialized_graph = fid.read()
+                od_graph_def.ParseFromString(serialized_graph)
+                tf.import_graph_def(od_graph_def, name='')
+            sess = tf.Session(graph=detection_graph)
+        self.graph = detection_graph
+        self.sess = sess
+
+    
+    # Source: Victor Dibia
+    # Link: https://github.com/victordibia/handtracking
+    # Taken the code straight from his example, as it works perfectly. This is specifically
+    # from the detect_hand method that he wrote, as other processing is required for the
+    # hand recognition to work correctly.
+    def detect_hand_tensorflow(self, detection_graph, sess):
+        """ Detects hands in a frame using a CNN
+
+        detection_graph -- The CNN to use to detect the hand.
+        sess -- THe tensorflow session for the given graph
+        """
+
+        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
+
+        detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
+
+        detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
+
+        detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
+
+        num_detections = detection_graph.get_tensor_by_name('num_detections:0')
+
+        img_expanded = np.expand_dims(self.img, axis=0)
+
+        (boxes, scores, classes, num) = sess.run(
+            [detection_boxes, detection_scores, detection_classes, num_detections],
+            feed_dict={image_tensor, img_expanded})
+        print('finished detection')
+        return np.squeeze(boxes), np.squeeze(scores)
+
+    def detect_hand_opencv(self, detection_graph, sess):
+        """Performs hand detection using a CNN from tensorflow using opencv. 
+        
+        detection_graph -- The CNN to use to detect the hand.
+        sess -- THe tensorflow session for the given graph
+        """
         if self.img is None:
-            return 0
+            return
 
         height = self.img.shape[0]
         width = self.img.shape[1]
+
         scale = 0.5
 
-        classes = None # Stores classes used for classification
+        classes = None
 
-        net = cv2.dnn.readNet(weights_path, config_path)
+        net = cv2.dnn.readNetFromTensorflow(detection_graph, sess)
 
-        net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
+        # width is scaled weirdly to ensure we keep tbe same ratio as the original image.
+        net.setInput(cv2.dnn.blobFromImage(self.img, scale, size=(300, 300 * (width/height)), swapRB=True, crop=False))
+        netOut = net.forward()
 
-        outNames = net.getUnconnectedOutLayersNames()
-
-        blob = cv2.dnn.blobFromImage(self.img, scale, (416,416), (0,0,0), True, False)
-
-        net.setInput(blob)
-
-        outs = net.forward(outNames)
-
-        # Getting the output layer.
-        layerNames = net.getLayerNames()
-        lastLayerId = net.getLayerId(layerNames[-1])
-        lastLayer = net.getLayer(lastLayerId)
-
-        classIds = []
-        confidences = []
+        # Format output to look same as tensorflow output.
+        scores = []
         boxes = []
-        if lastLayer.type == 'DetectionOutput':
-            # Check we are using an actual detection module. 
-            # Will return a 1x1xnx7 blob, where n is number of detections.
-            # Tuple for each detection: [batchId, classId, confidence, left, top, right, bottom]
 
-            for out in outs:
-                for detection in out[0,0]:
-                    confidence = detection[2]
-                    if confidence > conf_thresh:
-                        # WIll need to verify this first, but given code said this is needed. 
-                        left = int(detection[3] * width)
-                        top = int(detection[4] * height)
-                        right = int(detection[5] * width)
-                        bottom = int(detection[6] * height)
-                        classIds.append(int(detection[1]) - 1)
-                        confidences.append(float(confidence))
-                        boxes.append((left, top, right, bottom))
+        for out in netOut:
+            for detection in out[0,0]:
+                scores.append(detection[2])
+                boxes.append(detection[3], detection[4], detection[5], detection[6])
+            # Only doing first class as only trying to find the hand.
+            break
+        return np.array(boxes), np.array(scores)
 
+    def get_best_hand(self, boxes, scores, conf_thresh, nms_thresh):
+        """
+        Gets the best hand bounding box by inspecting confidence scores and overlapping
+        boxes, as well as the overall size of each box to determine which hand (if multiple present)
+        should be tested to recognise. 
+        """
+        # First remove any boxes below confidence threshold
+        confident_bs = boxes[scores > conf_thresh]
 
-        # Remove duplicate/overlapping boxes -> makes sure only detect one hand in an area.
-        indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_thresh, nms_thresh)
+        # Then use NMS to get rid of heavily overlapping boxes.
+        # This wasn't used in the tensorflow example that was found, however probably a
+        # good idea to use it just in case. 
+        indices = cv2.dnn.NMSBoxes(boxes, scores, conf_thresh, nms_thresh)
 
-        for i in indices:
-            i = i[0]
-            box = boxes[i]
-            left = box[0]
-            top = box[1]
-            right = box[2]
-            bottom = box[3]
-            # Now draw the box if we want to.
-
-        # OR can just get the box that is a hand with the maximum confidence/maximum box area -> this implies closest hand...
+        # Finally calculate area of each box to determine which hand is clearest (biggest in image)
+        # Just does the most confident for now.
         max_conf = 0
         max_index = 0
-        for conf in confidences:
+        for conf in scores:
             if conf > max_conf:
                 max_conf = conf
-                max_index = i
+                max_index = conf
+        return boxes[max_index]