Fix going outside of array bounds, and attempt to make CNN work.

2019-01-24 16:19:20 +10:30
parent 35d0f962e6
commit c84c427fa7
1 changed files with 138 additions and 59 deletions
--- a/GestureRecognition/SimpleHandRecogniser.py
+++ b/GestureRecognition/SimpleHandRecogniser.py
@@ -1,10 +1,13 @@
 from GestureRecognition.handrecogniser import HandRecogniser
 import numpy as np
 import cv2
 import tensorflow as tf
 class SimpleHandRecogniser(HandRecogniser):
    def __init__(self, frame):
        self.img = frame
        self.graph = None
        self.sess = None
    def __calc_pos_y(self, x, radius, centre):
        """
@@ -89,9 +92,16 @@ class SimpleHandRecogniser(HandRecogniser):
        Calculates the actual gesture, returning the number of fingers 
        seen in the image.
        """
        print('Getting Gesture')
        if self.img is None:
-            return 0
+            print('There is no image')
            return -1
        # First cut out the frame using the neural network.
        self.load_inference_graph()
        print("loaded inference graph")
        detections, scores = self.detect_hand_tensorflow(self.graph, self.sess)
        print('Attempting to use pure hand recognition')
        self.img_hsv = cv2.cvtColor(self.img, cv2.COLOR_BGR2HSV)
        # Need to shift red pixels so they can be 0-20 rather than 250-~20
@@ -100,7 +110,9 @@ class SimpleHandRecogniser(HandRecogniser):
        self.img_hsv = self.__denoise(self.img_hsv)
        self.__segment_image()
        print('calculating circle')
        radius, centre = self.__calc_circle(self.mask)
        print('Got circle')
        # Now go around the circle to calculate num of times going 0->255 or vice-versa.
        # First just do it the naive way with loops.
@@ -109,9 +121,32 @@ class SimpleHandRecogniser(HandRecogniser):
        prev_x = centre[0] - radius
        prev_y = [self.__calc_pos_y(centre[0] - radius, radius, centre), self.__calc_pos_y(centre[0] - radius, radius, centre)]
        num_change = 0
-        for x in range(centre[0] - radius + 1, centre[0] + radius):
+
        # Make sure x is also within bounds.
        x_start = centre[0] - radius + 1
        if x_start < 0:
            x_start = 0
        x_end = centre[0] + radius
        if x_end >= self.mask.shape[1]:
            x_end = self.mask.shape[1] - 1
        print(x_start)
        print(x_end)
        print(self.mask.shape)
        for x in range(x_start, x_end):
            # Need to check circle is inside the bounds.
            ypos = self.__calc_pos_y(x, radius, centre)
            # y above centre (ypos) and y below radius)
            y = [ypos, centre[1] - (ypos-centre[1])]
            if y[0] < 0:
                y[0] = 0
            if y[0] >= self.mask.shape[0]:
                y[0] = self.mask.shape[0] - 1
            if y[1] < 0:
                y[1] = 0
            if y[1] >= self.mask.shape[0]:
                y[1] = self.mask.shape[0] - 1
            if(self.mask[y[0], x] != self.mask[prev_y[0], prev_x]):
                num_change += 1
            if self.mask[y[1], x] != self.mask[prev_y[1], prev_x] and y[0] != y[1]:
@@ -119,81 +154,125 @@ class SimpleHandRecogniser(HandRecogniser):
            prev_x = x
            prev_y = y
        print('Finished calculating, returning')
        return num_change / 2 - 1
-    def detect_hand(self, weights_path, config_path, conf_thresh = 0.5, nms_thresh = 0.4):
+    def setFrame(self, frame):
-        '''
+        self.img = frame
-            Detects if there is a hand in the image. If there is (above a significant confidence threshold)
+
-            then the function will set the img property to the location of the hand according to its bounding box. 
+    # Source: Victor Dibia
-        '''
+    # Link: https://github.com/victordibia/handtracking
-        # Most of this code is from here: www.arunponnusamy.com/yolo-object-detection-opencv-python.html
+    # Taken the code straight from his example, as it works perfectly. This is specifically
-        # Also https://github.com/opencv/opencv/blob/3.4/samples/dnn/object_detection.py
+    # from the load_inference_graph method that he wrote, and will load the graph into
    # memory if one has not already been loaded for this object.  
    def load_inference_graph(self):
        """Loads a tensorflow model checkpoint into memory"""
        if self.graph != None and self.sess != None:
            # Don't load more than once.
            return
        PATH_TO_CKPT = '/Users/piv/Documents/Projects/car/GestureRecognition/frozen_inference_graph.pb'
        # load frozen tensorflow model into memory
        detection_graph = tf.Graph()
        with detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')
            sess = tf.Session(graph=detection_graph)
        self.graph = detection_graph
        self.sess = sess
    # Source: Victor Dibia
    # Link: https://github.com/victordibia/handtracking
    # Taken the code straight from his example, as it works perfectly. This is specifically
    # from the detect_hand method that he wrote, as other processing is required for the
    # hand recognition to work correctly.
    def detect_hand_tensorflow(self, detection_graph, sess):
        """ Detects hands in a frame using a CNN
        detection_graph -- The CNN to use to detect the hand.
        sess -- THe tensorflow session for the given graph
        """
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
        detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')
        img_expanded = np.expand_dims(self.img, axis=0)
        (boxes, scores, classes, num) = sess.run(
            [detection_boxes, detection_scores, detection_classes, num_detections],
            feed_dict={image_tensor, img_expanded})
        print('finished detection')
        return np.squeeze(boxes), np.squeeze(scores)
    def detect_hand_opencv(self, detection_graph, sess):
        """Performs hand detection using a CNN from tensorflow using opencv. 
        detection_graph -- The CNN to use to detect the hand.
        sess -- THe tensorflow session for the given graph
        """
        if self.img is None:
-            return 0
+            return
        height = self.img.shape[0]
        width = self.img.shape[1]
        scale = 0.5
-        classes = None # Stores classes used for classification
+        classes = None
-        net = cv2.dnn.readNet(weights_path, config_path)
+        net = cv2.dnn.readNetFromTensorflow(detection_graph, sess)
-        net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
+        # width is scaled weirdly to ensure we keep tbe same ratio as the original image.
        net.setInput(cv2.dnn.blobFromImage(self.img, scale, size=(300, 300 * (width/height)), swapRB=True, crop=False))
        netOut = net.forward()
-        outNames = net.getUnconnectedOutLayersNames()
+        # Format output to look same as tensorflow output.
-
+        scores = []
        blob = cv2.dnn.blobFromImage(self.img, scale, (416,416), (0,0,0), True, False)
        net.setInput(blob)
        outs = net.forward(outNames)
        # Getting the output layer.
        layerNames = net.getLayerNames()
        lastLayerId = net.getLayerId(layerNames[-1])
        lastLayer = net.getLayer(lastLayerId)
        classIds = []
        confidences = []
        boxes = []
        if lastLayer.type == 'DetectionOutput':
            # Check we are using an actual detection module. 
            # Will return a 1x1xnx7 blob, where n is number of detections.
            # Tuple for each detection: [batchId, classId, confidence, left, top, right, bottom]
-            for out in outs:
+        for out in netOut:
            for detection in out[0,0]:
-                    confidence = detection[2]
+                scores.append(detection[2])
-                    if confidence > conf_thresh:
+                boxes.append(detection[3], detection[4], detection[5], detection[6])
-                        # WIll need to verify this first, but given code said this is needed. 
+            # Only doing first class as only trying to find the hand.
-                        left = int(detection[3] * width)
+            break
-                        top = int(detection[4] * height)
+        return np.array(boxes), np.array(scores)
                        right = int(detection[5] * width)
                        bottom = int(detection[6] * height)
                        classIds.append(int(detection[1]) - 1)
                        confidences.append(float(confidence))
                        boxes.append((left, top, right, bottom))
    def get_best_hand(self, boxes, scores, conf_thresh, nms_thresh):
        """
        Gets the best hand bounding box by inspecting confidence scores and overlapping
        boxes, as well as the overall size of each box to determine which hand (if multiple present)
        should be tested to recognise. 
        """
        # First remove any boxes below confidence threshold
        confident_bs = boxes[scores > conf_thresh]
-        # Remove duplicate/overlapping boxes -> makes sure only detect one hand in an area.
+        # Then use NMS to get rid of heavily overlapping boxes.
-        indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_thresh, nms_thresh)
+        # This wasn't used in the tensorflow example that was found, however probably a
        # good idea to use it just in case. 
        indices = cv2.dnn.NMSBoxes(boxes, scores, conf_thresh, nms_thresh)
-        for i in indices:
+        # Finally calculate area of each box to determine which hand is clearest (biggest in image)
-            i = i[0]
+        # Just does the most confident for now.
            box = boxes[i]
            left = box[0]
            top = box[1]
            right = box[2]
            bottom = box[3]
            # Now draw the box if we want to.
        # OR can just get the box that is a hand with the maximum confidence/maximum box area -> this implies closest hand...
        max_conf = 0
        max_index = 0
-        for conf in confidences:
+        for conf in scores:
            if conf > max_conf:
                max_conf = conf
-                max_index = i
+                max_index = conf
        return boxes[max_index]