From c84c427fa75d091efa730773c3342ad4589d0a9b Mon Sep 17 00:00:00 2001 From: Michael Pivato Date: Thu, 24 Jan 2019 16:19:20 +1030 Subject: [PATCH] Fix going outside of array bounds, and attempt to make CNN work. --- GestureRecognition/SimpleHandRecogniser.py | 197 +++++++++++++++------ 1 file changed, 138 insertions(+), 59 deletions(-) diff --git a/GestureRecognition/SimpleHandRecogniser.py b/GestureRecognition/SimpleHandRecogniser.py index 3548a87..1bc77a7 100644 --- a/GestureRecognition/SimpleHandRecogniser.py +++ b/GestureRecognition/SimpleHandRecogniser.py @@ -1,10 +1,13 @@ from GestureRecognition.handrecogniser import HandRecogniser import numpy as np import cv2 +import tensorflow as tf class SimpleHandRecogniser(HandRecogniser): def __init__(self, frame): self.img = frame + self.graph = None + self.sess = None def __calc_pos_y(self, x, radius, centre): """ @@ -89,9 +92,16 @@ class SimpleHandRecogniser(HandRecogniser): Calculates the actual gesture, returning the number of fingers seen in the image. """ + print('Getting Gesture') if self.img is None: - return 0 - + print('There is no image') + return -1 + # First cut out the frame using the neural network. + self.load_inference_graph() + print("loaded inference graph") + detections, scores = self.detect_hand_tensorflow(self.graph, self.sess) + + print('Attempting to use pure hand recognition') self.img_hsv = cv2.cvtColor(self.img, cv2.COLOR_BGR2HSV) # Need to shift red pixels so they can be 0-20 rather than 250-~20 @@ -100,7 +110,9 @@ class SimpleHandRecogniser(HandRecogniser): self.img_hsv = self.__denoise(self.img_hsv) self.__segment_image() + print('calculating circle') radius, centre = self.__calc_circle(self.mask) + print('Got circle') # Now go around the circle to calculate num of times going 0->255 or vice-versa. # First just do it the naive way with loops. @@ -109,9 +121,32 @@ class SimpleHandRecogniser(HandRecogniser): prev_x = centre[0] - radius prev_y = [self.__calc_pos_y(centre[0] - radius, radius, centre), self.__calc_pos_y(centre[0] - radius, radius, centre)] num_change = 0 - for x in range(centre[0] - radius + 1, centre[0] + radius): + + # Make sure x is also within bounds. + x_start = centre[0] - radius + 1 + if x_start < 0: + x_start = 0 + + x_end = centre[0] + radius + if x_end >= self.mask.shape[1]: + x_end = self.mask.shape[1] - 1 + print(x_start) + print(x_end) + print(self.mask.shape) + for x in range(x_start, x_end): + # Need to check circle is inside the bounds. ypos = self.__calc_pos_y(x, radius, centre) + # y above centre (ypos) and y below radius) y = [ypos, centre[1] - (ypos-centre[1])] + + if y[0] < 0: + y[0] = 0 + if y[0] >= self.mask.shape[0]: + y[0] = self.mask.shape[0] - 1 + if y[1] < 0: + y[1] = 0 + if y[1] >= self.mask.shape[0]: + y[1] = self.mask.shape[0] - 1 if(self.mask[y[0], x] != self.mask[prev_y[0], prev_x]): num_change += 1 if self.mask[y[1], x] != self.mask[prev_y[1], prev_x] and y[0] != y[1]: @@ -119,81 +154,125 @@ class SimpleHandRecogniser(HandRecogniser): prev_x = x prev_y = y + print('Finished calculating, returning') + return num_change / 2 - 1 - def detect_hand(self, weights_path, config_path, conf_thresh = 0.5, nms_thresh = 0.4): - ''' - Detects if there is a hand in the image. If there is (above a significant confidence threshold) - then the function will set the img property to the location of the hand according to its bounding box. - ''' - # Most of this code is from here: www.arunponnusamy.com/yolo-object-detection-opencv-python.html - # Also https://github.com/opencv/opencv/blob/3.4/samples/dnn/object_detection.py + def setFrame(self, frame): + self.img = frame + + # Source: Victor Dibia + # Link: https://github.com/victordibia/handtracking + # Taken the code straight from his example, as it works perfectly. This is specifically + # from the load_inference_graph method that he wrote, and will load the graph into + # memory if one has not already been loaded for this object. + def load_inference_graph(self): + """Loads a tensorflow model checkpoint into memory""" + + if self.graph != None and self.sess != None: + # Don't load more than once. + return + + PATH_TO_CKPT = '/Users/piv/Documents/Projects/car/GestureRecognition/frozen_inference_graph.pb' + # load frozen tensorflow model into memory + detection_graph = tf.Graph() + with detection_graph.as_default(): + od_graph_def = tf.GraphDef() + with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: + serialized_graph = fid.read() + od_graph_def.ParseFromString(serialized_graph) + tf.import_graph_def(od_graph_def, name='') + sess = tf.Session(graph=detection_graph) + self.graph = detection_graph + self.sess = sess + + + # Source: Victor Dibia + # Link: https://github.com/victordibia/handtracking + # Taken the code straight from his example, as it works perfectly. This is specifically + # from the detect_hand method that he wrote, as other processing is required for the + # hand recognition to work correctly. + def detect_hand_tensorflow(self, detection_graph, sess): + """ Detects hands in a frame using a CNN + + detection_graph -- The CNN to use to detect the hand. + sess -- THe tensorflow session for the given graph + """ + + image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') + + detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') + + detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') + + detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') + + num_detections = detection_graph.get_tensor_by_name('num_detections:0') + + img_expanded = np.expand_dims(self.img, axis=0) + + (boxes, scores, classes, num) = sess.run( + [detection_boxes, detection_scores, detection_classes, num_detections], + feed_dict={image_tensor, img_expanded}) + print('finished detection') + return np.squeeze(boxes), np.squeeze(scores) + + def detect_hand_opencv(self, detection_graph, sess): + """Performs hand detection using a CNN from tensorflow using opencv. + + detection_graph -- The CNN to use to detect the hand. + sess -- THe tensorflow session for the given graph + """ if self.img is None: - return 0 + return height = self.img.shape[0] width = self.img.shape[1] + scale = 0.5 - classes = None # Stores classes used for classification + classes = None - net = cv2.dnn.readNet(weights_path, config_path) + net = cv2.dnn.readNetFromTensorflow(detection_graph, sess) - net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV) + # width is scaled weirdly to ensure we keep tbe same ratio as the original image. + net.setInput(cv2.dnn.blobFromImage(self.img, scale, size=(300, 300 * (width/height)), swapRB=True, crop=False)) + netOut = net.forward() - outNames = net.getUnconnectedOutLayersNames() - - blob = cv2.dnn.blobFromImage(self.img, scale, (416,416), (0,0,0), True, False) - - net.setInput(blob) - - outs = net.forward(outNames) - - # Getting the output layer. - layerNames = net.getLayerNames() - lastLayerId = net.getLayerId(layerNames[-1]) - lastLayer = net.getLayer(lastLayerId) - - classIds = [] - confidences = [] + # Format output to look same as tensorflow output. + scores = [] boxes = [] - if lastLayer.type == 'DetectionOutput': - # Check we are using an actual detection module. - # Will return a 1x1xnx7 blob, where n is number of detections. - # Tuple for each detection: [batchId, classId, confidence, left, top, right, bottom] - for out in outs: - for detection in out[0,0]: - confidence = detection[2] - if confidence > conf_thresh: - # WIll need to verify this first, but given code said this is needed. - left = int(detection[3] * width) - top = int(detection[4] * height) - right = int(detection[5] * width) - bottom = int(detection[6] * height) - classIds.append(int(detection[1]) - 1) - confidences.append(float(confidence)) - boxes.append((left, top, right, bottom)) + for out in netOut: + for detection in out[0,0]: + scores.append(detection[2]) + boxes.append(detection[3], detection[4], detection[5], detection[6]) + # Only doing first class as only trying to find the hand. + break + return np.array(boxes), np.array(scores) + def get_best_hand(self, boxes, scores, conf_thresh, nms_thresh): + """ + Gets the best hand bounding box by inspecting confidence scores and overlapping + boxes, as well as the overall size of each box to determine which hand (if multiple present) + should be tested to recognise. + """ + # First remove any boxes below confidence threshold + confident_bs = boxes[scores > conf_thresh] - # Remove duplicate/overlapping boxes -> makes sure only detect one hand in an area. - indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_thresh, nms_thresh) + # Then use NMS to get rid of heavily overlapping boxes. + # This wasn't used in the tensorflow example that was found, however probably a + # good idea to use it just in case. + indices = cv2.dnn.NMSBoxes(boxes, scores, conf_thresh, nms_thresh) - for i in indices: - i = i[0] - box = boxes[i] - left = box[0] - top = box[1] - right = box[2] - bottom = box[3] - # Now draw the box if we want to. - - # OR can just get the box that is a hand with the maximum confidence/maximum box area -> this implies closest hand... + # Finally calculate area of each box to determine which hand is clearest (biggest in image) + # Just does the most confident for now. max_conf = 0 max_index = 0 - for conf in confidences: + for conf in scores: if conf > max_conf: max_conf = conf - max_index = i + max_index = conf + return boxes[max_index]