From 983b5034631a37c4a1a44768f7bc93e5596d06df Mon Sep 17 00:00:00 2001 From: Michael Pivato Date: Fri, 1 Mar 2019 15:48:11 +1030 Subject: [PATCH] So many changes to hand recogniser i don't even know if it still works. Was trying to get it to have a probability by using multiple fingers. Also trying to get it to get the best box from opencv. --- GestureRecognition/SimpleHandRecogniser.py | 220 +++++++++++++-------- 1 file changed, 140 insertions(+), 80 deletions(-) diff --git a/GestureRecognition/SimpleHandRecogniser.py b/GestureRecognition/SimpleHandRecogniser.py index 35039f4..b9a7b4a 100644 --- a/GestureRecognition/SimpleHandRecogniser.py +++ b/GestureRecognition/SimpleHandRecogniser.py @@ -1,15 +1,15 @@ -from GestureRecognition.handrecogniser import HandRecogniser import numpy as np import cv2 -# import tensorflow as tf -import multiprocessing as mp + +from GestureRecognition.handrecogniser import HandRecogniser class SimpleHandRecogniser(HandRecogniser): def __init__(self, frame): self.img = frame self.graph = None self.sess = None - + self.img_cut = None + def __calc_pos_y(self, x, radius, centre): """ Calculates the position of y on a given circle radius and centre, given coordinate x. @@ -20,54 +20,55 @@ class SimpleHandRecogniser(HandRecogniser): """ Segments the hand from the rest of the image to get a threshold. """ - self.img_hsv = cv2.GaussianBlur(self.img_hsv,(5,5),0) + self.img_cut = cv2.GaussianBlur(self.img_cut, (5, 5), 0) lower_skin = (0, 0, 153) upper_skin = (45, 153, 255) # Only need mask, as we can just use this to do the hand segmentation. - self.mask = cv2.inRange(self.img_hsv, lower_skin, upper_skin) + self.img_cut = cv2.inRange(self.img_cut, lower_skin, upper_skin) # Apply another blur to rmeove any small holes/noise - self.mask = self.__denoise(self.mask) - ret, self.mask = cv2.threshold(self.mask, 50, 255, cv2.THRESH_BINARY) + self.img_cut = self.__denoise(self.img_cut) + ret, self.img_cut = cv2.threshold(self.img_cut, 50, 255, cv2.THRESH_BINARY) def __denoise(self, image): """ Applies a 5x5 gaussian blur to remove noise from the image. """ - return cv2.GaussianBlur(image,(5,5),0) - - def __calc_circle(self, image, radius_percent = 0.6): + return cv2.GaussianBlur(image, (5, 5), 0) + + def __calc_circle(self, image, radius_percent=0.6): """ Calculates the equation of the circle (radius, centre) from a given threshold image, so that the circle is the center of gravity of the - given threshold pixels, and the radius is by default 55% of the total + given threshold pixels, and the radius is by default 55% of the total size. """ - k = np.sum(self.mask) / 255 + k = np.sum(self.img_cut) / 255 # Taking indices for num of rows. - x_ind = np.arange(0,self.mask.shape[1]) - y_ind = np.arange(0,self.mask.shape[0]) - coords_x = np.zeros((self.mask.shape[0], self.mask.shape[1])) - coords_y = np.zeros((self.mask.shape[0], self.mask.shape[1])) - coords_x[:,:] = x_ind + x_ind = np.arange(0, self.img_cut.shape[1]) + y_ind = np.arange(0, self.img_cut.shape[0]) + coords_x = np.zeros((self.img_cut.shape[0], self.img_cut.shape[1])) + coords_y = np.zeros((self.img_cut.shape[0], self.img_cut.shape[1])) + coords_x[:, :] = x_ind - # Even this is extremely quick as it goes through rows in the numpy array, which in python is much faster than columns + # Even this is extremely quick as it goes through rows in the numpy array, + # which in python is much faster than columns for element in y_ind: - coords_y[element,:] = element - + coords_y[element, :] = element + # Now need to get the average x value and y value for centre of gravity - centre = (int(np.sum(coords_x[self.mask == 255])/k), int(np.sum(coords_y[self.mask == 255])/k)) + centre = (int(np.sum(coords_x[self.img_cut == 255])/k), int(np.sum(coords_y[self.img_cut == 255])/k)) # Calculate radius of circle: # May need to calculate diameter as well. # Just take min/max x values and y values - x_min = np.min(coords_x[self.mask == 255]) - x_max = np.max(coords_x[self.mask == 255]) - y_min = np.min(coords_y[self.mask == 255]) - y_max = np.max(coords_y[self.mask == 255]) + x_min = np.min(coords_x[self.img_cut == 255]) + x_max = np.max(coords_x[self.img_cut == 255]) + y_min = np.min(coords_y[self.img_cut == 255]) + y_max = np.max(coords_y[self.img_cut == 255]) candidate_pts = [(x_min, y_min), (x_min, y_max), (x_max, y_min), (x_max, y_max)] radius = 0 @@ -83,19 +84,66 @@ class SimpleHandRecogniser(HandRecogniser): return radius, centre + def __calc_circles(self, image, radius_percent_range=[0.6, 0.8], step = 0.1): + """ + Calculates the equation of the circle (radius, centre), but with + several radii so that we can get a more accurate estimate of from a given + threshold image, so that the circle is the center of gravity of the + given threshold pixels. + """ + k = np.sum(self.img_cut) / 255 + + # Taking indices for num of rows. + x_ind = np.arange(0,self.img_cut.shape[1]) + y_ind = np.arange(0,self.img_cut.shape[0]) + coords_x = np.zeros((self.img_cut.shape[0], self.img_cut.shape[1])) + coords_y = np.zeros((self.img_cut.shape[0], self.img_cut.shape[1])) + coords_x[:,:] = x_ind + + # Even this is extremely quick as it goes through rows in the numpy array, which in python is much faster than columns + for element in y_ind: + coords_y[element,:] = element + + # Now need to get the average x value and y value for centre of gravity + centre = (int(np.sum(coords_x[self.img_cut == 255])/k), int(np.sum(coords_y[self.img_cut == 255])/k)) + + # Calculate radius of circle: + # May need to calculate diameter as well. + # Just take min/max x values and y values + x_min = np.min(coords_x[self.img_cut == 255]) + x_max = np.max(coords_x[self.img_cut == 255]) + y_min = np.min(coords_y[self.img_cut == 255]) + y_max = np.max(coords_y[self.img_cut == 255]) + + candidate_pts = [(x_min, y_min), (x_min, y_max), (x_max, y_min), (x_max, y_max)] + radius = 0 + + # Check with each point to see which is furthest from the centre. + for pt in candidate_pts: + # Calculate Euclydian Distance + new_distance = ((pt[0] - centre[0])**2 + (pt[1] - centre[1])**2)**(1/2) + if new_distance > radius: + radius = new_distance + + radii = [] + for i in range(radius_percent_range[0], radius_percent_range[1], step): + radii += int(radius * i) + + return radii, centre + def __shift_pixels(self, image, shift_radius): - image[:,:,0] = image[:,:,0] + shift_radius - image[:,:,0] = np.where(image[:,:,0] > 179, image[:,:,0] - 179, image[:,:,0]) + image[:, :, 0] = image[:, :, 0] + shift_radius + image[:, :, 0] = np.where(image[:, :, 0] > 179, image[:, :, 0] - 179, image[:, :, 0]) return image - def setFrame(self, frame): + def set_frame(self, frame): self.img = frame # Source: Victor Dibia # Link: https://github.com/victordibia/handtracking # Taken the code straight from his example, as it works perfectly. This is specifically # from the load_inference_graph method that he wrote, and will load the graph into - # memory if one has not already been loaded for this object. + # memory if one has not already been loaded for this object. # def load_inference_graph(self): # """Loads a tensorflow model checkpoint into memory""" @@ -116,7 +164,7 @@ class SimpleHandRecogniser(HandRecogniser): # self.graph = detection_graph # self.sess = sess - + # Source: Victor Dibia # Link: https://github.com/victordibia/handtracking # Taken the code straight from his example, as it works perfectly. This is specifically @@ -156,52 +204,52 @@ class SimpleHandRecogniser(HandRecogniser): """ self.net = cv2.dnn.readNetFromTensorflow(graph_path, names_path) - def detect_hand_opencv(self, detection_graph, sess): - """Performs hand detection using a CNN from tensorflow using opencv. - + def detect_hand_opencv(self): + """Performs hand detection using a CNN from tensorflow using opencv. + detection_graph -- The CNN to use to detect the hand. sess -- THe tensorflow session for the given graph """ if self.img is None: return - height = self.img.shape[0] - width = self.img.shape[1] + rows = self.img.shape[0] + cols = self.img.shape[1] - scale = 0.5 + self.net.setInput(cv2.dnn.blobFromImage(self.img, size=(300, 300), swapRB=True, crop=False)) + cv_out = self.net.forward() - classes = None - - net = cv2.dnn.readNetFromTensorflow(detection_graph, sess) - - # width is scaled weirdly to ensure we keep tbe same ratio as the original image. - net.setInput(cv2.dnn.blobFromImage(self.img, scale, size=(300, 300 * (width/height)), swapRB=True, crop=False)) - netOut = net.forward() - - # Format output to look same as tensorflow output. - scores = [] boxes = [] + scores = [] - for out in netOut: - for detection in out[0,0]: - scores.append(detection[2]) - boxes.append(detection[3], detection[4], detection[5], detection[6]) - # Only doing first class as only trying to find the hand. - break - return np.array(boxes), np.array(scores) + for detection in cv_out[0, 0, :, :]: + score = float(detection[2]) + # TODO: Need to make this the confidence threshold... + if score > 0.6: + left = detection[3] * cols + top = detection[4] * rows + right = detection[5] * cols + bottom = detection[6] * rows + boxes.append((left, top, right, bottom)) + scores.append(score) + else: + # Scores are in descending order... + break + + return boxes, scores def get_best_hand(self, boxes, scores, conf_thresh, nms_thresh): """ Gets the best hand bounding box by inspecting confidence scores and overlapping boxes, as well as the overall size of each box to determine which hand (if multiple present) - should be tested to recognise. + should be tested to recognise. """ print(scores) boxes = boxes[scores > conf_thresh] scores = scores[scores > conf_thresh] # Use NMS to get rid of heavily overlapping boxes. # This wasn't used in the tensorflow example that was found, however probably a - # good idea to use it just in case. + # good idea to use it just in case. print(boxes.shape) if boxes.shape[0] == 0: print("No good boxes found") @@ -238,7 +286,7 @@ class SimpleHandRecogniser(HandRecogniser): def get_gesture(self): """ - Calculates the actual gesture, returning the number of fingers + Calculates the actual gesture, returning the number of fingers seen in the image. """ print('Getting Gesture') @@ -250,22 +298,37 @@ class SimpleHandRecogniser(HandRecogniser): # print("loaded inference graph") # detections, scores = self.detect_hand_tensorflow(self.graph, self.sess) + print('Loading openCV net') + self.load_cv_net('/Users/piv/Documents/Projects/car/GestureRecognition/frozen_inference_graph.pb', + '/Users/piv/Documents/Projects/car/GestureRecognition/graph.pbtxt') + + detections, scores = self.detect_hand_opencv() + # print("Getting best hand") # best_hand = self.get_best_hand(detections, scores, 0.7, 0.5) # if best_hand is not None: # self.img = self.img[best_hand[0] - 30:best_hand[2] + 30, best_hand[1] - 30:best_hand[3] + 30] + if len(detections) > 0: + print("Cutting out the hand!") + self.img_cut = self.img[detections[0] - 30:detections[2] + 30, detections[1] - 30:detections[3] + 30] + else: + self.img_cut = self.img + print('Attempting to use pure hand recognition') - self.img_hsv = cv2.cvtColor(self.img, cv2.COLOR_BGR2HSV) + self.img_cut = cv2.cvtColor(self.img_cut, cv2.COLOR_BGR2HSV) # Need to shift red pixels so they can be 0-20 rather than 250-~20 - self.img_hsv = self.__shift_pixels(self.img_hsv, 30) + self.img_cut = self.__shift_pixels(self.img_cut, 30) - self.img_hsv = self.__denoise(self.img_hsv) + self.img_cut = self.__denoise(self.img_cut) self.__segment_image() print('calculating circle') - radius, centre = self.__calc_circle(self.mask) + # Could calculate multiple circles to get probability + # for each gesture (i.e. calc num of each gesture recongised and take percentage + # as the probability). + radius, centre = self.__calc_circle(self.img_cut) print('Got circle') # Now go around the circle to calculate num of times going 0->255 or vice-versa. @@ -273,7 +336,8 @@ class SimpleHandRecogniser(HandRecogniser): # Equation of the circle: # y = sqrt(r2 - (x-c)2) + c prev_x = centre[0] - radius - prev_y = [self.__calc_pos_y(centre[0] - radius, radius, centre), self.__calc_pos_y(centre[0] - radius, radius, centre)] + prev_y = [self.__calc_pos_y(centre[0] - radius, radius, centre), + self.__calc_pos_y(centre[0] - radius, radius, centre)] num_change = 0 # Make sure x is also within bounds. @@ -282,40 +346,36 @@ class SimpleHandRecogniser(HandRecogniser): x_start = 0 x_end = centre[0] + radius - if x_end >= self.mask.shape[1]: - x_end = self.mask.shape[1] - 1 - # Could batch this function to execute on multiple cores? - # Calc num CPUS. - # num_cores = mp.cpu_count() - # # Calc batch size: - # batch_size = x_end // num_cores - # for b in range(0, num_cores - 1): - # pass + if x_end >= self.img_cut.shape[1]: + x_end = self.img_cut.shape[1] - 1 for x in range(x_start, x_end): # Need to check circle is inside the bounds. ypos = self.__calc_pos_y(x, radius, centre) # y above centre (ypos) and y below radius) y = [ypos, centre[1] - (ypos-centre[1])] - + if y[0] < 0: y[0] = 0 - if y[0] >= self.mask.shape[0]: - y[0] = self.mask.shape[0] - 1 + if y[0] >= self.img_cut.shape[0]: + y[0] = self.img_cut.shape[0] - 1 if y[1] < 0: y[1] = 0 - if y[1] >= self.mask.shape[0]: - y[1] = self.mask.shape[0] - 1 - if(self.mask[y[0], x] != self.mask[prev_y[0], prev_x]): + if y[1] >= self.img_cut.shape[0]: + y[1] = self.img_cut.shape[0] - 1 + if(self.img_cut[y[0], x] != self.img_cut[prev_y[0], prev_x]): num_change += 1 - if self.mask[y[1], x] != self.mask[prev_y[1], prev_x] and y[0] != y[1]: + if self.img_cut[y[1], x] != self.img_cut[prev_y[1], prev_x] and y[0] != y[1]: num_change += 1 prev_x = x prev_y = y print('Finished calculating, returning') print(num_change) - return int(num_change / 2 - 1) + return int(num_change / 2 - 1), self.img + + def get_gesture_multiple_radii(self): + pass def calc_hand_batch(self, batch): - pass \ No newline at end of file + pass