Was trying to get it to have a probability by using multiple fingers. Also trying to get it to get the best box from opencv.
382 lines
15 KiB
Python
382 lines
15 KiB
Python
import numpy as np
|
|
import cv2
|
|
|
|
from GestureRecognition.handrecogniser import HandRecogniser
|
|
|
|
class SimpleHandRecogniser(HandRecogniser):
|
|
def __init__(self, frame):
|
|
self.img = frame
|
|
self.graph = None
|
|
self.sess = None
|
|
self.img_cut = None
|
|
|
|
def __calc_pos_y(self, x, radius, centre):
|
|
"""
|
|
Calculates the position of y on a given circle radius and centre, given coordinate x.
|
|
"""
|
|
return int((radius**2 - (x - centre[0])**2)**(1/2) + centre[1])
|
|
|
|
def __segment_image(self):
|
|
"""
|
|
Segments the hand from the rest of the image to get a threshold.
|
|
"""
|
|
self.img_cut = cv2.GaussianBlur(self.img_cut, (5, 5), 0)
|
|
|
|
lower_skin = (0, 0, 153)
|
|
upper_skin = (45, 153, 255)
|
|
|
|
# Only need mask, as we can just use this to do the hand segmentation.
|
|
self.img_cut = cv2.inRange(self.img_cut, lower_skin, upper_skin)
|
|
|
|
# Apply another blur to rmeove any small holes/noise
|
|
self.img_cut = self.__denoise(self.img_cut)
|
|
ret, self.img_cut = cv2.threshold(self.img_cut, 50, 255, cv2.THRESH_BINARY)
|
|
|
|
def __denoise(self, image):
|
|
"""
|
|
Applies a 5x5 gaussian blur to remove noise from the image.
|
|
"""
|
|
return cv2.GaussianBlur(image, (5, 5), 0)
|
|
|
|
def __calc_circle(self, image, radius_percent=0.6):
|
|
"""
|
|
Calculates the equation of the circle (radius, centre) from a given
|
|
threshold image, so that the circle is the center of gravity of the
|
|
given threshold pixels, and the radius is by default 55% of the total
|
|
size.
|
|
"""
|
|
k = np.sum(self.img_cut) / 255
|
|
|
|
# Taking indices for num of rows.
|
|
x_ind = np.arange(0, self.img_cut.shape[1])
|
|
y_ind = np.arange(0, self.img_cut.shape[0])
|
|
coords_x = np.zeros((self.img_cut.shape[0], self.img_cut.shape[1]))
|
|
coords_y = np.zeros((self.img_cut.shape[0], self.img_cut.shape[1]))
|
|
coords_x[:, :] = x_ind
|
|
|
|
# Even this is extremely quick as it goes through rows in the numpy array,
|
|
# which in python is much faster than columns
|
|
for element in y_ind:
|
|
coords_y[element, :] = element
|
|
|
|
# Now need to get the average x value and y value for centre of gravity
|
|
centre = (int(np.sum(coords_x[self.img_cut == 255])/k), int(np.sum(coords_y[self.img_cut == 255])/k))
|
|
|
|
# Calculate radius of circle:
|
|
# May need to calculate diameter as well.
|
|
# Just take min/max x values and y values
|
|
x_min = np.min(coords_x[self.img_cut == 255])
|
|
x_max = np.max(coords_x[self.img_cut == 255])
|
|
y_min = np.min(coords_y[self.img_cut == 255])
|
|
y_max = np.max(coords_y[self.img_cut == 255])
|
|
|
|
candidate_pts = [(x_min, y_min), (x_min, y_max), (x_max, y_min), (x_max, y_max)]
|
|
radius = 0
|
|
|
|
# Check with each point to see which is furthest from the centre.
|
|
for pt in candidate_pts:
|
|
# Calculate Euclydian Distance
|
|
new_distance = ((pt[0] - centre[0])**2 + (pt[1] - centre[1])**2)**(1/2)
|
|
if new_distance > radius:
|
|
radius = new_distance
|
|
|
|
radius = int(radius * radius_percent)
|
|
|
|
return radius, centre
|
|
|
|
def __calc_circles(self, image, radius_percent_range=[0.6, 0.8], step = 0.1):
|
|
"""
|
|
Calculates the equation of the circle (radius, centre), but with
|
|
several radii so that we can get a more accurate estimate of from a given
|
|
threshold image, so that the circle is the center of gravity of the
|
|
given threshold pixels.
|
|
"""
|
|
k = np.sum(self.img_cut) / 255
|
|
|
|
# Taking indices for num of rows.
|
|
x_ind = np.arange(0,self.img_cut.shape[1])
|
|
y_ind = np.arange(0,self.img_cut.shape[0])
|
|
coords_x = np.zeros((self.img_cut.shape[0], self.img_cut.shape[1]))
|
|
coords_y = np.zeros((self.img_cut.shape[0], self.img_cut.shape[1]))
|
|
coords_x[:,:] = x_ind
|
|
|
|
# Even this is extremely quick as it goes through rows in the numpy array, which in python is much faster than columns
|
|
for element in y_ind:
|
|
coords_y[element,:] = element
|
|
|
|
# Now need to get the average x value and y value for centre of gravity
|
|
centre = (int(np.sum(coords_x[self.img_cut == 255])/k), int(np.sum(coords_y[self.img_cut == 255])/k))
|
|
|
|
# Calculate radius of circle:
|
|
# May need to calculate diameter as well.
|
|
# Just take min/max x values and y values
|
|
x_min = np.min(coords_x[self.img_cut == 255])
|
|
x_max = np.max(coords_x[self.img_cut == 255])
|
|
y_min = np.min(coords_y[self.img_cut == 255])
|
|
y_max = np.max(coords_y[self.img_cut == 255])
|
|
|
|
candidate_pts = [(x_min, y_min), (x_min, y_max), (x_max, y_min), (x_max, y_max)]
|
|
radius = 0
|
|
|
|
# Check with each point to see which is furthest from the centre.
|
|
for pt in candidate_pts:
|
|
# Calculate Euclydian Distance
|
|
new_distance = ((pt[0] - centre[0])**2 + (pt[1] - centre[1])**2)**(1/2)
|
|
if new_distance > radius:
|
|
radius = new_distance
|
|
|
|
radii = []
|
|
for i in range(radius_percent_range[0], radius_percent_range[1], step):
|
|
radii += int(radius * i)
|
|
|
|
return radii, centre
|
|
|
|
def __shift_pixels(self, image, shift_radius):
|
|
image[:, :, 0] = image[:, :, 0] + shift_radius
|
|
image[:, :, 0] = np.where(image[:, :, 0] > 179, image[:, :, 0] - 179, image[:, :, 0])
|
|
return image
|
|
|
|
def set_frame(self, frame):
|
|
self.img = frame
|
|
|
|
# Source: Victor Dibia
|
|
# Link: https://github.com/victordibia/handtracking
|
|
# Taken the code straight from his example, as it works perfectly. This is specifically
|
|
# from the load_inference_graph method that he wrote, and will load the graph into
|
|
# memory if one has not already been loaded for this object.
|
|
# def load_inference_graph(self):
|
|
# """Loads a tensorflow model checkpoint into memory"""
|
|
|
|
# if self.graph != None and self.sess != None:
|
|
# # Don't load more than once, to save time...
|
|
# return
|
|
|
|
# PATH_TO_CKPT = '/Users/piv/Documents/Projects/car/GestureRecognition/frozen_inference_graph.pb'
|
|
# # load frozen tensorflow model into memory
|
|
# detection_graph = tf.Graph()
|
|
# with detection_graph.as_default():
|
|
# od_graph_def = tf.GraphDef()
|
|
# with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
|
|
# serialized_graph = fid.read()
|
|
# od_graph_def.ParseFromString(serialized_graph)
|
|
# tf.import_graph_def(od_graph_def, name='')
|
|
# sess = tf.Session(graph=detection_graph)
|
|
# self.graph = detection_graph
|
|
# self.sess = sess
|
|
|
|
|
|
# Source: Victor Dibia
|
|
# Link: https://github.com/victordibia/handtracking
|
|
# Taken the code straight from his example, as it works perfectly. This is specifically
|
|
# from the detect_hand method that he wrote, as other processing is required for the
|
|
# hand recognition to work correctly.
|
|
# def detect_hand_tensorflow(self, detection_graph, sess):
|
|
# """ Detects hands in a frame using a CNN
|
|
|
|
# detection_graph -- The CNN to use to detect the hand.
|
|
# sess -- THe tensorflow session for the given graph
|
|
# """
|
|
|
|
# image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
|
|
|
|
# detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
|
|
|
|
# detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
|
|
|
|
# detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
|
|
|
|
# num_detections = detection_graph.get_tensor_by_name('num_detections:0')
|
|
|
|
# img_expanded = np.expand_dims(self.img, axis=0)
|
|
|
|
# (boxes, scores, classes, num) = sess.run(
|
|
# [detection_boxes, detection_scores, detection_classes, num_detections],
|
|
# feed_dict={image_tensor: img_expanded})
|
|
# print('finished detection')
|
|
# return np.squeeze(boxes), np.squeeze(scores)
|
|
|
|
def load_cv_net(self, graph_path, names_path):
|
|
"""Loads a tensorflow neural object detection network using openCV
|
|
|
|
Arguments
|
|
graph_path: Path to the tensorflow frozen inference graph (something.pb)
|
|
names_path: Path to the tensorflow (something.pbtext) file.
|
|
"""
|
|
self.net = cv2.dnn.readNetFromTensorflow(graph_path, names_path)
|
|
|
|
def detect_hand_opencv(self):
|
|
"""Performs hand detection using a CNN from tensorflow using opencv.
|
|
|
|
detection_graph -- The CNN to use to detect the hand.
|
|
sess -- THe tensorflow session for the given graph
|
|
"""
|
|
if self.img is None:
|
|
return
|
|
|
|
rows = self.img.shape[0]
|
|
cols = self.img.shape[1]
|
|
|
|
self.net.setInput(cv2.dnn.blobFromImage(self.img, size=(300, 300), swapRB=True, crop=False))
|
|
cv_out = self.net.forward()
|
|
|
|
boxes = []
|
|
scores = []
|
|
|
|
for detection in cv_out[0, 0, :, :]:
|
|
score = float(detection[2])
|
|
# TODO: Need to make this the confidence threshold...
|
|
if score > 0.6:
|
|
left = detection[3] * cols
|
|
top = detection[4] * rows
|
|
right = detection[5] * cols
|
|
bottom = detection[6] * rows
|
|
boxes.append((left, top, right, bottom))
|
|
scores.append(score)
|
|
else:
|
|
# Scores are in descending order...
|
|
break
|
|
|
|
return boxes, scores
|
|
|
|
def get_best_hand(self, boxes, scores, conf_thresh, nms_thresh):
|
|
"""
|
|
Gets the best hand bounding box by inspecting confidence scores and overlapping
|
|
boxes, as well as the overall size of each box to determine which hand (if multiple present)
|
|
should be tested to recognise.
|
|
"""
|
|
print(scores)
|
|
boxes = boxes[scores > conf_thresh]
|
|
scores = scores[scores > conf_thresh]
|
|
# Use NMS to get rid of heavily overlapping boxes.
|
|
# This wasn't used in the tensorflow example that was found, however probably a
|
|
# good idea to use it just in case.
|
|
print(boxes.shape)
|
|
if boxes.shape[0] == 0:
|
|
print("No good boxes found")
|
|
return None
|
|
elif boxes.shape[0] == 1:
|
|
print("Only one good box!")
|
|
box = boxes[0]
|
|
box[0] = box[0] * self.img.shape[0]
|
|
box[1] = box[1] * self.img.shape[1]
|
|
box[2] = box[2] * self.img.shape[0]
|
|
box[3] = box[3] * self.img.shape[1]
|
|
return box.astype(int)
|
|
else:
|
|
boxes[:][2] = ((boxes[:][2] - boxes[:][0]) * self.img.shape[0]).astype(int)
|
|
boxes[:][3] = ((boxes[:][3] - boxes[:][1]) * self.img.shape[1]).astype(int)
|
|
boxes[:][0] = (boxes[:][0] * self.img.shape[0]).astype(int)
|
|
boxes[:][1] = (boxes[:][1] * self.img.shape[1]).astype(int)
|
|
|
|
# Can't seem to get this to work...
|
|
# indices = cv2.dnn.NMSBoxes(boxes, scores, conf_thresh, nms_thresh)
|
|
|
|
print("Num boxes: %s" % boxes.shape[0])
|
|
# Finally calculate area of each box to determine which hand is clearest (biggest in image)
|
|
# Just does the most confident for now.
|
|
best_box = boxes[0]
|
|
best_index = None
|
|
i = 0
|
|
for box in boxes:
|
|
if box[2] * box[3] > best_box[2] * best_box[3]:
|
|
best_box = box
|
|
best_index = i
|
|
i += 1
|
|
return boxes[i - 1]
|
|
|
|
def get_gesture(self):
|
|
"""
|
|
Calculates the actual gesture, returning the number of fingers
|
|
seen in the image.
|
|
"""
|
|
print('Getting Gesture')
|
|
if self.img is None:
|
|
print('There is no image')
|
|
return -1
|
|
# First cut out the frame using the neural network.
|
|
# self.load_inference_graph()
|
|
# print("loaded inference graph")
|
|
# detections, scores = self.detect_hand_tensorflow(self.graph, self.sess)
|
|
|
|
print('Loading openCV net')
|
|
self.load_cv_net('/Users/piv/Documents/Projects/car/GestureRecognition/frozen_inference_graph.pb',
|
|
'/Users/piv/Documents/Projects/car/GestureRecognition/graph.pbtxt')
|
|
|
|
detections, scores = self.detect_hand_opencv()
|
|
|
|
# print("Getting best hand")
|
|
# best_hand = self.get_best_hand(detections, scores, 0.7, 0.5)
|
|
# if best_hand is not None:
|
|
# self.img = self.img[best_hand[0] - 30:best_hand[2] + 30, best_hand[1] - 30:best_hand[3] + 30]
|
|
|
|
if len(detections) > 0:
|
|
print("Cutting out the hand!")
|
|
self.img_cut = self.img[detections[0] - 30:detections[2] + 30, detections[1] - 30:detections[3] + 30]
|
|
else:
|
|
self.img_cut = self.img
|
|
|
|
print('Attempting to use pure hand recognition')
|
|
self.img_cut = cv2.cvtColor(self.img_cut, cv2.COLOR_BGR2HSV)
|
|
|
|
# Need to shift red pixels so they can be 0-20 rather than 250-~20
|
|
self.img_cut = self.__shift_pixels(self.img_cut, 30)
|
|
|
|
self.img_cut = self.__denoise(self.img_cut)
|
|
self.__segment_image()
|
|
|
|
print('calculating circle')
|
|
# Could calculate multiple circles to get probability
|
|
# for each gesture (i.e. calc num of each gesture recongised and take percentage
|
|
# as the probability).
|
|
radius, centre = self.__calc_circle(self.img_cut)
|
|
print('Got circle')
|
|
|
|
# Now go around the circle to calculate num of times going 0->255 or vice-versa.
|
|
# First just do it the naive way with loops.
|
|
# Equation of the circle:
|
|
# y = sqrt(r2 - (x-c)2) + c
|
|
prev_x = centre[0] - radius
|
|
prev_y = [self.__calc_pos_y(centre[0] - radius, radius, centre),
|
|
self.__calc_pos_y(centre[0] - radius, radius, centre)]
|
|
num_change = 0
|
|
|
|
# Make sure x is also within bounds.
|
|
x_start = centre[0] - radius + 1
|
|
if x_start < 0:
|
|
x_start = 0
|
|
|
|
x_end = centre[0] + radius
|
|
if x_end >= self.img_cut.shape[1]:
|
|
x_end = self.img_cut.shape[1] - 1
|
|
|
|
for x in range(x_start, x_end):
|
|
# Need to check circle is inside the bounds.
|
|
ypos = self.__calc_pos_y(x, radius, centre)
|
|
# y above centre (ypos) and y below radius)
|
|
y = [ypos, centre[1] - (ypos-centre[1])]
|
|
|
|
if y[0] < 0:
|
|
y[0] = 0
|
|
if y[0] >= self.img_cut.shape[0]:
|
|
y[0] = self.img_cut.shape[0] - 1
|
|
if y[1] < 0:
|
|
y[1] = 0
|
|
if y[1] >= self.img_cut.shape[0]:
|
|
y[1] = self.img_cut.shape[0] - 1
|
|
if(self.img_cut[y[0], x] != self.img_cut[prev_y[0], prev_x]):
|
|
num_change += 1
|
|
if self.img_cut[y[1], x] != self.img_cut[prev_y[1], prev_x] and y[0] != y[1]:
|
|
num_change += 1
|
|
prev_x = x
|
|
prev_y = y
|
|
|
|
print('Finished calculating, returning')
|
|
print(num_change)
|
|
return int(num_change / 2 - 1), self.img
|
|
|
|
def get_gesture_multiple_radii(self):
|
|
pass
|
|
|
|
def calc_hand_batch(self, batch):
|
|
pass
|