Fix going outside of array bounds, and attempt to make CNN work.
This commit is contained in:
@@ -1,10 +1,13 @@
|
||||
from GestureRecognition.handrecogniser import HandRecogniser
|
||||
import numpy as np
|
||||
import cv2
|
||||
import tensorflow as tf
|
||||
|
||||
class SimpleHandRecogniser(HandRecogniser):
|
||||
def __init__(self, frame):
|
||||
self.img = frame
|
||||
self.graph = None
|
||||
self.sess = None
|
||||
|
||||
def __calc_pos_y(self, x, radius, centre):
|
||||
"""
|
||||
@@ -89,9 +92,16 @@ class SimpleHandRecogniser(HandRecogniser):
|
||||
Calculates the actual gesture, returning the number of fingers
|
||||
seen in the image.
|
||||
"""
|
||||
print('Getting Gesture')
|
||||
if self.img is None:
|
||||
return 0
|
||||
print('There is no image')
|
||||
return -1
|
||||
# First cut out the frame using the neural network.
|
||||
self.load_inference_graph()
|
||||
print("loaded inference graph")
|
||||
detections, scores = self.detect_hand_tensorflow(self.graph, self.sess)
|
||||
|
||||
print('Attempting to use pure hand recognition')
|
||||
self.img_hsv = cv2.cvtColor(self.img, cv2.COLOR_BGR2HSV)
|
||||
|
||||
# Need to shift red pixels so they can be 0-20 rather than 250-~20
|
||||
@@ -100,7 +110,9 @@ class SimpleHandRecogniser(HandRecogniser):
|
||||
self.img_hsv = self.__denoise(self.img_hsv)
|
||||
self.__segment_image()
|
||||
|
||||
print('calculating circle')
|
||||
radius, centre = self.__calc_circle(self.mask)
|
||||
print('Got circle')
|
||||
|
||||
# Now go around the circle to calculate num of times going 0->255 or vice-versa.
|
||||
# First just do it the naive way with loops.
|
||||
@@ -109,9 +121,32 @@ class SimpleHandRecogniser(HandRecogniser):
|
||||
prev_x = centre[0] - radius
|
||||
prev_y = [self.__calc_pos_y(centre[0] - radius, radius, centre), self.__calc_pos_y(centre[0] - radius, radius, centre)]
|
||||
num_change = 0
|
||||
for x in range(centre[0] - radius + 1, centre[0] + radius):
|
||||
|
||||
# Make sure x is also within bounds.
|
||||
x_start = centre[0] - radius + 1
|
||||
if x_start < 0:
|
||||
x_start = 0
|
||||
|
||||
x_end = centre[0] + radius
|
||||
if x_end >= self.mask.shape[1]:
|
||||
x_end = self.mask.shape[1] - 1
|
||||
print(x_start)
|
||||
print(x_end)
|
||||
print(self.mask.shape)
|
||||
for x in range(x_start, x_end):
|
||||
# Need to check circle is inside the bounds.
|
||||
ypos = self.__calc_pos_y(x, radius, centre)
|
||||
# y above centre (ypos) and y below radius)
|
||||
y = [ypos, centre[1] - (ypos-centre[1])]
|
||||
|
||||
if y[0] < 0:
|
||||
y[0] = 0
|
||||
if y[0] >= self.mask.shape[0]:
|
||||
y[0] = self.mask.shape[0] - 1
|
||||
if y[1] < 0:
|
||||
y[1] = 0
|
||||
if y[1] >= self.mask.shape[0]:
|
||||
y[1] = self.mask.shape[0] - 1
|
||||
if(self.mask[y[0], x] != self.mask[prev_y[0], prev_x]):
|
||||
num_change += 1
|
||||
if self.mask[y[1], x] != self.mask[prev_y[1], prev_x] and y[0] != y[1]:
|
||||
@@ -119,81 +154,125 @@ class SimpleHandRecogniser(HandRecogniser):
|
||||
prev_x = x
|
||||
prev_y = y
|
||||
|
||||
print('Finished calculating, returning')
|
||||
|
||||
return num_change / 2 - 1
|
||||
|
||||
def detect_hand(self, weights_path, config_path, conf_thresh = 0.5, nms_thresh = 0.4):
|
||||
'''
|
||||
Detects if there is a hand in the image. If there is (above a significant confidence threshold)
|
||||
then the function will set the img property to the location of the hand according to its bounding box.
|
||||
'''
|
||||
# Most of this code is from here: www.arunponnusamy.com/yolo-object-detection-opencv-python.html
|
||||
# Also https://github.com/opencv/opencv/blob/3.4/samples/dnn/object_detection.py
|
||||
def setFrame(self, frame):
|
||||
self.img = frame
|
||||
|
||||
# Source: Victor Dibia
|
||||
# Link: https://github.com/victordibia/handtracking
|
||||
# Taken the code straight from his example, as it works perfectly. This is specifically
|
||||
# from the load_inference_graph method that he wrote, and will load the graph into
|
||||
# memory if one has not already been loaded for this object.
|
||||
def load_inference_graph(self):
|
||||
"""Loads a tensorflow model checkpoint into memory"""
|
||||
|
||||
if self.graph != None and self.sess != None:
|
||||
# Don't load more than once.
|
||||
return
|
||||
|
||||
PATH_TO_CKPT = '/Users/piv/Documents/Projects/car/GestureRecognition/frozen_inference_graph.pb'
|
||||
# load frozen tensorflow model into memory
|
||||
detection_graph = tf.Graph()
|
||||
with detection_graph.as_default():
|
||||
od_graph_def = tf.GraphDef()
|
||||
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
|
||||
serialized_graph = fid.read()
|
||||
od_graph_def.ParseFromString(serialized_graph)
|
||||
tf.import_graph_def(od_graph_def, name='')
|
||||
sess = tf.Session(graph=detection_graph)
|
||||
self.graph = detection_graph
|
||||
self.sess = sess
|
||||
|
||||
|
||||
# Source: Victor Dibia
|
||||
# Link: https://github.com/victordibia/handtracking
|
||||
# Taken the code straight from his example, as it works perfectly. This is specifically
|
||||
# from the detect_hand method that he wrote, as other processing is required for the
|
||||
# hand recognition to work correctly.
|
||||
def detect_hand_tensorflow(self, detection_graph, sess):
|
||||
""" Detects hands in a frame using a CNN
|
||||
|
||||
detection_graph -- The CNN to use to detect the hand.
|
||||
sess -- THe tensorflow session for the given graph
|
||||
"""
|
||||
|
||||
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
|
||||
|
||||
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
|
||||
|
||||
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
|
||||
|
||||
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
|
||||
|
||||
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
|
||||
|
||||
img_expanded = np.expand_dims(self.img, axis=0)
|
||||
|
||||
(boxes, scores, classes, num) = sess.run(
|
||||
[detection_boxes, detection_scores, detection_classes, num_detections],
|
||||
feed_dict={image_tensor, img_expanded})
|
||||
print('finished detection')
|
||||
return np.squeeze(boxes), np.squeeze(scores)
|
||||
|
||||
def detect_hand_opencv(self, detection_graph, sess):
|
||||
"""Performs hand detection using a CNN from tensorflow using opencv.
|
||||
|
||||
detection_graph -- The CNN to use to detect the hand.
|
||||
sess -- THe tensorflow session for the given graph
|
||||
"""
|
||||
if self.img is None:
|
||||
return 0
|
||||
return
|
||||
|
||||
height = self.img.shape[0]
|
||||
width = self.img.shape[1]
|
||||
|
||||
scale = 0.5
|
||||
|
||||
classes = None # Stores classes used for classification
|
||||
classes = None
|
||||
|
||||
net = cv2.dnn.readNet(weights_path, config_path)
|
||||
net = cv2.dnn.readNetFromTensorflow(detection_graph, sess)
|
||||
|
||||
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
|
||||
# width is scaled weirdly to ensure we keep tbe same ratio as the original image.
|
||||
net.setInput(cv2.dnn.blobFromImage(self.img, scale, size=(300, 300 * (width/height)), swapRB=True, crop=False))
|
||||
netOut = net.forward()
|
||||
|
||||
outNames = net.getUnconnectedOutLayersNames()
|
||||
|
||||
blob = cv2.dnn.blobFromImage(self.img, scale, (416,416), (0,0,0), True, False)
|
||||
|
||||
net.setInput(blob)
|
||||
|
||||
outs = net.forward(outNames)
|
||||
|
||||
# Getting the output layer.
|
||||
layerNames = net.getLayerNames()
|
||||
lastLayerId = net.getLayerId(layerNames[-1])
|
||||
lastLayer = net.getLayer(lastLayerId)
|
||||
|
||||
classIds = []
|
||||
confidences = []
|
||||
# Format output to look same as tensorflow output.
|
||||
scores = []
|
||||
boxes = []
|
||||
if lastLayer.type == 'DetectionOutput':
|
||||
# Check we are using an actual detection module.
|
||||
# Will return a 1x1xnx7 blob, where n is number of detections.
|
||||
# Tuple for each detection: [batchId, classId, confidence, left, top, right, bottom]
|
||||
|
||||
for out in outs:
|
||||
for detection in out[0,0]:
|
||||
confidence = detection[2]
|
||||
if confidence > conf_thresh:
|
||||
# WIll need to verify this first, but given code said this is needed.
|
||||
left = int(detection[3] * width)
|
||||
top = int(detection[4] * height)
|
||||
right = int(detection[5] * width)
|
||||
bottom = int(detection[6] * height)
|
||||
classIds.append(int(detection[1]) - 1)
|
||||
confidences.append(float(confidence))
|
||||
boxes.append((left, top, right, bottom))
|
||||
for out in netOut:
|
||||
for detection in out[0,0]:
|
||||
scores.append(detection[2])
|
||||
boxes.append(detection[3], detection[4], detection[5], detection[6])
|
||||
# Only doing first class as only trying to find the hand.
|
||||
break
|
||||
return np.array(boxes), np.array(scores)
|
||||
|
||||
def get_best_hand(self, boxes, scores, conf_thresh, nms_thresh):
|
||||
"""
|
||||
Gets the best hand bounding box by inspecting confidence scores and overlapping
|
||||
boxes, as well as the overall size of each box to determine which hand (if multiple present)
|
||||
should be tested to recognise.
|
||||
"""
|
||||
# First remove any boxes below confidence threshold
|
||||
confident_bs = boxes[scores > conf_thresh]
|
||||
|
||||
# Remove duplicate/overlapping boxes -> makes sure only detect one hand in an area.
|
||||
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_thresh, nms_thresh)
|
||||
# Then use NMS to get rid of heavily overlapping boxes.
|
||||
# This wasn't used in the tensorflow example that was found, however probably a
|
||||
# good idea to use it just in case.
|
||||
indices = cv2.dnn.NMSBoxes(boxes, scores, conf_thresh, nms_thresh)
|
||||
|
||||
for i in indices:
|
||||
i = i[0]
|
||||
box = boxes[i]
|
||||
left = box[0]
|
||||
top = box[1]
|
||||
right = box[2]
|
||||
bottom = box[3]
|
||||
# Now draw the box if we want to.
|
||||
|
||||
# OR can just get the box that is a hand with the maximum confidence/maximum box area -> this implies closest hand...
|
||||
# Finally calculate area of each box to determine which hand is clearest (biggest in image)
|
||||
# Just does the most confident for now.
|
||||
max_conf = 0
|
||||
max_index = 0
|
||||
for conf in confidences:
|
||||
for conf in scores:
|
||||
if conf > max_conf:
|
||||
max_conf = conf
|
||||
max_index = i
|
||||
max_index = conf
|
||||
return boxes[max_index]
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user