From c74eceb38a4da3758f4a48c050cea569ef0a038a Mon Sep 17 00:00:00 2001 From: "DSTO\\pivatom" Date: Thu, 6 Dec 2018 16:37:33 +1030 Subject: [PATCH] Working Gesture Recognition --- GestureRecognition/HandRecGray.py | 17 ++-- GestureRecognition/HandRecHSV.py | 134 ++++++++++++++++++++++-------- 2 files changed, 110 insertions(+), 41 deletions(-) diff --git a/GestureRecognition/HandRecGray.py b/GestureRecognition/HandRecGray.py index 64b48d6..1bb379c 100644 --- a/GestureRecognition/HandRecGray.py +++ b/GestureRecognition/HandRecGray.py @@ -13,13 +13,20 @@ img = cv2.imread('H:\car\GestureRecognition\IMG_0818.png', 1) # Downscale the image img = cv2.resize(img, None, fx=0.1, fy=0.1, interpolation = cv2.INTER_AREA) -img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) +min_seg_threshold = 1.2 +max_seg_threshold = 1.8 -img_gray[img_gray[:,:] > 90] = 255 -img_gray[img_gray[:,:] < 90] = 0 +# Need to make this get correct skin tones. +# img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) +# img_gray[img_gray[:,:] > 90] = 255 +# img_gray[img_gray[:,:] < 90] = 0 + +img_bin = np.zeros(shape=(img.shape[0], img.shape[1]), dtype=int) +img = np.where(img[:,:,1] == 0, 0, img[:,:,1]) +img[(img[:,:,2]/img[:,:,1] > min_seg_threshold) & (img[:,:,2]/img[:,:,1] < max_seg_threshold)] = [255,255,255] # Threshold to binary. -ret,img_thresh = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY) +ret,img_thresh = cv2.threshold(img_bin, 127, 255, cv2.THRESH_BINARY) # Following method is much faster -> 0.00143s # Still want to speed up further by lowering reliance on memory, which is quite heavy.. @@ -34,7 +41,7 @@ coords_y = np.zeros((img_thresh.shape[0], img_thresh.shape[1])) coords_x[:,:] = x_ind -# Even this is extremely quick as it goes through rows in the numpy array. +# Even this is extremely quick as it goes through rows in the numpy array, which in python is much faster than columns for element in y_ind: coords_y[element,:] = element diff --git a/GestureRecognition/HandRecHSV.py b/GestureRecognition/HandRecHSV.py index f0574a6..1aca10a 100644 --- a/GestureRecognition/HandRecHSV.py +++ b/GestureRecognition/HandRecHSV.py @@ -13,49 +13,111 @@ img = cv2.imread('H:\car\GestureRecognition\IMG_0818.png', 1) # Downscale the image img = cv2.resize(img, None, fx=0.1, fy=0.1, interpolation = cv2.INTER_AREA) -img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) +e1 = cv2.getTickCount() -img_gray[img_gray[:,:] > 90] = 255 -img_gray[img_gray[:,:] < 90] = 0 +# Hand Localization... -# Threshold to binary. -ret,img_thresh = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY) -x,y,k,xb,yb = 0,0,0,0,0 +img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) -# this is inherently slow... -for pix in img_thresh: - for j in pix: - if j == 255: - k += 1 - xb += x - yb += y - x += 1 - y += 1 - x = 0 +# Need to shift red pixels so they can be 0-20 rather than 250-~20 +img_hsv[:,:,0] = img_hsv[:,:,0] + 30 +img_hsv[:,:,0] = np.where(img_hsv[:,:,0] > 179, img_hsv[:,:,0] - 179, img_hsv[:,:,0]) + +img_hsv = cv2.GaussianBlur(img_hsv,(5,5),0) + +lower_skin = (0, 0, 153) +upper_skin = (50, 153, 255) + +# Only need mask, as we can just use this to do the hand segmentation. +mask = cv2.inRange(img_hsv, lower_skin, upper_skin) + +# This takes a whole millisecond (approx), and does not seem very worth the cost. +blur = cv2.GaussianBlur(mask,(5,5),0) +ret, img_thresh = cv2.threshold(blur, 50, 255, cv2.THRESH_BINARY) + +img_thresh = mask + +k = np.sum(img_thresh) / 255 + +# Taking indices for num of rows. +x_ind = np.arange(0,img_thresh.shape[1]) +y_ind = np.arange(0,img_thresh.shape[0]) +coords_x = np.zeros((img_thresh.shape[0], img_thresh.shape[1])) +coords_y = np.zeros((img_thresh.shape[0], img_thresh.shape[1])) +coords_x[:,:] = x_ind + + +# Even this is extremely quick as it goes through rows in the numpy array, which in python is much faster than columns +for element in y_ind: + coords_y[element,:] = element -centre = (int(xb/k), int(yb/k)) +# Now need to get the average x value and y value for centre of gravity +xb = int(np.sum(coords_x[img_thresh == 255])/k) +yb = int(np.sum(coords_y[img_thresh == 255])/k) + +centre = (int(np.sum(coords_x[img_thresh == 255])/k), int(np.sum(coords_y[img_thresh == 255])/k)) + +# Calculate radius of circle: +# May need to calculate diameter as well. +# Just take min/max x values and y values +x_min = np.min(coords_x[img_thresh == 255]) +x_max = np.max(coords_x[img_thresh == 255]) +y_min = np.min(coords_y[img_thresh == 255]) +y_max = np.max(coords_y[img_thresh == 255]) + +candidate_pts = [(x_min, y_min), (x_min, y_max), (x_max, y_min), (x_max, y_max)] +radius = 0 + +# Check with each point to see which is furthest from the centre. +for pt in candidate_pts: + # Calculate Euclydian Distance + new_distance = ((pt[0] - centre[0])**2 + (pt[1] - centre[1])**2)**(1/2) + if new_distance > radius: + radius = new_distance + +radius = int(radius * 0.55) + +# 140 needs to be replaced with a predicted value. i.e. not be a magic number. +cv2.circle(img_thresh, centre, 140, (120,0,0), 3) + +def calc_pos_y(x): + return int((radius**2 - (x - centre[0])**2)**(1/2) + centre[1]) + +print(img_thresh.shape) print(centre) +print(radius) +# Now go around the circle to calculate num of times going 0->255 or vice-versa. +# First just do it the naive way with loops. +# Equation of the circle: +# y = sqrt(r2 - (x-c)2) + c +# Will just increment x to check, no need to loop y as well. +# This is extremely slow, need to speed it up by removing for loop. +# Brings speed down to 20 fps. +# Could try a kerel method? +prev_x = centre[0] - radius +prev_y = [calc_pos_y(centre[0] - radius), calc_pos_y(centre[0] - radius)] +print(prev_y) +num_change = 0 +for x in range(centre[0] - radius + 1, centre[0] + radius): + ypos = calc_pos_y(x) + y = [ypos, centre[1] - (ypos-centre[1])] + print(y) + if(img_thresh[y[0], x] != img_thresh[prev_y[0], prev_x]): + num_change += 1 + if img_thresh[y[1], x] != img_thresh[prev_y[1], prev_x] and y[0] != y[1]: + num_change += 1 + prev_x = x + prev_y = y -cv2.rectangle(img_thresh, centre, (centre[0] + 20, centre[1] + 20), (0,0,255), 3) -cv2.circle(img_thresh, centre, 140, (0,0,0), 3) +fingers = num_change / 2 - 1 -# Now need to trace around the circle to figure out where the fingers are. +print("Num Fingers: " + str(fingers)) -cv2.imshow("Binary-cot-out", img_thresh) +e2 = cv2.getTickCount() +t = (e2 - e1)/cv2.getTickFrequency() +print( t ) + +cv2.imshow("Threshold", img_thresh) cv2.waitKey(0) -cv2.destroyAllWindows() - - - -#img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) - -#lower_skin = np.array([2, 102, 153]) -#upper_skin = np.array([7.5, 153, 255]) -# -## Only need mask, as we can just use this to calculate the -#mask = cv2.inRange(img_hsv, lower_skin, upper_skin) -# -#cv2.imshow("Mask", mask) -#cv2.waitKey(0) -#cv2.destroyAllWindows() \ No newline at end of file +cv2.destroyAllWindows() \ No newline at end of file