picar/GestureRecognition/SimpleHandRecogniser.py

from GestureRecognition.handrecogniser import HandRecogniser
import numpy as np
import cv2

class SimpleHandRecogniser(HandRecogniser):
    def __init__(self, frame):
        self.img = frame

    def __calc_pos_y(self, x, radius, centre):
        """
        Calculates the position of y on a given circle radius and centre, given coordinate x.
        """
        return int((radius**2 - (x - centre[0])**2)**(1/2) + centre[1])

    def __segment_image(self):
        """
        Segments the hand from the rest of the image to get a threshold.
        """
        self.img_hsv = cv2.GaussianBlur(self.img_hsv,(5,5),0)

        lower_skin = (0, 0, 153)
        upper_skin = (45, 153, 255)

        # Only need mask, as we can just use this to do the hand segmentation.
        self.mask = cv2.inRange(self.img_hsv, lower_skin, upper_skin)

        # Apply another blur to rmeove any small holes/noise
        self.mask = self.__denoise(self.mask)
        ret, self.mask = cv2.threshold(self.mask, 50, 255, cv2.THRESH_BINARY)

    def __denoise(self, image):
        """
        Applies a 5x5 gaussian blur to remove noise from the image.
        """
        return cv2.GaussianBlur(image,(5,5),0)

    def __calc_circle(self, image, radius_percent = 0.52):
        """
        Calculates the equation of the circle (radius, centre) from a given
        threshold image, so that the circle is the center of gravity of the
        given threshold pixels, and the radius is by default 55% of the total
        size.
        """
        k = np.sum(self.mask) / 255

        # Taking indices for num of rows.
        x_ind = np.arange(0,self.mask.shape[1])
        y_ind = np.arange(0,self.mask.shape[0])
        coords_x = np.zeros((self.mask.shape[0], self.mask.shape[1]))
        coords_y = np.zeros((self.mask.shape[0], self.mask.shape[1]))
        coords_x[:,:] = x_ind

        # Even this is extremely quick as it goes through rows in the numpy array, which in python is much faster than columns
        for element in y_ind:
            coords_y[element,:] = element

        # Now need to get the average x value and y value for centre of gravity
        centre = (int(np.sum(coords_x[self.mask == 255])/k), int(np.sum(coords_y[self.mask == 255])/k))

        # Calculate radius of circle:
        # May need to calculate diameter as well.
        # Just take min/max x values and y values
        x_min = np.min(coords_x[self.mask == 255])
        x_max = np.max(coords_x[self.mask == 255])
        y_min = np.min(coords_y[self.mask == 255])
        y_max = np.max(coords_y[self.mask == 255])

        candidate_pts = [(x_min, y_min), (x_min, y_max), (x_max, y_min), (x_max, y_max)]
        radius = 0

        # Check with each point to see which is furthest from the centre.
        for pt in candidate_pts:
            # Calculate Euclydian Distance
            new_distance = ((pt[0] - centre[0])**2 + (pt[1] - centre[1])**2)**(1/2)
            if new_distance > radius:
                radius = new_distance

        radius = int(radius * radius_percent)

        return radius, centre

    def __shift_pixels(self, image, shift_radius):
        image[:,:,0] = image[:,:,0] + shift_radius
        image[:,:,0] = np.where(image[:,:,0] > 179, image[:,:,0] - 179, image[:,:,0])
        return image

    def get_gesture(self):
        """
        Calculates the actual gesture, returning the number of fingers
        seen in the image.
        """
        if self.img is None:
            return 0

        self.img_hsv = cv2.cvtColor(self.img, cv2.COLOR_BGR2HSV)

        # Need to shift red pixels so they can be 0-20 rather than 250-~20
        self.img_hsv = self.__shift_pixels(self.img_hsv, 30)

        self.img_hsv = self.__denoise(self.img_hsv)
        self.__segment_image()

        radius, centre = self.__calc_circle(self.mask)

        # Now go around the circle to calculate num of times going 0->255 or vice-versa.
        # First just do it the naive way with loops.
        # Equation of the circle:
        # y = sqrt(r2 - (x-c)2) + c
        prev_x = centre[0] - radius
        prev_y = [self.__calc_pos_y(centre[0] - radius, radius, centre), self.__calc_pos_y(centre[0] - radius, radius, centre)]
        num_change = 0
        for x in range(centre[0] - radius + 1, centre[0] + radius):
            ypos = self.__calc_pos_y(x, radius, centre)
            y = [ypos, centre[1] - (ypos-centre[1])]
            if(self.mask[y[0], x] != self.mask[prev_y[0], prev_x]):
                num_change += 1
            if self.mask[y[1], x] != self.mask[prev_y[1], prev_x] and y[0] != y[1]:
                num_change += 1
            prev_x = x
            prev_y = y

        return num_change / 2 - 1

    def detect_hand(self, weights_path, config_path, conf_thresh = 0.5, nms_thresh = 0.4):
        '''
            Detects if there is a hand in the image. If there is (above a significant confidence threshold)
            then the function will set the img property to the location of the hand according to its bounding box.
        '''
        # Most of this code is from here: www.arunponnusamy.com/yolo-object-detection-opencv-python.html
        # Also https://github.com/opencv/opencv/blob/3.4/samples/dnn/object_detection.py
        if self.img is None:
            return 0

        height = self.img.shape[0]
        width = self.img.shape[1]
        scale = 0.5

        classes = None # Stores classes used for classification

        net = cv2.dnn.readNet(weights_path, config_path)

        net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)

        outNames = net.getUnconnectedOutLayersNames()

        blob = cv2.dnn.blobFromImage(self.img, scale, (416,416), (0,0,0), True, False)

        net.setInput(blob)

        outs = net.forward(outNames)

        # Getting the output layer.
        layerNames = net.getLayerNames()
        lastLayerId = net.getLayerId(layerNames[-1])
        lastLayer = net.getLayer(lastLayerId)

        classIds = []
        confidences = []
        boxes = []
        if lastLayer.type == 'DetectionOutput':
            # Check we are using an actual detection module.
            # Will return a 1x1xnx7 blob, where n is number of detections.
            # Tuple for each detection: [batchId, classId, confidence, left, top, right, bottom]

            for out in outs:
                for detection in out[0,0]:
                    confidence = detection[2]
                    if confidence > conf_thresh:
                        # WIll need to verify this first, but given code said this is needed.
                        left = int(detection[3] * width)
                        top = int(detection[4] * height)
                        right = int(detection[5] * width)
                        bottom = int(detection[6] * height)
                        classIds.append(int(detection[1]) - 1)
                        confidences.append(float(confidence))
                        boxes.append((left, top, right, bottom))


        # Remove duplicate/overlapping boxes -> makes sure only detect one hand in an area.
        indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_thresh, nms_thresh)

        for i in indices:
            i = i[0]
            box = boxes[i]
            left = box[0]
            top = box[1]
            right = box[2]
            bottom = box[3]
            # Now draw the box if we want to.

        # OR can just get the box that is a hand with the maximum confidence/maximum box area -> this implies closest hand...
        max_conf = 0
        max_index = 0
        for conf in confidences:
            if conf > max_conf:
                max_conf = conf
                max_index = i