#!/usr/bin/env python # -*- coding: utf-8 -*- import csv import copy import argparse import itertools from collections import Counter from collections import deque import cv2 as cv import numpy as np import mediapipe as mp from utils import CvFpsCalc from model import KeyPointClassifier from model import PointHistoryClassifier def get_args(): parser = argparse.ArgumentParser() parser.add_argument("--device", type=int, default=0) #RY 0 or 1 parser.add_argument("--width", help='cap width', type=int, default=1280) parser.add_argument("--height", help='cap height', type=int, default=960) parser.add_argument('--use_static_image_mode', action='store_true') parser.add_argument("--min_detection_confidence", help='min_detection_confidence', type=float, default=0.7) parser.add_argument("--min_tracking_confidence", help='min_tracking_confidence', type=int, default=0.5) args = parser.parse_args() return args def main(): # Argument parsing ################################################################# args = get_args() cap_device = args.device cap_width = args.width cap_height = args.height use_static_image_mode = args.use_static_image_mode min_detection_confidence = args.min_detection_confidence min_tracking_confidence = args.min_tracking_confidence use_brect = True # Camera preparation ############################################################### #cap = cv.VideoCapture(cap_device) cap = cv.VideoCapture('/dev/video0', cv.CAP_V4L) cap.set(cv.CAP_PROP_FRAME_WIDTH, cap_width) cap.set(cv.CAP_PROP_FRAME_HEIGHT, cap_height) # Model load ############################################################# mp_hands = mp.solutions.hands hands = mp_hands.Hands( static_image_mode=use_static_image_mode, max_num_hands=2, #RY changed 1 to 2 to detect 2 hands min_detection_confidence=0.8, #RY changed the value from min_detection_confidence min_tracking_confidence=0.5, #RY change the value from min_tracking_confidence ) keypoint_classifier = KeyPointClassifier() point_history_classifier = PointHistoryClassifier() # Read labels ########################################################### with open('model/keypoint_classifier/keypoint_classifier_label.csv', encoding='utf-8-sig') as f: keypoint_classifier_labels = csv.reader(f) keypoint_classifier_labels = [ row[0] for row in keypoint_classifier_labels ] with open( 'model/point_history_classifier/point_history_classifier_label.csv', encoding='utf-8-sig') as f: point_history_classifier_labels = csv.reader(f) point_history_classifier_labels = [ row[0] for row in point_history_classifier_labels ] # FPS Measurement ######################################################## cvFpsCalc = CvFpsCalc(buffer_len=10) # Coordinate history ################################################################# history_length = 16 point_history = deque(maxlen=history_length) # Finger gesture history ################################################ finger_gesture_history = deque(maxlen=history_length) # ######################################################################## mode = 0 # RY Serial Communication ################################################# # GPT Initialize serial communication ################################################# import time import serial.tools.list_ports serialInst = serial.Serial() portVar = '/dev/ttyUSB0' print(f"Using port: {portVar}") try: serialInst.baudrate = 115200 serialInst.port = portVar serialInst.timeout = 1 # Set a timeout for serial operations serialInst.open() print(f"Opened port: {portVar}") except Exception as e: print(f"Failed to open port {portVar}: {e}") raise print("Serial communication setup complete. Starting main loop...") # Test Serial Communication try: serialInst.write(b'Hello\n') time.sleep(1) # Wait for a response if serialInst.in_waiting: response = serialInst.readline().decode().strip() print(f"Received: {response}") else: print("No response received from the serial device.") except Exception as e: print(f"Error during serial communication: {e}") serialInst.close() raise ################################################# # lastcommand = None #RY ver1.0 itroducing lastcommand as 0 or none before the loop commandrun = set() #RY ver2.0 ver2.5 #this is creating a set. this is like a box to store command that is already run while True: print("Entering main loop iteration") # Add this line fps = cvFpsCalc.get() # Process Key (ESC: end) ################################################# key = cv.waitKey(10) #without Waitkey wont work. it adding little bit of delay if key == 27: # ESC break number, mode = select_mode(key, mode) # Camera capture ##################################################### ret, image = cap.read() if not ret: print("Failed to capture image from camera. Exiting...") # Add this line for debugging break image = cv.flip(image, 1) # Mirror display Flipping the display debug_image = copy.deepcopy(image) # Detection implementation ############################################################# image = cv.cvtColor(image, cv.COLOR_BGR2RGB) image.flags.writeable = False results = hands.process(image) image.flags.writeable = True #################################################################### if results.multi_hand_landmarks is not None: for hand_landmarks, handedness in zip(results.multi_hand_landmarks, results.multi_handedness): #print(hand_landmarks) #RY printing landmarks XYZ coordinates # Bounding box calculation brect = calc_bounding_rect(debug_image, hand_landmarks) # Landmark calculation landmark_list = calc_landmark_list(debug_image, hand_landmarks) #print(landmark_list) #RY printing landmarks list( [X,Y]for each landmarks) pixcl coordinates #print(landmark_list[8]) #RY printing landmarks list( [X,Y]for only for pointy finger tip) # Conversion to relative coordinates / normalized coordinates pre_processed_landmark_list = pre_process_landmark( landmark_list) pre_processed_point_history_list = pre_process_point_history( debug_image, point_history) # Write to the dataset file logging_csv(number, mode, pre_processed_landmark_list, pre_processed_point_history_list) # Hand sign classification hand_sign_id = keypoint_classifier(pre_processed_landmark_list) if hand_sign_id == 2: # Point gesture point_history.append(landmark_list[8]) else: point_history.append([0, 0]) # Finger gesture classification finger_gesture_id = 0 point_history_len = len(pre_processed_point_history_list) if point_history_len == (history_length * 2): finger_gesture_id = point_history_classifier( pre_processed_point_history_list) # Calculates the gesture IDs in the latest detection finger_gesture_history.append(finger_gesture_id) most_common_fg_id = Counter( finger_gesture_history).most_common() # Drawing part debug_image = draw_bounding_rect(use_brect, debug_image, brect) debug_image = draw_landmarks(debug_image, landmark_list) #print(keypoint_classifier_labels) #RY used this for debugging #print(hand_sign_id) #RY used this for debugging #if hand_sign_id >= 0: # at the end didnt use it #RY if hand sign ID is greater than 0 Thank you Tony!!! debug_image = draw_info_text( debug_image, brect, handedness, keypoint_classifier_labels[hand_sign_id], #at the end deleted index 0 instead #RY -1 from hand sign id for debuging. Thank you Tony!!! point_history_classifier_labels[most_common_fg_id[0][0]], ) # RY G-code Printing ############################################################# cmdlist=["M290 X10", "M290 X-10", "Pointer", "G28", "G0 X0"] command = cmdlist[hand_sign_id] #print(lastcommand,command) #this is for checking and debugging if not ((command in commandrun and command in ["G28", "G0 X0","Pointer"])): #RY ver2.5 this is a simpler way to write the ver 2.0 #if (command in commandrun and command in ["G28", "G0 X0","Pointer"]): #RY ver2.0 #if (command == lastcommand and command in ["G28", "G0 X0"]) or command == "Pointer": #RY ver1.0 this is a way to not run command continuously but this will detect same command after detecting other command #RY ver1.0 if the command is same as last command or if its G28 or G0 X0, or if its Pointer # pass #do nothing #RY ver1.0 ver2.0 #else: #if not #RY ver1.0 ver2.0 #command = input("Enter G-Code: ") #this needs to be disable because this input will pause the loop and wait for input if command.lower() == 'exit': #if the serialInst.close() break serialInst.write((command + '\n').encode('utf-8')) #send G-code command to end with a newline character (\n), and encoded to utf-8 response="" #creating blank valueable while serialInst.in_waiting: response += serialInst.readline().decode() # Read the response resp = resp + readline (this will keep adding newreadline until there is none) print("Marlin Response:", response) #print response with Marlin Response: commandrun.add(command) #RY ver2.0 ver2.5 adding the already run command to commandrun list #if command != "Pointer": #RY ver1.0 if the command is not Pointer, do the below. this is to ignore the Pointer. # lastcommand = command #RY ver1.0 make last command equal to command. this needs to be at the end because if it at top it will not change this is same as Blick without Delay code. Instead,last command was introduced in line 127 ########## THANK YOU SO MUCH, My friend Tony Tony ########### ############################################################# else: point_history.append([0, 0]) debug_image = draw_point_history(debug_image, point_history) debug_image = draw_info(debug_image, fps, mode, number) # Screen reflection ############################################################# cv.imshow('Hand Gesture Recognition', debug_image) # Ensure the loop continues running if response.lower() == 'exit': serialInst.close() break else: print("Continuing the main loop...") # Add this line to see that the loop is continuing cap.release() cv.destroyAllWindows() def select_mode(key, mode): number = -1 if 48 <= key <= 57: # 0 ~ 9 number = key - 48 if key == 110: # n mode = 0 if key == 107: # k mode = 1 if key == 104: # h mode = 2 return number, mode def calc_bounding_rect(image, landmarks): image_width, image_height = image.shape[1], image.shape[0] landmark_array = np.empty((0, 2), int) for _, landmark in enumerate(landmarks.landmark): landmark_x = min(int(landmark.x * image_width), image_width - 1) landmark_y = min(int(landmark.y * image_height), image_height - 1) landmark_point = [np.array((landmark_x, landmark_y))] landmark_array = np.append(landmark_array, landmark_point, axis=0) x, y, w, h = cv.boundingRect(landmark_array) return [x, y, x + w, y + h] def calc_landmark_list(image, landmarks): image_width, image_height = image.shape[1], image.shape[0] landmark_point = [] # Keypoint for _, landmark in enumerate(landmarks.landmark): landmark_x = min(int(landmark.x * image_width), image_width - 1) landmark_y = min(int(landmark.y * image_height), image_height - 1) # landmark_z = landmark.z landmark_point.append([landmark_x, landmark_y]) return landmark_point def pre_process_landmark(landmark_list): temp_landmark_list = copy.deepcopy(landmark_list) # Convert to relative coordinates base_x, base_y = 0, 0 for index, landmark_point in enumerate(temp_landmark_list): if index == 0: base_x, base_y = landmark_point[0], landmark_point[1] #setting the wrist point 0 as base poin for XY relative coordinate temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y # Convert to a one-dimensional list temp_landmark_list = list( itertools.chain.from_iterable(temp_landmark_list)) # Normalization max_value = max(list(map(abs, temp_landmark_list))) def normalize_(n): return n / max_value #dividing by max value temp_landmark_list = list(map(normalize_, temp_landmark_list)) return temp_landmark_list def pre_process_point_history(image, point_history): image_width, image_height = image.shape[1], image.shape[0] temp_point_history = copy.deepcopy(point_history) # Convert to relative coordinates base_x, base_y = 0, 0 for index, point in enumerate(temp_point_history): if index == 0: base_x, base_y = point[0], point[1] temp_point_history[index][0] = (temp_point_history[index][0] - base_x) / image_width temp_point_history[index][1] = (temp_point_history[index][1] - base_y) / image_height # Convert to a one-dimensional list temp_point_history = list( itertools.chain.from_iterable(temp_point_history)) return temp_point_history def logging_csv(number, mode, landmark_list, point_history_list): if mode == 0: pass if mode == 1 and (0 <= number <= 9): csv_path = 'model/keypoint_classifier/keypoint.csv' with open(csv_path, 'a', newline="") as f: writer = csv.writer(f) writer.writerow([number, *landmark_list]) if mode == 2 and (0 <= number <= 9): csv_path = 'model/point_history_classifier/point_history.csv' with open(csv_path, 'a', newline="") as f: writer = csv.writer(f) writer.writerow([number, *point_history_list]) return def draw_landmarks(image, landmark_point): if len(landmark_point) > 0: # Thumb #cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[3]), #outer line # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[3]), #inner line (0,0,0), 2) #cv.line(image, tuple(landmark_point[3]), tuple(landmark_point[4]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[3]), tuple(landmark_point[4]), (0,0,0), 2) # Index finger #cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[6]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[6]), (0,0,0), 2) #cv.line(image, tuple(landmark_point[6]), tuple(landmark_point[7]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[6]), tuple(landmark_point[7]), (0,0,0), 2) #cv.line(image, tuple(landmark_point[7]), tuple(landmark_point[8]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[7]), tuple(landmark_point[8]), (0,0,0), 2) # Middle finger #cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[10]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[10]), (0,0,0), 2) #cv.line(image, tuple(landmark_point[10]), tuple(landmark_point[11]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[10]), tuple(landmark_point[11]), (0,0,0), 2) #cv.line(image, tuple(landmark_point[11]), tuple(landmark_point[12]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[11]), tuple(landmark_point[12]), (0,0,0), 2) # Ring finger #cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[14]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[14]), (0,0,0), 2) #cv.line(image, tuple(landmark_point[14]), tuple(landmark_point[15]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[14]), tuple(landmark_point[15]), (0,0,0), 2) #cv.line(image, tuple(landmark_point[15]), tuple(landmark_point[16]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[15]), tuple(landmark_point[16]), (0,0,0), 2) # Little finger #cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[18]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[18]), (0,0,0), 2) #cv.line(image, tuple(landmark_point[18]), tuple(landmark_point[19]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[18]), tuple(landmark_point[19]), (0,0,0), 2) #cv.line(image, tuple(landmark_point[19]), tuple(landmark_point[20]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[19]), tuple(landmark_point[20]), (0,0,0), 2) # Palm #cv.line(image, tuple(landmark_point[0]), tuple(landmark_point[1]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[0]), tuple(landmark_point[1]), (0,0,0), 2) #cv.line(image, tuple(landmark_point[1]), tuple(landmark_point[2]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[1]), tuple(landmark_point[2]), (0,0,0), 2) #cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[5]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[2]), tuple(landmark_point[5]), (0,0,0), 2) #cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[9]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[5]), tuple(landmark_point[9]), (0,0,0), 2) #cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[13]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[9]), tuple(landmark_point[13]), (0,0,0), 2) #cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[17]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[13]), tuple(landmark_point[17]), (0,0,0), 2) #cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[0]), # (255, 255, 255), 6) cv.line(image, tuple(landmark_point[17]), tuple(landmark_point[0]), (0,0,0), 2) # Key Points for index, landmark in enumerate(landmark_point): if index == 0: # 手首1 cv.circle(image, (landmark[0], landmark[1]), 5, (0,128,255),-1) #infill cv.circle(image, (landmark[0], landmark[1]), 5, (0,128,255), 1) #outer line if index == 1: # 手首2 cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0), 1) if index == 2: # 親指:付け根 cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0), 1) if index == 3: # 親指:第1関節 cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0), 1) if index == 4: # 親指:指先 cv.circle(image, (landmark[0], landmark[1]), 8, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 8, (0,0,0), 1) if index == 5: # 人差指:付け根 cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0), 1) if index == 6: # 人差指:第2関節 cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0), 1) if index == 7: # 人差指:第1関節 cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0), 1) if index == 8: # 人差指:指先 cv.circle(image, (landmark[0], landmark[1]), 8, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 8, (0,0,0), 1) if index == 9: # 中指:付け根 cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0), 1) if index == 10: # 中指:第2関節 cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0), 1) if index == 11: # 中指:第1関節 cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0), 1) if index == 12: # 中指:指先 cv.circle(image, (landmark[0], landmark[1]), 8, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 8, (0,0,0), 1) if index == 13: # 薬指:付け根 cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0), 1) if index == 14: # 薬指:第2関節 cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0), 1) if index == 15: # 薬指:第1関節 cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0), 1) if index == 16: # 薬指:指先 cv.circle(image, (landmark[0], landmark[1]), 8, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 8, (0,0,0), 1) if index == 17: # 小指:付け根 cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0), 1) if index == 18: # 小指:第2関節 cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0), 1) if index == 19: # 小指:第1関節 cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 5, (0,0,0), 1) if index == 20: # 小指:指先 cv.circle(image, (landmark[0], landmark[1]), 8, (0,0,0),-1) cv.circle(image, (landmark[0], landmark[1]), 8, (0,0,0), 1) return image def draw_bounding_rect(use_brect, image, brect): if use_brect: # Outer rectangle cv.rectangle(image, (brect[0], brect[1]), (brect[2], brect[3]), (0, 0, 0), 1) return image def draw_info_text(image, brect, handedness, hand_sign_text, finger_gesture_text): cv.rectangle(image, (brect[0], brect[1]), (brect[2], brect[1] - 22), (0, 0, 0), -1) info_text = handedness.classification[0].label[0:] #Right & Left text if hand_sign_text != "": info_text = info_text + ':' + hand_sign_text cv.putText(image, info_text, (brect[0] + 5, brect[1] - 4), cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv.LINE_AA) if finger_gesture_text != "": cv.putText(image, "Movement:" + finger_gesture_text, (10, 30), #RY changed the terxt to G-code, changed location cv.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 4, cv.LINE_AA) cv.putText(image, "Movement:" + finger_gesture_text, (10, 30), #RY changed the terxt to G-code, changed location cv.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2, cv.LINE_AA) return image def draw_point_history(image, point_history): for index, point in enumerate(point_history): if point[0] != 0 and point[1] != 0: cv.circle(image, (point[0], point[1]), 1 + int(index / 2), (255,0,0), 2) #RY changed it return image def draw_info(image, fps, mode, number): # cv.putText(image, "FPS:" + str(fps), (10, 30), cv.FONT_HERSHEY_SIMPLEX, #RY uncommented # 1.0, (0, 0, 0), 4, cv.LINE_AA) cv.putText(image, "FPS:" + str(fps), (500, 20), cv.FONT_HERSHEY_SIMPLEX, #RY changed location 0.5, (0, 0, 0), 1, cv.LINE_AA) #RY changed it to balck, thin, small mode_string = ['Logging Key Point', 'Logging Point History'] if 1 <= mode <= 2: cv.putText(image, "MODE:" + mode_string[mode - 1], (10, 90), cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv.LINE_AA) if 0 <= number <= 9: cv.putText(image, "NUM:" + str(number), (10, 110), cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv.LINE_AA) return image if __name__ == '__main__': main()