-
Notifications
You must be signed in to change notification settings - Fork 0
/
gaze.py
189 lines (156 loc) · 7.8 KB
/
gaze.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import cv2
import mediapipe as mp
import numpy as np
# Initialize MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5)
# Initialize the drawing utils
mp_drawing = mp.solutions.drawing_utils
# Define the drawing specifications
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
# Function to calculate the gaze direction
def calculate_gaze_direction(landmarks, img_width, img_height):
# Define the left and right eye landmarks
left_eye_indices = [33, 160, 158, 133, 153, 144]
right_eye_indices = [362, 385, 387, 263, 373, 380]
# Get the coordinates of the left and right eye landmarks
left_eye = np.array([(landmarks[i].x, landmarks[i].y, landmarks[i].z) for i in left_eye_indices])
right_eye = np.array([(landmarks[i].x, landmarks[i].y, landmarks[i].z) for i in right_eye_indices])
#print("Left eye", left_eye)
#print("right eye", right_eye)
# Calculate the center of each eye
left_eye_center = left_eye.mean(axis=0).astype("float")
right_eye_center = right_eye.mean(axis=0).astype("float")
#print("Left eye center", left_eye_center)
#print("right eye", right_eye_center)
# Calculate the gaze direction (this is a simplistic approach)
#gaze_direction = (left_eye_center + right_eye_center) / 2
#screen_center = np.array([img_width / 2, img_height / 2])
# Calculate the direction vector
#direction_vector = gaze_direction - screen_center
#vector between eyes
eye_vector = left_eye_center - right_eye_center
#Calculate the vector between top of head and tip of nose,
nose_tip = landmarks[1]
nose_tip = np.array([nose_tip.x, nose_tip.y, nose_tip.z])
head_top = landmarks[10]
head_top = np.array([head_top.x, head_top.y, head_top.z])
vertical_vector = head_top - nose_tip
#find the gaze vector by the cross product of the two vectors
gaze = np.cross(eye_vector, vertical_vector)
#print("Vertical vector: ", vertical_vector)
#print("Eye vector", eye_vector)
#Return gaze vector
return gaze
""" # Function to calculate the head tilt
def calculate_head_tilt(landmarks, img_width, img_height):
# Define the indices for the key landmarks
left_eye_inner = landmarks[133] # Left eye inner corner
right_eye_inner = landmarks[362] # Right eye inner corner
nose_tip = landmarks[1] # Nose tip
chin = landmarks[199] # Chin
# Convert landmarks to image coordinates
left_eye_inner = np.array([left_eye_inner.x * img_width, left_eye_inner.y * img_height])
right_eye_inner = np.array([right_eye_inner.x * img_width, right_eye_inner.y * img_height])
nose_tip = np.array([nose_tip.x * img_width, nose_tip.y * img_height])
chin = np.array([chin.x * img_width, chin.y * img_height])
# Calculate vertical angle between the nose tip and chin
vertical_vector = chin - nose_tip
vertical_angle = np.arctan2(vertical_vector[1], vertical_vector[0]) * 180 / np.pi
return vertical_angle """
def getScreenPlaneEquation(landmarks, img_width, img_height):
pointA = np.array([1, 0, -0.1]) #top left
pointB = np.array([0, 0, -0.1]) #top right
pointC = np.array([1, 1, -0.1]) #bottom left
vector1 = pointA - pointB
vector2 = pointC - pointA
crossVector = np.cross(vector1, vector2)
return crossVector
def checkIntersection(landmarks, direction_vector, plane_equation_constants, plane_equation_d):
#location of top of nose
top_nose = landmarks[168]
top_nose = np.array([top_nose.x, top_nose.y, top_nose.z])
#find the a for which top_nose + a * direction vector has z = -0.1
a = (plane_equation_d - top_nose[2]) / direction_vector[2]
#apply that a to find the point of intersection
poi = direction_vector * a + top_nose
#check poi x and y
print("POI: ", poi)
if poi[0] >= 0.4 and poi[0] <= 0.7 and poi[1] <= 0.7 and poi[1] >= 0.2:
return True
else:
return False
""" # Capture video from the webcam
cap = cv2.VideoCapture(1)
while cap.isOpened():
success, image = cap.read()
if not success:
print("Ignoring empty camera frame.")
continue
# Convert the BGR image to RGB
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Process the image to find face mesh
results = face_mesh.process(image)
# Convert the image color back for rendering
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
img_height, img_width, _ = image.shape
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
# Draw face landmarks
mp_drawing.draw_landmarks(
image=image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_CONTOURS,
landmark_drawing_spec=drawing_spec,
connection_drawing_spec=drawing_spec)
# Calculate the gaze direction
direction_vector = calculate_gaze_direction(face_landmarks.landmark, img_width, img_height)
#print(direction_vector)
#if direction vector starting at eye location does not exceed the bounds of the camera screen
#d = -0.1
flag = checkIntersection(face_landmarks.landmark, direction_vector, getScreenPlaneEquation(face_landmarks.landmark, img_width, img_height), -0.1)
if flag == True:
cv2.putText(image, "Looking at Screen", (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
else:
cv2.putText(image, "Not Looking at Screen", (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
vertical_angle = calculate_head_tilt(face_landmarks.landmark, img_width, img_height)
#print(vertical_angle)
#if vertical_angle < 90: # Adjust this threshold based on your needs
# cv2.putText(image, "Looking Down", (30, 130), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
#else:
# cv2.putText(image, "Not Looking Down", (30, 130), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
else:
cv2.putText(image, "Not Looking at Screen", (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
# Display the image
cv2.imshow('MediaPipe Face Mesh', image)
if cv2.waitKey(5) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows() """
def gazeDetect(image):
# Process the image to find face mesh
results = face_mesh.process(image)
img_height, img_width, _ = image.shape
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
# Draw face landmarks
mp_drawing.draw_landmarks(
image=image,
landmark_list=face_landmarks,
connections=mp_face_mesh.FACEMESH_CONTOURS,
landmark_drawing_spec=drawing_spec,
connection_drawing_spec=drawing_spec)
# Calculate the gaze direction
direction_vector = calculate_gaze_direction(face_landmarks.landmark, img_width, img_height)
#print(direction_vector)
#if direction vector starting at eye location does not exceed the bounds of the camera screen
#d = -0.1
flag = checkIntersection(face_landmarks.landmark, direction_vector, getScreenPlaneEquation(face_landmarks.landmark, img_width, img_height), -0.1)
if flag == True:
#cv2.putText(image, "Looking at Screen", (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
return True
else:
#cv2.putText(image, "Not Looking at Screen", (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)
return False
else:
return False