Skip to content
125 changes: 104 additions & 21 deletions src/app/app.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from flask import Flask, render_template, Response, request
import requests
from importlib import import_module
import io
import base64
import queue

import camera_opencv
import webbrowser
Expand All @@ -20,6 +23,8 @@
from torch import nn
import transforms as t
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure
import json
import time
import jsonify
Expand All @@ -31,12 +36,14 @@
mp_holistic = mp.solutions.holistic


label_dict = pd.read_csv('jester-v1-labels.csv', header=None)
ges = label_dict[0].tolist()
with open("jester-v1-labels.txt", "r") as fh:
gesture_labels = fh.read().splitlines()

camera = cv2.VideoCapture(0)
camera.set(cv2.CAP_PROP_FPS, 48)

confidence_queue = queue.Queue(maxsize=10)

app = Flask(__name__)

@app.route('/get_model_selected', methods=['POST'])
Expand Down Expand Up @@ -86,9 +93,22 @@ def gen(camera):
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')


import collections
import time

class FPS:
def __init__(self, avarageof=50):
self.frametimestamps = collections.deque(maxlen=avarageof)
def __call__(self):
self.frametimestamps.append(time.time())
if(len(self.frametimestamps) > 1):
return round(len(self.frametimestamps)/(self.frametimestamps[-1]-self.frametimestamps[0]), 2)
else:
return 0.0

def Demo_Model_1_20BNJester_gen(camera):
"""Video streaming generator function for Demo_Model_1_20BNJester."""
fig, ax = plt.subplots()
# fig, ax = plt.subplots()
# Set up some storage variables
seq_len = 16
value = 0
Expand All @@ -115,17 +135,20 @@ def Demo_Model_1_20BNJester_gen(camera):
hist = []
mean_hist = []
setup = True
# plt.ion()

cooldown = 0
eval_samples = 2
num_classes = 27

score_energy = torch.zeros((eval_samples, num_classes))

fps_a = FPS()
fps_d = FPS()

while True:
success, frame = camera.read()
cv2.flip(frame, 1, frame)
# print(f"fps_all: {fps_a()}")

if not success:
break
Expand All @@ -142,14 +165,23 @@ def Demo_Model_1_20BNJester_gen(camera):

# Get model output prediction
if len(imgs) == 16:

# print(f"detection_iter_per_sec: {fps_d()}")

data = torch.cat(imgs).cuda()
output = model(data.unsqueeze(0))
out = (torch.nn.Softmax(dim=1)(output).data).cpu().numpy()[0]
if len(hist) > 300:
mean_hist = mean_hist[1:]
hist = hist[1:]

# this is straight cheating.
out[-2:] = [0,0]
# Softmax should sum to 1.
print(sum(out))

hist.append(out)

score_energy = torch.tensor(hist[-eval_samples:])
curr_mean = torch.mean(score_energy, dim=0)
mean_hist.append(curr_mean.cpu().numpy())
Expand All @@ -160,32 +192,29 @@ def Demo_Model_1_20BNJester_gen(camera):
if cooldown > 0:
cooldown = cooldown - 1
if value.item() > 0.6 and indices < 25 and cooldown == 0:
print('Gesture:', ges[indices], '\t\t\t\t\t\t Value: {:.2f}'.format(value.item()))
print('Gesture:', gesture_labels[indices], '\t\t\t\t\t\t Value: {:.2f}'.format(value.item()))
cooldown = 16
pred = indices
imgs = imgs[1:]

df = pd.DataFrame(mean_hist, columns=ges)

# ax.clear()
# df.plot.line(legend=False, figsize=(16,6),ax=ax, ylim=(0,1))
# if setup:
# plt.show(block = False)
# setup=False
# plt.draw()
# send predictions to plotting thread
try:
confidence_queue.put_nowait(out)
except queue.Full as e:
print("WARNING: gesture scores filled output queue Filled")
pass

n += 1
bg = np.full((480, 640, 3), 15, np.uint8)
bg[:480, :640] = frame

font = cv2.FONT_HERSHEY_SIMPLEX
if value > 0.6:
cv2.putText(bg, ges[pred],(20,465), font, 1,(0,255,0),2)
cv2.rectangle(bg,(128,48),(640-128,480-48),(0,255,0),3)
for i, top in enumerate(top_3):
cv2.putText(bg, ges[top],(40,200-70*i), font, 1,(255,255,255),1)
cv2.rectangle(bg,(400,225-70*i),(int(400+out[top]*170),205-70*i),(255,255,255),3)

# font = cv2.FONT_HERSHEY_SIMPLEX
# if value > 0.6:
# cv2.putText(bg, ges[pred],(20,465), font, 1,(0,255,0),2)
# cv2.rectangle(bg,(128,48),(640-128,480-48),(0,255,0),3)
# for i, top in enumerate(top_3):
# cv2.putText(bg, ges[top],(40,200-70*i), font, 1,(255,255,255),1)
# cv2.rectangle(bg,(400,225-70*i),(int(400+out[top]*170),205-70*i),(255,255,255),3)

ret, buffer = cv2.imencode('.jpg', bg)
frame = buffer.tobytes()
Expand All @@ -194,6 +223,59 @@ def Demo_Model_1_20BNJester_gen(camera):
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')


# TODO: handle multiple sets of labels (currently just Jester)
def plot_png():

confidence_thresh = 0.6

pos = range(len(gesture_labels))

# create figure object, we don't use the matplotlib GUI
# so use the base figure class
fig = Figure(figsize=(8,4))
ax = fig.add_subplot(1, 1, 1)
bars = ax.bar(pos, np.zeros(len(gesture_labels)), align="center")
ax.set_ylim(0, 1)
ax.set_xticks(pos)
ax.set_xticklabels(gesture_labels, rotation=60, ha='right')
ax.set_xlabel("Jester gesture classes")
ax.set_ylabel("confidence")
fig.tight_layout()

while True:

try:
# read data from queue
result = confidence_queue.get(timeout=0.2)

# update the height for each bar
for rect, y in zip(bars, result):
if y > confidence_thresh:
rect.set_color("g")
else:
rect.set_color("b")
rect.set_height(y)

except: # no data has been returned, detection is off
pass
# print("WARNING: no results returned")

finally:
# write figure image to io buffer
io_buffer = io.BytesIO()
FigureCanvas(fig).print_png(io_buffer)
io_buffer.seek(0)

# pass bytes to webpage
yield (b'--frame\r\n'
b'Content-Type: image/png\r\n\r\n' + io_buffer.read() + b'\r\n')


@app.route('/accuracy_plot')
def call_plot():
return Response(plot_png(),
mimetype='multipart/x-mixed-replace; boundary=frame')

@app.route('/Demo_Model_1_20BNJester_video_feed')
def Demo_Model_1_20BNJester_video_feed():
"""Video streaming route. Put this in the src attribute of an img tag."""
Expand All @@ -204,6 +286,7 @@ def Demo_Model_1_20BNJester_video_feed():
@app.route('/video_feed')
def video_feed():
"""Video streaming route. Put this in the src attribute of an img tag."""
plot_png()
return Response(gen(camera),
mimetype='multipart/x-mixed-replace; boundary=frame')

Expand Down
File renamed without changes.
4 changes: 4 additions & 0 deletions src/app/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ <h1 class="card-text" style="color: red">OFF</h1>
<a href="{{ url_for('index', selected_model_name=selected_model_name) }}?gesture_recognition_state=on"><button type="button" class="btn btn-success btn-block" name="recognition_toggle" value="Begin Gesture Detection">Begin Gesture Detection</button></a>
<a href="{{ url_for('index', selected_model_name=selected_model_name) }}?gesture_recognition_state=off"><button type="button" class="btn btn-danger btn-block" name="recognition_toggle" value="End Gesture Detection">End Gesture Detection</button></a>
</div>

<br>
<img src="{{ url_for('call_plot') }}" alt="gesture classification accuracy">

</div>
</div>
<div class="card bg-light mb-3">
Expand Down
122 changes: 122 additions & 0 deletions test_newbackend/DemoModel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Source https://github.com/fabiopk/RT_GestureRecognition/blob/master/demo.py

import torch
import torch.nn as nn
import math

class FullModel(nn.Module):

def __init__(self, batch_size, seq_lenght=8):
super(FullModel, self).__init__()

class CNN2D(nn.Module):
def __init__(self, batch_size=batch_size, image_size=96, seq_lenght=8, in_channels=3):
super(CNN2D, self).__init__()
self.conv1 = self._create_conv_layer(in_channels=in_channels, out_channels=16)
self.conv2 = self._create_conv_layer(in_channels=16, out_channels=32)
self.conv3 = self._create_conv_layer_pool(in_channels=32, out_channels=64)
self.conv4 = self._create_conv_layer_pool(in_channels=64, out_channels=128)
self.conv5 = self._create_conv_layer_pool(in_channels=128, out_channels=256)
cnn_output_shape = int(256*(image_size/(2**4))**2)

def forward(self, x):
batch_size, frames, channels, width, height = x.shape
x = x.view(-1, channels, width, height)
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
return x

def _create_conv_layer(self,in_channels, out_channels, kernel_size=(3,3), padding=(1,1)):
return nn.Sequential(
nn.Conv2d(in_channels,out_channels, kernel_size, padding=padding),
nn.BatchNorm2d(out_channels),
nn.ReLU(),
)

def _create_conv_layer_pool(self,in_channels, out_channels, kernel_size=(3,3), padding=(1,1), pool=(2,2)):
return nn.Sequential(
nn.Conv2d(in_channels,out_channels, kernel_size, padding=padding),
nn.BatchNorm2d(out_channels),
nn.ReLU(),
nn.MaxPool2d(pool)
)

class CNN3D(nn.Module):
def __init__(self, batch_size=batch_size, image_size=96, seq_lenght=8):
super(CNN3D, self).__init__()
self.conv1 = self._create_conv_layer_pool(in_channels=256, out_channels=256, pool=(1,1,1))
self.conv2 = self._create_conv_layer_pool(in_channels=256, out_channels=256, pool=(2,2,2))
self.conv3 = self._create_conv_layer_pool(in_channels=256, out_channels=256, pool=(2,1,1))
self.conv4 = self._create_conv_layer_pool(in_channels=256, out_channels=256, pool=(2,2,2))

def forward(self, x):
batch_size, channels, frames, width, height = x.shape
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
return x

def _create_conv_layer(self,in_channels, out_channels, kernel_size=(3,3,3), padding=(1,1,1)):
return nn.Sequential(
nn.Conv3d(in_channels,out_channels, kernel_size, padding=padding),
nn.BatchNorm3d(out_channels),
nn.ReLU(),
)

def _create_conv_layer_pool(self,in_channels, out_channels, kernel_size=(3,3,3), padding=(1,1,1), pool=(1,2,2)):
return nn.Sequential(
nn.Conv3d(in_channels,out_channels, kernel_size, padding=padding),
nn.BatchNorm3d(out_channels),
nn.ReLU(),
nn.MaxPool3d(pool)
)


class Combiner(nn.Module):

def __init__(self, in_features):
super(Combiner, self).__init__()
self.linear1 = self._create_linear_layer(in_features , in_features//2)
self.linear2 = self._create_linear_layer(in_features//2 , 1024)
self.linear3 = self._create_linear_layer(1024 , 27)

def forward(self, x):
x = self.linear1(x)
x = self.linear2(x)
x = self.linear3(x)
return x;

def _create_linear_layer(self, in_features, out_features, p=0.6):
return nn.Sequential(
nn.Linear(in_features, out_features),
nn.Dropout(p=p)
)

self.rgb2d = CNN2D(batch_size)
self.rgb3d = CNN3D(batch_size)
self.combiner = Combiner(4608)

self.batch_size = batch_size
self.seq_lenght = seq_lenght
self.steps = 0
self.steps = 0
self.epochs = 0
self.best_valdiation_loss = math.inf

def forward(self, x):
self.batch_size = x.shape[0]
x = self.rgb2d(x)
batch_and_frames, channels, dim1, dim2 = x.shape
x = x.view(self.batch_size, -1, channels, dim1, dim2).permute(0,2,1,3,4)
x = self.rgb3d(x)
x = x.view(self.batch_size, -1)
x = self.combiner(x)

if self.training:
self.steps += 1

return x
30 changes: 30 additions & 0 deletions test_newbackend/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# New Backend
This new backend relies and a shared memory buffer to separate the
capturing and storing of image sequences from a model implementation.

### usage
`python test_newbackend/main.py`
> tested on: debian bullseye, python v3.9.2, ROCm stack v4.3.0

At the moment no real user interface.
This demo uses opencv windows to display chart and most recent image frame.
Runs for 2 minutes then kills self, `ctrl+c` should kill early.

### TODOs
- [ ] integrate with GUI
- [ ] `@ianzur`: expected it to be possible to use with a flask backend similar to [celery](), but did not investigate implementing.
> for a web app it may make more sense to move towards a java implementation
- [ ] instead of hacking in the changes into ringbuffer, subclass

**Notes:**
- 2 files in this folder are directly copied from `./src/app/`
- model structure definition: `DemoModel.py`
- model weights: `demo.ckp`
- RingBuffer implementation: see: https://github.com/ctrl-labs/cringbuffer
- Changes:
- writer allowed to overwrite entries before they are read by the model class.
> This allows for readers to always have the newest frame. (in the case of slow model execution, camera fps remains constant)
- reader pointers ignored, does not track where the readers are. Reader always reads `n`-most recent frames. Writer position is used to locate the most recent frame.

**contact**
- questions, concerns? raise an issue and `@ianzur` or send me an email `ian dot zurutuza at gmail dot com`
Binary file added test_newbackend/demo.ckp
Binary file not shown.
Loading