RCSnyder · ianzur · Oct 14, 2021 · Oct 14, 2021 · Oct 15, 2021 · Oct 16, 2021
diff --git a/src/app/app.py b/src/app/app.py
@@ -1,6 +1,9 @@
 from flask import Flask, render_template, Response, request
 import requests
 from importlib import import_module
+import io
+import base64
+import queue
 
 import camera_opencv
 import webbrowser
@@ -20,6 +23,8 @@
 from torch import nn
 import transforms as t
 import matplotlib.pyplot as plt
+from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+from matplotlib.figure import Figure
 import json
 import time
 import jsonify
@@ -31,12 +36,14 @@
 mp_holistic = mp.solutions.holistic
 
 
-label_dict = pd.read_csv('jester-v1-labels.csv', header=None)
-ges = label_dict[0].tolist()
+with open("jester-v1-labels.txt", "r") as fh:
+    gesture_labels = fh.read().splitlines()
 
 camera = cv2.VideoCapture(0)
 camera.set(cv2.CAP_PROP_FPS, 48)
 
+confidence_queue = queue.Queue(maxsize=10)
+
 app = Flask(__name__)
 
 @app.route('/get_model_selected', methods=['POST'])
@@ -86,9 +93,22 @@ def gen(camera):
                b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
 
 
+import collections
+import time
+
+class FPS:
+    def __init__(self, avarageof=50):
+        self.frametimestamps = collections.deque(maxlen=avarageof)
+    def __call__(self):
+        self.frametimestamps.append(time.time())
+        if(len(self.frametimestamps) > 1):
+            return round(len(self.frametimestamps)/(self.frametimestamps[-1]-self.frametimestamps[0]), 2)
+        else:
+            return 0.0
+
 def Demo_Model_1_20BNJester_gen(camera):
     """Video streaming generator function for Demo_Model_1_20BNJester."""
-    fig, ax = plt.subplots()
+    # fig, ax = plt.subplots()
     # Set up some storage variables
     seq_len = 16
     value = 0
@@ -115,17 +135,20 @@ def Demo_Model_1_20BNJester_gen(camera):
     hist = []
     mean_hist = []
     setup = True
-    # plt.ion()
 
     cooldown = 0
     eval_samples = 2
     num_classes = 27
 
     score_energy = torch.zeros((eval_samples, num_classes))
 
+    fps_a = FPS()
+    fps_d = FPS()
+
     while True:
         success, frame = camera.read()
         cv2.flip(frame, 1, frame)
+        # print(f"fps_all: {fps_a()}")
 
         if not success:
             break
@@ -142,14 +165,23 @@ def Demo_Model_1_20BNJester_gen(camera):
 
             # Get model output prediction
             if len(imgs) == 16:
+
+                # print(f"detection_iter_per_sec: {fps_d()}")
+
                 data = torch.cat(imgs).cuda()
                 output = model(data.unsqueeze(0))
                 out = (torch.nn.Softmax(dim=1)(output).data).cpu().numpy()[0]
                 if len(hist) > 300:
                     mean_hist  = mean_hist[1:]
                     hist  = hist[1:]
+
+                # this is straight cheating.
                 out[-2:] = [0,0]
+                # Softmax should sum to 1.
+                print(sum(out))
+
                 hist.append(out)
+
                 score_energy = torch.tensor(hist[-eval_samples:])
                 curr_mean = torch.mean(score_energy, dim=0)
                 mean_hist.append(curr_mean.cpu().numpy())
@@ -160,32 +192,29 @@ def Demo_Model_1_20BNJester_gen(camera):
                 if cooldown > 0:
                     cooldown = cooldown - 1
                 if value.item() > 0.6 and indices < 25 and cooldown == 0: 
-                    print('Gesture:', ges[indices], '\t\t\t\t\t\t Value: {:.2f}'.format(value.item()))
+                    print('Gesture:', gesture_labels[indices], '\t\t\t\t\t\t Value: {:.2f}'.format(value.item()))
                     cooldown = 16 
                 pred = indices
                 imgs = imgs[1:]
 
-                df = pd.DataFrame(mean_hist, columns=ges)
-
-                # ax.clear()
-                # df.plot.line(legend=False, figsize=(16,6),ax=ax, ylim=(0,1))
-                # if setup:
-                #     plt.show(block = False)
-                #     setup=False
-                # plt.draw()
+                # send predictions to plotting thread
+                try:
+                    confidence_queue.put_nowait(out)
+                except queue.Full as e:
+                    print("WARNING: gesture scores filled output queue Filled")
+                    pass
 
             n += 1
             bg = np.full((480, 640, 3), 15, np.uint8)
             bg[:480, :640] = frame
 
-            font = cv2.FONT_HERSHEY_SIMPLEX
-            if value > 0.6:
-                cv2.putText(bg, ges[pred],(20,465), font, 1,(0,255,0),2)
-            cv2.rectangle(bg,(128,48),(640-128,480-48),(0,255,0),3)
-            for i, top in enumerate(top_3):
-                cv2.putText(bg, ges[top],(40,200-70*i), font, 1,(255,255,255),1)
-                cv2.rectangle(bg,(400,225-70*i),(int(400+out[top]*170),205-70*i),(255,255,255),3)
-
+            # font = cv2.FONT_HERSHEY_SIMPLEX
+            # if value > 0.6:
+            #     cv2.putText(bg, ges[pred],(20,465), font, 1,(0,255,0),2)
+            # cv2.rectangle(bg,(128,48),(640-128,480-48),(0,255,0),3)
+            # for i, top in enumerate(top_3):
+            #     cv2.putText(bg, ges[top],(40,200-70*i), font, 1,(255,255,255),1)
+            #     cv2.rectangle(bg,(400,225-70*i),(int(400+out[top]*170),205-70*i),(255,255,255),3)
 
             ret, buffer = cv2.imencode('.jpg', bg)
             frame = buffer.tobytes()
@@ -194,6 +223,59 @@ def Demo_Model_1_20BNJester_gen(camera):
                b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
 
 
+# TODO: handle multiple sets of labels (currently just Jester)
+def plot_png():
+
+    confidence_thresh = 0.6
+
+    pos = range(len(gesture_labels))
+
+    # create figure object, we don't use the matplotlib GUI 
+    # so use the base figure class
+    fig = Figure(figsize=(8,4))
+    ax = fig.add_subplot(1, 1, 1)
+    bars = ax.bar(pos, np.zeros(len(gesture_labels)), align="center")
+    ax.set_ylim(0, 1)
+    ax.set_xticks(pos)
+    ax.set_xticklabels(gesture_labels, rotation=60, ha='right')
+    ax.set_xlabel("Jester gesture classes")
+    ax.set_ylabel("confidence")
+    fig.tight_layout()
+
+    while True:
+
+        try:
+            # read data from queue
+            result = confidence_queue.get(timeout=0.2)
+
+            # update the height for each bar
+            for rect, y in zip(bars, result):
+                if y > confidence_thresh:
+                    rect.set_color("g")
+                else:
+                    rect.set_color("b")
+                rect.set_height(y)
+
+        except: # no data has been returned, detection is off
+            pass
+            # print("WARNING: no results returned")
+
+        finally: 
+            # write figure image to io buffer
+            io_buffer = io.BytesIO()
+            FigureCanvas(fig).print_png(io_buffer)
+            io_buffer.seek(0)
+
+            # pass bytes to webpage
+            yield (b'--frame\r\n'
+                b'Content-Type: image/png\r\n\r\n' + io_buffer.read() + b'\r\n')
+
+
+@app.route('/accuracy_plot')
+def call_plot():
+    return Response(plot_png(),
+                    mimetype='multipart/x-mixed-replace; boundary=frame')
+
 @app.route('/Demo_Model_1_20BNJester_video_feed')
 def Demo_Model_1_20BNJester_video_feed():
     """Video streaming route. Put this in the src attribute of an img tag."""
@@ -204,6 +286,7 @@ def Demo_Model_1_20BNJester_video_feed():
 @app.route('/video_feed')
 def video_feed():
     """Video streaming route. Put this in the src attribute of an img tag."""
+    plot_png()
     return Response(gen(camera),
                     mimetype='multipart/x-mixed-replace; boundary=frame')
 

diff --git a/src/app/jester-v1-labels.csv → src/app/jester-v1-labels.txt b/src/app/jester-v1-labels.csv → src/app/jester-v1-labels.txt
diff --git a/src/app/templates/index.html b/src/app/templates/index.html
@@ -72,6 +72,10 @@ <h1 class="card-text" style="color: red">OFF</h1>
                                 <a href="{{ url_for('index', selected_model_name=selected_model_name) }}?gesture_recognition_state=on"><button type="button" class="btn btn-success btn-block" name="recognition_toggle" value="Begin Gesture Detection">Begin Gesture Detection</button></a>
                                 <a href="{{ url_for('index', selected_model_name=selected_model_name) }}?gesture_recognition_state=off"><button type="button" class="btn btn-danger btn-block" name="recognition_toggle" value="End Gesture Detection">End Gesture Detection</button></a>
                             </div>
+
+                            <br>
+                            <img src="{{ url_for('call_plot') }}" alt="gesture classification accuracy">
+
                         </div>
                     </div>
                     <div class="card bg-light mb-3">

diff --git a/test_newbackend/DemoModel.py b/test_newbackend/DemoModel.py
@@ -0,0 +1,122 @@
+# Source https://github.com/fabiopk/RT_GestureRecognition/blob/master/demo.py
+
+import torch
+import torch.nn as nn
+import math
+
+class FullModel(nn.Module):
+
+	def __init__(self, batch_size, seq_lenght=8):
+		super(FullModel, self).__init__()
+
+		class CNN2D(nn.Module):
+			def __init__(self, batch_size=batch_size, image_size=96, seq_lenght=8, in_channels=3):
+				super(CNN2D, self).__init__()
+				self.conv1 = self._create_conv_layer(in_channels=in_channels, out_channels=16)
+				self.conv2 = self._create_conv_layer(in_channels=16, out_channels=32)
+				self.conv3 = self._create_conv_layer_pool(in_channels=32, out_channels=64)
+				self.conv4 = self._create_conv_layer_pool(in_channels=64, out_channels=128)
+				self.conv5 = self._create_conv_layer_pool(in_channels=128, out_channels=256)
+				cnn_output_shape = int(256*(image_size/(2**4))**2)
+
+			def forward(self, x):
+				batch_size, frames, channels, width, height = x.shape
+				x = x.view(-1, channels, width, height)
+				x = self.conv1(x)
+				x = self.conv2(x)
+				x = self.conv3(x)
+				x = self.conv4(x)
+				x = self.conv5(x)
+				return x
+
+			def _create_conv_layer(self,in_channels, out_channels, kernel_size=(3,3), padding=(1,1)):
+				return nn.Sequential(
+						nn.Conv2d(in_channels,out_channels, kernel_size, padding=padding),
+						nn.BatchNorm2d(out_channels),
+						nn.ReLU(),
+					)
+
+			def _create_conv_layer_pool(self,in_channels, out_channels, kernel_size=(3,3), padding=(1,1), pool=(2,2)):
+				return nn.Sequential(
+						nn.Conv2d(in_channels,out_channels, kernel_size, padding=padding),
+						nn.BatchNorm2d(out_channels),
+						nn.ReLU(),
+						nn.MaxPool2d(pool)
+					)
+
+		class CNN3D(nn.Module):
+			def __init__(self, batch_size=batch_size, image_size=96, seq_lenght=8):
+				super(CNN3D, self).__init__()
+				self.conv1 = self._create_conv_layer_pool(in_channels=256, out_channels=256, pool=(1,1,1))
+				self.conv2 = self._create_conv_layer_pool(in_channels=256, out_channels=256, pool=(2,2,2))
+				self.conv3 = self._create_conv_layer_pool(in_channels=256, out_channels=256, pool=(2,1,1))
+				self.conv4 = self._create_conv_layer_pool(in_channels=256, out_channels=256, pool=(2,2,2))
+
+			def forward(self, x):
+				batch_size, channels, frames, width, height = x.shape
+				x = self.conv1(x)
+				x = self.conv2(x)
+				x = self.conv3(x)
+				x = self.conv4(x)
+				return x
+
+			def _create_conv_layer(self,in_channels, out_channels, kernel_size=(3,3,3), padding=(1,1,1)):
+				return nn.Sequential(
+						nn.Conv3d(in_channels,out_channels, kernel_size, padding=padding),
+						nn.BatchNorm3d(out_channels),
+						nn.ReLU(),
+					)
+
+			def _create_conv_layer_pool(self,in_channels, out_channels, kernel_size=(3,3,3), padding=(1,1,1), pool=(1,2,2)):
+				return nn.Sequential(
+						nn.Conv3d(in_channels,out_channels, kernel_size, padding=padding),
+						nn.BatchNorm3d(out_channels),
+						nn.ReLU(),
+						nn.MaxPool3d(pool)
+					)
+
+
+		class Combiner(nn.Module):
+
+			def __init__(self, in_features):
+				super(Combiner, self).__init__()
+				self.linear1 = self._create_linear_layer(in_features , in_features//2)
+				self.linear2 = self._create_linear_layer(in_features//2 , 1024)
+				self.linear3 = self._create_linear_layer(1024 , 27)
+
+			def forward(self, x):
+				x = self.linear1(x)
+				x = self.linear2(x)
+				x = self.linear3(x)
+				return x;
+
+			def _create_linear_layer(self, in_features, out_features, p=0.6):
+				return nn.Sequential(
+					nn.Linear(in_features, out_features),
+					nn.Dropout(p=p)
+				)
+
+		self.rgb2d = CNN2D(batch_size)
+		self.rgb3d = CNN3D(batch_size)
+		self.combiner = Combiner(4608)
+
+		self.batch_size = batch_size
+		self.seq_lenght = seq_lenght
+		self.steps = 0
+		self.steps = 0
+		self.epochs = 0
+		self.best_valdiation_loss = math.inf
+
+	def forward(self, x):
+		self.batch_size = x.shape[0]
+		x = self.rgb2d(x)
+		batch_and_frames, channels, dim1, dim2 = x.shape
+		x = x.view(self.batch_size, -1, channels, dim1, dim2).permute(0,2,1,3,4)
+		x = self.rgb3d(x)
+		x = x.view(self.batch_size, -1)
+		x = self.combiner(x)
+
+		if self.training:
+			self.steps += 1
+
+		return x
diff --git a/test_newbackend/README.md b/test_newbackend/README.md
@@ -0,0 +1,30 @@
+# New Backend
+This new backend relies and a shared memory buffer to separate the
+capturing and storing of image sequences from a model implementation.
+
+### usage 
+`python test_newbackend/main.py` 
+> tested on: debian bullseye, python v3.9.2, ROCm stack v4.3.0
+
+At the moment no real user interface.
+This demo uses opencv windows to display chart and most recent image frame.
+Runs for 2 minutes then kills self, `ctrl+c` should kill early.
+
+### TODOs
+- [ ] integrate with GUI
+  - [ ] `@ianzur`: expected it to be possible to use with a flask backend similar to [celery](), but did not investigate implementing.
+    > for a web app it may make more sense to move towards a java implementation
+- [ ] instead of hacking in the changes into ringbuffer, subclass
+
+**Notes:**
+- 2 files in this folder are directly copied from `./src/app/`
+  - model structure definition: `DemoModel.py`
+  - model weights: `demo.ckp`
+- RingBuffer implementation: see: https://github.com/ctrl-labs/cringbuffer
+  - Changes:
+    - writer allowed to overwrite entries before they are read by the model class.
+      > This allows for readers to always have the newest frame. (in the case of slow model execution, camera fps remains constant)
+    - reader pointers ignored, does not track where the readers are. Reader always reads `n`-most recent frames. Writer position is used to locate the most recent frame.
+
+**contact**
+- questions, concerns? raise an issue and `@ianzur` or send me an email `ian dot zurutuza at gmail dot com`
diff --git a/test_newbackend/demo.ckp b/test_newbackend/demo.ckp