including speech binaries

evancohen · May 21, 2016 · a19f649 · a19f649
1 parent ce65f3f
commit a19f649
Show file tree

Hide file tree

Showing 15 changed files with 513 additions and 0 deletions.
diff --git a/js/services/speech.js b/js/services/speech.js
@@ -0,0 +1,105 @@
+(function(annyang) {
+    'use strict';
+
+    function SpeechService($rootScope) {
+        var service = {};
+
+        service.init = function() {
+            annyang.setLanguage(config.language);            
+            console.log("Initializing keyword spotter");
+
+            var modelFile = config.kws.model || "smart_mirror.pmdl";
+            var kwsSensitivity = config.kws.sensitivity || 0.5;
+
+            var spawn = require('child_process').spawn;
+            var kwsProcess = spawn('python', ['./speech/kws.py', modelFile, kwsSensitivity], {detached: false});
+            console.log(kwsProcess);
+            kwsProcess.stderr.on('data', function (data) {
+                var message = data.toString();
+                if(message.startsWith('INFO')){
+                    annyang.start();
+                }else{
+                    console.error(message);
+                }
+            })
+            kwsProcess.stdout.on('data', function (data) {
+                console.log(data.toString())
+            })
+        }
+
+        // Register callbacks for the controller. does not utelize CallbackManager()
+        service.registerCallbacks = function(cb) {
+            // annyang.addCommands(service.commands);
+
+            // Annyang is a bit "chatty", turn this on only for debugging
+            annyang.debug(false);
+
+            // add specified callback functions
+            if (isCallback(cb.listening)) {
+                annyang.addCallback('start', function(){
+                    $rootScope.$apply(cb.listening(true));
+                });
+                annyang.addCallback('end', function(data){
+                    $rootScope.$apply(cb.listening(false));
+                });
+            };
+            if (isCallback(cb.interimResult)) {
+                annyang.addCallback('interimResult', function(data){
+                    $rootScope.$apply(cb.interimResult(data));
+                });
+            };
+            if (isCallback(cb.result)) {
+                annyang.addCallback('result', function(data){
+                    $rootScope.$apply(cb.result(data));
+                });
+            };
+            if (isCallback(cb.error)) {
+                annyang.addCallback('error', function(data){
+                    $rootScope.$apply(cb.error(data));
+                });
+            };
+        };
+
+        // Ensure callback is a valid function
+        function isCallback(callback){
+            return typeof(callback) == "function";
+        }
+
+        // COMMANDS
+        service.commands = {};
+        service.addCommand = function(phrase, callback) {
+            var command = {};
+
+            // Wrap annyang command in scope apply
+            command[phrase] = function(arg1, arg2) {
+                $rootScope.$apply(callback(arg1, arg2));
+            };
+
+            // Extend our commands list
+            angular.extend(service.commands, command);
+
+            // Add the commands to annyang
+            annyang.addCommands(service.commands);
+            console.debug('added command "' + phrase + '"', service.commands);
+        };
+
+        // Annyang start listening
+        service.start = function(){
+            // Listen for the next utterance and then stop
+            annyang.start({autoRestart: false, continuous: false});
+        }
+
+        // Annyang stop listening
+        service.abort = function(){
+            annyang.abort();
+        }
+
+        service.init();
+
+        return service;
+    }
+
+    angular.module('SmartMirror')
+        .factory('SpeechService', SpeechService);
+
+}(window.annyang));
diff --git a/smart_mirror.pmdl b/smart_mirror.pmdl
diff --git a/speech/_snowboydetect.so b/speech/_snowboydetect.so
diff --git a/speech/demo2.py b/speech/demo2.py
@@ -0,0 +1,41 @@
+import snowboydecoder
+import sys
+import signal
+
+# Demo code for listening two hotwords at the same time
+
+interrupted = False
+
+
+def signal_handler(signal, frame):
+    global interrupted
+    interrupted = True
+
+
+def interrupt_callback():
+    global interrupted
+    return interrupted
+
+if len(sys.argv) != 3:
+    print("Error: need to specify 2 model names")
+    print("Usage: python demo.py 1st.model 2nd.model")
+    sys.exit(-1)
+
+models = sys.argv[1:]
+
+# capture SIGINT signal, e.g., Ctrl+C
+signal.signal(signal.SIGINT, signal_handler)
+
+sensitivity = [0.5]*len(models)
+detector = snowboydecoder.HotwordDetector(models, sensitivity=sensitivity)
+callbacks = [lambda: snowboydecoder.play_audio_file(snowboydecoder.DETECT_DING),
+             lambda: snowboydecoder.play_audio_file(snowboydecoder.DETECT_DONG)]
+print('Listening... Press Ctrl+C to exit')
+
+# main loop
+# make sure you have the same numbers of callbacks and models
+detector.start(detected_callback=callbacks,
+               interrupt_check=interrupt_callback,
+               sleep_time=0.03)
+
+detector.terminate()
diff --git a/speech/kws.py b/speech/kws.py
@@ -0,0 +1,40 @@
+import snowboydecoder
+import sys
+import signal
+
+interrupted = False
+
+
+def signal_handler(signal, frame):
+    global interrupted
+    interrupted = True
+
+
+def interrupt_callback():
+    global interrupted
+    return interrupted
+
+def hotword_detected_callback():
+    print("!Hotword Detected")
+    snowboydecoder.play_audio_file(snowboydecoder.DETECT_DING)
+
+if len(sys.argv) < 2:
+    print("Error: need to specify model name and sensitivity")
+    print("Usage: python demo.py your.model 0.5")
+    sys.exit(-1)
+
+model = sys.argv[1]
+detectionSensitivity = round(float(sys.argv[2]), 2)
+
+# capture SIGINT signal, e.g., Ctrl+C
+signal.signal(signal.SIGINT, signal_handler)
+
+detector = snowboydecoder.HotwordDetector(model, sensitivity=detectionSensitivity)
+print('Listening... Press Ctrl+C to exit')
+
+# main loop
+detector.start(detected_callback=hotword_detected_callback,
+               interrupt_check=interrupt_callback,
+               sleep_time=0.03)
+
+detector.terminate()
diff --git a/speech/resources/common.res b/speech/resources/common.res
diff --git a/speech/resources/ding.wav b/speech/resources/ding.wav
diff --git a/speech/resources/dong.wav b/speech/resources/dong.wav
diff --git a/speech/smart mirror.pmdl b/speech/smart mirror.pmdl
diff --git a/speech/snowboy.pmdl b/speech/snowboy.pmdl
diff --git a/speech/snowboydecoder.py b/speech/snowboydecoder.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python
+
+import collections
+import pyaudio
+import snowboydetect
+import time
+import wave
+import os
+import logging
+
+logging.basicConfig()
+logger = logging.getLogger("snowboy")
+logger.setLevel(logging.INFO)
+TOP_DIR = os.path.dirname(os.path.abspath(__file__))
+
+RESOURCE_FILE = os.path.join(TOP_DIR, "resources/common.res")
+DETECT_DING = os.path.join(TOP_DIR, "resources/ding.wav")
+DETECT_DONG = os.path.join(TOP_DIR, "resources/dong.wav")
+
+
+class RingBuffer(object):
+    """Ring buffer to hold audio from PortAudio"""
+    def __init__(self, size = 4096):
+        self._buf = collections.deque(maxlen=size)
+
+    def extend(self, data):
+        """Adds data to the end of buffer"""
+        self._buf.extend(data)
+
+    def get(self):
+        """Retrieves data from the beginning of buffer and clears it"""
+        tmp = ''.join(self._buf)
+        self._buf.clear()
+        return tmp
+
+
+def play_audio_file(fname=DETECT_DING):
+    """Simple callback function to play a wave file. By default it plays
+    a Ding sound.
+
+    :param str fname: wave file name
+    :return: None
+    """
+    ding_wav = wave.open(fname, 'rb')
+    ding_data = ding_wav.readframes(ding_wav.getnframes())
+    audio = pyaudio.PyAudio()
+    stream_out = audio.open(
+        format=audio.get_format_from_width(ding_wav.getsampwidth()),
+        channels=ding_wav.getnchannels(),
+        rate=ding_wav.getframerate(), input=False, output=True)
+    stream_out.start_stream()
+    stream_out.write(ding_data)
+    time.sleep(0.2)
+    stream_out.stop_stream()
+    stream_out.close()
+    audio.terminate()
+
+
+class HotwordDetector(object):
+    """
+    Snowboy decoder to detect whether a keyword specified by `decoder_model`
+    exists in a microphone input stream.
+
+    :param decoder_model: decoder model file path, a string or a list of strings
+    :param resource: resource file path.
+    :param sensitivity: decoder sensitivity, a float of a list of floats.
+                              The bigger the value, the more senstive the
+                              decoder. If an empty list is provided, then the
+                              default sensitivity in the model will be used.
+    :param audio_gain: multiply input volume by this factor.
+    """
+    def __init__(self, decoder_model,
+                 resource=RESOURCE_FILE,
+                 sensitivity=[],
+                 audio_gain=1):
+
+        def audio_callback(in_data, frame_count, time_info, status):
+            self.ring_buffer.extend(in_data)
+            play_data = chr(0) * len(in_data)
+            return play_data, pyaudio.paContinue
+
+        tm = type(decoder_model)
+        ts = type(sensitivity)
+        if tm is not list:
+            decoder_model = [decoder_model]
+        if ts is not list:
+            sensitivity = [sensitivity]
+        model_str = ",".join(decoder_model)
+
+        self.detector = snowboydetect.SnowboyDetect(
+            resource_filename=resource, model_str=model_str)
+        self.detector.SetAudioGain(audio_gain)
+        self.num_hotwords = self.detector.NumHotwords()
+
+        if len(decoder_model) > 1 and len(sensitivity) == 1:
+            sensitivity = sensitivity*self.num_hotwords
+        if len(sensitivity) != 0:
+            assert self.num_hotwords == len(sensitivity), \
+                "number of hotwords in decoder_model (%d) and sensitivity " \
+                "(%d) does not match" % (self.num_hotwords, len(sensitivity))
+        sensitivity_str = ",".join([str(t) for t in sensitivity])
+        if len(sensitivity) != 0:
+            self.detector.SetSensitivity(sensitivity_str);
+
+        self.ring_buffer = RingBuffer(
+            self.detector.NumChannels() * self.detector.SampleRate() * 5)
+        self.audio = pyaudio.PyAudio()
+        self.stream_in = self.audio.open(
+            input=True, output=False,
+            format=self.audio.get_format_from_width(
+                self.detector.BitsPerSample() / 8),
+            channels=self.detector.NumChannels(),
+            rate=self.detector.SampleRate(),
+            frames_per_buffer=2048,
+            stream_callback=audio_callback)
+
+
+    def start(self, detected_callback=play_audio_file,
+              interrupt_check=lambda: False,
+              sleep_time=0.03):
+        """
+        Start the voice detector. For every `sleep_time` second it checks the
+        audio buffer for triggering keywords. If detected, then call
+        corresponding function in `detected_callback`, which can be a single
+        function (single model) or a list of callback functions (multiple
+        models). Every loop it also calls `interrupt_check` -- if it returns
+        True, then breaks from the loop and return.
+
+        :param detected_callback: a function or list of functions. The number of
+                                  items must match the number of models in
+                                  `decoder_model`.
+        :param interrupt_check: a function that returns True if the main loop
+                                needs to stop.
+        :param float sleep_time: how much time in second every loop waits.
+        :return: None
+        """
+        if interrupt_check():
+            logger.debug("detect voice return")
+            return
+
+        tc = type(detected_callback)
+        if tc is not list:
+            detected_callback = [detected_callback]
+        if len(detected_callback) == 1 and self.num_hotwords > 1:
+            detected_callback *= self.num_hotwords
+
+        assert self.num_hotwords == len(detected_callback), \
+            "Error: hotwords in your models (%d) do not match the number of " \
+            "callbacks (%d)" % (self.num_hotwords, len(detected_callback))
+
+        logger.debug("detecting...")
+
+        while True:
+            if interrupt_check():
+                logger.debug("detect voice break")
+                break
+            data = self.ring_buffer.get()
+            if len(data) == 0:
+                time.sleep(sleep_time)
+                continue
+
+            ans = self.detector.RunDetection(data)
+            if ans == -1:
+                logger.warning("Error initializing streams or reading audio data")
+            elif ans > 0:
+                message = "Keyword " + str(ans) + " detected at time: "
+                message += time.strftime("%Y-%m-%d %H:%M:%S",
+                                         time.localtime(time.time()))
+                logger.info(message)
+                callback = detected_callback[ans-1]
+                if callback is not None:
+                    callback()
+
+        logger.debug("finished.")
+
+    def terminate(self):
+        """
+        Terminate audio stream. Users cannot call start() again to detect.
+        :return: None
+        """
+        self.stream_in.stop_stream()
+        self.stream_in.close()
+        self.audio.terminate()
diff --git a/speech/snowboydecoder.pyc b/speech/snowboydecoder.pyc