-
Notifications
You must be signed in to change notification settings - Fork 71
/
detect_from_microphone.py
96 lines (83 loc) · 2.9 KB
/
detect_from_microphone.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# Copyright 2022 David Scripka. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Imports
import pyaudio
import numpy as np
from openwakeword.model import Model
import argparse
# Parse input arguments
parser=argparse.ArgumentParser()
parser.add_argument(
"--chunk_size",
help="How much audio (in number of samples) to predict on at once",
type=int,
default=1280,
required=False
)
parser.add_argument(
"--model_path",
help="The path of a specific model to load",
type=str,
default="",
required=False
)
parser.add_argument(
"--inference_framework",
help="The inference framework to use (either 'onnx' or 'tflite'",
type=str,
default='tflite',
required=False
)
args=parser.parse_args()
# Get microphone stream
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = args.chunk_size
audio = pyaudio.PyAudio()
mic_stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
# Load pre-trained openwakeword models
if args.model_path != "":
owwModel = Model(wakeword_models=[args.model_path], inference_framework=args.inference_framework)
else:
owwModel = Model(inference_framework=args.inference_framework)
n_models = len(owwModel.models.keys())
# Run capture loop continuosly, checking for wakewords
if __name__ == "__main__":
# Generate output string header
print("\n\n")
print("#"*100)
print("Listening for wakewords...")
print("#"*100)
print("\n"*(n_models*3))
while True:
# Get audio
audio = np.frombuffer(mic_stream.read(CHUNK), dtype=np.int16)
# Feed to openWakeWord model
prediction = owwModel.predict(audio)
# Column titles
n_spaces = 16
output_string_header = """
Model Name | Score | Wakeword Status
--------------------------------------
"""
for mdl in owwModel.prediction_buffer.keys():
# Add scores in formatted table
scores = list(owwModel.prediction_buffer[mdl])
curr_score = format(scores[-1], '.20f').replace("-", "")
output_string_header += f"""{mdl}{" "*(n_spaces - len(mdl))} | {curr_score[0:5]} | {"--"+" "*20 if scores[-1] <= 0.5 else "Wakeword Detected!"}
"""
# Print results table
print("\033[F"*(4*n_models+1))
print(output_string_header, " ", end='\r')