complete migration of web-api to local repo

AlmasB · Mar 26, 2024 · 80c164a · 80c164a
1 parent d08facc
commit 80c164a
Show file tree

Hide file tree

Showing 5 changed files with 129 additions and 5 deletions.
diff --git a/fxgl-intelligence/src/main/java/com/almasb/fxgl/intelligence/WebAPI.java b/fxgl-intelligence/src/main/java/com/almasb/fxgl/intelligence/WebAPI.java
@@ -16,7 +16,6 @@
 
 /**
  * Stores constants related to web-api projects.
- * Changes to these values must be synchronized with the web-api project (https://github.com/AlmasB/web-api).
  *
  * @author Almas Baim (https://github.com/AlmasB)
  */
@@ -31,8 +30,8 @@ public final class WebAPI {
  private static final Map<String, URL> URLS = extractURLs();
 
  public static final URL TEXT_TO_SPEECH_API = URLS.get("tts/index.html");
- public static final String SPEECH_RECOGNITION_API = "https://almasb.github.io/web-api/speech-recog-v1/";
- public static final String GESTURE_RECOGNITION_API = "https://almasb.github.io/web-api/gesture-recog-v1/";
+ public static final URL SPEECH_RECOGNITION_API = URLS.get("speechrecog/index.html");
+ public static final URL GESTURE_RECOGNITION_API = URLS.get("gesturerecog/index.html");
 
  public static final int TEXT_TO_SPEECH_PORT = 55550;
  public static final int SPEECH_RECOGNITION_PORT = 55555;
@@ -44,7 +43,10 @@ private static Map<String, URL> extractURLs() {
  List.of(
  "rpc-common.js",
  "tts/index.html",
- "tts/script.js"
+ "tts/script.js",
+ "gesturerecog/index.html",
+ "speechrecog/index.html",
+ "speechrecog/script.js"
  ).forEach(relativeURL -> {
  map.put(relativeURL, extractURL(relativeURL, "intelligence/" + relativeURL));
  });

diff --git a/...lligence/src/main/kotlin/com/almasb/fxgl/intelligence/gesturerecog/HandTrackingService.kt b/...lligence/src/main/kotlin/com/almasb/fxgl/intelligence/gesturerecog/HandTrackingService.kt
@@ -78,7 +78,7 @@ class HandTrackingService : EngineService() {
  options.addArguments("--use-fake-ui-for-media-stream")
 
  webDriver = ChromeDriver(options)
- webDriver!!.get(WebAPI.GESTURE_RECOGNITION_API)
+ webDriver!!.get(WebAPI.GESTURE_RECOGNITION_API.toExternalForm())
 
  // we are ready to use the web api service
  } catch (e: Exception) {

diff --git a/fxgl-intelligence/src/main/resources/com/almasb/fxgl/intelligence/gesturerecog/index.html b/fxgl-intelligence/src/main/resources/com/almasb/fxgl/intelligence/gesturerecog/index.html
@@ -0,0 +1,64 @@
+<!DOCTYPE html>
+<html>
+<head>
+ <meta charset="utf-8">
+ <script src="https://cdn.jsdelivr.net/npm/@mediapipe/camera_utils/camera_utils.js" crossorigin="anonymous"></script>
+ <script src="https://cdn.jsdelivr.net/npm/@mediapipe/control_utils/control_utils.js" crossorigin="anonymous"></script>
+ <script src="https://cdn.jsdelivr.net/npm/@mediapipe/drawing_utils/drawing_utils.js" crossorigin="anonymous"></script>
+ <script src="https://cdn.jsdelivr.net/npm/@mediapipe/hands/hands.js" crossorigin="anonymous"></script>
+</head>
+
+<body>
+ <div class="container">
+ <h1>Processing MediaPipe Camera Input...</h1>
+ <video class="input_video" hidden></video>
+ </div>
+
+ <script type="module">
+ const videoElement = document.getElementsByClassName('input_video')[0];
+
+ // Create WebSocket connection.
+ const socket = new WebSocket('ws:https://localhost:55560');
+
+ function onResults(results) {
+ if (results.multiHandLandmarks) {
+ // hand id
+ var id = 0;
+ for (const landmarks of results.multiHandLandmarks) {
+ var data = "" + id + ",";
+
+ landmarks.forEach(point => {
+ data += point.x + "," + point.y + "," + point.z + ",";
+ });
+
+ socket.send(data);
+
+ id++;
+ }
+ }
+ }
+
+ const hands = new Hands({locateFile: (file) => {
+ return `https://cdn.jsdelivr.net/npm/@mediapipe/hands/${file}`;
+ }});
+ hands.setOptions({
+ maxNumHands: 2,
+ modelComplexity: 1,
+ minDetectionConfidence: 0.5,
+ minTrackingConfidence: 0.5
+ });
+ hands.onResults(onResults);
+
+ // pick a mid-quality resolution
+ const camera = new Camera(videoElement, {
+ onFrame: async () => {
+ await hands.send({image: videoElement});
+ },
+ width: 853,
+ height: 480
+ });
+ camera.start();
+ </script>
+
+</body>
+</html>
diff --git a/fxgl-intelligence/src/main/resources/com/almasb/fxgl/intelligence/speechrecog/index.html b/fxgl-intelligence/src/main/resources/com/almasb/fxgl/intelligence/speechrecog/index.html
@@ -0,0 +1,15 @@
+<!DOCTYPE html>
+<html lang="en">
+ <head>
+ <meta charset="utf-8" />
+ <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+ <meta name="viewport" content="width=device-width" />
+ <title>Speech-to-text</title>
+ </head>
+
+ <body>
+ <h2>Speech-to-text service</h2>
+ <script src="../rpc-common.js"></script>
+ <script src="script.js"></script>
+ </body>
+</html>
diff --git a/fxgl-intelligence/src/main/resources/com/almasb/fxgl/intelligence/speechrecog/script.js b/fxgl-intelligence/src/main/resources/com/almasb/fxgl/intelligence/speechrecog/script.js
@@ -0,0 +1,43 @@
+var SpeechRecognition = SpeechRecognition || webkitSpeechRecognition
+var SpeechGrammarList = SpeechGrammarList || window.webkitSpeechGrammarList
+var SpeechRecognitionEvent = SpeechRecognitionEvent || webkitSpeechRecognitionEvent
+
+const socket = new WebSocket('ws:https://localhost:55555');
+
+socket.addEventListener('open', function (event) {
+ initService();
+});
+
+// set up speech recog
+const recognition = new SpeechRecognition();
+recognition.continuous = true;
+recognition.lang = 'en-GB';
+recognition.interimResults = false;
+recognition.maxAlternatives = 1;
+
+recognition.onresult = (event) => {
+ // latest result
+ var result = event.results[event.results.length - 1][0];
+ var inputText = result.transcript;
+ var confidence = result.confidence;
+
+ // only call if we have something
+ if (inputText.length > 0) {
+ rpcRun("onSpeechInput", inputText, confidence);
+ }
+}
+
+recognition.onend = (event) => {
+ recognition.start();
+}
+
+recognition.onerror = function(event) {
+ // not much use recording event.error, so just restart
+ recognition.start();
+}
+
+recognition.start();
+
+function initService() {
+ rpcRun("initService");
+}