diff --git a/fxgl-intelligence/src/main/java/com/almasb/fxgl/intelligence/WebAPI.java b/fxgl-intelligence/src/main/java/com/almasb/fxgl/intelligence/WebAPI.java index 84b3644eb..1775a54f9 100644 --- a/fxgl-intelligence/src/main/java/com/almasb/fxgl/intelligence/WebAPI.java +++ b/fxgl-intelligence/src/main/java/com/almasb/fxgl/intelligence/WebAPI.java @@ -16,7 +16,6 @@ /** * Stores constants related to web-api projects. - * Changes to these values must be synchronized with the web-api project (https://github.com/AlmasB/web-api). * * @author Almas Baim (https://github.com/AlmasB) */ @@ -31,8 +30,8 @@ public final class WebAPI { private static final Map URLS = extractURLs(); public static final URL TEXT_TO_SPEECH_API = URLS.get("tts/index.html"); - public static final String SPEECH_RECOGNITION_API = "https://almasb.github.io/web-api/speech-recog-v1/"; - public static final String GESTURE_RECOGNITION_API = "https://almasb.github.io/web-api/gesture-recog-v1/"; + public static final URL SPEECH_RECOGNITION_API = URLS.get("speechrecog/index.html"); + public static final URL GESTURE_RECOGNITION_API = URLS.get("gesturerecog/index.html"); public static final int TEXT_TO_SPEECH_PORT = 55550; public static final int SPEECH_RECOGNITION_PORT = 55555; @@ -44,7 +43,10 @@ private static Map extractURLs() { List.of( "rpc-common.js", "tts/index.html", - "tts/script.js" + "tts/script.js", + "gesturerecog/index.html", + "speechrecog/index.html", + "speechrecog/script.js" ).forEach(relativeURL -> { map.put(relativeURL, extractURL(relativeURL, "intelligence/" + relativeURL)); }); diff --git a/fxgl-intelligence/src/main/kotlin/com/almasb/fxgl/intelligence/gesturerecog/HandTrackingService.kt b/fxgl-intelligence/src/main/kotlin/com/almasb/fxgl/intelligence/gesturerecog/HandTrackingService.kt index 3dc235a36..77799c590 100644 --- a/fxgl-intelligence/src/main/kotlin/com/almasb/fxgl/intelligence/gesturerecog/HandTrackingService.kt +++ b/fxgl-intelligence/src/main/kotlin/com/almasb/fxgl/intelligence/gesturerecog/HandTrackingService.kt @@ -78,7 +78,7 @@ class HandTrackingService : EngineService() { options.addArguments("--use-fake-ui-for-media-stream") webDriver = ChromeDriver(options) - webDriver!!.get(WebAPI.GESTURE_RECOGNITION_API) + webDriver!!.get(WebAPI.GESTURE_RECOGNITION_API.toExternalForm()) // we are ready to use the web api service } catch (e: Exception) { diff --git a/fxgl-intelligence/src/main/resources/com/almasb/fxgl/intelligence/gesturerecog/index.html b/fxgl-intelligence/src/main/resources/com/almasb/fxgl/intelligence/gesturerecog/index.html new file mode 100644 index 000000000..2af8183bb --- /dev/null +++ b/fxgl-intelligence/src/main/resources/com/almasb/fxgl/intelligence/gesturerecog/index.html @@ -0,0 +1,64 @@ + + + + + + + + + + + +
+

Processing MediaPipe Camera Input...

+ +
+ + + + + \ No newline at end of file diff --git a/fxgl-intelligence/src/main/resources/com/almasb/fxgl/intelligence/speechrecog/index.html b/fxgl-intelligence/src/main/resources/com/almasb/fxgl/intelligence/speechrecog/index.html new file mode 100644 index 000000000..7cae3ed6b --- /dev/null +++ b/fxgl-intelligence/src/main/resources/com/almasb/fxgl/intelligence/speechrecog/index.html @@ -0,0 +1,15 @@ + + + + + + + Speech-to-text + + + +

Speech-to-text service

+ + + + \ No newline at end of file diff --git a/fxgl-intelligence/src/main/resources/com/almasb/fxgl/intelligence/speechrecog/script.js b/fxgl-intelligence/src/main/resources/com/almasb/fxgl/intelligence/speechrecog/script.js new file mode 100644 index 000000000..644fb1ce1 --- /dev/null +++ b/fxgl-intelligence/src/main/resources/com/almasb/fxgl/intelligence/speechrecog/script.js @@ -0,0 +1,43 @@ +var SpeechRecognition = SpeechRecognition || webkitSpeechRecognition +var SpeechGrammarList = SpeechGrammarList || window.webkitSpeechGrammarList +var SpeechRecognitionEvent = SpeechRecognitionEvent || webkitSpeechRecognitionEvent + +const socket = new WebSocket('ws://localhost:55555'); + +socket.addEventListener('open', function (event) { + initService(); +}); + +// set up speech recog +const recognition = new SpeechRecognition(); +recognition.continuous = true; +recognition.lang = 'en-GB'; +recognition.interimResults = false; +recognition.maxAlternatives = 1; + +recognition.onresult = (event) => { + // latest result + var result = event.results[event.results.length - 1][0]; + var inputText = result.transcript; + var confidence = result.confidence; + + // only call if we have something + if (inputText.length > 0) { + rpcRun("onSpeechInput", inputText, confidence); + } +} + +recognition.onend = (event) => { + recognition.start(); +} + +recognition.onerror = function(event) { + // not much use recording event.error, so just restart + recognition.start(); +} + +recognition.start(); + +function initService() { + rpcRun("initService"); +} \ No newline at end of file