From c3243acd68785186f0611ece0ba62c2d86e6de82 Mon Sep 17 00:00:00 2001 From: rerender2021 Date: Tue, 21 Mar 2023 22:27:29 +0800 Subject: [PATCH 1/4] use gpu server --- .gitignore | 8 ++- package-lock.json | 27 ++++++++++ package.json | 3 +- src/app.tsx | 89 +++++++++++++++++++++++++++------ src/asr/asr.ts | 88 ++++++++++++++++++++++++--------- src/asr/base.ts | 5 +- src/asr/index.ts | 3 +- src/asr/postasr.ts | 107 ++++++++++++++++++++++++++++++++++++++++ src/common/index.ts | 2 +- src/config/index.ts | 2 +- src/layout/index.ts | 17 ++++--- src/nlp/helsinki-nlp.ts | 34 ++++++++++--- src/shadow/common.ts | 34 +++++++++++-- src/shadow/display.ts | 98 ++++++++++++++++++++++++++++-------- src/shadow/translate.ts | 77 ++++++++++++++++++++++------- 15 files changed, 492 insertions(+), 102 deletions(-) create mode 100644 src/asr/postasr.ts diff --git a/.gitignore b/.gitignore index a58d1c5..699ca83 100644 --- a/.gitignore +++ b/.gitignore @@ -6,5 +6,11 @@ dist /*.traineddata asr-server +asr-server-* nlp-server -/config.json \ No newline at end of file +nlp-gpu-server +/config.json +subtitle +perf +log +*.exe \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 223021d..dc79e67 100644 --- a/package-lock.json +++ b/package-lock.json @@ -246,6 +246,11 @@ "picomatch": "^2.3.1" } }, + "@textlint/ast-node-types": { + "version": "13.3.1", + "resolved": "https://registry.npmjs.org/@textlint/ast-node-types/-/ast-node-types-13.3.1.tgz", + "integrity": "sha512-/qeEjW3hIFpGwESsCkJRroja7mBOlo9wqyx8G4fwayq4FZRvJMm/9DhIo77jd/4wm/VSJcVVr+fs+rVa4jrY5A==" + }, "@types/debounce": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/@types/debounce/-/debounce-1.2.1.tgz", @@ -485,6 +490,11 @@ } } }, + "boundary": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/boundary/-/boundary-2.0.0.tgz", + "integrity": "sha512-rJKn5ooC9u8q13IMCrW0RSp31pxBCHE3y9V/tp3TdWSLf8Em3p6Di4NBpfzbJge9YjjFEsD0RtFEjtvHL5VyEA==" + }, "brace-expansion": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", @@ -2177,6 +2187,15 @@ "lru-cache": "^6.0.0" } }, + "sentence-splitter": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/sentence-splitter/-/sentence-splitter-4.2.0.tgz", + "integrity": "sha512-1Ww0iofAbR56tu6lVJ9Yh8Sj5ukeVjikBQ4sR5sNWM0kc+2AJe3p5F2o2qyuf5dJ4KVs1RbJpNkwEiMBCz7pKg==", + "requires": { + "@textlint/ast-node-types": "^13.2.0", + "structured-source": "^4.0.0" + } + }, "set-blocking": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz", @@ -2340,6 +2359,14 @@ "integrity": "sha1-PFMZQukIwml8DsNEhYwobHygpgo=", "dev": true }, + "structured-source": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/structured-source/-/structured-source-4.0.0.tgz", + "integrity": "sha512-qGzRFNJDjFieQkl/sVOI2dUjHKRyL9dAJi2gCPGJLbJHBIkyOHxjuocpIEfbLioX+qSJpvbYdT49/YCdMznKxA==", + "requires": { + "boundary": "^2.0.0" + } + }, "supports-color": { "version": "5.5.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", diff --git a/package.json b/package.json index add2292..29d5a18 100644 --- a/package.json +++ b/package.json @@ -34,6 +34,7 @@ "dependencies": { "ave-react": "^0.1.4", "axios": "^1.3.2", - "debounce": "^1.2.1" + "debounce": "^1.2.1", + "sentence-splitter": "^4.2.0" } } diff --git a/src/app.tsx b/src/app.tsx index 26697bc..f7ef1b0 100644 --- a/src/app.tsx +++ b/src/app.tsx @@ -1,12 +1,13 @@ -import React, { useCallback, useEffect, useMemo } from "react"; -import { AveRenderer, Grid, Window, getAppContext, IIconResource, IWindowComponentProps, Button, CheckBox, ICheckBoxComponentProps } from "ave-react"; +import React, { useCallback, useEffect, useMemo, useState } from "react"; +import { AveRenderer, Grid, Window, getAppContext, IIconResource, IWindowComponentProps, Button, CheckBox, ICheckBoxComponentProps, ScrollBar, Label, IScrollBarComponentProps } from "ave-react"; import { App, ThemePredefined_Dark, CheckValue } from "ave-ui"; import { VoskAsrEngine } from "./asr"; import { HelsinkiNlpEngine } from "./nlp"; import { containerLayout, controlLayout } from "./layout"; import { iconResource } from "./resource"; -import { onMeasure, onTranslate, shadowRelated } from "./shadow"; +import { onMeasure, onTranslate, safe, shadowRelated } from "./shadow"; import { getAsrConfig, getNlpConfig } from "./config"; +import axios from "axios"; function onInit(app: App) { const context = getAppContext(); @@ -20,6 +21,15 @@ function initTheme() { themeDark.SetStyle(themeImage, 0); } +enum ButtonText { + Measure = "设置字幕区", + Recognize = "语音识别", + BreakLongText = "长句分解", + SetTopMost = "字幕置顶", + SubtitleEn = "英文字幕", + SubtitleZh = "中文字幕", +} + export function Echo() { const asrEngine = useMemo( () => @@ -57,11 +67,14 @@ export function Echo() { }, []); const onSetRecognize = useCallback((sender) => { + shadowRelated.subtitleQueue = []; + let shouldRecognize = false; const checkValue = sender.GetValue(); if (checkValue === CheckValue.Unchecked) { shouldRecognize = false; + shadowRelated.onUpdateTranslationResult({ en: "", zh: "" }); } else if (checkValue === CheckValue.Checked) { shouldRecognize = true; } @@ -69,41 +82,87 @@ export function Echo() { shadowRelated.shouldRecognize = shouldRecognize; }, []); - const onSetPunct = useCallback((sender) => { - let shouldResotrePunct = false; + const onSetBreakLongText = useCallback((sender) => { + let value = false; const checkValue = sender.GetValue(); if (checkValue === CheckValue.Unchecked) { - shouldResotrePunct = false; + value = false; } else if (checkValue === CheckValue.Checked) { - shouldResotrePunct = true; + value = true; } - shadowRelated.shouldResotrePunct = shouldResotrePunct; + shadowRelated.shouldBreakLongText = value; + }, []); + + const onSetDisplaySubtitle = useCallback((sender) => { + const checkValue = sender.GetValue(); + const text = sender.GetText(); + const isChecked = checkValue === CheckValue.Checked; + if (text === ButtonText.SubtitleEn) { + shadowRelated.subtitleConfig.en = isChecked; + } else if (text === ButtonText.SubtitleZh) { + shadowRelated.subtitleConfig.zh = isChecked; + } + shadowRelated.onUpdateTranslationConfig(); }, []); + const [fontSize, setFontSize] = useState(16); + const onSetFontSize = useCallback((sender) => { + const fontSize = sender.GetValue(); + shadowRelated.onUpdateFontSize(fontSize); + setFontSize(fontSize); + }, []); + + const [title, setTitle] = useState("Echo"); + useEffect(() => { initTheme(); asrEngine.init(); - nlpEngine.init(); + nlpEngine.init().then( + safe(async () => { + const response = await axios.get("http://localhost:8100/gpu"); + if (response.data.gpu === "True") { + console.log("great! use gpu"); + setTitle("Echo (GPU)"); + } else { + console.log("gpu is not available"); + } + }) + ); onTranslate(asrEngine, nlpEngine); }, []); return ( - + - + - + - - + + - + + + + + + + + + + + + + + + + diff --git a/src/asr/asr.ts b/src/asr/asr.ts index 4f526de..2c40a09 100644 --- a/src/asr/asr.ts +++ b/src/asr/asr.ts @@ -2,30 +2,54 @@ import axios from "axios"; import path from "path"; import fs from "fs"; import childProcess from "child_process"; -import { IAsrEngine, IAsrEngineOptions, IAsrResult } from "./base"; -import { shadowRelated } from "../shadow"; +import { IAsrEngine, IAsrEngineOptions, ISentence } from "./base"; +import { emptySentence, shadowRelated } from "../shadow"; +import { postasr } from "./postasr"; + +enum AsrVersion { + v100, + v110, +} export class VoskAsrEngine implements IAsrEngine { private options: IAsrEngineOptions; private asr: childProcess.ChildProcessWithoutNullStreams; + private version: AsrVersion; constructor(options: IAsrEngineOptions) { this.options = options; + this.version = AsrVersion.v100; + } + + getAsrPath() { + const v110 = path.resolve(process.cwd(), "asr-server-v1.1.0"); + if (fs.existsSync(v110)) { + this.version = AsrVersion.v110; + console.log("use asr-server-v1.1.0"); + return { asrDir: v110, exePath: path.resolve(v110, "./ASR-API.exe") }; + } + + const v100 = path.resolve(process.cwd(), "asr-server"); + if (fs.existsSync(v100)) { + console.log("use asr-server-v1.0.0"); + return { asrDir: v100, exePath: path.resolve(v100, "./ASR-API.exe") }; + } + + return { asrDir: "", exePath: "" }; } async init() { console.log("try to init vosk asr engine"); - const asrDir = path.resolve(process.cwd(), "asr-server"); - const exePath = path.resolve(asrDir, "./ASR-API.exe"); - if (fs.existsSync(asrDir) && fs.existsSync(exePath)) { + const { asrDir, exePath } = this.getAsrPath(); + if (asrDir && exePath) { return new Promise((resolve, reject) => { console.log("asrDir exists, start asr server", asrDir); - const asr = childProcess.spawn(`./asr-server/ASR-API.exe`, [], { windowsHide: true, detached: false /** hide console */ }); + const asr = childProcess.spawn(exePath, [], { windowsHide: true, detached: false /** hide console */ }); this.asr = asr; asr.stdout.on("data", (data) => { console.log(`stdout: ${data}`); - if (data.includes("ASR-API has been started")) { + if (data.includes("has been started")) { console.log("asr server started"); resolve(true); } @@ -53,29 +77,49 @@ export class VoskAsrEngine implements IAsrEngine { } } - async recognize(): Promise { - // const base64 = buffer.toString("base64"); - let text = ""; + private async asrApi(): Promise { + if (this.version === AsrVersion.v100) { + const response = await axios.post("http://localhost:8200/asr", {}, { timeout: 2000 }); + const result = response?.data?.result; + const data = JSON.parse(result || "{}"); + const asrText = data.partial || ""; + return asrText; + } else { + const response = await axios.post("http://localhost:8200/asr_queue", {}, { timeout: 1000 }); + const result = response?.data?.result; + const data = JSON.parse(result[result.length - 1] || "{}"); + const asrText = data.partial || ""; + return asrText; + } + } + + async getAsrResult(): Promise { + let asrResult = ""; try { - const timeout = this.options?.timeout || 3000; - const response = await axios.post("http://localhost:8200/asr", {}, { timeout }); - const data = JSON.parse(response.data.result); - console.log(data); + asrResult = await this.asrApi(); + } catch (error) { + console.log(`asr failed: ${error.message}`); + } finally { + return asrResult; + } + } - text = data.partial || data.text || ""; + async recognize(): Promise { + let sentence: ISentence = emptySentence; + try { + const asrText = await this.getAsrResult(); - if (text && shadowRelated.shouldResotrePunct) { - const withPunctResponse = await axios.post("http://localhost:8200/punct", { text }, { timeout }); - if (withPunctResponse.data.text) { - text = withPunctResponse.data.text; - console.log({ text }); - } + if (shadowRelated.shouldBreakLongText) { + const result = await postasr(asrText); + sentence = { text: result, asr: asrText }; + } else { + sentence = { text: asrText, asr: asrText }; } } catch (error) { console.log(`asr failed: ${error.message}`); this.options?.onError(error.message); } finally { - return { text }; + return sentence; } } } diff --git a/src/asr/base.ts b/src/asr/base.ts index b660c39..5602808 100644 --- a/src/asr/base.ts +++ b/src/asr/base.ts @@ -1,5 +1,6 @@ -export interface IAsrResult { +export interface ISentence { text: string; + asr: string } export interface IAsrEngineOptions { @@ -13,7 +14,7 @@ export interface IAsrEngineConstructor { } export interface IAsrEngine { - recognize(): Promise; + recognize(): Promise; init(): void; destroy(): void; } diff --git a/src/asr/index.ts b/src/asr/index.ts index 16099b9..c234349 100644 --- a/src/asr/index.ts +++ b/src/asr/index.ts @@ -1 +1,2 @@ -export * from "./asr"; \ No newline at end of file +export * from "./asr"; +export * from "./base"; \ No newline at end of file diff --git a/src/asr/postasr.ts b/src/asr/postasr.ts new file mode 100644 index 0000000..07d3fdd --- /dev/null +++ b/src/asr/postasr.ts @@ -0,0 +1,107 @@ +import axios from "axios"; +import { split } from "sentence-splitter"; + +class SessionManager { + private prevTextLength: number = Number.MAX_SAFE_INTEGER; + isNewSession(asrText: string) { + const result = asrText.length < this.prevTextLength; + return result; + } + + update(asrText: string) { + this.prevTextLength = asrText.length; + } +} + +const sessionManager = new SessionManager(); +const maxTextLength = 100; +const longTextLength = 60; +let tokenIndex = 0; + +async function getTextToPunct(asrText: string) { + if (asrText.length >= maxTextLength) { + const punctResponse = await axios.post("http://localhost:8200/punct", { text: asrText }, { timeout: 1000 }); + const withPunct = punctResponse?.data?.text || ""; + return withPunct; + } else { + return asrText; + } +} + +function getSubarray(array: any[], from: number, to: number) { + return array.slice(from, to + 1); +} + +function getSentences(withPunct: string) { + const raw = split(withPunct) + ?.map((each) => each?.raw?.trim()) + .filter((each) => Boolean(each)); + const sentences = []; + raw.forEach((each) => { + if (each.length >= longTextLength) { + sentences.push(...each.split(",").map((each) => each?.trim())); + } else { + sentences.push(each); + } + }); + return sentences; +} + +async function punctText(text: string) { + const withPunct = await getTextToPunct(text); + const sentences = getSentences(withPunct); + + // prettier-ignore + const toIgnore = sentences.length === 1 ? [] : ( + sentences.length >= 3 ? + getSubarray(sentences, 0, sentences.length - 2) : + [sentences[0]] + ); + + let offset = 0; + toIgnore.forEach((each) => { + offset += each.split(" ").length; + }); + + const allToken = text.split(" "); + const theRest = getSubarray(allToken, offset, allToken.length - 1); + const lastUnstable = theRest.join(" "); + return { lastUnstable, offset, sentences }; +} + +export async function postasr(asrText: string) { + try { + if (asrText) { + const isNewSession = sessionManager.isNewSession(asrText); + sessionManager.update(asrText); + if (isNewSession) { + tokenIndex = 0; + } + + let result = asrText; + + if (asrText.length >= maxTextLength) { + if (tokenIndex === 0) { + const { lastUnstable, offset } = await punctText(asrText); + tokenIndex = offset; + result = lastUnstable; + } else { + const allToken = asrText.split(" "); + const theRest = getSubarray(allToken, tokenIndex, allToken.length - 1); + const currentUnstable = theRest.join(" "); + result = currentUnstable; + + // still long text, punct it again + if (currentUnstable.length >= maxTextLength) { + const { lastUnstable, offset } = await punctText(currentUnstable); + tokenIndex += offset; + result = lastUnstable; + } + } + } + return result; + } + } catch (error) { + console.log(`postasr failed: ${error?.message}`); + } +} diff --git a/src/common/index.ts b/src/common/index.ts index 6fa604d..b5de66f 100644 --- a/src/common/index.ts +++ b/src/common/index.ts @@ -3,4 +3,4 @@ import path from "path"; export function assetsPath(name: string) { const root = path.resolve(__dirname, "../../assets"); return path.resolve(root, `./${name}`); -} \ No newline at end of file +} diff --git a/src/config/index.ts b/src/config/index.ts index 9709e26..9d7399d 100644 --- a/src/config/index.ts +++ b/src/config/index.ts @@ -5,7 +5,7 @@ import { INlpEngineOptions } from "../nlp/base"; const defaultConfig = { /** timeout for asr and translate api call*/ - timeout: 4000, + timeout: 3500, }; export function getConfig() { diff --git a/src/layout/index.ts b/src/layout/index.ts index 63479a5..4ab18d7 100644 --- a/src/layout/index.ts +++ b/src/layout/index.ts @@ -7,12 +7,17 @@ export const containerLayout = { }; export const controlLayout = { - columns: `1`, - rows: `16dpx 32dpx 16dpx 32dpx 16dpx 32dpx 16dpx 32dpx 4dpx 1 32dpx 4dpx 150dpx`, + columns: `1 1 1 1 1`, + rows: `16dpx 32dpx 16dpx 32dpx 16dpx 32dpx 16dpx 32dpx 16dpx 32dpx 16dpx 16dpx 4dpx 1 32dpx 4dpx 150dpx`, areas: { - measure: { row: 1, column: 0 }, - recognize: { row: 3, column: 0 }, - punct: { row: 5, column: 0 }, - topmost: { row: 7, column: 0 }, + measure: { row: 1, column: 0, columnSpan: 5 }, + recognize: { row: 3, column: 0, columnSpan: 2 }, + breakLongText: { row: 5, column: 0, columnSpan: 2 }, + topmost: { row: 7, column: 0, columnSpan: 2 }, + zh: { row: 9, column: 0, columnSpan: 2 }, + en: { row: 9, column: 2, columnSpan: 2 }, + fontSizeLabel: { row: 11, column: 0 }, + fontSize: { row: 11, column: 1, columnSpan: 3 }, + fontSizeValue: { row: 11, column: 4 } }, }; diff --git a/src/nlp/helsinki-nlp.ts b/src/nlp/helsinki-nlp.ts index d6ee09a..f7e3143 100644 --- a/src/nlp/helsinki-nlp.ts +++ b/src/nlp/helsinki-nlp.ts @@ -7,23 +7,39 @@ import { INlpEngine, INlpEngineOptions, ITranslateResult } from "./base"; export class HelsinkiNlpEngine implements INlpEngine { private options: INlpEngineOptions; private nlp: childProcess.ChildProcessWithoutNullStreams; - + private cache: Record; constructor(options: INlpEngineOptions) { this.options = options; + this.cache = {}; + } + + getNlpPath() { + const gpu = path.resolve(process.cwd(), "nlp-gpu-server"); + if (fs.existsSync(gpu)) { + console.log("nlp-gpu-server exists! use it"); + return { nlpDir: gpu, exePath: path.resolve(gpu, "./NLP-GPU-API.exe") }; + } + + const cpu = path.resolve(process.cwd(), "nlp-server"); + if (fs.existsSync(cpu)) { + console.log("use nlp-server"); + return { nlpDir: cpu, exePath: path.resolve(cpu, "./NLP-API.exe") }; + } + + return { nlpDir: "", exePath: "" }; } async init() { console.log("try to init nlp engine"); - const nlpDir = path.resolve(process.cwd(), "nlp-server"); - if (fs.existsSync(nlpDir)) { + const { nlpDir, exePath } = this.getNlpPath(); + if (nlpDir && exePath) { return new Promise((resolve, reject) => { console.log("nlpDir exists, start nlp server", nlpDir); - - const nlp = childProcess.spawn(`./nlp-server/NLP-API.exe`, [`--lang-from=en`, `--lang-to=zh`, `--model-dir=.\\model`], { windowsHide: true, detached: false /** hide console */ }); + const nlp = childProcess.spawn(exePath, [`--lang-from=en`, `--lang-to=zh`, `--model-dir=.\\model`], { windowsHide: true, detached: false /** hide console */ }); this.nlp = nlp; nlp.stdout.on("data", (data) => { console.log(`stdout: ${data}`); - if (data.includes("nlp server has been started")) { + if (data.includes("has been started")) { console.log("nlp server started"); resolve(true); } @@ -53,7 +69,10 @@ export class HelsinkiNlpEngine implements INlpEngine { async translate(text: string): Promise { try { - const timeout = this.options?.timeout || 3000; + if (this.cache[text]) { + return { text: this.cache[text] }; + } + const timeout = this.options?.timeout || 1000; const translated = await axios.post( "http://localhost:8100/translate", { @@ -62,6 +81,7 @@ export class HelsinkiNlpEngine implements INlpEngine { { timeout } ); const result = translated.data.result[0].translation_text; + this.cache[text] = result; return { text: result }; } catch (error) { console.log(`translate failed: ${error.message}`); diff --git a/src/shadow/common.ts b/src/shadow/common.ts index 8aa50ba..51a2a6f 100644 --- a/src/shadow/common.ts +++ b/src/shadow/common.ts @@ -1,12 +1,25 @@ import { getAppContext } from "ave-react"; import { IGridControl, Vec2, Grid as NativeGrid, Window as NativeWindow } from "ave-ui"; +import { ISentence } from "../asr"; + +export interface ISubtitle { + zh: string; + en: string; +} + +export interface ISubtitleConfig { + zh: boolean; + en: boolean; +} export type ShadowRelatedType = { - prevAsrText: string; + prevSentence: ISentence; prevTranslation: string; shouldTranslate: boolean; shouldRecognize: boolean; - shouldResotrePunct: boolean; + shouldBreakLongText: boolean; + subtitleQueue: Array; + subtitleConfig: ISubtitleConfig; measureWindow: NativeWindow; selected: IGridControl; start: Vec2; @@ -19,15 +32,24 @@ export type ShadowRelatedType = { displayWindow: NativeWindow; defaultTopMost: boolean; selectedAreaIsEmpty(): boolean; - onUpdateTranslationResult: (text: string) => void; + onUpdateTranslationResult: (subtitle: ISubtitle) => void; + onUpdateTranslationConfig: () => void; + onUpdateFontSize: (size: number) => void; }; +export const emptySentence: ISentence = { text: "", asr: "" }; + export const shadowRelated: ShadowRelatedType = { - prevAsrText: "", + prevSentence: emptySentence, prevTranslation: "", shouldTranslate: false, shouldRecognize: false, - shouldResotrePunct: false, + shouldBreakLongText: false, + subtitleQueue: [], + subtitleConfig: { + en: true, + zh: true, + }, measureWindow: null, selected: null, start: null, @@ -43,6 +65,8 @@ export const shadowRelated: ShadowRelatedType = { return this.selectedArea.start.x === 0 && this.selectedArea.start.y === 0 && this.selectedArea.end.x === 0 && this.selectedArea.end.x === 0; }, onUpdateTranslationResult: () => {}, + onUpdateTranslationConfig: () => {}, + onUpdateFontSize: () => {}, }; globalThis.shadowRelated = shadowRelated; diff --git a/src/shadow/display.ts b/src/shadow/display.ts index 54db280..d677d94 100644 --- a/src/shadow/display.ts +++ b/src/shadow/display.ts @@ -1,5 +1,5 @@ import { safe, shadowRelated } from "./common"; -import { WindowFramePart, RichLabelColor, RichLabelBackColor, RichLabelTextColor, Rect, Byo2Font, AlignType, RichLabel as NativeRichLabel, IGridControl, DpiSize_2, DpiSize, CursorType, DockMode, Vec2, Vec4, Grid as NativeGrid, Window as NativeWindow, WindowFlag, WindowCreation, ImageContainerType, ImageData, ImageDimension, Byo2Image, AveImage, Picture as NativePicture, App, ThemePredefined_Dark, StretchMode, AveGetClipboard, CodeEditor as NativeEditor, ResourceSource, Byo2ImageCreation, Byo2ImageDataType, PixFormat } from "ave-ui"; +import { WindowFramePart, DpiMargin, RichLabelTextColor, Byo2Font, AlignType, RichLabel as NativeRichLabel, DpiSize, DockMode, Vec2, Vec4, Grid as NativeGrid, Window as NativeWindow, WindowFlag, WindowCreation } from "ave-ui"; export const onDisplay = safe(async function () { if (!shadowRelated.displayWindow) { @@ -44,28 +44,84 @@ export const onDisplay = safe(async function () { content.SetOpacity(0.5); container.ControlAdd(content).SetDock(DockMode.Fill); - const label = new NativeRichLabel(shadowRelated.displayWindow); - - const fd = shadowRelated.displayWindow.GetTheme().GetFont(); - fd.Size = 24; - const fontDef = new Byo2Font(shadowRelated.displayWindow, fd); - - const textColor = new RichLabelTextColor(); - textColor.Text.Color = new Vec4(255, 255, 255, 255); - - label.FmSetDefaultFont(fontDef); - label.FmSetDefaultTextColor(textColor); - - label.SetAlignHorz(AlignType.Center); - label.SetAlignVert(AlignType.Center); - - // TODO: crash when use "" - label.SetText(" "); - shadowRelated.onUpdateTranslationResult = safe((text) => { - label.SetText(text); + function createSubtitle() { + const fd = shadowRelated.displayWindow.GetTheme().GetFont(); + fd.Size = 16; + const fontDef = new Byo2Font(shadowRelated.displayWindow, fd); + + const textColor = new RichLabelTextColor(); + textColor.Text.Color = new Vec4(255, 255, 255, 255); + + const label = new NativeRichLabel(shadowRelated.displayWindow); + label.FmSetDefaultFont(fontDef); + label.FmSetDefaultTextColor(textColor); + + label.SetAlignHorz(AlignType.Near); + label.SetAlignVert(AlignType.Center); + return label; + } + + // TODO: crash when use "" + const en = createSubtitle(); + const zh = createSubtitle(); + en.SetText(" "); + zh.SetText(" "); + + const subtitle = new NativeGrid(shadowRelated.displayWindow); + subtitle.RowAddSlice(...[1]); + subtitle.RowAddDpx(...[2]); + subtitle.RowAddSlice(...[1]); + subtitle.ColAddSlice(...[1]); + + const margin = new DpiMargin( + DpiSize.FromPixelScaled(50), // margin left + DpiSize.FromPixelScaled(5), // margin top + DpiSize.FromPixelScaled(50), // margin right + DpiSize.FromPixelScaled(5) // margin bottom + ); + const enGrid = subtitle.ControlAdd(en).SetGrid(0, 0).SetMargin(margin); + const zhGrid = subtitle.ControlAdd(zh).SetGrid(0, 2).SetMargin(margin); + container.ControlAdd(subtitle).SetGrid(0, 0); + + shadowRelated.onUpdateFontSize = safe((size: number) => { + const fd = shadowRelated.displayWindow.GetTheme().GetFont(); + fd.Size = size; + const fontDef = new Byo2Font(shadowRelated.displayWindow, fd); + + en.FmSetDefaultFont(fontDef); + zh.FmSetDefaultFont(fontDef); + shadowRelated.displayWindow.Redraw(); + }); + shadowRelated.onUpdateTranslationResult = safe((subtitle: { zh: string; en: string }) => { + en.SetText(subtitle.en || " "); + zh.SetText(subtitle.zh || " "); + shadowRelated.displayWindow.Redraw(); }); - container.ControlAdd(label).SetGrid(0, 0); + shadowRelated.onUpdateTranslationConfig = safe(() => { + const config = shadowRelated.subtitleConfig; + if (config.en && !config.zh) { + enGrid.SetGrid(0, 0, 1, 3); + en.SetOpacity(1); + zh.SetOpacity(0); + } else if (!config.en && config.zh) { + zhGrid.SetGrid(0, 0, 1, 3); + en.SetOpacity(0); + zh.SetOpacity(1); + } else if (!config.en && !config.zh) { + en.SetOpacity(0); + enGrid.SetGrid(0, 0); + zh.SetOpacity(0); + zhGrid.SetGrid(0, 2); + } else if (config.en && config.zh) { + en.SetOpacity(1); + enGrid.SetGrid(0, 0); + zh.SetOpacity(1); + zhGrid.SetGrid(0, 2); + } + + shadowRelated.displayWindow.Redraw(); + }); } shadowRelated.displayWindow.SetSize(new Vec2(shadowRelated.selectedArea.end.x - shadowRelated.selectedArea.start.x, shadowRelated.selectedArea.end.y - shadowRelated.selectedArea.start.y)); diff --git a/src/shadow/translate.ts b/src/shadow/translate.ts index 416fd4c..b8cc141 100644 --- a/src/shadow/translate.ts +++ b/src/shadow/translate.ts @@ -2,16 +2,44 @@ import { sleep, shadowRelated } from "./common"; import { IAsrEngine } from "../asr/base"; import { INlpEngine } from "../nlp/base"; -async function updateSubtitle() { - const both = `${shadowRelated.prevAsrText}\n${shadowRelated.prevTranslation}`; - shadowRelated.onUpdateTranslationResult(both); - // at least display it for 100ms? - // await sleep(100); -} - export const onTranslate = async function (asrEngine: IAsrEngine, nlpEngine: INlpEngine) { _onRecognize(asrEngine); _onTranslate(nlpEngine); + _onUpdateSubtitle(); +}; + +let prevLength = Number.MAX_SAFE_INTEGER; +let lastUpdateTime = Date.now(); +const subtitleDelay = 1000; + +const _onUpdateSubtitle = async function () { + try { + if (!shadowRelated.shouldRecognize) { + shadowRelated.subtitleQueue = []; + return; + } + const current = shadowRelated.subtitleQueue.shift(); + if (current) { + const now = Date.now(); + if (current.en.length < prevLength) { + // length change, a new subtitle found! + const dt = now - lastUpdateTime; + if (dt <= subtitleDelay) { + await sleep(Math.abs(subtitleDelay - dt)); + } + } + shadowRelated.onUpdateTranslationResult(current); + prevLength = current.en.length || 0; + lastUpdateTime = Date.now(); + await sleep(500); + } + } catch (error) { + console.error("recognize failed", error); + } finally { + setTimeout(() => { + _onUpdateSubtitle(); + }, 0); + } }; const _onRecognize = async function (asrEngine: IAsrEngine) { @@ -20,19 +48,20 @@ const _onRecognize = async function (asrEngine: IAsrEngine) { return; } const asrStart = Date.now(); - const asrResult = await asrEngine.recognize(); + const sentence = await asrEngine.recognize(); const asrEnd = Date.now(); - console.log(`asr end in ${asrEnd - asrStart}ms`); - if (asrResult.text && asrResult.text !== shadowRelated.prevAsrText) { - shadowRelated.prevAsrText = asrResult.text; + + if (sentence.text && sentence.text !== shadowRelated.prevSentence.text) { + console.log(`asr end in ${asrEnd - asrStart}ms`); + shadowRelated.prevSentence = sentence; shadowRelated.shouldTranslate = true; - await updateSubtitle(); } } catch (error) { console.error("recognize failed", error); } finally { - await sleep(100); - await _onRecognize(asrEngine); + setTimeout(() => { + _onRecognize(asrEngine); + }, 100); } }; const _onTranslate = async function (nlpEngine: INlpEngine) { @@ -40,20 +69,30 @@ const _onTranslate = async function (nlpEngine: INlpEngine) { if (!shadowRelated.shouldRecognize) { return; } + + if (!shadowRelated.subtitleConfig.zh) { + const { text, ...rest } = shadowRelated.prevSentence; + shadowRelated.subtitleQueue.push({ zh: "", en: text, ...rest }); + return; + } + if (shadowRelated.shouldTranslate) { shadowRelated.shouldTranslate = false; console.log("will translate"); const translateStart = Date.now(); - const { text } = await nlpEngine.translate(shadowRelated.prevAsrText); + const { text: enText, ...rest } = shadowRelated.prevSentence; + const input = enText; + const { text } = await nlpEngine.translate(input); shadowRelated.prevTranslation = text; const translateEnd = Date.now(); - console.log(`translate end in ${translateEnd - translateStart}ms`); - await updateSubtitle(); + console.log(`translate end in ${translateEnd - translateStart}ms`, { enText: input, text }); + shadowRelated.subtitleQueue.push({ zh: shadowRelated.prevTranslation, en: input, ...rest }); } } catch (error) { console.error("translate failed", error); } finally { - await sleep(500); - await _onTranslate(nlpEngine); + setTimeout(() => { + _onTranslate(nlpEngine); + }, 500); } }; From df473e6f0b6da735cf56dc56a88dd3fce8e80c5a Mon Sep 17 00:00:00 2001 From: rerender2021 Date: Wed, 22 Mar 2023 09:02:22 +0800 Subject: [PATCH 2/4] wait for init --- src/app.tsx | 63 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/src/app.tsx b/src/app.tsx index f7ef1b0..6f1f078 100644 --- a/src/app.tsx +++ b/src/app.tsx @@ -115,10 +115,15 @@ export function Echo() { }, []); const [title, setTitle] = useState("Echo"); + const [asrReady, setAsrReady] = useState(false); useEffect(() => { initTheme(); - asrEngine.init(); + asrEngine.init().then( + safe(() => { + setAsrReady(true); + }) + ); nlpEngine.init().then( safe(async () => { const response = await axios.get("http://localhost:8100/gpu"); @@ -140,30 +145,38 @@ export function Echo() { - - - - - - - - - - - - - - - - - - - - - - - - + {asrReady ? ( + <> + + + + + + + + + + + + + + + + + + + + + + + + + + ) : ( + + + + )} From 632f4ebafbe6f5600ea1442130b131e0c757d187 Mon Sep 17 00:00:00 2001 From: rerender2021 Date: Mon, 3 Apr 2023 22:10:02 +0800 Subject: [PATCH 3/4] refine timeout and delay --- src/shadow/translate.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/shadow/translate.ts b/src/shadow/translate.ts index b8cc141..6c7719b 100644 --- a/src/shadow/translate.ts +++ b/src/shadow/translate.ts @@ -10,7 +10,7 @@ export const onTranslate = async function (asrEngine: IAsrEngine, nlpEngine: INl let prevLength = Number.MAX_SAFE_INTEGER; let lastUpdateTime = Date.now(); -const subtitleDelay = 1000; +const subtitleDelay = 250; const _onUpdateSubtitle = async function () { try { @@ -31,7 +31,6 @@ const _onUpdateSubtitle = async function () { shadowRelated.onUpdateTranslationResult(current); prevLength = current.en.length || 0; lastUpdateTime = Date.now(); - await sleep(500); } } catch (error) { console.error("recognize failed", error); From 39c9926e6ac440bebdd9bfe4ae1666f075adc099 Mon Sep 17 00:00:00 2001 From: rerender2021 Date: Tue, 4 Apr 2023 08:51:53 +0800 Subject: [PATCH 4/4] restore subtitleDelay --- src/shadow/translate.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shadow/translate.ts b/src/shadow/translate.ts index 6c7719b..9fec6b0 100644 --- a/src/shadow/translate.ts +++ b/src/shadow/translate.ts @@ -10,7 +10,7 @@ export const onTranslate = async function (asrEngine: IAsrEngine, nlpEngine: INl let prevLength = Number.MAX_SAFE_INTEGER; let lastUpdateTime = Date.now(); -const subtitleDelay = 250; +const subtitleDelay = 1000; const _onUpdateSubtitle = async function () { try {