Skip to content

Commit

Permalink
Add support for SSML speech.
Browse files Browse the repository at this point in the history
  • Loading branch information
m1maker committed Sep 7, 2024
1 parent e246aaf commit 1eb7986
Show file tree
Hide file tree
Showing 10 changed files with 67 additions and 12 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ endif()
project ("SRAL")
add_library(${PROJECT_NAME}_obj OBJECT)
target_sources(${PROJECT_NAME}_obj PRIVATE
"SRC/SRAL.cpp" "SRC/Engine.h")
"SRC/SRAL.cpp" "SRC/Engine.h" "SRC/Util.h" "SRC/Util.cpp")
target_sources(${PROJECT_NAME}_obj PUBLIC
FILE_SET HEADERS
BASE_DIRS "${INCLUDES}"
Expand Down
3 changes: 2 additions & 1 deletion Include/SRAL.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ extern "C" {
SUPPORTS_SPEECH_RATE = 512,
SUPPORTS_SPEECH_VOLUME = 1024,
SUPPORTS_SELECT_VOICE = 2048,
SUPPORTS_PAUSE_SPEECH = 4096
SUPPORTS_PAUSE_SPEECH = 4096,
SUPPORTS_SSML = 8192
};

/**
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ This enumeration defines the features supported by the various speech engines. T
- `SUPPORTS_SPEECH_VOLUME = 1024`: The engine supports setting the speech volume.
- `SUPPORTS_SELECT_VOICE = 2048`: The engine supports selecting a specific voice.
- `SUPPORTS_PAUSE_SPEECH = 4096`: The engine supports pause and resume speech.
- `SUPPORTS_SSML = 8192`: The engine supports the SSML tags for speak.


## Functions
Expand Down
21 changes: 16 additions & 5 deletions SRC/NVDA.cpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
#ifdef _WIN32
#include "Encoding.h"
#include "NVDA.h"
#include "Util.h"
#include<Windows.h>


bool NVDA::Initialize() {
lib = LoadLibraryW(L"nvdaControllerClient.dll");
if (lib == nullptr)return false;
nvdaController_speakText = (NVDAController_speakText)GetProcAddress(lib, "nvdaController_speakText");
nvdaController_brailleMessage = (NVDAController_brailleMessage)GetProcAddress(lib, "nvdaController_brailleMessage");
nvdaController_cancelSpeech = (NVDAController_cancelSpeech)GetProcAddress(lib, "nvdaController_cancelSpeech");
nvdaController_testIfRunning = (NVDAController_testIfRunning)GetProcAddress(lib, "nvdaController_testIfRunning");
nvdaController_speakSsml = (NVDAController_speakSsml)GetProcAddress(lib, "nvdaController_speakSsml");
return true;
}
bool NVDA::Uninitialize() {
Expand All @@ -20,6 +20,7 @@ bool NVDA::Uninitialize() {
nvdaController_brailleMessage = nullptr;
nvdaController_cancelSpeech = nullptr;
nvdaController_testIfRunning = nullptr;
nvdaController_speakSsml = nullptr;
return true;
}
bool NVDA::GetActive() {
Expand All @@ -31,9 +32,20 @@ bool NVDA::Speak(const char* text, bool interrupt) {
if (!GetActive())return false;
if (interrupt)
nvdaController_cancelSpeech();
std::string text_str(text);
if (!IsSsml(text_str))AddSsml(text_str);

std::wstring out;
UnicodeConvert(text, out);
return nvdaController_speakText(out.c_str()) == 0;
UnicodeConvert(text_str, out);
error_status_t result = nvdaController_speakSsml(out.c_str(), -1, 0, 0);
if (result == 0)return true;
else if (result == 1717) {
RemoveSsml(text_str);
UnicodeConvert(text_str, out);
result = nvdaController_speakText(out.c_str());
return result == 0;
}
return false;
}
bool NVDA::Braille(const char* text) {
if (!GetActive())return false;
Expand Down Expand Up @@ -64,4 +76,3 @@ bool NVDA::PauseSpeech() {
bool NVDA::ResumeSpeech() {
return PauseSpeech(); // Don't know how to do it
}
#endif
4 changes: 3 additions & 1 deletion SRC/NVDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class NVDA : public Engine {
bool Initialize()override;
bool Uninitialize()override;
int GetFeatures()override {
return SUPPORTS_SPEECH | SUPPORTS_BRAILLE | SUPPORTS_PAUSE_SPEECH;
return SUPPORTS_SPEECH | SUPPORTS_BRAILLE | SUPPORTS_PAUSE_SPEECH | SUPPORTS_SSML;
}
void SetVolume(uint64_t)override { return; }
uint64_t GetVolume()override { return 0; }
Expand All @@ -45,11 +45,13 @@ class NVDA : public Engine {
typedef error_status_t(__stdcall* NVDAController_brailleMessage)(const wchar_t*);
typedef error_status_t(__stdcall* NVDAController_cancelSpeech)();
typedef error_status_t(__stdcall* NVDAController_testIfRunning)();
typedef error_status_t(__stdcall* NVDAController_speakSsml)(const wchar_t*, int, int, int);

NVDAController_speakText nvdaController_speakText = nullptr;
NVDAController_brailleMessage nvdaController_brailleMessage = nullptr;
NVDAController_cancelSpeech nvdaController_cancelSpeech = nullptr;
NVDAController_testIfRunning nvdaController_testIfRunning = nullptr;
NVDAController_speakSsml nvdaController_speakSsml = nullptr;
};
#endif
#endif
8 changes: 6 additions & 2 deletions SRC/SAPI.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
#ifdef _WIN32
#include "SAPI.h"
#include "Util.h"
#include <cstdio>
#include<string>
#include<thread>


static char* trim(char* data, unsigned long* size, WAVEFORMATEX* wfx, int threshold) {
int channels = wfx->nChannels;
int bytesPerSample = wfx->wBitsPerSample / 8;
Expand Down Expand Up @@ -128,9 +131,10 @@ bool SAPI::Speak(const char* text, bool interrupt) {
this->Uninitialize();
this->Initialize();
}

std::string text_str(text);
if (!IsSsml(text_str))AddSsml(text_str);
unsigned long bytes;
char* audio_ptr = blastspeak_speak_to_memory(instance, &bytes, text);
char* audio_ptr = blastspeak_speak_to_memory(instance, &bytes, text_str.c_str());
if (audio_ptr == nullptr)
return false;

Expand Down
2 changes: 1 addition & 1 deletion SRC/SAPI.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class SAPI : public Engine {
bool Initialize()override;
bool Uninitialize()override;
int GetFeatures()override {
return SUPPORTS_SPEECH | SUPPORTS_SPEECH_RATE | SUPPORTS_SPEECH_VOLUME | SUPPORTS_SELECT_VOICE | SUPPORTS_PAUSE_SPEECH;
return SUPPORTS_SPEECH | SUPPORTS_SPEECH_RATE | SUPPORTS_SPEECH_VOLUME | SUPPORTS_SELECT_VOICE | SUPPORTS_PAUSE_SPEECH | SUPPORTS_SSML;
}
void SetVolume(uint64_t value)override;
uint64_t GetVolume()override;
Expand Down
5 changes: 4 additions & 1 deletion SRC/SRAL.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#define SRAL_EXPORT
#include "../Include/SRAL.h"
#include "Engine.h"
#include "Util.h"
#if defined(_WIN32)
#define UNICODE
#include "NVDA.h"
Expand Down Expand Up @@ -366,7 +367,9 @@ extern "C" SRAL_API bool SRAL_Output(const char* text, bool interrupt) {
if (g_currentEngine == nullptr)return false;
speech_engine_update();
const bool speech = SRAL_Speak(text, interrupt);
const bool braille = SRAL_Braille(text);
std::string braille_str(text);
if (IsSsml(braille_str))RemoveSsml(braille_str);
const bool braille = SRAL_Braille(braille_str.c_str());
return speech || braille;
}
extern "C" SRAL_API bool SRAL_StopSpeech(void) {
Expand Down
26 changes: 26 additions & 0 deletions SRC/Util.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#include "Util.h"
#include <algorithm>
#include <regex>


bool IsSsml(const std::string& str) {
std::string cpy_str = str;
cpy_str.erase(std::remove_if(cpy_str.begin(), cpy_str.end(), ::isspace), cpy_str.end());
size_t pos = cpy_str.find("<speak>");
if (pos == 0 && cpy_str.find("</speak>") != std::string::npos) {
return true;
}
return false;
}
bool AddSsml(std::string& str) {
if (IsSsml(str))return true;
str = "<speak>" + str + "</speak>";
return IsSsml(str);
}
bool RemoveSsml(std::string& str) {
if (!IsSsml(str))return true;
std::regex ssml_tags("<[^>]+>");
str = std::regex_replace(str, ssml_tags, ""); // Replace SSML tags with an empty string
return true;
}

7 changes: 7 additions & 0 deletions SRC/Util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#ifndef UTIL_H_
#define UTIL_H_
#include <string>
bool IsSsml(const std::string& str);
bool AddSsml(std::string& str);
bool RemoveSsml(std::string& str);
#endif

0 comments on commit 1eb7986

Please sign in to comment.