Skip to content

Commit

Permalink
Add ElevenLabs V2 model support and Chinese support (#347)
Browse files Browse the repository at this point in the history
* Add ElevenLabs V2 model support and Chinese support

* Refine env var
  • Loading branch information
pycui committed Aug 14, 2023
1 parent 83df08b commit 020f765
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 3 deletions.
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ OPEN_AI_WHISPER_API_KEY=YOUR_API_KEY
# "ELEVEN_LABS" or "GOOGLE_TTS" or "UNREAL_SPEECH"
TEXT_TO_SPEECH_USE=ELEVEN_LABS
ELEVEN_LABS_API_KEY=YOUR_API_KEY
ELEVEN_LABS_USE_V2= # change to true if you have access to V2 model.
# Add voice id of your cloned voice. leave empty to use default voices
ELON_MUSK_VOICE_ID=
LOKI_VOICE_ID=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ enum LlmOption: RawRepresentable, Hashable, CaseIterable, Identifiable, Codable

enum LanguageOption: RawRepresentable, Hashable, CaseIterable, Identifiable, Codable {

case english, spanish, french, german, italian, portuguese, polish, hindi
case english, spanish, french, german, italian, portuguese, polish, hindi, chinese

init?(rawValue: String) {
for option in LanguageOption.allCases {
Expand Down Expand Up @@ -84,6 +84,8 @@ enum LanguageOption: RawRepresentable, Hashable, CaseIterable, Identifiable, Cod
return "pl-PL"
case .hindi:
return "hi-IN"
case .chinese:
return "zh-CN"
}
}

Expand All @@ -105,6 +107,8 @@ enum LanguageOption: RawRepresentable, Hashable, CaseIterable, Identifiable, Cod
return "Polish"
case .hindi:
return "Hindi"
case .chinese:
return "Chinese"
}
}

Expand Down
1 change: 1 addition & 0 deletions client/web/src/components/Languages/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const Languages = ({ preferredLanguage, setPreferredLanguage }) => {
'Italian',
'Polish',
'Portuguese',
'Chinese',
];

return (
Expand Down
1 change: 1 addition & 0 deletions realtime_ai_character/audio/speech_to_text/whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"pt-PT": "pt",
"hi-IN": "hi",
"pl-PL": "pl",
'zh-CN': 'zh',
}


Expand Down
7 changes: 5 additions & 2 deletions realtime_ai_character/audio/text_to_speech/elevenlabs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@

logger = get_logger(__name__)
DEBUG = False
ELEVEN_LABS_MULTILINGUAL_MODEL = 'eleven_multilingual_v2' if os.getenv(
"ELEVEN_LABS_USE_V2",
'false').lower() in ('true', '1') else 'eleven_multilingual_v1'

config = types.SimpleNamespace(**{
'chunk_size': 1024,
Expand Down Expand Up @@ -71,7 +74,7 @@ async def generate_audio(self, text, voice_id = "", language='en-US') -> bytes:
voice_id = "21m00Tcm4TlvDq8ikWAM"
headers = config.headers
if language != 'en-US':
config.data["model_id"] = 'eleven_multilingual_v1'
config.data["model_id"] = ELEVEN_LABS_MULTILINGUAL_MODEL
data = {
"text": text,
**config.data,
Expand All @@ -83,4 +86,4 @@ async def generate_audio(self, text, voice_id = "", language='en-US') -> bytes:
if response.status_code != 200:
logger.error(f"ElevenLabs returns response {response.status_code}")
# Get audio/mpeg from the response and return it
return response.content
return response.content
1 change: 1 addition & 0 deletions realtime_ai_character/websocket_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"pt-PT": "Olá meu amigo, o que te traz aqui hoje?",
"hi-IN": "नमस्ते मेरे दोस्त, आज आपको यहां क्या लाया है?",
"pl-PL": "Cześć mój przyjacielu, co cię tu dziś przynosi?",
"zh-CN": "嗨,我的朋友,今天你为什么来这里?",
}


Expand Down

0 comments on commit 020f765

Please sign in to comment.