Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

community: add hugging face text-to-speech inference API #18880

Merged
merged 9 commits into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix
  • Loading branch information
h0rv committed Mar 27, 2024
commit 19421ca561e9e27082fd0fa133bc21865013610d
11 changes: 11 additions & 0 deletions libs/community/langchain_community/tools/audio/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from langchain_community.tools.audio.base import AudioTool
from langchain_community.tools.audio.huggingface_text_to_speech_inference import (
HuggingFaceSupportedAudioFormat,
HuggingFaceTextToSpeechModelInference,
)

__all__ = [
"AudioTool",
"HuggingFaceTextToSpeechModelInference",
"HuggingFaceSupportedAudioFormat",
]
5 changes: 4 additions & 1 deletion libs/community/langchain_community/tools/audio/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ class AudioTool(BaseTool):

@abstractmethod
def _run(
self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None
self,
query: str,
output_name: Optional[str] = None,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
pass
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
from typing import Optional

import requests
from langchain_core.callbacks import CallbackManagerForToolRun
from langchain_core.pydantic_v1 import SecretStr

from langchain_community.tools.audio import AudioTool


class HuggingFaceSupportedAudioFormat(Enum):
WAV, WAVE, WAVEFORM = "wav"
WAV = WAVE = WAVEFORM = "wav"


class HuggingFaceTextToSpeechModelInference(AudioTool):
Expand All @@ -19,7 +20,7 @@ class HuggingFaceTextToSpeechModelInference(AudioTool):

Requirements:

- Environment variable ``HUGGINGFACE_API_TOKEN`` must be set,
- Environment variable ``HUGGINGFACE_API_KEY`` must be set,
or passed to the constructor.
"""

Expand All @@ -28,13 +29,13 @@ class HuggingFaceTextToSpeechModelInference(AudioTool):

model_name: str
api_url: str
huggingface_api_token: SecretStr
huggingface_api_key: SecretStr
format: HuggingFaceSupportedAudioFormat
output_dir: str

_DEFAULT_OUTPUT_DIR = "tts_output"
_DEFAULT_OUTPUT_NAME = "output"
_HUGGINGFACE_API_TOKEN_ENV_NAME = "HUGGINGFACE_API_KEY"
_HUGGINGFACE_API_KEY_ENV_NAME = "HUGGINGFACE_API_KEY"
_HUGGINGFACE_API_URL_ROOT = "https://api-inference.huggingface.co/models"

def __init__(
Expand All @@ -45,13 +46,18 @@ def __init__(
output_dir: Optional[str] = None,
) -> None:
if not huggingface_api_key:
SecretStr(os.getenv(self._HUGGINGFACE_API_TOKEN_ENV_NAME))
huggingface_api_key = SecretStr(
os.getenv(self._HUGGINGFACE_API_KEY_ENV_NAME, "")
)

if (
not huggingface_api_key.get_secret_value()
not huggingface_api_key
or not huggingface_api_key.get_secret_value()
or huggingface_api_key.get_secret_value() == ""
):
raise ValueError(f"'{self.HUGGINGFACE_API_KEY}' must be or set or passed")
raise ValueError(
f"'{self._HUGGINGFACE_API_KEY_ENV_NAME}' must be or set or passed"
)

super().__init__(
model_name=model_name,
Expand All @@ -61,7 +67,12 @@ def __init__(
output_dir=output_dir if output_dir else self._DEFAULT_OUTPUT_DIR,
)

def _run(self, query: str, output_name: Optional[str] = None) -> str:
def _run(
self,
query: str,
output_name: Optional[str] = None,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
response = requests.post(
self.api_url,
headers={
Expand All @@ -87,4 +98,4 @@ def _run(self, query: str, output_name: Optional[str] = None) -> str:

def _default_output_name(self) -> str:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
return f"{self._DEFAULT_OUTPUT_NAME_PREFIX}_{timestamp}"
return f"{self._DEFAULT_OUTPUT_NAME}_{timestamp}"