Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Extract CC from youtube video #24

Merged
merged 1 commit into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,22 @@ $ artbox voice text-to-speech \
--lang en-IN
```

Additionally, if you are using edge-tts, you can specify `--rate`, `--volume`,
and `--pitch`, for example:

```bash
$ echo "Do you want some coffee?" > /tmp/artbox/text.md
$ artbox voice text-to-speech \
--title artbox \
--text-path /tmp/artbox/text.md \
--output-path /tmp/artbox/voice.mp3 \
--engine edge-tts \
--lang en \
--rate +10% \
--volume -10% \
--pitch -5Hz
```

### Download a youtube video

If you want to download videos from the youtube, you can use the following
Expand Down Expand Up @@ -152,10 +168,3 @@ If you want to use Python to play your audio files, you can install `playsound`:
```bash
$ pip wheel --use-pep517 "playsound (==1.3.0)"
```

## Troubleshoot

After installing with `poetry install`:

- Patch `pytube` (ref: https://github.com/pytube/pytube/issues/1773):
`sed -i 's/(r"^$\\w+\\W")/(r"^\\w+\\W")/' $CONDA_PREFIX/lib/python3.*/site-packages/pytube/cipher.py`
16 changes: 16 additions & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,22 @@ $ artbox voice text-to-speech \
--lang en-IN
```

Additionally, if you are using edge-tts, you can specify `--rate`, `--volume`,
and `--pitch`, for example:

```bash
$ echo "Do you want some coffee?" > /tmp/artbox/text.md
$ artbox voice text-to-speech \
--title artbox \
--text-path /tmp/artbox/text.md \
--output-path /tmp/artbox/voice.mp3 \
--engine edge-tts \
--lang en \
--rate +10% \
--volume -10% \
--pitch -5Hz
```

### Download a youtube video

If you want to download videos from the youtube, you can use the following
Expand Down
13 changes: 6 additions & 7 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ exclude = [

[tool.poetry.dependencies]
python = ">3.8.1,<3.12"
pytube = ">=15.0.0"
pycairo = ">=1.24.0"
pygobject = ">=3.44.1"
openai = ">=1"
Expand All @@ -32,6 +31,7 @@ gtts = ">=2.3.2"
edge-tts = ">=6.1.8"
numpy = ">=1.20"
typer = ">=0.9.0"
pytubefix = ">=1.13.3"

[tool.poetry.group.dev.dependencies]
pytest = ">=7.3.2"
Expand Down Expand Up @@ -113,6 +113,6 @@ module = [
"noisereduce",
"pydub",
"pydub.generators",
"pytube",
"pytubefix",
]
ignore_missing_imports = true
66 changes: 60 additions & 6 deletions src/artbox/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def main(


@app_voice.command("text-to-speech")
def text_to_speech(
def voice_text_to_speech(
title: Annotated[
str, typer.Option("--title", help="Specify the name of the audio file")
] = "artbox",
Expand All @@ -93,6 +93,18 @@ def text_to_speech(
"--lang", help="Choose the language for audio generation"
),
] = "en",
rate: Annotated[
str,
typer.Option("--rate", help="Decrease/Increase the rate level"),
] = "+0%",
volume: Annotated[
str,
typer.Option("--volume", help="Decrease/Increase the volume level"),
] = "+0%",
pitch: Annotated[
str,
typer.Option("--pitch", help="Decrease/Increase the pitch level"),
] = "+0Hz",
) -> None:
"""Convert text to speech."""
args_dict = {
Expand All @@ -101,14 +113,17 @@ def text_to_speech(
"output-path": output_path,
"engine": engine,
"lang": lang,
"rate": rate,
"volume": volume,
"pitch": pitch,
}

runner = Voice(args_dict)
runner.text_to_speech()


@app_sound.command("notes-to-audio")
def notes_to_audio(
def sound_notes_to_audio(
input_path: Annotated[
str,
typer.Option(
Expand Down Expand Up @@ -138,7 +153,7 @@ def notes_to_audio(


@app_video.command("remove-audio")
def remove_audio(
def video_remove_audio(
input_path: Annotated[
str,
typer.Option(
Expand All @@ -163,7 +178,7 @@ def remove_audio(


@app_video.command("extract-audio")
def extract_audio(
def video_extract_audio(
input_path: Annotated[
str,
typer.Option(
Expand All @@ -189,7 +204,7 @@ def extract_audio(


@app_video.command("combine-video-and-audio")
def combine_audio_and_video(
def video_combine_audio_and_video(
video_path: Annotated[
str,
typer.Option(
Expand Down Expand Up @@ -222,7 +237,7 @@ def combine_audio_and_video(


@app_youtube.command("download")
def download_youtube_video(
def youtube_download(
url: Annotated[
str,
typer.Option(
Expand Down Expand Up @@ -252,3 +267,42 @@ def download_youtube_video(

runner = Youtube(args_dict)
runner.download()


@app_youtube.command("cc")
def youtube_cc(
url: Annotated[
str,
typer.Option(
"--url", help="Specify the URL of the YouTube video to download"
),
] = "",
output_path: Annotated[
str,
typer.Option(
"--output-path",
help=(
"Specify the path to store the downloaded video file "
"(.srt, .txt)"
),
),
] = "/tmp/cc.txt",
lang: Annotated[
str,
typer.Option("--lang", help="Set the CC language to be downloaded"),
] = "en",
format: Annotated[
str,
typer.Option("--format", help="Set the CC format (srt, text)"),
] = "text",
) -> None:
"""Download youtube video CC."""
args_dict = {
"url": url,
"output-path": output_path,
"lang": lang,
"format": format,
}

runner = Youtube(args_dict)
runner.download_captions()
44 changes: 43 additions & 1 deletion src/artbox/videos.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from abc import abstractmethod

from moviepy.editor import AudioFileClip, VideoFileClip
from pytube import YouTube as PyYouTube
from pytubefix import YouTube as PyYouTube

from artbox.base import ArtBox

Expand All @@ -20,6 +20,27 @@ def download(self):
...


def _convert_srt_to_plain_text(srt_text: str) -> str:
"""
Convert an SRT file to plain text by removing timestamps and formatting.

Parameters
----------
srt_file_path (str): Path to the SRT file.

Returns
-------
str: The extracted plain text from the SRT file.
"""
plain_text = []
# Skip lines that are part of SRT formatting (timestamps, etc.)
for line in srt_text.split("\n"):
if line.strip() and not line.strip().isdigit() and "-->" not in line:
plain_text.append(line.strip())

return "\n".join(plain_text)


class Youtube(DownloadBase):
"""Set of tools for handing videos."""

Expand Down Expand Up @@ -52,6 +73,27 @@ def download(self):
except Exception as e:
print(f"Failed to download video: {e}")

def download_captions(self):
"""Download the English closed captions of a YouTube video."""
video_url = self.args.get("url", "")
lang = self.args.get("lang", "en")
format = self.args.get("format", "text")

yt = PyYouTube(video_url)
caption = yt.captions.get_by_language_code(f"a.{lang}")

if not caption:
print(f"No captions found for language {lang}.")
return

# Save the captions to a file
cc = caption.generate_srt_captions()
with open(str(self.output_path), "w") as f:
if format == "text":
cc = _convert_srt_to_plain_text(cc)
f.write(cc)
print("Captions downloaded successfully.")


class Video(ArtBox):
"""Set of tools for handing videos."""
Expand Down
8 changes: 6 additions & 2 deletions src/artbox/voices.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ async def async_text_to_speech(self) -> None:
title: str = self.args.get("title", "")
text_path: str = self.args.get("text-path", "")
lang: str = self.args.get("lang", "en")
rate = self.args.get("rate", "+0%")
volume = self.args.get("volume", "+0%")
pitch = self.args.get("pitch", "+0Hz")

if not title:
raise Exception("Argument `title` not given")
Expand All @@ -96,8 +99,9 @@ async def async_text_to_speech(self) -> None:
communicate = edge_tts.Communicate(
text=text,
voice=random.choice(voice_options)["Name"],
rate="+5%",
volume="+0%",
rate=rate,
volume=volume,
pitch=pitch,
)
with open(self.output_path, "wb") as file:
async for chunk in communicate.stream():
Expand Down
Loading