Skip to content

Commit

Permalink
Implement videolength filter with ffprobe
Browse files Browse the repository at this point in the history
  • Loading branch information
Skaronator committed Sep 28, 2024
1 parent df4ecef commit 1e7457b
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 0 deletions.
34 changes: 34 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5018,6 +5018,40 @@ Description
These suffixes are case-insensitive.


downloader.*.videolegth-min & .videolegth-max
-----------------------------------------
Type
``string``
Default
``null``
Example
``"1min"``, ``"1m30s"``, ``"1h21min31s"``
Description
Minimum/Maximum allowed video length.
Any video shorter/longer than this limit will not be downloaded.

A file qualifies as a video if it contains more than 10 frames. If a file contains multiple video streams the shortest video will be used for comparison.

This option requires ``ffprobe`` to be available. Additionally ``download.*.ffprobe-location`` can be configured.

Possible values are valid integer numbers followed with one of the following suffixes:
* Hours: ```hours``, ``hour``, ``h``,
* Minutes: ``minutes``, ``minute``, ``min``, ``m``
* Seconds: ``seconds``, ``second``, ``sec``, ``s``

Multiple values can be combined. e.g. ``2hours30min2s``


download.*.ffprobe-location
------------------
Type
``string``
Default
``ffprobe``
Description
Path/Location of ``ffprobe``. Used for the ``downloader.*.videolegth-min & .videolegth-max`` option.


downloader.*.mtime
------------------
Type
Expand Down
96 changes: 96 additions & 0 deletions gallery_dl/downloader/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@

import time
import mimetypes
import subprocess
import json
from datetime import timedelta
from requests.exceptions import RequestException, ConnectionError, Timeout
from .common import DownloaderBase
from .. import text, util
Expand All @@ -32,6 +35,10 @@ def __init__(self, job):
self.headers = self.config("headers")
self.minsize = self.config("filesize-min")
self.maxsize = self.config("filesize-max")
self.minlength = self.config("videolength-min")
self.maxlength = self.config("videolength-max")
ffprobe = self.config("ffprobe-location")
self.ffprobe = util.expand_path(ffprobe) if ffprobe else "ffprobe"
self.retries = self.config("retries", extractor._retries)
self.retry_codes = self.config("retry-codes", extractor._retry_codes)
self.timeout = self.config("timeout", extractor._timeout)
Expand Down Expand Up @@ -59,6 +66,18 @@ def __init__(self, job):
self.log.warning(
"Invalid maximum file size (%r)", self.maxsize)
self.maxsize = maxsize
if self.minlength:
minlength = text.parse_duration(self.minlength)
if not minlength:
self.log.warning(
"Invalid maximum videolength duration (%r)", self.minlength)
self.minlength = minlength
if self.maxlength:
maxlength = text.parse_duration(self.maxlength)
if not maxlength:
self.log.warning(
"Invalid maximum videolength duration (%r)", self.maxlength)
self.maxlength = maxlength
if isinstance(self.chunk_size, str):
chunk_size = text.parse_bytes(self.chunk_size)
if not chunk_size:
Expand Down Expand Up @@ -219,6 +238,26 @@ def _download_impl(self, url, pathfmt):
kwdict[metadata] = util.extract_headers(response)
build_path = True

# check video length using ffprobe request
if (self.minlength or self.maxlength):
length = self._fetch_videolength(url)

if length and self.minlength and length < self.minlength:
self.release_conn(response)
self.log.warning(
"Video length is shorter than allowed minimum (%s < %s)",
length, self.minlength)
pathfmt.temppath = ""
return True

if length and self.maxlength and length > self.maxlength:
self.release_conn(response)
self.log.warning(
"Video length is longer than allowed maximum (%s > %s)",
length, self.maxlength)
pathfmt.temppath = ""
return True

# build and check file path
if build_path:
pathfmt.build_path()
Expand Down Expand Up @@ -376,6 +415,63 @@ def _adjust_extension(pathfmt, file_header):
return True
return False

def _fetch_videolength(self, url):
minimum_frames = 10
args = [
self.ffprobe,
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
url,
]

try:
result = subprocess.run(
args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True,
)
data = json.loads(result.stdout)

video_streams = [
float(stream["duration"])
for stream in data["streams"]
if stream["codec_type"] == "video"
and "duration" in stream
and "avg_frame_rate" in stream
and self._frame_count(stream) >= minimum_frames
]

if not video_streams:
self.log.info(
"No video streams found or none with a valid duration and minimum frames."
)
return None

duration = timedelta(seconds=min(video_streams))
return duration

except subprocess.CalledProcessError as e:
self.log.error("ffprobe failed: %s", e.stderr)
return None
except json.JSONDecodeError:
self.log.error("Failed to decode ffprobe output as JSON")
return None

def _frame_count(self, stream):
"""Calculates the number of frames in the video stream."""
try:
duration = float(stream["duration"])
avg_frame_rate = eval(stream["avg_frame_rate"])
return int(duration * avg_frame_rate)
except (ValueError, ZeroDivisionError):
return 0


MIME_TYPES = {
"image/jpeg" : "jpg",
Expand Down
22 changes: 22 additions & 0 deletions gallery_dl/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,28 @@ def parse_timestamp(ts, default=None):
return default


def parse_duration(duration_string, default=None):
try:
patterns = {
'hours': r'(\d+)\s*h(our(s)?)?',
'minutes': r'(\d+)\s*m(in(ute)?(s)?)?',
'seconds': r'(\d+)\s*s(ec(ond)?(s)?)?'
}
parsed_values = {unit: 0 for unit in patterns.keys()}

for unit, pattern in patterns.items():
match = re.search(pattern, duration_string, re.IGNORECASE)
if match:
parsed_values[unit] = int(match.group(1))

return datetime.timedelta(
hours=parsed_values['hours'],
minutes=parsed_values['minutes'],
seconds=parsed_values['seconds'])
except Exception:
return default


def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z", utcoffset=0):
"""Create a datetime object by parsing 'date_string'"""
try:
Expand Down

0 comments on commit 1e7457b

Please sign in to comment.