diff --git a/README.md b/README.md index e7c46b4..f145d29 100644 --- a/README.md +++ b/README.md @@ -55,10 +55,9 @@ The built-in help provides handles and default values for implemented functions ```bash (venv) ~/ffmpeg2obj$ ffmpeg2obj --help -usage: ffmpeg2obj [-h] [-v] [--noop] [--force-cleanup] [--disable-upload] [-s SRC_DIR] [-d DST_DIR] - [-i IGNORED_SUBDIR] [-o OBJ_PREFIX] -b BUCKET_NAME [-e FILE_EXTENSION] [-vc VIDEO_CODEC] - [--pix-fmt PIX_FMT] [-l LANGS] [--width TARGET_WIDTH] [--resize] [--height TARGET_HEIGHT] - (-qp TARGET_QP | -crf TARGET_CRF) +usage: ffmpeg2obj [-h] [-v] [--noop] [--force-cleanup] [-s SRC_DIR] [-d DST_DIR] [-i IGNORED_SUBDIR] [-o OBJ_PREFIX] [-e FILE_EXTENSION] + [-vc VIDEO_CODEC] [--pix-fmt PIX_FMT] [-l LANGS] [--width TARGET_WIDTH] [--resize] [--concat] [--height TARGET_HEIGHT] + (-b BUCKET_NAME | --disable-upload) [-qp TARGET_QP | -crf TARGET_CRF] Simple tool to compress blu ray movie library and store it in obj @@ -67,7 +66,6 @@ options: -v, --verbose show additional information --noop script executes but takes no action --force-cleanup cleans up even on upload failure - --disable-upload disables default upload to object storage and stores files locally -s SRC_DIR, --source-dir SRC_DIR source directory for media to be transcoded -d DST_DIR, --destination-dir DST_DIR @@ -76,8 +74,6 @@ options: ignored subdirectories -o OBJ_PREFIX, --obj-prefix OBJ_PREFIX source directory for media to be transcoded - -b BUCKET_NAME, --bucket-name BUCKET_NAME - source directory for media to be transcoded -e FILE_EXTENSION, --file-extension FILE_EXTENSION extension for the media files to be transcoded -vc VIDEO_CODEC, --video-codec VIDEO_CODEC @@ -87,8 +83,12 @@ options: selected languages transcoding of the media files --width TARGET_WIDTH target width for the media files to be transcoded --resize scale input files to height x width + --concat concatenates files within same directory --height TARGET_HEIGHT target height for the media files to be transcoded + -b BUCKET_NAME, --bucket-name BUCKET_NAME + source directory for media to be transcoded + --disable-upload disables default upload to object storage and stores files locally -qp TARGET_QP Quantization Parameter for the media files to be transcoded -crf TARGET_CRF Constant Rate Factor for the media files to be transcoded (venv) ~/ffmpeg2obj$ diff --git a/pyproject.toml b/pyproject.toml index 81d1c2b..293a406 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ classifiers = [ dependencies = [ "boto3==1.34.92", "ffmpeg-python@ git+https://github.com/kkroening/ffmpeg-python.git", + "pydantic==2.7.1", ] [project.optional-dependencies] dev = [ @@ -42,3 +43,11 @@ profile = "black" files = [ "src/ffmpeg2obj", ] +plugins = [ + "pydantic.mypy", +] + +[tool.pydantic-mypy] +init_forbid_extra = true +init_typed = true +warn_required_dynamic_aliases = true diff --git a/src/ffmpeg2obj/helper.py b/src/ffmpeg2obj/helper.py index 36fa2f6..39b3be6 100644 --- a/src/ffmpeg2obj/helper.py +++ b/src/ffmpeg2obj/helper.py @@ -5,6 +5,8 @@ import argparse import hashlib import json +import os +import tempfile import time from datetime import timedelta from typing import Any @@ -55,7 +57,7 @@ class ProcessedFile: def __init__( self, object_name: str, - real_path: str, + real_paths: list[str], file_extension: str, dst_dir: str, has_lockfile: bool, @@ -63,7 +65,7 @@ def __init__( processing_params: ProcessingParams, ) -> None: self.object_name = object_name - self.real_path = real_path + self.real_paths = real_paths self.file_extension = file_extension self.dst_dir = dst_dir if dst_dir.endswith("/") else dst_dir + "/" self.has_lockfile = has_lockfile @@ -79,7 +81,7 @@ def __init__( def __str__(self) -> str: out = [] out += ["object_name: " + self.object_name] - out += ["real_path: " + self.real_path] + out += ["real_path: " + ",".join(self.real_paths)] out += ["has_lockfile: " + str(self.has_lockfile)] out += ["is_uploaded: " + str(self.is_uploaded)] out += ["hashed_name: " + self.hashed_name] @@ -100,25 +102,30 @@ def update(self, obj_config: dict, bucket_name: str) -> None: def get_coded_res(self) -> list[int]: """Returns height and width for the file from real_path""" - probe_result = ffmpeg.probe(self.real_path) + probe_result = ffmpeg.probe(self.real_paths[0]) video_stream = list( filter(lambda x: x["codec_type"] == "video", probe_result["streams"]) )[0] coded_res = [video_stream["coded_width"], video_stream["coded_height"]] return coded_res - def convert(self) -> tuple[str, str, bool, timedelta]: + def convert(self, verbose: bool = False) -> tuple[str, str, bool, timedelta]: """Runs ffmpeg against the file from real_path and stores it in /tmp""" convert_succeded = False + concat_enabled = len(self.real_paths) > 1 # core opts opts_dict: dict[str, Any] = { "c:v": self.processing_params.video_codec, - "pix_fmt": self.processing_params.pix_fmt, "c:a": "copy", "c:s": "copy", - "v": "quiet", + "v": "error", } # conditional opts + if ( + self.processing_params.pix_fmt is not None + and self.processing_params.video_codec != "copy" + ): + opts_dict.update({"pix_fmt": self.processing_params.pix_fmt}) if self.processing_params.target_crf is not None: opts_dict.update({"crf": str(self.processing_params.target_crf)}) elif self.processing_params.target_qp is not None: @@ -137,20 +144,36 @@ def convert(self) -> tuple[str, str, bool, timedelta]: + ":".join(str(x) for x in self.processing_params.target_res) } opts_dict.update(scale_dict) - stream = ffmpeg.input(self.real_path) + if concat_enabled: + temp_file_byte_contents = ( + "\n".join(f"file '{path}'" for path in self.real_paths) + "\n" + ).encode() + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + temp_file.write(temp_file_byte_contents) + input_file = temp_file.name + stream = ffmpeg.input(input_file, f="concat", safe="0") + else: + input_file = self.real_paths[0] + stream = ffmpeg.input(input_file) stream = ffmpeg.output(stream, self.dst_hashed_path, **opts_dict) start_time = time.monotonic() + if verbose: + print(" ".join(ffmpeg.compile(stream))) try: - std_out, std_err = ffmpeg.run(stream) + std_out, std_err = ffmpeg.run( + stream, capture_stdout=True, capture_stderr=True + ) except ffmpeg.Error as e: print(f"Error occured: {e}") end_time = time.monotonic() duration = timedelta(seconds=end_time - start_time) - return e.stdout, e.stderr, convert_succeded, duration + return e.stdout.decode(), e.stderr.decode(), convert_succeded, duration convert_succeded = True + if concat_enabled: + os.remove(input_file) end_time = time.monotonic() duration = timedelta(seconds=end_time - start_time) - return std_out, std_err, convert_succeded, duration + return std_out.decode(), std_err.decode(), convert_succeded, duration def create_lock_file(self, obj_config: dict, bucket_name: str) -> bool: """Creates empty lock file on object storage bucket""" diff --git a/src/ffmpeg2obj/script.py b/src/ffmpeg2obj/script.py index 1b9190c..48545da 100644 --- a/src/ffmpeg2obj/script.py +++ b/src/ffmpeg2obj/script.py @@ -110,7 +110,7 @@ def parse_args() -> argparse.Namespace: "--video-codec", dest="video_codec", type=str, - default="libx265", + default="copy", help="video codec for transcoding of the media files", ) @@ -118,7 +118,6 @@ def parse_args() -> argparse.Namespace: "--pix-fmt", dest="pix_fmt", type=str, - default="yuv420p10le", help="pix fmt for transcoding of the media files", ) @@ -147,6 +146,14 @@ def parse_args() -> argparse.Namespace: help="scale input files to height x width", ) + parser.add_argument( + "--concat", + dest="concat", + action="store_true", + default=False, + help="concatenates files within same directory", + ) + parser.add_argument( "--height", dest="target_height", @@ -173,7 +180,7 @@ def parse_args() -> argparse.Namespace: help="disables default upload to object storage and stores files locally", ) - qf_group = parser.add_mutually_exclusive_group(required=True) + qf_group = parser.add_mutually_exclusive_group() qf_group.add_argument( "-qp", @@ -193,10 +200,18 @@ def parse_args() -> argparse.Namespace: def get_source_files( - src_dir: str, ignored_subdir: str, obj_prefix: str, file_extension: str -) -> dict[str, str]: - """Looks for source files""" - source_files = {} + src_dir: str, + ignored_subdir: str, + obj_prefix: str, + file_extension: str, + concat: bool, +) -> dict[str, list[str]]: + """Looks for source files, performs concatenation of files in same directories if requested""" + + def get_concat_base(object_name): + return "/".join(object_name.split("/")[:-1]) + + found_source_files: dict[str, str] = {} for root, _, files in os.walk(src_dir): for name in files: if ignored_subdir not in root and name.lower().endswith( @@ -207,7 +222,26 @@ def get_source_files( "NFC", real_path.replace(src_dir, obj_prefix) ) source_file_dict = {object_name: real_path} - source_files.update(source_file_dict) + found_source_files.update(source_file_dict) + + source_files: dict[str, list[str]] = {} + if concat: + concat_base_mapping: dict[str, str] = {} + concat_object_name_mapping: dict[str, str] = {} + for object_name, real_path in found_source_files.items(): + concat_base = get_concat_base(object_name) + concat_base_mapping.update({real_path: concat_base}) + if concat_object_name_mapping.get(concat_base) is None: + concat_object_name_mapping.update({concat_base: object_name}) + for real_path, concat_base in concat_base_mapping.items(): + object_name = concat_object_name_mapping.get(concat_base) + if source_files.get(object_name) is None: + source_files.update({object_name: [real_path]}) + else: + source_files.get(object_name).append(real_path) + else: + for object_name, real_path in found_source_files.items(): + source_files.update({object_name: real_path}) return source_files @@ -246,7 +280,7 @@ def get_bucket_files( def get_processed_files( - source_files: dict, + source_files: dict[str, list[str]], bucket_objects: list, file_extension: str, dst_dir: str, @@ -254,13 +288,13 @@ def get_processed_files( ) -> list[ProcessedFile]: """Returns list of processed files based on collected data""" processed_files = [] - for object_name, real_path in source_files.items(): + for object_name, real_paths in source_files.items(): is_uploaded = object_name in bucket_objects has_lockfile = object_name + ".lock" in bucket_objects processed_files.append( ProcessedFile( object_name, - real_path, + real_paths, file_extension, dst_dir, has_lockfile, @@ -285,32 +319,34 @@ def convert_and_upload( def convert(processed_file: ProcessedFile) -> bool: """Handles conversion of source file""" + convert_succeded = False with lock: if not noop: - # TODO: improve overall ffmpeg-python error handling and maybe show status + # TODO: improve overall communicating job progress to user print("Starting conversion for " + processed_file.object_name) std_out, std_err, convert_succeded, convert_duration = ( - processed_file.convert() + processed_file.convert(verbose) ) if verbose: print( f"Conversion of file {processed_file.object_name}" f" took: {convert_duration}" ) - if std_out is not None: + if std_out != "": print("\nffmpeg standard output:") print(std_out) - if std_err is not None: + if std_err != "": print("\nffmpeg standard error:") print(std_err) if convert_succeded and upload_enabled: processed_file.create_lock_file(obj_config, bucket_name) else: print("Would have start conversion for " + processed_file.object_name) - return convert_succeded + return convert_succeded def upload(processed_file: ProcessedFile) -> bool: """Handles upload of destination file to object storage""" + upload_succeded = False if not processed_file.is_uploaded and os.path.isfile( processed_file.dst_hashed_path ): @@ -342,6 +378,7 @@ def upload(processed_file: ProcessedFile) -> bool: def store(processed_file: ProcessedFile) -> bool: """Handles local storage of destination file""" + store_succeded = False if os.path.isfile(processed_file.dst_hashed_path): print( f"Storing file {processed_file.object_name}" " in destination directory" @@ -369,6 +406,9 @@ def needs_conversion(processed_file: ProcessedFile): ) processed_file: ProcessedFile = queue.get() + convert_succeded = False + upload_succeded = False + store_succeded = False if needs_conversion(processed_file): convert_succeded = convert(processed_file) if upload_enabled: @@ -395,7 +435,11 @@ def main(): sys.exit(3) source_files = get_source_files( - args.src_dir, args.ignored_subdir, args.obj_prefix, args.file_extension + args.src_dir, + args.ignored_subdir, + args.obj_prefix, + args.file_extension, + args.concat, ) obj_resource = get_obj_resource(OBJ_CONFIG)