KBTMPL · KBTMPL · Apr 1, 2025 · Apr 26, 2024 · Apr 28, 2024 · Mar 30, 2025
diff --git a/README.md b/README.md
@@ -55,10 +55,9 @@ The built-in help provides handles and default values for implemented functions
 
 ```bash
 (venv) ~/ffmpeg2obj$ ffmpeg2obj --help
-usage: ffmpeg2obj [-h] [-v] [--noop] [--force-cleanup] [--disable-upload] [-s SRC_DIR] [-d DST_DIR]
-                  [-i IGNORED_SUBDIR] [-o OBJ_PREFIX] -b BUCKET_NAME [-e FILE_EXTENSION] [-vc VIDEO_CODEC]
-                  [--pix-fmt PIX_FMT] [-l LANGS] [--width TARGET_WIDTH] [--resize] [--height TARGET_HEIGHT]
-                  (-qp TARGET_QP | -crf TARGET_CRF)
+usage: ffmpeg2obj [-h] [-v] [--noop] [--force-cleanup] [-s SRC_DIR] [-d DST_DIR] [-i IGNORED_SUBDIR] [-o OBJ_PREFIX] [-e FILE_EXTENSION]
+                  [-vc VIDEO_CODEC] [--pix-fmt PIX_FMT] [-l LANGS] [--width TARGET_WIDTH] [--resize] [--concat] [--height TARGET_HEIGHT]
+                  (-b BUCKET_NAME | --disable-upload) [-qp TARGET_QP | -crf TARGET_CRF]
 
 Simple tool to compress blu ray movie library and store it in obj
 
@@ -67,7 +66,6 @@ options:
   -v, --verbose         show additional information
   --noop                script executes but takes no action
   --force-cleanup       cleans up even on upload failure
-  --disable-upload      disables default upload to object storage and stores files locally
   -s SRC_DIR, --source-dir SRC_DIR
                         source directory for media to be transcoded
   -d DST_DIR, --destination-dir DST_DIR
@@ -76,8 +74,6 @@ options:
                         ignored subdirectories
   -o OBJ_PREFIX, --obj-prefix OBJ_PREFIX
                         source directory for media to be transcoded
-  -b BUCKET_NAME, --bucket-name BUCKET_NAME
-                        source directory for media to be transcoded
   -e FILE_EXTENSION, --file-extension FILE_EXTENSION
                         extension for the media files to be transcoded
   -vc VIDEO_CODEC, --video-codec VIDEO_CODEC
@@ -87,8 +83,12 @@ options:
                         selected languages transcoding of the media files
   --width TARGET_WIDTH  target width for the media files to be transcoded
   --resize              scale input files to height x width
+  --concat              concatenates files within same directory
   --height TARGET_HEIGHT
                         target height for the media files to be transcoded
+  -b BUCKET_NAME, --bucket-name BUCKET_NAME
+                        source directory for media to be transcoded
+  --disable-upload      disables default upload to object storage and stores files locally
   -qp TARGET_QP         Quantization Parameter for the media files to be transcoded
   -crf TARGET_CRF       Constant Rate Factor for the media files to be transcoded
 (venv) ~/ffmpeg2obj$

diff --git a/pyproject.toml b/pyproject.toml
@@ -18,6 +18,7 @@ classifiers = [
 dependencies = [
   "boto3==1.34.92",
   "ffmpeg-python@ git+https://github.com/kkroening/ffmpeg-python.git",
+  "pydantic==2.7.1",
 ]
 [project.optional-dependencies]
 dev = [
@@ -42,3 +43,11 @@ profile = "black"
 files = [
   "src/ffmpeg2obj",
 ]
+plugins = [
+  "pydantic.mypy",
+]
+
+[tool.pydantic-mypy]
+init_forbid_extra = true
+init_typed = true
+warn_required_dynamic_aliases = true
diff --git a/src/ffmpeg2obj/helper.py b/src/ffmpeg2obj/helper.py
@@ -5,6 +5,8 @@
 import argparse
 import hashlib
 import json
+import os
+import tempfile
 import time
 from datetime import timedelta
 from typing import Any
@@ -55,15 +57,15 @@ class ProcessedFile:
     def __init__(
         self,
         object_name: str,
-        real_path: str,
+        real_paths: list[str],
         file_extension: str,
         dst_dir: str,
         has_lockfile: bool,
         is_uploaded: bool,
         processing_params: ProcessingParams,
     ) -> None:
         self.object_name = object_name
-        self.real_path = real_path
+        self.real_paths = real_paths
         self.file_extension = file_extension
         self.dst_dir = dst_dir if dst_dir.endswith("/") else dst_dir + "/"
         self.has_lockfile = has_lockfile
@@ -79,7 +81,7 @@ def __init__(
     def __str__(self) -> str:
         out = []
         out += ["object_name: " + self.object_name]
-        out += ["real_path: " + self.real_path]
+        out += ["real_path: " + ",".join(self.real_paths)]
         out += ["has_lockfile: " + str(self.has_lockfile)]
         out += ["is_uploaded: " + str(self.is_uploaded)]
         out += ["hashed_name: " + self.hashed_name]
@@ -100,25 +102,30 @@ def update(self, obj_config: dict, bucket_name: str) -> None:
 
     def get_coded_res(self) -> list[int]:
         """Returns height and width for the file from real_path"""
-        probe_result = ffmpeg.probe(self.real_path)
+        probe_result = ffmpeg.probe(self.real_paths[0])
         video_stream = list(
             filter(lambda x: x["codec_type"] == "video", probe_result["streams"])
         )[0]
         coded_res = [video_stream["coded_width"], video_stream["coded_height"]]
         return coded_res
 
-    def convert(self) -> tuple[str, str, bool, timedelta]:
+    def convert(self, verbose: bool = False) -> tuple[str, str, bool, timedelta]:
         """Runs ffmpeg against the file from real_path and stores it in /tmp"""
         convert_succeded = False
+        concat_enabled = len(self.real_paths) > 1
         # core opts
         opts_dict: dict[str, Any] = {
             "c:v": self.processing_params.video_codec,
-            "pix_fmt": self.processing_params.pix_fmt,
             "c:a": "copy",
             "c:s": "copy",
-            "v": "quiet",
+            "v": "error",
         }
         # conditional opts
+        if (
+            self.processing_params.pix_fmt is not None
+            and self.processing_params.video_codec != "copy"
+        ):
+            opts_dict.update({"pix_fmt": self.processing_params.pix_fmt})
         if self.processing_params.target_crf is not None:
             opts_dict.update({"crf": str(self.processing_params.target_crf)})
         elif self.processing_params.target_qp is not None:
@@ -137,20 +144,36 @@ def convert(self) -> tuple[str, str, bool, timedelta]:
                 + ":".join(str(x) for x in self.processing_params.target_res)
             }
             opts_dict.update(scale_dict)
-        stream = ffmpeg.input(self.real_path)
+        if concat_enabled:
+            temp_file_byte_contents = (
+                "\n".join(f"file '{path}'" for path in self.real_paths) + "\n"
+            ).encode()
+            with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+                temp_file.write(temp_file_byte_contents)
+            input_file = temp_file.name
+            stream = ffmpeg.input(input_file, f="concat", safe="0")
+        else:
+            input_file = self.real_paths[0]
+            stream = ffmpeg.input(input_file)
         stream = ffmpeg.output(stream, self.dst_hashed_path, **opts_dict)
         start_time = time.monotonic()
+        if verbose:
+            print(" ".join(ffmpeg.compile(stream)))
         try:
-            std_out, std_err = ffmpeg.run(stream)
+            std_out, std_err = ffmpeg.run(
+                stream, capture_stdout=True, capture_stderr=True
+            )
         except ffmpeg.Error as e:
             print(f"Error occured: {e}")
             end_time = time.monotonic()
             duration = timedelta(seconds=end_time - start_time)
-            return e.stdout, e.stderr, convert_succeded, duration
+            return e.stdout.decode(), e.stderr.decode(), convert_succeded, duration
         convert_succeded = True
+        if concat_enabled:
+            os.remove(input_file)
         end_time = time.monotonic()
         duration = timedelta(seconds=end_time - start_time)
-        return std_out, std_err, convert_succeded, duration
+        return std_out.decode(), std_err.decode(), convert_succeded, duration
 
     def create_lock_file(self, obj_config: dict, bucket_name: str) -> bool:
         """Creates empty lock file on object storage bucket"""

diff --git a/src/ffmpeg2obj/script.py b/src/ffmpeg2obj/script.py
@@ -110,15 +110,14 @@ def parse_args() -> argparse.Namespace:
         "--video-codec",
         dest="video_codec",
         type=str,
-        default="libx265",
+        default="copy",
         help="video codec for transcoding of the media files",
     )
 
     parser.add_argument(
         "--pix-fmt",
         dest="pix_fmt",
         type=str,
-        default="yuv420p10le",
         help="pix fmt for transcoding of the media files",
     )
 
@@ -147,6 +146,14 @@ def parse_args() -> argparse.Namespace:
         help="scale input files to height x width",
     )
 
+    parser.add_argument(
+        "--concat",
+        dest="concat",
+        action="store_true",
+        default=False,
+        help="concatenates files within same directory",
+    )
+
     parser.add_argument(
         "--height",
         dest="target_height",
@@ -173,7 +180,7 @@ def parse_args() -> argparse.Namespace:
         help="disables default upload to object storage and stores files locally",
     )
 
-    qf_group = parser.add_mutually_exclusive_group(required=True)
+    qf_group = parser.add_mutually_exclusive_group()
 
     qf_group.add_argument(
         "-qp",
@@ -193,10 +200,18 @@ def parse_args() -> argparse.Namespace:
 
 
 def get_source_files(
-    src_dir: str, ignored_subdir: str, obj_prefix: str, file_extension: str
-) -> dict[str, str]:
-    """Looks for source files"""
-    source_files = {}
+    src_dir: str,
+    ignored_subdir: str,
+    obj_prefix: str,
+    file_extension: str,
+    concat: bool,
+) -> dict[str, list[str]]:
+    """Looks for source files, performs concatenation of files in same directories if requested"""
+
+    def get_concat_base(object_name):
+        return "/".join(object_name.split("/")[:-1])
+
+    found_source_files: dict[str, str] = {}
     for root, _, files in os.walk(src_dir):
         for name in files:
             if ignored_subdir not in root and name.lower().endswith(
@@ -207,7 +222,26 @@ def get_source_files(
                     "NFC", real_path.replace(src_dir, obj_prefix)
                 )
                 source_file_dict = {object_name: real_path}
-                source_files.update(source_file_dict)
+                found_source_files.update(source_file_dict)
+
+    source_files: dict[str, list[str]] = {}
+    if concat:
+        concat_base_mapping: dict[str, str] = {}
+        concat_object_name_mapping: dict[str, str] = {}
+        for object_name, real_path in found_source_files.items():
+            concat_base = get_concat_base(object_name)
+            concat_base_mapping.update({real_path: concat_base})
+            if concat_object_name_mapping.get(concat_base) is None:
+                concat_object_name_mapping.update({concat_base: object_name})
+        for real_path, concat_base in concat_base_mapping.items():
+            object_name = concat_object_name_mapping.get(concat_base)
+            if source_files.get(object_name) is None:
+                source_files.update({object_name: [real_path]})
+            else:
+                source_files.get(object_name).append(real_path)
+    else:
+        for object_name, real_path in found_source_files.items():
+            source_files.update({object_name: real_path})
     return source_files
 
 
@@ -246,21 +280,21 @@ def get_bucket_files(
 
 
 def get_processed_files(
-    source_files: dict,
+    source_files: dict[str, list[str]],
     bucket_objects: list,
     file_extension: str,
     dst_dir: str,
     processing_params: ProcessingParams,
 ) -> list[ProcessedFile]:
     """Returns list of processed files based on collected data"""
     processed_files = []
-    for object_name, real_path in source_files.items():
+    for object_name, real_paths in source_files.items():
         is_uploaded = object_name in bucket_objects
         has_lockfile = object_name + ".lock" in bucket_objects
         processed_files.append(
             ProcessedFile(
                 object_name,
-                real_path,
+                real_paths,
                 file_extension,
                 dst_dir,
                 has_lockfile,
@@ -285,32 +319,34 @@ def convert_and_upload(
 
     def convert(processed_file: ProcessedFile) -> bool:
         """Handles conversion of source file"""
+        convert_succeded = False
         with lock:
             if not noop:
-                # TODO: improve overall ffmpeg-python error handling and maybe show status
+                # TODO: improve overall communicating job progress to user
                 print("Starting conversion for " + processed_file.object_name)
                 std_out, std_err, convert_succeded, convert_duration = (
-                    processed_file.convert()
+                    processed_file.convert(verbose)
                 )
                 if verbose:
                     print(
                         f"Conversion of file {processed_file.object_name}"
                         f" took: {convert_duration}"
                     )
-                    if std_out is not None:
+                    if std_out != "":
                         print("\nffmpeg standard output:")
                         print(std_out)
-                    if std_err is not None:
+                    if std_err != "":
                         print("\nffmpeg standard error:")
                         print(std_err)
                 if convert_succeded and upload_enabled:
                     processed_file.create_lock_file(obj_config, bucket_name)
             else:
                 print("Would have start conversion for " + processed_file.object_name)
-            return convert_succeded
+        return convert_succeded
 
     def upload(processed_file: ProcessedFile) -> bool:
         """Handles upload of destination file to object storage"""
+        upload_succeded = False
         if not processed_file.is_uploaded and os.path.isfile(
             processed_file.dst_hashed_path
         ):
@@ -342,6 +378,7 @@ def upload(processed_file: ProcessedFile) -> bool:
 
     def store(processed_file: ProcessedFile) -> bool:
         """Handles local storage of destination file"""
+        store_succeded = False
         if os.path.isfile(processed_file.dst_hashed_path):
             print(
                 f"Storing file {processed_file.object_name}" " in destination directory"
@@ -369,6 +406,9 @@ def needs_conversion(processed_file: ProcessedFile):
         )
 
     processed_file: ProcessedFile = queue.get()
+    convert_succeded = False
+    upload_succeded = False
+    store_succeded = False
     if needs_conversion(processed_file):
         convert_succeded = convert(processed_file)
     if upload_enabled:
@@ -395,7 +435,11 @@ def main():
         sys.exit(3)
 
     source_files = get_source_files(
-        args.src_dir, args.ignored_subdir, args.obj_prefix, args.file_extension
+        args.src_dir,
+        args.ignored_subdir,
+        args.obj_prefix,
+        args.file_extension,
+        args.concat,
     )
 
     obj_resource = get_obj_resource(OBJ_CONFIG)