From e6a69681096912ce1acf693d84d0e819110e6a56 Mon Sep 17 00:00:00 2001
From: jenstandstad <jens.tandstad@gmail.com>
Date: Sat, 9 May 2026 12:38:05 +0200
Subject: [PATCH] Tweaks ton esure that

---
 gnommo/cli.py          | 158 ++++++++++++++++++++++++++++++-----------
 gnommo/models.py       |   8 ++-
 gnommo/preprocessor.py |  39 +++++++---
 gnommo/renderer.py     |  11 ++-
 gnommo/transformer.py  |  21 ++++--
 5 files changed, 173 insertions(+), 64 deletions(-)

diff --git a/gnommo/cli.py b/gnommo/cli.py
index 84c18ca..eb5a7c8 100644
--- a/gnommo/cli.py
+++ b/gnommo/cli.py
@@ -418,7 +418,9 @@ def _probe_audio_durations(
             if verbose:
                 print(f"  Audio '{audio_id}': file not found, skipping")
             continue
-        print(f"  Probing audio '{audio_id}' ({audio_path.name})...", end=" ", flush=True)
+        print(
+            f"  Probing audio '{audio_id}' ({audio_path.name})...", end=" ", flush=True
+        )
         try:
             duration = _get_audio_duration(audio_path)
             data[audio_id]["duration"] = round(duration, 3)
@@ -434,7 +436,11 @@ def _probe_audio_durations(
 
 
 def _probe_video_metadata(
-    project_path: Path, config, shared_assets_dir: Optional[Path], force: bool, verbose: bool
+    project_path: Path,
+    config,
+    shared_assets_dir: Optional[Path],
+    force: bool,
+    verbose: bool,
 ) -> None:
     """Probe and cache video file duration and audio presence into videos.json.
 
@@ -459,7 +465,11 @@ def _probe_video_metadata(
 
     # Load shared_assets/videos.json separately — shared probes write there
     shared_json_path = shared_assets_dir / "videos.json" if shared_assets_dir else None
-    shared_data = _read_json(shared_json_path) if shared_json_path and shared_json_path.exists() else {}
+    shared_data = (
+        _read_json(shared_json_path)
+        if shared_json_path and shared_json_path.exists()
+        else {}
+    )
 
     local_updated = False
     shared_updated = False
@@ -478,10 +488,14 @@ def _probe_video_metadata(
 
         if not force and "duration" in canonical and "has_audio" in canonical:
             if verbose:
-                print(f"  Video '{video_id}': cached ({canonical['duration']:.1f}s, audio={canonical['has_audio']})")
+                print(
+                    f"  Video '{video_id}': cached ({canonical['duration']:.1f}s, audio={canonical['has_audio']})"
+                )
             continue
 
-        base_dir = shared_assets_dir if (is_shared and shared_assets_dir) else videos_dir
+        base_dir = (
+            shared_assets_dir if (is_shared and shared_assets_dir) else videos_dir
+        )
 
         # Mirror renderer._resolve_video_path: try output_file first, then source_file
         video_path = None
@@ -507,7 +521,9 @@ def _probe_video_metadata(
                 print(f"  Video '{video_id}': file not found, skipping")
             continue
 
-        print(f"  Probing video '{video_id}' ({video_path.name})...", end=" ", flush=True)
+        print(
+            f"  Probing video '{video_id}' ({video_path.name})...", end=" ", flush=True
+        )
         try:
             duration = get_video_duration(video_path)
             has_audio = _has_audio_stream(video_path)
@@ -569,7 +585,10 @@ def _sync_shared_videos_to_local(
             # Propagate any metadata fields that were probed into shared_assets/videos.json
             changed = False
             for field in _METADATA_FIELDS:
-                if field in shared_entry and local_videos[video_id].get(field) != shared_entry[field]:
+                if (
+                    field in shared_entry
+                    and local_videos[video_id].get(field) != shared_entry[field]
+                ):
                     local_videos[video_id][field] = shared_entry[field]
                     changed = True
             if changed:
@@ -588,9 +607,13 @@ def _sync_shared_videos_to_local(
         with open(local_json_path, "w", encoding="utf-8") as f:
             json.dump(local_videos, f, indent=4)
         if added:
-            print(f"  Synced {len(added)} shared asset(s) to local videos.json: {', '.join(added)}")
+            print(
+                f"  Synced {len(added)} shared asset(s) to local videos.json: {', '.join(added)}"
+            )
         if metadata_updated:
-            print(f"  Updated metadata for {len(metadata_updated)} shared asset(s): {', '.join(metadata_updated)}")
+            print(
+                f"  Updated metadata for {len(metadata_updated)} shared asset(s): {', '.join(metadata_updated)}"
+            )
     elif verbose:
         print("  No new shared assets to sync to local videos.json")
 
@@ -887,8 +910,7 @@ def _import_narration_segments(narration_dir: Path, config, verbose: bool) -> No
 
         # If a raw_mov equivalent exists, skip — step 2 will handle it
         raw_mov_has_file = raw_dir.exists() and any(
-            (raw_dir / f"{segment_id}{ext}").exists()
-            for ext in _raw_video_exts
+            (raw_dir / f"{segment_id}{ext}").exists() for ext in _raw_video_exts
         )
         if raw_mov_has_file:
             continue
@@ -1200,7 +1222,9 @@ def cmd_preprocess(
     # --- Filter pipeline ---
     talkinghead_filter = (config.default_filters or {}).get("talkinghead", [])
     if not talkinghead_filter:
-        print("  ERROR: No 'talkinghead' filter defined in project.json default_filters.")
+        print(
+            "  ERROR: No 'talkinghead' filter defined in project.json default_filters."
+        )
         print("  Add a 'talkinghead' entry under 'default_filters' in project.json.")
         return 1
 
@@ -1211,8 +1235,11 @@ def cmd_preprocess(
         if not d.exists():
             return []
         return sorted(
-            f for f in d.iterdir()
-            if f.is_file() and f.suffix.lower() in _video_exts and not f.name.startswith(".")
+            f
+            for f in d.iterdir()
+            if f.is_file()
+            and f.suffix.lower() in _video_exts
+            and not f.name.startswith(".")
         )
 
     raw_mov_files = _scan_dir(raw_dir)
@@ -1224,7 +1251,9 @@ def cmd_preprocess(
     elif raw_mp4_files:
         source_files = raw_mp4_files
         using_compressed = True
-        print("  WARNING: raw_mov/ is empty — using compressed files from raw_mp4/ instead. Quality may be reduced.")
+        print(
+            "  WARNING: raw_mov/ is empty — using compressed files from raw_mp4/ instead. Quality may be reduced."
+        )
     else:
         print(f"  No source files found in raw_mov/ or raw_mp4/.")
         print(f"  Place .mov recordings in {raw_dir}")
@@ -1259,7 +1288,9 @@ def cmd_preprocess(
         raw_filter = existing_entry.get("filter")
         if raw_filter:
             if isinstance(raw_filter, str):
-                filter_list = (config.default_filters or {}).get(raw_filter, talkinghead_filter)
+                filter_list = (config.default_filters or {}).get(
+                    raw_filter, talkinghead_filter
+                )
             else:
                 filter_list = raw_filter
         else:
@@ -1276,7 +1307,9 @@ def cmd_preprocess(
 
     if not segments_to_process:
         if skipped_count:
-            print(f"\n  All {skipped_count} segment(s) already preprocessed. Use --force to reprocess.")
+            print(
+                f"\n  All {skipped_count} segment(s) already preprocessed. Use --force to reprocess."
+            )
         else:
             print("\n  No segments to preprocess.")
         return 0
@@ -1294,19 +1327,27 @@ def cmd_preprocess(
 
     if workers > 1 and len(segments_to_process) > 1:
         num_workers = min(workers, len(segments_to_process))
-        print(f"\n  Processing {len(segments_to_process)} segments in parallel ({num_workers} workers)")
+        print(
+            f"\n  Processing {len(segments_to_process)} segments in parallel ({num_workers} workers)"
+        )
 
         def process_segment_task(task):
             seg_id, seg_source = task
             preprocess_video(
-                narration_dir, seg_id, seg_source,
-                verbose=False, force=force, custom_gnommo_scratch=gnommo_scratch,
+                narration_dir,
+                seg_id,
+                seg_source,
+                verbose=False,
+                force=force,
+                custom_gnommo_scratch=gnommo_scratch,
             )
             return task
 
         completed = 0
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
-            futures = {executor.submit(process_segment_task, t): t for t in segments_to_process}
+            futures = {
+                executor.submit(process_segment_task, t): t for t in segments_to_process
+            }
             for future in as_completed(futures):
                 seg_id, seg_source = future.result()
                 completed += 1
@@ -1321,8 +1362,12 @@ def cmd_preprocess(
             print(f"    Output: {segment_source.output_file}")
             print(f"    Filters: {len(segment_source.filter)} step(s)")
             preprocess_video(
-                narration_dir, segment_id, segment_source,
-                verbose, force, gnommo_scratch,
+                narration_dir,
+                segment_id,
+                segment_source,
+                verbose,
+                force,
+                gnommo_scratch,
             )
             output_path = narration_dir / segment_source.output_file
             if output_path.exists():
@@ -1330,8 +1375,17 @@ def cmd_preprocess(
 
     # --- Update narration.json ---
     # Write processed segments; preserve any existing per-segment settings (skip/take/etc.)
-    _PRESERVE_KEYS = ("skip", "take", "begin", "end", "cutout", "use_audio_channels",
-                      "defer_loudnorm", "volume", "zoom")
+    _PRESERVE_KEYS = (
+        "skip",
+        "take",
+        "begin",
+        "end",
+        "cutout",
+        "use_audio_channels",
+        "defer_loudnorm",
+        "volume",
+        "zoom",
+    )
     for segment_id, segment_source in successfully_processed:
         existing_entry = existing_narration.get(segment_id, {})
         entry: dict = {}
@@ -1930,9 +1984,7 @@ def cmd_stitch(
 
         # Get cutout from first narration segment
         first_seg = narration[segment_ids[0]]
-        cutout = (
-            first_seg.cutout or "talkinghead"
-        )
+        cutout = first_seg.cutout or "talkinghead"
 
         # Create/update narration_combined entry
         existing_videos["narration_combined"] = {
@@ -2043,12 +2095,32 @@ def _print_render_plan_details(plan, marker_timings, slides: dict) -> None:
                     print(f'  {marker_id:6}  {time_str}{conf_str}  "{context}"')
             elif any(
                 marker_id.startswith(p)
-                for p in ("video:", "vft:", "vfb:", "vst:", "vsb:", "vft:", "vfbp:", "vstp:", "vsbp:")
+                for p in (
+                    "video:",
+                    "vft:",
+                    "vfb:",
+                    "vst:",
+                    "vsb:",
+                    "vft:",
+                    "vfbp:",
+                    "vstp:",
+                    "vsbp:",
+                )
             ):
                 aligned_count += 1
                 pfx_len = next(
                     len(p)
-                    for p in ("video:", "vft:", "vfb:", "vst:", "vsb:", "vft:", "vfbp:", "vstp:", "vsbp:")
+                    for p in (
+                        "video:",
+                        "vft:",
+                        "vfb:",
+                        "vst:",
+                        "vsb:",
+                        "vft:",
+                        "vfbp:",
+                        "vstp:",
+                        "vsbp:",
+                    )
                     if marker_id.startswith(p)
                 )
                 video_id = marker_id[pfx_len:]
@@ -2062,8 +2134,7 @@ def _print_render_plan_details(plan, marker_timings, slides: dict) -> None:
                     cutout_name = "?"
                     end_on = "next_slide"
                     layer_tag = ""
-                
-                
+
                 cache_ind = " 📁" if video_id in plan.cached_files else ""
                 print(
                     f"  {marker_id:20}  {time_str}  in '{cutout_name}' [{end_on}]{layer_tag}{cache_ind}"
@@ -2790,13 +2861,14 @@ def cmd_all(
 
     print("\n>>> Step 2/6: Preprocess\n")
     t0 = time.time()
-    result = cmd_preprocess(project_path, verbose, dry_run, cascade_force, workers=1, res=res)
+    result = cmd_preprocess(
+        project_path, verbose, dry_run, cascade_force, workers=1, res=res
+    )
     if result != 0:
         return result
-    if (
-        _files_modified_since(project_path, t0, "*_processed.mov")
-        or _files_modified_since(project_path, t0, "*_processed.webm")
-    ):
+    if _files_modified_since(
+        project_path, t0, "*_processed.mov"
+    ) or _files_modified_since(project_path, t0, "*_processed.webm"):
         cascade_force = True
 
     print("\n>>> Step 3/6: Trim\n")
@@ -2938,14 +3010,15 @@ def cmd_description(project_path: Path, verbose: bool) -> int:
 # Files and directories excluded from all sync/archive/load operations.
 # Covers intermediate processing artifacts, chunk scratch dirs, venv, and
 # common OS/editor noise.
+
 _RSYNC_EXCLUDES = [
     # Intermediate processing files
     "media/narration/intermediate/",
     "media/narration/intermediate/**",
     "media/videos/intermediate/",
     "media/videos/intermediate/**",
-    "media/videos/processed/",
-    "media/videos/processed/**",
+    "media/narration/processed/",
+    "media/narration/processed/**",
     # Chunk scratch directories
     "**/chunks/",
     "**/chunks/**",
@@ -3145,7 +3218,9 @@ def cmd_sync(project_path: Path, verbose: bool, dry_run: bool, download: bool) -
         else:
             remote_dir = f"{server['path']}/{project_path.name}"
             ssh_cmd = [
-                "ssh", "-p", server["port"],
+                "ssh",
+                "-p",
+                server["port"],
                 f"{server['user']}@{server['host']}",
                 f"mkdir -p {remote_dir}",
             ]
@@ -3160,7 +3235,8 @@ def cmd_sync(project_path: Path, verbose: bool, dry_run: bool, download: bool) -
         "rsync",
         "-av",
         "--progress",
-        "-e", f"ssh -p {server['port']}",
+        "-e",
+        f"ssh -p {server['port']}",
         *[f"--exclude={p}" for p in _RSYNC_EXCLUDES],
         src,
         dest,
diff --git a/gnommo/models.py b/gnommo/models.py
index 712ea77..f3f264a 100644
--- a/gnommo/models.py
+++ b/gnommo/models.py
@@ -298,9 +298,13 @@ class VideoSource:
     )
     volume: float = 1.0  # Volume multiplier (1.0=full, >1.0=boost, <1.0=reduce)
     layer: str = "above"  # "above" = renders on top of slides; "below" = behind slides
-    duration: Optional[float] = None  # Pre-probed file duration in seconds (set by import)
+    duration: Optional[
+        float
+    ] = None  # Pre-probed file duration in seconds (set by import)
     has_audio: Optional[bool] = None  # Pre-detected audio presence (set by import)
-    end_on: Optional[str] = None  # When video event ends: "next_slide" | "end" | "take" (None = marker-type default)
+    end_on: Optional[
+        str
+    ] = None  # When video event ends: "next_slide" | "end" | "take" (None = marker-type default)
 
 
 @dataclass
diff --git a/gnommo/preprocessor.py b/gnommo/preprocessor.py
index f5aab99..92e09bb 100644
--- a/gnommo/preprocessor.py
+++ b/gnommo/preprocessor.py
@@ -302,7 +302,6 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"):
     while True:
         # If process ended and no more output, break
         if p.poll() is not None:
-
             # drain any remaining output quickly
             while True:
                 line = p.stdout.readline()
@@ -358,7 +357,9 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"):
     else:
         code = p.returncode
         # On macOS/Linux, -9 means SIGKILL (OOM kill by OS), -6 = SIGABRT
-        signal_hint = " (OOM kill)" if code == -9 else (" (abort)" if code == -6 else "")
+        signal_hint = (
+            " (OOM kill)" if code == -9 else (" (abort)" if code == -6 else "")
+        )
         sys.stdout.write(f"\n          FFmpeg exited with code {code}{signal_hint}\n")
         sys.stdout.flush()
 
@@ -371,12 +372,19 @@ def _has_audio_stream(video_path: Path) -> bool:
     """Return True if the file has a real (non-ghost) audio stream."""
     result = subprocess.run(
         [
-            "ffprobe", "-v", "error",
-            "-analyzeduration", "0",
-            "-probesize", "1000000",
-            "-select_streams", "a:0",
-            "-show_entries", "stream=index,nb_frames",
-            "-of", "csv=p=0",
+            "ffprobe",
+            "-v",
+            "error",
+            "-analyzeduration",
+            "0",
+            "-probesize",
+            "1000000",
+            "-select_streams",
+            "a:0",
+            "-show_entries",
+            "stream=index,nb_frames",
+            "-of",
+            "csv=p=0",
             str(video_path),
         ],
         capture_output=True,
@@ -1380,9 +1388,18 @@ def _process_chunk_to_prores4444(
     # FFmpeg can return 0 but write a corrupt/incomplete file (e.g. moov atom
     # missing) when faststart rewrite fails or disk is under pressure.
     probe = subprocess.run(
-        ["ffprobe", "-v", "error", "-show_entries", "format=duration",
-         "-of", "csv=p=0", str(output_path)],
-        capture_output=True, text=True,
+        [
+            "ffprobe",
+            "-v",
+            "error",
+            "-show_entries",
+            "format=duration",
+            "-of",
+            "csv=p=0",
+            str(output_path),
+        ],
+        capture_output=True,
+        text=True,
     )
     if probe.returncode != 0 or not probe.stdout.strip():
         raise PreprocessError(
diff --git a/gnommo/renderer.py b/gnommo/renderer.py
index 6a9cf15..02b0fba 100644
--- a/gnommo/renderer.py
+++ b/gnommo/renderer.py
@@ -410,7 +410,9 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
         input_idx += 1
         has_audio = event.video_source.has_audio
         if has_audio is None:
-            print(f"  Warning: no cached metadata for '{event.video_source.source_file}' — run 'gnommo import' to avoid slow probing")
+            print(
+                f"  Warning: no cached metadata for '{event.video_source.source_file}' — run 'gnommo import' to avoid slow probing"
+            )
             has_audio = _has_audio_stream(video_path)
         if has_audio:
             video_events_with_audio.add(i)
@@ -436,7 +438,9 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
         input_idx += 1
         has_audio = event.video_source.has_audio
         if has_audio is None:
-            print(f"  Warning: no cached metadata for '{event.video_source.source_file}' — run 'gnommo import' to avoid slow probing")
+            print(
+                f"  Warning: no cached metadata for '{event.video_source.source_file}' — run 'gnommo import' to avoid slow probing"
+            )
             has_audio = _has_audio_stream(video_path)
         if has_audio:
             outro_events_with_audio.add(i)
@@ -468,7 +472,8 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
             # Cache duration for crossfade loop filter
             if event.audio_def.loop and event.audio_def.overlap:
                 audio_durations[event.audio_id] = (
-                    file_duration if file_duration is not None
+                    file_duration
+                    if file_duration is not None
                     else _get_audio_duration(audio_path)
                 )
 
diff --git a/gnommo/transformer.py b/gnommo/transformer.py
index 4a812b7..cfad2fe 100644
--- a/gnommo/transformer.py
+++ b/gnommo/transformer.py
@@ -134,7 +134,18 @@ def _is_known_marker(
         return True
 
     # Video/narration triggers (all supported prefixes)
-    _VIDEO_PREFIXES = ("video:", "narration:", "vft:", "vfb:", "vst:", "vsb:", "vftp:", "vfbp:", "vstp:", "vsbp:")
+    _VIDEO_PREFIXES = (
+        "video:",
+        "narration:",
+        "vft:",
+        "vfb:",
+        "vst:",
+        "vsb:",
+        "vftp:",
+        "vfbp:",
+        "vstp:",
+        "vsbp:",
+    )
     if any(marker_id.startswith(p) for p in _VIDEO_PREFIXES):
         return True
 
@@ -923,9 +934,7 @@ def _extract_video_events(
                     f"Marker [video:{video_id}] — cutout '{video_source.cutout}' is not defined in project config. "
                     f"Available: {list(cutouts.keys())}"
                 )
-            video_markers.append(
-                (timing.timestamp, video_id, "video", None, None)
-            )
+            video_markers.append((timing.timestamp, video_id, "video", None, None))
             continue
 
         # --- [narration:xxx] ---
@@ -946,9 +955,7 @@ def _extract_video_events(
                     f"Marker [narration:{video_id}] — cutout '{video_source.cutout}' is not defined in project config. "
                     f"Available: {list(cutouts.keys())}"
                 )
-            video_markers.append(
-                (timing.timestamp, video_id, "narration", None, None)
-            )
+            video_markers.append((timing.timestamp, video_id, "narration", None, None))
 
     events: list[VideoEvent] = []
     for (