Commti prior to change to video tag below / above layering

2026-03-16 16:57:54 +01:00
parent 757d966803
commit e734dbfcac
12 changed files with 416 additions and 154 deletions
@@ -23,26 +23,8 @@
    "talkinghead": [
      {
        "type": "audio_normalize",
-        "highpass": 100,
-        "room_eq": true,
-        "room_eq_freq": 300,
-        "room_eq_gain": -4,
-        "room_eq_width": 1.5,
-        "dereverb_model": "shared_assets/models/std.rnnn",
-        "dereverb_mix": 0.8,
-        "denoise": true,
-        "noise_floor": -25,
-        "gate": true,
-        "gate_threshold": -35,
-        "gate_range": -20,
-        "compress": true,
-        "threshold": -20,
-        "ratio": 4,
-        "attack": 5,
-        "release": 50,
-        "makeup": 2,
        "normalize": true,
-        "target_lufs": -16,
+        "target_lufs": -14,
        "target_lra": 11,
        "target_tp": -1.5
      },
@@ -101,5 +83,5 @@
  },
  "manuscript": "manuscript.txt",
  "shorts": [],
-  "output_video": "out/final.mp4"
+  "output_video": "TRAILER.mp4"
 }
@@ -14,7 +14,7 @@
  "videos": "media/videos/videos.json",
  "slides": "media/slides/Example/slides.json",
  "audio": "media/audio/audio.json",
-   "default_filters": {                                                                                                                                                  
+  "default_filters": {                                                                                                                                                  
      "talkinghead": [
        {
          "type": "audio_normalize",
@@ -2,12 +2,15 @@

 import argparse
 import json
+from logging import config
 import re
 import shutil
 import subprocess
 import sys
 from datetime import datetime
 from pathlib import Path
+
+from gnommo.parser import _read_json
 from . import __version__
 from .errors import GnommoError, ParseError, ValidationError, RenderError
 from .cache import get_cache_info, resolve_with_cache
@@ -35,10 +38,15 @@ Examples:
  gnommo -p video1 import              Generate slides.json from images
  gnommo -p video1 pre                 Preprocess videos (chroma key, etc.)
  gnommo -p video1 stitch --res tiny -f  Fast stitch with new begin/end values
+  gnommo -p video1 trim                Auto-detect silence and set skip/take in narration.json
+  gnommo -p video1 trim --force        Redo trim even for segments that already have skip/take
+  gnommo -p video1 trim --threshold -25  Raise threshold to ignore clothing/room noise
+  gnommo -p video1 trim -v             Show detected silence periods for debugging
  gnommo -p video1 all                 Full pipeline: transcribe → align → render
  gnommo -p video1 render --dry-run    Show FFmpeg command without running
  gnommo -p video1 description         Generate YouTube description file
-  gnommo -p video1 transcribe --final  Transcribe final.mp4 and generate SRT for YouTube
+  gnommo -p video1 transcribe          Narration file for timing of slides
+  gnommo -p video1 transcribe --final  Transcribe outputted file and generate SRT for YouTube
  gnommo -p video1 archive             Sync project to external cache storage
  gnommo -p video1 archive --dry-run   Preview what would be synced
  gnommo -p video1 extract-audio --combined          Extract audio from narration_combined.mov
@@ -71,6 +79,7 @@ Examples:
            "preprocess",
            "pre",
            "stitch",
+            "trim",
            "render",
            "all",
            "transcribe",
@@ -156,6 +165,12 @@ Examples:
        action="store_true",
        help="Target production server (GNOMMOWEB_PROD_URL / GNOMMOWEB_PROD_API_KEY)",
    )
+    parser.add_argument(
+        "--threshold",
+        type=float,
+        default=-40.0,
+        help="For trim: silence threshold in dB (default: -40). Raise (e.g. -25) to ignore clothing/room noise.",
+    )

    args = parser.parse_args()

@@ -181,6 +196,8 @@ Examples:
                args.workers,
                args.res,
            )
+        elif action == "trim":
+            return cmd_trim(project_path, args.verbose, args.force, args.threshold)
        elif action in ("stitch"):
            return cmd_stitch(
                project_path,
@@ -223,7 +240,7 @@ Examples:
            return cmd_pull(project_path, args.verbose, args.force, args.prod)
        elif action == "handoff":
            from .handoff import cmd_handoff
-            return cmd_handoff(project_path, args.verbose, args.file, args.prod)
+            return cmd_handoff(project_path, args.verbose, args.file, args.prod, args.res)

    except GnommoError as e:
        print(f"Error: {e}", file=sys.stderr)
@@ -242,7 +259,7 @@ Examples:

 def cmd_import(project_path: Path, force: bool, verbose: bool) -> int:
    """Import assets and generate metadata JSON files."""
-    from .parser import parse_project_config
+    from .parser import parse_project_config, _read_json

    print(f"Importing assets for: {project_path.name}")

@@ -367,8 +384,7 @@ def _import_shared_assets(shared_assets_dir: Path, verbose: bool) -> None:
    videos_json_path = shared_assets_dir / "videos.json"
    existing_videos: dict = {}
    if videos_json_path.exists():
-        with open(videos_json_path, "r", encoding="utf-8") as f:
-            existing_videos = json.load(f)
+        existing_videos = _read_json(videos_json_path)

    # Add new videos (don't overwrite existing)
    added_count = 0
@@ -474,8 +490,7 @@ def _import_videos(videos_dir: Path, config, verbose: bool) -> None:
    videos_json_path = videos_dir / "videos.json"
    existing_videos: dict = {}
    if videos_json_path.exists():
-        with open(videos_json_path, "r", encoding="utf-8") as f:
-            existing_videos = json.load(f)
+        existing_videos = _read_json(videos_json_path)

    # Get available filter presets from config
    default_filters = config.default_filters if config else {}
@@ -558,8 +573,7 @@ def _import_narration_segments(narration_dir: Path, config, verbose: bool) -> No
    narration_json_path = narration_dir / "narration.json"
    existing_narration: dict = {}
    if narration_json_path.exists():
-        with open(narration_json_path, "r", encoding="utf-8") as f:
-            existing_narration = json.load(f)
+        existing_narration = _read_json(narration_json_path)

    # Get available filter presets from config
    default_filters = config.default_filters if config else {}
@@ -583,9 +597,11 @@ def _import_narration_segments(narration_dir: Path, config, verbose: bool) -> No

        # Apply talkinghead preset if available
        if "talkinghead" in default_filters:
-            narration_entry["filter"] = "talkinghead"
            narration_entry["cutout"] = "talkinghead"

+        if "talkinghead" in default_filters:
+            narration_entry["filter"] = "talkinghead"
+
        # Default audio settings for narration
        narration_entry["use_audio_channels"] = "left"
        narration_entry["defer_loudnorm"] = True
@@ -656,7 +672,7 @@ def _import_presenter_notes(

    # Parse JSON output from JXA script
    try:
-        notes_data = json.loads(proc.stdout)
+        notes_data = json.loads(proc.stdout) if proc.stdout.strip() else []
    except json.JSONDecodeError as e:
        print(f"  Error parsing notes JSON: {e}", file=sys.stderr)
        return
@@ -714,9 +730,11 @@ def cmd_validate(project_path: Path, verbose: bool) -> int:
        print(f"  - Videos defined: {len(videos)}")

    # Validate
-    validate_project(
+    warnings = validate_project(
        project_path, markers, config, slides, videos, videos_dir, malformed
    )
+    for w in warnings:
+        print(f"  Warning: {w}")

    print("Validation passed.")
    return 0
@@ -735,9 +753,9 @@ def cmd_preprocess(
    workers: int = 1,
    res: str = "full",
 ) -> int:
-    """Run preprocessing pipeline on narration segments."""
+    """Run preprocessing pipeline on narration segments and videos."""
    from concurrent.futures import ThreadPoolExecutor, as_completed
-    from .parser import parse_project_config, parse_narration
+    from .parser import parse_project_config, parse_narration, parse_videos
    from .preprocessor import (
        preprocess_video,
        create_downscaled_videos,
@@ -834,10 +852,118 @@ def cmd_preprocess(
            )

    print(f"\n  Run 'gnommo -p <project> stitch' to stitch narration segments into one fulll length narration file.")
+
+    # Also preprocess videos from videos.json (e.g. chroma key, color grade)
+    videos, videos_dir = parse_videos(project_path, config)
+    videos_to_process = [
+        (vid_id, vid_src)
+        for vid_id, vid_src in videos.items()
+        if vid_src.filter and not vid_src.is_shared
+    ]
+
+    if videos_to_process:
+        print(f"\n  Processing {len(videos_to_process)} video(s) from videos.json:")
+        for video_id, video_source in videos_to_process:
+            if video_source.output_file:
+                output_path = videos_dir / video_source.output_file
+                if output_path.exists() and not force:
+                    print(f"    {video_id}: output exists, skipping (use --force to reprocess)")
+                    continue
+            if dry_run:
+                print(f"    Would preprocess: {video_id} ({len(video_source.filter)} filter(s))")
+                continue
+            print(f"    Processing: {video_id}")
+            preprocess_video(videos_dir, video_id, video_source, verbose, force, gnommo_scratch)
+
    print("\nPreprocessing complete.")
    return 0


+# =============================================================================
+# Trim Command — auto-detect silence bounds for narration segments
+# =============================================================================
+
+
+def cmd_trim(
+    project_path: Path,
+    verbose: bool,
+    force: bool = False,
+    threshold_db: float = -40.0,
+) -> int:
+    """
+    Auto-detect silence bounds for all narration segments and write skip/take
+    values into narration.json.
+
+    For each segment:
+      skip = max(0, first_sound_time - 0.5)
+      take = last_sound_time + 3.0 - skip  (capped at file duration)
+
+    Segments that already have explicit skip or take values are left unchanged
+    unless --force is passed.
+
+    Use --threshold to adjust sensitivity, e.g. -25 to ignore clothing/room
+    noise that sits above -40 dB.
+    """
+    from .parser import parse_project_config, parse_narration
+    from .preprocessor import detect_silence_bounds, get_video_duration
+
+    print(f"Auto-trimming narration: {project_path.name}")
+
+    config = parse_project_config(project_path)
+    narration, narration_dir = parse_narration(project_path, config)
+
+    if not narration:
+        print("  No narration segments found in narration.json")
+        print("  Run 'gnommo -p <project> import' first.")
+        return 1
+
+    narration_json_path = narration_dir / "narration.json"
+    raw_data: dict = _read_json(narration_json_path)
+
+    updated = 0
+    for seg_id in sorted(narration.keys()):
+        seg = narration[seg_id]
+
+        existing = raw_data.get(seg_id, {})
+        has_explicit = "skip" in existing or "take" in existing
+        if has_explicit and not force:
+            print(f"  {seg_id}: already trimmed, skipping (use --force to redo)")
+            continue
+
+        # Always analyse the raw source file — it's always present and has the
+        # same audio as any processed version (processing is video-only).
+        source_path = narration_dir / seg.source_file
+        if not source_path.exists():
+            print(f"  {seg_id}: source file not found ({seg.source_file}), skipping")
+            continue
+
+        print(f"  {seg_id}: analysing...", end="", flush=True)
+        first_sound, last_sound = detect_silence_bounds(source_path, noise_threshold_db=threshold_db, verbose=verbose)
+        total_dur = get_video_duration(source_path)
+
+        new_skip = max(0.0, round(first_sound - 0.5, 3))
+        new_take = round(min(total_dur - new_skip, last_sound + 3.0 - new_skip), 3)
+        new_take = max(0.0, new_take)
+
+        print(
+            f" first={first_sound:.2f}s  last={last_sound:.2f}s"
+            f"  →  skip={new_skip:.3f}s  take={new_take:.3f}s"
+        )
+
+        raw_data[seg_id]["skip"] = new_skip
+        raw_data[seg_id]["take"] = new_take
+        updated += 1
+
+    if updated > 0:
+        with open(narration_json_path, "w", encoding="utf-8") as f:
+            json.dump(raw_data, f, indent=2)
+        print(f"\n  Updated {updated} segment(s) in narration.json")
+    else:
+        print(f"\n  No segments updated")
+
+    return 0
+
+
 # =============================================================================
 # Stitch Command (fast iteration on narration segments)
 # =============================================================================
@@ -903,19 +1029,17 @@ def cmd_stitch(
    if stitch_output.exists() and not force:
        print(f"\n  Combined narration exists: {stitch_output.name}")
        print("  (use --force to regenerate)")
-        return 0
-
-    stitch_narration_segments(
-        narration_dir,
-        segment_ids,
-        narration,
-        stitch_output,
-        verbose=verbose,
-        default_end_trim=config.default_end_trim if config else 0.0,
-    )
-    
-    # Run import videos again, because at this point narration_combined might have been created.
-    _import_videos(videos_dir, config, verbose)
+    else:
+        stitch_narration_segments(
+            narration_dir,
+            segment_ids,
+            narration,
+            stitch_output,
+            verbose=verbose,
+            default_end_trim=config.default_end_trim if config else 0.0,
+        )
+        # Run import videos again, because at this point narration_combined might have been created.
+        _import_videos(videos_dir, config, verbose)

    # Always update the MAIN videos.json (parent of subdir when using low/tiny res)
    # Downscaled dirs only affect file paths, not JSON metadata updates
@@ -924,12 +1048,11 @@ def cmd_stitch(
    if True:  # Always update JSON regardless of proxy mode
        existing_videos: dict = {}
        if videos_json_path.exists():
-            with open(videos_json_path, "r", encoding="utf-8") as f:
-                existing_videos = json.load(f)
+            existing_videos = _read_json(videos_json_path)

        # Get cutout from first narration segment
        first_seg = narration[segment_ids[0]]
-        cutout = first_seg.cutout or "talkinghead"
+        cutout = first_seg.cutout or "talkinghead"  # Default to audioonly if no cutout specified

        # Create/update narration_combined entry
        existing_videos["narration_combined"] = {
@@ -1149,7 +1272,10 @@ def cmd_render(

    # Non-full res: use downscaled video directory, create on-the-fly if needed
    if res != "full":
-        videos_dir = ensure_downscaled_files_exist(videos_dir, res, force=False, verbose=verbose)
+        # Skip downscaling sources that have a preprocessed output_file — the
+        # renderer will use the full-res processed version instead, saving disk space.
+        sources_with_output = {v.source_file for v in videos.values() if v.output_file}
+        videos_dir = ensure_downscaled_files_exist(videos_dir, res, force=False, verbose=verbose, skip_sources=sources_with_output)
        if verbose:
            print(f"  Using {res} dir: {videos_dir}")
    audio, audio_dir = parse_audio(project_path, config)
@@ -1246,9 +1372,11 @@ def cmd_render(

    # Stage 2: Validate
    print("\n[2/4] Validating...")
-    validate_project(
+    warnings = validate_project(
        project_path, markers, config, slides, videos, videos_dir, malformed
    )
+    for w in warnings:
+        print(f"  Warning: {w}")
    print("  Passed.")

    # Stage 3: Transform (includes on-the-fly alignment)
@@ -1310,14 +1438,19 @@ def cmd_render(
            print(f"\n  Continuing anyway due to --force flag...")

    # Stage 4: Render
-    # Generate output filename based on slide range and resolution
-    base_name = "preview" if res == "low" else "final"
-    if slide_range:
+    # Determine output filename and directory
+    if config.output_video:
+        out_filename = config.output_video
+    elif slide_range:
        start, end = slide_range
        range_suffix = f"_{start}-{end}" if end else f"_{start}-end"
-        output_path = project_path / "out" / f"{base_name}{range_suffix}.mp4"
+        out_filename = f"final{range_suffix}.mp4"
    else:
-        output_path = project_path / "out" / f"{base_name}.mp4"
+        out_filename = f"{config.co}.mp4"
+
+    out_dir = project_path / "out" / res if res != "full" else project_path / "out"
+    output_path = out_dir / out_filename
+    plan.output_path = output_path

    if dry_run:
        print("\n[4/4] FFmpeg command (dry run):")
@@ -1372,15 +1505,17 @@ def cmd_transcribe(
    from .transcriber import transcribe_video, save_transcript, words_to_srt
    from .parser import parse_project_config, parse_videos
    from .preprocessor import ensure_downscaled_files_exist
+    config = parse_project_config(project_path)
    
    # Handle --final mode: transcribe the rendered output for YouTube captions
    if final:
-        return _transcribe_final(project_path, verbose)
+        path = project_path / "out" / f"{config.output_video}.mp4"
+        return _transcribe_final(path, verbose)

    mode_str = f" ({res.upper()})" if res != "full" else ""
    print(f"Transcribing: {project_path.name}{mode_str}")

-    config = parse_project_config(project_path)
+   
    videos, videos_dir = parse_videos(project_path, config)
    if not videos:
        print("Error: No videos defined in videos.json", file=sys.stderr)
@@ -1433,23 +1568,20 @@ def cmd_transcribe(
    return 0


-def _transcribe_final(project_path: Path, verbose: bool) -> int:
+def _transcribe_final(final_video: Path, verbose: bool) -> int:
    """
    Transcribe the final rendered video and generate SRT captions for YouTube.

-    Looks for out/final.mp4 and creates out/final.srt suitable for upload.
+    Looks and creates out filename.srt suitable for upload.
    """
    from .transcriber import transcribe_video, save_transcript, words_to_srt

-    print(f"Transcribing final output: {project_path.name}")
+    print(f"Transcribing final output: {final_video}")

-    # Look for the final rendered video
-    out_dir = project_path / "out"
-    final_video = out_dir / "final.mp4"

    if not final_video.exists():
        print(f"Error: Final video not found: {final_video}", file=sys.stderr)
-        print(f"Run 'gnommo -p {project_path.name} render' first.", file=sys.stderr)
+        print("Run 'gnommo render' first.", file=sys.stderr)
        return 1

    print(f"  Video: {final_video.name}")
@@ -1462,11 +1594,11 @@ def _transcribe_final(project_path: Path, verbose: bool) -> int:
        return 1

    # Save JSON transcript
-    transcript_path = out_dir / "final.transcript.json"
+    transcript_path = final_video.with_suffix(".transcript.json")
    save_transcript(words, transcript_path)

    # Generate SRT captions
-    srt_path = out_dir / "final.srt"
+    srt_path = final_video.with_suffix(".srt")
    srt_content = words_to_srt(words)
    srt_path.write_text(srt_content, encoding="utf-8")

@@ -1597,33 +1729,33 @@ def cmd_all(
    res: str = "full",
    force: bool = False,
 ) -> int:
-    """Run full pipeline: transcribe → render (alignment is automatic)."""
-    from .parser import parse_project_config, parse_videos
+    """Run full pipeline: preprocess → stitch → render → handoff."""
+    from .handoff import cmd_handoff

    print(f"=== Full Pipeline: {project_path.name} ===\n")

-    # Check if transcription exists
-    config = parse_project_config(project_path)
-    videos, videos_dir = parse_videos(project_path, config)
-    result = _find_narration_video(config, videos)
-    if result:
-        video_id, video_source = result
-        video_path = videos_dir / video_source.source_file
-        transcript_path = video_path.with_suffix(".transcript.json")
+    print(">>> Step 1/5: Import\n")
+    result = cmd_import(project_path, force, verbose)
+    if result != 0:
+        return result

-        # Try cache fallback for transcript
-        resolved_transcript, _ = resolve_with_cache(transcript_path, project_path)
-        if not resolved_transcript.exists():
-            print(">>> Step 1/2: Transcribe\n")
-            result = cmd_transcribe(project_path, verbose)
-            if result != 0:
-                return result
-        else:
-            print(f">>> Step 1/2: Transcribe (cached: {resolved_transcript.name})\n")
+    print("\n>>> Step 2/5: Preprocess\n")
+    result = cmd_preprocess(project_path, verbose, dry_run, force, workers=1, res=res)
+    if result != 0:
+        return result

-    # Render (alignment happens automatically)
-    print("\n>>> Step 2/2: Render\n")
-    return cmd_render(project_path, verbose, dry_run, res=res, force=force)
+    print("\n>>> Step 3/5: Stitch\n")
+    result = cmd_stitch(project_path, verbose, force, res=res)
+    if result != 0:
+        return result
+
+    print("\n>>> Step 4/5: Render\n")
+    result = cmd_render(project_path, verbose, dry_run, res=res, force=force)
+    if result != 0:
+        return result
+
+    print("\n>>> Step 5/5: Handoff\n")
+    return cmd_handoff(project_path, verbose, file_override=None, prod=False, res=res)


 # =============================================================================
@@ -1801,7 +1933,7 @@ def cmd_archive(project_path: Path, verbose: bool, dry_run: bool) -> int:
        project_json_path = project_path / "project.json"
        if project_json_path.exists():
            try:
-                data = json.loads(project_json_path.read_text(encoding="utf-8"))
+                data = _read_json(project_json_path.read_text(encoding="utf-8"))
                data["synced_time"] = datetime.now().isoformat()
                project_json_path.write_text(
                    json.dumps(data, indent=2, ensure_ascii=False) + "\n",
@@ -176,11 +176,8 @@ def generate_chapters(
    for slide_id in slide_ids:
        if slide_id not in timing_lookup:
            continue
-
        timestamp = timing_lookup[slide_id]
        title = _extract_chapter_title(manuscript_text, slide_id, slides)
-
-        # Check if we should merge with previous chapter (too short)
        if chapters and (timestamp - chapters[-1].timestamp) < min_chapter_duration: 
            continue  # Skip this chapter, previous one covers it

@@ -23,12 +23,12 @@ import tempfile
 import zipfile
 from pathlib import Path

+from gnommo.parser import _read_json
+

 def write_manuscript(data: Path, out_path: Path):
-    data = json.loads(
-        data.read_text(encoding="utf-8")
-    )  # list of {"slide_index": int, "notes": str}
    
+    data = _read_json(data.read_text(encoding="utf-8"))
    lines = []
    i = 0
    for item in data:
@@ -69,7 +69,7 @@ def _write_sync(project_path: Path, data: dict, prod: bool = False):
        json.dump(data, f, indent=2)


-def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str | None = None, prod: bool = False) -> int:
+def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str | None = None, prod: bool = False, res: str = "full") -> int:
    _load_env_file()

    if prod:
@@ -104,14 +104,17 @@ def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str |
    if file_override:
        video_path = Path(file_override)
    else:
-        output_video = project.get("output_video")
-        if not output_video:
+        output_filename = project.get("output") or Path(project.get("output_video", "")).name
+        if not output_filename:
            print(
-                "Error: no 'output_video' field in project.json and no --file provided.",
+                "Error: no 'output' field in project.json and no --file provided.",
                file=sys.stderr,
            )
            return 1
-        video_path = project_path / output_video
+        if res != "full":
+            video_path = project_path / "out" / res / output_filename
+        else:
+            video_path = project_path / "out" / output_filename

    if not video_path.exists():
        print(f"Error: video file not found: {video_path}", file=sys.stderr)
@@ -65,6 +65,7 @@ class ProjectConfig:
    # YouTube description fields
    description: str = ""  # Video description text for YouTube
    footer: str = ""  # Footer text (social links, subscribe CTA, etc.)
+    output_video: str = ""  # Output filename (e.g. "DISC_INT3.mp4"); placed in out/ or out/<res>/


@dataclass
@@ -507,6 +508,7 @@ class RenderPlan:
    cached_files: set = field(
        default_factory=set
    )  # Video IDs loaded from external cache (show 📁 indicator)
+    output_path: Optional[Path] = None  # Final output file path (set after plan is built)


 # Slide layout configurations (hardcoded for POC)
@@ -19,6 +19,12 @@ from .models import (
 )


+def _read_json(path: Path) -> Any:
+    """Read and parse a JSON file, treating an empty file as {}."""
+    text = path.read_text(encoding="utf-8").strip()
+    return json.loads(text) if text else {}
+
+
 def parse_manuscript(
    project_path: Path,
 ) -> tuple[str, list[str], list[tuple[int, str]], list[Citation]]:
@@ -132,7 +138,7 @@ def load_citations(path: Path) -> list[Citation]:
    """Load citations from a JSON file."""
    if not path.exists():
        return []
-    data = json.loads(path.read_text(encoding="utf-8"))
+    data = _read_json(path)
    return [
        Citation(
            reference=item["reference"],
@@ -151,7 +157,7 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
        raise ParseError("project.json not found", config_path)

    try:
-        data = json.loads(config_path.read_text(encoding="utf-8"))
+        data = _read_json(config_path)
    except json.JSONDecodeError as e:
        raise ParseError(f"Invalid JSON: {e}", config_path)

@@ -204,6 +210,7 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
        outro=data.get("outro", []),
        description=data.get("description", ""),
        footer=data.get("footer", ""),
+        output_video=data.get("output_video", ""),
    )


@@ -239,7 +246,7 @@ def parse_slides(
        raise ParseError(f"slides file not found: {local_slides_path}", local_slides_path)

    try:
-        data = json.loads(slides_path.read_text(encoding="utf-8"))
+        data = _read_json(slides_path)
    except json.JSONDecodeError as e:
        raise ParseError(f"Invalid JSON: {e}", slides_path)

@@ -283,7 +290,7 @@ def parse_audio(
        return {}, audio_dir

    try:
-        data = json.loads(audio_path.read_text(encoding="utf-8"))
+        data = _read_json(audio_path)
    except json.JSONDecodeError as e:
        raise ParseError(f"Invalid JSON: {e}", audio_path)

@@ -382,7 +389,7 @@ def parse_videos(
        raise ParseError(f"videos.json not found: {local_videos_path}", local_videos_path)

    try:
-        data = json.loads(videos_path.read_text(encoding="utf-8"))
+        data = _read_json(videos_path)
    except json.JSONDecodeError as e:
        raise ParseError(f"Invalid JSON: {e}", videos_path)

@@ -489,7 +496,7 @@ def parse_narration(
        return {}, narration_dir

    try:
-        data = json.loads(narration_path.read_text(encoding="utf-8"))
+        data = _read_json(narration_path)
    except json.JSONDecodeError as e:
        raise ParseError(f"Invalid JSON: {e}", narration_path)

@@ -594,7 +601,7 @@ def parse_video_metadata(metadata_path: Path) -> VideoMetadata:
        raise ParseError(f"Video metadata not found: {metadata_path}", metadata_path)

    try:
-        data = json.loads(metadata_path.read_text(encoding="utf-8"))
+        data = _read_json(metadata_path)
    except json.JSONDecodeError as e:
        raise ParseError(f"Invalid JSON: {e}", metadata_path)

@@ -185,10 +185,14 @@ def ensure_downscaled_files_exist(
    res: str,
    force: bool = False,
    verbose: bool = False,
+    skip_sources: set = None,
 ) -> Path:
    """
    Ensure downscaled copies exist for all videos in source_dir for the given res preset.
    Creates them on-the-fly if missing. Returns the output subdirectory.
+
+    skip_sources: optional set of source filenames to skip (e.g. files that have a
+    preprocessed output_file, where the full-res processed version will be used instead).
    """
    cfg = RES_CONFIGS[res]
    if cfg is None:
@@ -205,6 +209,7 @@ def ensure_downscaled_files_exist(
        and f.suffix.lower() in video_extensions
        and "_processed" not in f.stem
        and not f.name.startswith(".")
+        and (skip_sources is None or f.name not in skip_sources)
    ]

    if not video_files:
@@ -359,6 +364,115 @@ def check_audio_channel_silent(input_path: Path, channel: str, threshold_db: flo
    return False, 0.0


+def _resolve_auto_channel(input_path: Path, threshold_db: float = -60.0) -> str:
+    """
+    Detect which audio channels have signal and return the appropriate channel setting.
+
+    Logic:
+    - One channel silent, the other not → return the active channel ("left" or "right")
+    - Both channels have signal → return "both"
+    """
+    left_silent, _ = check_audio_channel_silent(input_path, "left", threshold_db)
+    right_silent, _ = check_audio_channel_silent(input_path, "right", threshold_db)
+
+    if left_silent and not right_silent:
+        return "right"
+    if right_silent and not left_silent:
+        return "left"
+    return "both"
+
+
+def detect_silence_bounds(
+    input_path: Path,
+    noise_threshold_db: float = -40.0,
+    min_silence_duration: float = 0.3,
+    verbose: bool = False,
+) -> tuple[float, float]:
+    """
+    Detect when audio content starts and ends in a file.
+
+    Uses FFmpeg's silencedetect filter to find the first and last
+    non-silent moments.  Useful for automatically computing skip/take values.
+
+    Two common preamble shapes are handled:
+      - File starts with silence → first_sound = end of that silence.
+      - File starts with noise (e.g. clothing rustle) followed by a brief
+        quiet gap before speech → first_sound = end of that first gap.
+
+    Args:
+        input_path: Video or audio file to analyse.
+        noise_threshold_db: dB level below which audio is considered silent.
+            Raise (e.g. -25) to treat low-level noise like clothing rustle
+            as silence.
+        min_silence_duration: Minimum gap length (seconds) that counts as
+            silence.  Shorter gaps are ignored.
+        verbose: Print detected silence periods for debugging.
+
+    Returns:
+        (first_sound_time, last_sound_time) in seconds.
+        first_sound_time — when the first meaningful sound begins.
+        last_sound_time  — when the last meaningful sound ends.
+    """
+    total_duration = get_video_duration(input_path)
+
+    cmd = [
+        "ffmpeg", "-i", str(input_path),
+        "-af",
+        f"silencedetect=noise={noise_threshold_db}dB:duration={min_silence_duration}",
+        "-f", "null", "/dev/null",
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+
+    # Parse silence_start / silence_end lines from stderr
+    silence_periods: list[tuple[float, float]] = []
+    pending_start: float | None = None
+
+    for line in result.stderr.splitlines():
+        if "silence_start:" in line:
+            try:
+                pending_start = float(line.split("silence_start:")[1].strip())
+            except ValueError:
+                pass
+        elif "silence_end:" in line and pending_start is not None:
+            try:
+                end_t = float(line.split("silence_end:")[1].split("|")[0].strip())
+                silence_periods.append((pending_start, end_t))
+                pending_start = None
+            except ValueError:
+                pass
+
+    # File ended while still in silence — close the period at total_duration
+    if pending_start is not None:
+        silence_periods.append((pending_start, total_duration))
+
+    if verbose:
+        print(f"\n    silence periods ({len(silence_periods)}):")
+        for s, e in silence_periods:
+            print(f"      {s:.3f}s – {e:.3f}s")
+
+    # --- First sound ---
+    # Take the end of the FIRST silence period found in the preamble window
+    # (first 60 s).  This handles both:
+    #   • file starts with silence  → silence[0].start ≈ 0
+    #   • file starts with noise (crumpling etc.) then has a brief quiet gap
+    #     before speech             → silence[0].start > 0
+    # If no silence is found at all the whole file is assumed to be content.
+    PREAMBLE_LIMIT = 60.0
+    first_sound = 0.0
+    for s_start, s_end in silence_periods:
+        if s_start < PREAMBLE_LIMIT:
+            first_sound = s_end
+            break
+
+    # --- Last sound ---
+    # Where the trailing silence begins (if the file ends with silence).
+    last_sound = total_duration
+    if silence_periods and silence_periods[-1][1] >= total_duration - 0.05:
+        last_sound = silence_periods[-1][0]
+
+    return first_sound, last_sound
+
+
 def preprocess_video(
    videos_dir: Path,
    video_id: str,
@@ -402,9 +516,12 @@ def preprocess_video(
            filter_type=None,
        )

-    # Quick audio sanity check: warn early if selected channel is silent
+    # Resolve channel setting (auto-detect if needed) and sanity check
    channel = video_source.use_audio_channels
-    if channel in ("left", "right"):
+    if channel == "auto":
+        channel = _resolve_auto_channel(current_input)
+        print(f"        Auto channel detection: using '{channel}'")
+    elif channel in ("left", "right"):
        is_silent, max_vol = check_audio_channel_silent(current_input, channel)
        if is_silent:
            raise PreprocessError(
@@ -482,7 +599,7 @@ def preprocess_video(
                batch[0],
                verbose,
                take=None,
-                use_audio_channels=video_source.use_audio_channels,
+                use_audio_channels=channel,
                skip_loudnorm=video_source.defer_loudnorm,
            )
            current_input = step_output
@@ -2022,12 +2139,12 @@ def stitch_narration_segments(
                f"      Skip: {skip}s, Take: {take or 'all'}s, Duration: {effective_duration:.1f}s"
            )

-        # If no trimming needed, use source directly
-        if skip == 0 and take is None:
-            trimmed_segments.append(source_path)
-            continue
+        # Always re-encode every segment to normalize fps and timestamps.
+        # Mixing un-normalized source files (e.g. 60fps camera) with
+        # trimmed-and-re-encoded 30fps segments causes cumulative A/V drift
+        # in the final concat.

-        # Trim the segment
+        # Trim/normalize the segment
        trimmed_path = temp_dir / f"segment_{i:03d}.mov"

        # Check if source has alpha channel (for ProRes 4444, etc.)
@@ -5,6 +5,8 @@ import subprocess
 from pathlib import Path

 from .errors import RenderError
+from .parser import _read_json
+from .preprocessor import _resolve_auto_channel
 from .models import (
    AudioEvent,
    CameraEvent,
@@ -179,22 +181,23 @@ def _resolve_video_path(
        base_dir = videos_dir

    if video_source.output_file:
-        video_path = base_dir / video_source.output_file
-        # Check with cache fallback
-        if project_path:
-            resolved, _ = resolve_with_cache(video_path, project_path)
-            if resolved.exists():
-                return resolved
-        elif video_path.exists():
-            return video_path
-        # Check for WebM variant (preprocessing outputs compressed WebM instead of ProRes)
-        webm_path = video_path.with_suffix(".mov")
-        if project_path:
-            resolved, _ = resolve_with_cache(webm_path, project_path)
-            if resolved.exists():
-                return resolved
-        elif webm_path.exists():
-            return webm_path
+        for candidate_dir in [base_dir, base_dir.parent]:
+            video_path = candidate_dir / video_source.output_file
+            # Check with cache fallback
+            if project_path:
+                resolved, _ = resolve_with_cache(video_path, project_path)
+                if resolved.exists():
+                    return resolved
+            elif video_path.exists():
+                return video_path
+            # Check for WebM variant (preprocessing outputs compressed WebM instead of ProRes)
+            webm_path = video_path.with_suffix(".mov")
+            if project_path:
+                resolved, _ = resolve_with_cache(webm_path, project_path)
+                if resolved.exists():
+                    return resolved
+            elif webm_path.exists():
+                return webm_path

    # Fall back to source_file with cache fallback
    source_path = base_dir / video_source.source_file
@@ -272,7 +275,6 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
    from .cache import resolve_with_cache

    # Input: background — resolved via handle in shared_assets/videos.json
-    import json as _json
    bg_handle = plan.config.background
    has_background = bool(bg_handle)
    bg_idx = None
@@ -282,7 +284,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
        videos_json_bg = shared_assets_dir / "videos.json"
        if not videos_json_bg.exists():
            raise RenderError(f"shared_assets/videos.json not found (needed for background handle '{bg_handle}')")
-        bg_videos = _json.loads(videos_json_bg.read_text())
+        bg_videos = _read_json(videos_json_bg)
        if bg_handle not in bg_videos:
            raise RenderError(f"Background handle '{bg_handle}' not found in shared_assets/videos.json")
        bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"]
@@ -719,7 +721,8 @@ def build_filter_complex(
            )
        else:
            filters.append(
-                f"[{bg_idx}:v]scale={width}:{height}:force_original_aspect_ratio=increase,"
+                f"[{bg_idx}:v]fps={plan.config.fps},"
+                f"scale={width}:{height}:force_original_aspect_ratio=increase,"
                f"crop={width}:{height}[bg]"
            )
    else:
@@ -742,9 +745,12 @@ def build_filter_complex(

        if not plan.narration_pauses:
            # Simple case: no pauses, continuous overlay
+            # fps+setpts normalise the source to a constant frame rate and reset
+            # the timeline to 0 so the video stays locked to the audio track.
            video_label = f"av{i}"
            filters.append(
-                f"[{input_idx}:v]format=yuva444p10le,"
+                f"[{input_idx}:v]fps={plan.config.fps},setpts=PTS-STARTPTS,"
+                f"format=yuva444p10le,"
                f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase,"
                f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2,"
                f"format=rgba[{video_label}]"
@@ -942,9 +948,11 @@ def build_filter_complex(
        narration_volume = 1.0
        if plan.narration_videos:
            _, first_video_source, _ = plan.narration_videos[0]
-            channel_filter = _build_audio_channel_filter(
-                first_video_source.use_audio_channels
-            )
+            use_channels = first_video_source.use_audio_channels
+            if use_channels == "auto":
+                narration_path = _resolve_video_path(videos_dir, first_video_source, shared_assets_dir, project_path)
+                use_channels = _resolve_auto_channel(narration_path)
+            channel_filter = _build_audio_channel_filter(use_channels)
            narration_volume = first_video_source.volume

        # Build volume filter if not 1.0
@@ -4,6 +4,7 @@ from pathlib import Path

 from .cache import resolve_with_cache
 from .errors import ValidationError, ValidationIssue
+from .parser import _read_json
 from .models import (
    ProjectConfig,
    SlideDefinition,
@@ -21,9 +22,10 @@ def validate_project(
    videos: dict[str, VideoSource],
    videos_dir: Path,
    malformed_markers: list[tuple[int, str]] = None,
-) -> None:
+) -> list[ValidationIssue]:
    """
    Validate all parsed project data. Raises ValidationError if any issues found.
+    Returns a list of warnings (non-fatal issues).

    Checks:
    - All slide markers in manuscript exist in slides.json
@@ -34,6 +36,7 @@ def validate_project(
    - No malformed markers in manuscript
    """
    issues: list[ValidationIssue] = []
+    warnings: list[ValidationIssue] = []

    # Check for malformed markers first (these are likely typos)
    if malformed_markers:
@@ -64,9 +67,9 @@ def validate_project(
                    base_name = video_id.rsplit(".", 1)[0]
                    if base_name in videos:
                        hint = f" (Did you mean [video:{base_name}]? Don't include file extensions in markers)"
-                issues.append(
+                warnings.append(
                    ValidationIssue(
-                        f"Video marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json{hint}",
+                        f"Video marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json{hint} — using PlaceholderVideo instead",
                        project_path / "manuscript.txt",
                    )
                )
@@ -76,9 +79,9 @@ def validate_project(
        if marker.startswith("narration:"):
            video_id = marker[10:]  # Remove 'narration:' prefix
            if video_id not in videos:
-                issues.append(
+                warnings.append(
                    ValidationIssue(
-                        f"Narration marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json",
+                        f"Narration marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json — using PlaceholderVideo instead",
                        project_path / "manuscript.txt",
                    )
                )
@@ -88,6 +91,16 @@ def validate_project(
        if marker.startswith("segment:"):
            continue

+        # Unknown namespaced markers (e.g. [background:xxx]) — not supported, ignore with warning
+        if ":" in marker:
+            warnings.append(
+                ValidationIssue(
+                    f"Unknown marker type [{marker}] — ignoring (no support for '{marker.split(':', 1)[0]}:' markers)",
+                    project_path / "manuscript.txt",
+                )
+            )
+            continue
+
        if marker not in slides:
            issues.append(
                ValidationIssue(
@@ -150,9 +163,9 @@ def validate_project(
        video_path = base_dir / video_source.source_file
        video_path, _ = resolve_with_cache(video_path, project_path)
        if not video_path.exists():
-            issues.append(
+            warnings.append(
                ValidationIssue(
-                    f"Video file not found: {video_source.source_file}",
+                    f"Video file not found: {video_source.source_file} — falling back to PlaceholderVideo",
                    videos_json_path,
                )
            )
@@ -183,8 +196,7 @@ def validate_project(
                )
            )
        else:
-            import json as _json
-            bg_videos = _json.loads(videos_json_path_bg.read_text())
+            bg_videos = _read_json(videos_json_path_bg)
            if bg_handle not in bg_videos:
                issues.append(
                    ValidationIssue(
@@ -239,3 +251,5 @@ def validate_project(
    # If any issues, raise ValidationError
    if issues:
        raise ValidationError(issues)
+
+    return warnings
@@ -14,7 +14,7 @@
  "fps": 30,
  "duration_seconds": 60,
  "script": "script.md",
-  "output_video": "export/final.mp4",
+  "output_video": "short_is_universe_pixelated.mp4",
  "keynote_file": "../video1/media/video1.key",
  "background": "../video1/shared_assets/BlackBackground.mp4",
  "slides": "../video1/media/slides/Video1/slides.json",