From b9b5a8e77d9f4e21e17769fef0a1a8c3a698ab69 Mon Sep 17 00:00:00 2001
From: jenstandstad <jens.tandstad@gmail.com>
Date: Sun, 7 Jun 2026 11:19:19 +0200
Subject: [PATCH] Adding pexels downloader and fixes

---
 GlitchTrailer/project.json |   4 +-
 all.sh                     |   2 +-
 example/project.json       |   1 +
 gnommo/cache.py            |  14 +-
 gnommo/cli.py              | 357 +++++++++++++++++++++++++++++++------
 gnommo/models.py           |  14 +-
 gnommo/parser.py           |   7 +-
 gnommo/pexels.py           | 312 ++++++++++++++++++++++++++++++++
 gnommo/preprocessor.py     |  25 +++
 gnommo/renderer.py         | 131 +++++++++++---
 gnommo/transformer.py      | 154 ++++++++++++----
 gnommo/validator.py        |  82 +++++++--
 12 files changed, 957 insertions(+), 146 deletions(-)
 create mode 100644 gnommo/pexels.py

diff --git a/GlitchTrailer/project.json b/GlitchTrailer/project.json
index 7f1a339..230cdd4 100644
--- a/GlitchTrailer/project.json
+++ b/GlitchTrailer/project.json
@@ -7,13 +7,14 @@
   "platform_targets": ["youtube"],
   "status": "scripted",
   "youtube_url": null,
-  "resolution": [1960, 1080],
+  "resolution": [1920, 1080],
   "fps": 30,
   "duration_seconds": null,
     "default_filters": {
     "audioonly": [
       {
         "type": "audio_normalize",
+"enable":false,
         "compress": false,
         "normalize": true,
         "target_lufs": -14,
@@ -24,6 +25,7 @@
     "talkinghead": [
       {
         "type": "audio_normalize",
+"enable":false,
         "normalize": true,
         "target_lufs": -14,
         "target_lra": 11,
diff --git a/all.sh b/all.sh
index 1d49795..38e81d9 100755
--- a/all.sh
+++ b/all.sh
@@ -3,7 +3,7 @@
 ./gnommo.sh -p video1 all --force --prod
 ./gnommo.sh -p video2 all --force --prod
 ./gnommo.sh -p video3 all --force --prod
-#./gnommo.sh -p video4 all --force
+./gnommo.sh -p video4 all --force --prod
 #./gnommo.sh -p video5 all --force
 #./gnommo.sh -p video6 all --force
 
diff --git a/example/project.json b/example/project.json
index 79b79fb..d6ffc84 100644
--- a/example/project.json
+++ b/example/project.json
@@ -18,6 +18,7 @@
       "talkinghead": [
         {
           "type": "audio_normalize",
+"enable":false,
           "eq_bands": [
             {"freq": 47, "gain": -15, "type": "lowshelf"},
             {"freq": 107, "gain": -1.3, "q": 1.2},
diff --git a/gnommo/cache.py b/gnommo/cache.py
index 9999b3a..84e4d9b 100644
--- a/gnommo/cache.py
+++ b/gnommo/cache.py
@@ -129,14 +129,24 @@ def resolve_with_cache(
     if cache_base is None:
         return local_path, False  # No cache configured
 
-    # Build cache path: {cache_base}/{project_name}/{relative_path}
+    # Try 1: path inside the project  →  cache_base / project_name / relative
     try:
         relative = local_path.relative_to(project_path)
         cache_path = cache_base / project_path.name / relative
         if cache_path.exists():
             return cache_path, True
     except ValueError:
-        pass  # local_path is not relative to project_path
+        pass  # local_path is not under project_path
+
+    # Try 2: path relative to gnommo root (sibling dirs like shared_assets)
+    # e.g. shared_assets/pexels/file.mp4  →  cache_base / shared_assets / pexels / file.mp4
+    try:
+        relative = local_path.relative_to(project_path.parent)
+        cache_path = cache_base / relative
+        if cache_path.exists():
+            return cache_path, True
+    except ValueError:
+        pass  # local_path is not under project_path.parent either
 
     return local_path, False
 
diff --git a/gnommo/cli.py b/gnommo/cli.py
index 6fa9a95..5a5f31b 100644
--- a/gnommo/cli.py
+++ b/gnommo/cli.py
@@ -106,6 +106,7 @@ Examples:
             "pull",
             "handoff",
             "transcode",
+            "pexels",
         ],
         help="Action to perform (default: render)",
     )
@@ -310,6 +311,8 @@ Examples:
             return cmd_handoff(
                 project_path, args.verbose, args.file, args.prod, args.res
             )
+        elif action == "pexels":
+            return cmd_pexels(project_path, args.verbose)
 
     except GnommoError as e:
         print(f"Error: {e}", file=sys.stderr)
@@ -362,7 +365,7 @@ def cmd_import(project_path: Path, force: bool, verbose: bool) -> int:
         keynote_file = keynote_files[0]  # Use first .key file found
         if len(keynote_files) > 1:
             print(f"  Warning: Multiple .key files found, using {keynote_file.name}")
-        _import_presenter_notes(project_path, keynote_file, verbose)
+        _import_presenter_notes(project_path, keynote_file, verbose, config)
 
     # Generate slides.json for each slide directory (after Keynote export)
     slides_base = project_path / "media" / "slides"
@@ -391,6 +394,42 @@ def cmd_import(project_path: Path, force: bool, verbose: bool) -> int:
     # Probe and cache video metadata (duration, has_audio) into videos.json
     _probe_video_metadata(project_path, config, shared_assets_dir, force, verbose)
 
+    # ETL: if a manuscript exists, project shorthand marker semantics (cutout/layer)
+    # into videos.json so the render stage is always data-driven from the manuscript.
+    # Run AFTER sync so newly-added shared videos are already present when we write
+    # their cutout/layer. Also warn about any referenced video that is still missing.
+    manuscript_path = project_path / "manuscript.txt"
+    if manuscript_path.exists() and config:
+        from .parser import parse_manuscript
+        from .transformer import _SHORTHAND_PREFIXES
+
+        _, markers, _, _ = parse_manuscript(project_path)
+        if markers:
+            _project_markers_to_videos(
+                markers,
+                project_path / config.videos_path,
+                config,
+                project_path,
+            )
+
+            # Warn about shorthand-referenced videos still absent from videos.json
+            videos_json_path = project_path / config.videos_path
+            local_vids: dict = (
+                _read_json(videos_json_path) if videos_json_path.exists() else {}
+            )
+            seen_missing: set[str] = set()
+            for marker in markers:
+                for prefix in _SHORTHAND_PREFIXES:
+                    if marker.startswith(prefix):
+                        vid_id = marker[len(prefix):]
+                        if vid_id not in local_vids and vid_id not in seen_missing:
+                            print(
+                                f"  ⚠  [{marker}] video '{vid_id}' not found in "
+                                f"videos.json or shared_assets — add it manually"
+                            )
+                            seen_missing.add(vid_id)
+                        break
+
     print("Import complete.")
     return 0
 
@@ -729,33 +768,47 @@ def _import_shared_assets(shared_assets_dir: Path, verbose: bool) -> None:
     """
     video_extensions = {".mov", ".mp4", ".webm", ".avi", ".mkv", ".m4v"}
 
-    # Find all video files in shared_assets (root level and subdirectories)
+    # Find all video files in shared_assets (root level and subdirectories).
+    # Also scan the GnommoDisk cache mirror so files placed there are registered.
+    from .cache import load_cache_config
+
+    scan_roots: list[Path] = [shared_assets_dir]
+    cache_base = load_cache_config()
+    if cache_base:
+        cache_shared = cache_base / "shared_assets"
+        if cache_shared.exists() and cache_shared != shared_assets_dir:
+            scan_roots.append(cache_shared)
+
     video_files: list[tuple[Path, Path]] = []  # (relative_path, absolute_path)
+    seen_rel: set[str] = set()  # deduplicate by relative path
 
-    for item in shared_assets_dir.iterdir():
-        if item.name.startswith("."):
-            continue
+    for scan_root in scan_roots:
+        for item in scan_root.iterdir():
+            if item.name.startswith("."):
+                continue
 
-        if item.is_file():
-            # Video file directly in shared_assets root
-            if (
-                item.suffix.lower() in video_extensions
-                and not item.name.endswith("_processed.mov")
-                and not item.name.endswith("_processed.webm")
-            ):
-                rel_path = item.relative_to(shared_assets_dir)
-                video_files.append((rel_path, item))
-        elif item.is_dir():
-            # Scan subdirectories recursively
-            for video_file in item.rglob("*"):
+            if item.is_file():
                 if (
-                    video_file.is_file()
-                    and video_file.suffix.lower() in video_extensions
-                    and not video_file.name.endswith("_processed.mov")
-                    and not video_file.name.endswith("_processed.webm")
+                    item.suffix.lower() in video_extensions
+                    and not item.name.endswith("_processed.mov")
+                    and not item.name.endswith("_processed.webm")
                 ):
-                    rel_path = video_file.relative_to(shared_assets_dir)
-                    video_files.append((rel_path, video_file))
+                    rel_path = item.relative_to(scan_root)
+                    if str(rel_path) not in seen_rel:
+                        seen_rel.add(str(rel_path))
+                        video_files.append((rel_path, item))
+            elif item.is_dir():
+                for video_file in item.rglob("*"):
+                    if (
+                        video_file.is_file()
+                        and video_file.suffix.lower() in video_extensions
+                        and not video_file.name.endswith("_processed.mov")
+                        and not video_file.name.endswith("_processed.webm")
+                    ):
+                        rel_path = video_file.relative_to(scan_root)
+                        if str(rel_path) not in seen_rel:
+                            seen_rel.add(str(rel_path))
+                            video_files.append((rel_path, video_file))
 
     if not video_files:
         if verbose:
@@ -1049,11 +1102,36 @@ def _import_narration_segments(narration_dir: Path, config, verbose: bool) -> No
             print(f"  No new narration segments to add")
 
 
+def _write_youtube_meta(
+    project_path: Path, config, citations: list[str]
+) -> None:
+    """Write youtube_meta.txt with project description and collected citations."""
+    meta_path = project_path / "youtube_meta.txt"
+    lines: list[str] = []
+
+    if config and config.description:
+        lines.append("== Description ==")
+        lines.append(config.description)
+        lines.append("")
+
+    if citations:
+        lines.append("== References ==")
+        for i, cite in enumerate(citations, 1):
+            lines.append(f"{i}. {cite}")
+        lines.append("")
+
+    meta_path.write_text("\n".join(lines), encoding="utf-8")
+    print(f"  Wrote {meta_path.name} ({len(citations)} reference(s))")
+
+
 def _import_presenter_notes(
-    project_path: Path, keynote_file: Path, verbose: bool
+    project_path: Path, keynote_file: Path, verbose: bool, config=None
 ) -> None:
     """Extract presenter notes from Keynote and write to manuscript.txt.
 
+    [cite:...] markers are stripped from the manuscript and collected into
+    youtube_meta.txt alongside the project description.
+
     Uses the JXA script (extract_keynote_notes.js) to extract notes via osascript.
     Also exports slides as PNG images to media/slides/{project_name}/.
     Backs up existing manuscript.txt before overwriting.
@@ -1116,21 +1194,44 @@ def _import_presenter_notes(
         print(f"  Error parsing notes JSON: {e}", file=sys.stderr)
         return
 
-    # Convert to manuscript.txt format
+    # Convert to manuscript.txt format, stripping [cite:...] markers
+    _CITE_RE = re.compile(r"\[cite:([^\]]+)\]")
     lines = []
+    citations: list[str] = []
+    seen_citations: set[str] = set()
+
     for item in notes_data:
         idx = item.get("slide_index")
         notes = (item.get("notes") or "").rstrip()
 
         lines.append(f"[S{idx}]")
         if notes:
-            lines.append(notes)
+            clean_note_lines = []
+            for note_line in notes.splitlines():
+                for m in _CITE_RE.finditer(note_line):
+                    cite_text = m.group(1).strip()
+                    if cite_text not in seen_citations:
+                        citations.append(cite_text)
+                        seen_citations.add(cite_text)
+                cleaned = _CITE_RE.sub("", note_line).strip()
+                if cleaned:
+                    clean_note_lines.append(cleaned)
+            if clean_note_lines:
+                lines.append("\n".join(clean_note_lines))
         lines.append("")  # blank line between slides
 
-    # Write manuscript.txt
-    manuscript_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")
+    # Write manuscript.txt with Unix line endings (Keynote notes may contain \r\n or \r)
+    content = "\n".join(lines).rstrip() + "\n"
+    content = content.replace("\r\n", "\n").replace("\r", "\n")
+    manuscript_path.write_text(content, encoding="utf-8")
     print(f"  Wrote {manuscript_path} ({len(notes_data)} slides)")
 
+    # Write youtube_meta.txt with description + collected citations
+    _write_youtube_meta(project_path, config, citations)
+    if citations and verbose:
+        for i, cite in enumerate(citations, 1):
+            print(f"    {i}. {cite}")
+
     if verbose:
         non_empty = sum(1 for item in notes_data if item.get("notes"))
         print(f"    {non_empty} slides have presenter notes")
@@ -1221,6 +1322,71 @@ def _write_tasks_file(
     )
 
 
+# =============================================================================
+# Pexels Download Command
+# =============================================================================
+
+
+def cmd_pexels(project_path: Path, verbose: bool) -> int:
+    """Download missing Pexels videos and enrich metadata for existing ones."""
+    from .parser import parse_manuscript, parse_project_config, parse_videos
+    from .pexels import (
+        get_pexels_api_key,
+        find_missing_pexels_videos,
+        download_video,
+        update_videos_json,
+        enrich_missing_descriptions,
+    )
+
+    api_key = get_pexels_api_key()
+    if not api_key:
+        print(
+            "Error: Pexels API key not configured.\n"
+            "Add to ~/.gnommo.conf:\n"
+            "  [pexels]\n"
+            "  api_key = YOUR_KEY_HERE\n"
+            "Get a free key at https://www.pexels.com/api/",
+            file=sys.stderr,
+        )
+        return 1
+
+    config = parse_project_config(project_path)
+    _, markers, _, _ = parse_manuscript(project_path)
+    videos, _ = parse_videos(project_path, config)
+
+    shared_assets_dir = _find_shared_assets(project_path)
+    if not shared_assets_dir:
+        print("Error: shared_assets directory not found.", file=sys.stderr)
+        return 1
+
+    local_videos_json = project_path / config.videos_path
+    shared_videos_json = shared_assets_dir / "videos.json"
+
+    # 1. Download missing files
+    missing = find_missing_pexels_videos(markers, videos, shared_assets_dir)
+    failed = 0
+    if missing:
+        print(f"Downloading {len(missing)} missing Pexels video(s)...")
+        for video_id, source_file in missing:
+            meta = download_video(source_file, shared_assets_dir, api_key)
+            if meta is None:
+                failed += 1
+                continue
+            for json_path in (local_videos_json, shared_videos_json):
+                update_videos_json(json_path, video_id, meta)
+        if failed:
+            print(f"\n  {failed}/{len(missing)} download(s) failed.")
+        else:
+            print(f"\n  {len(missing)} video(s) downloaded.")
+    else:
+        print("No missing Pexels videos.")
+
+    # 2. Enrich descriptions for existing files that have none
+    enrich_missing_descriptions(shared_assets_dir, api_key)
+
+    return 1 if failed else 0
+
+
 # =============================================================================
 # Validate Command
 # =============================================================================
@@ -1283,6 +1449,35 @@ def _resolve_process_cache(project_path: Path, config) -> Optional[Path]:
     return p / project_path.name
 
 
+def _narration_combined_hint(project_path: Path, config) -> str:
+    """Return a helpful hint when narration_combined.mov cannot be found.
+
+    If external storage is configured but the volume isn't mounted, the stitch
+    command wouldn't help — the disk is just not connected.
+    """
+    from .cache import load_cache_config
+
+    missing_paths = []
+
+    cache_base = load_cache_config()
+    if cache_base is not None and not cache_base.exists():
+        missing_paths.append(cache_base)
+
+    if config and config.process_cache:
+        pc = Path(config.process_cache)
+        if not pc.is_absolute():
+            pc = (project_path / pc).resolve()
+        if not pc.exists():
+            missing_paths.append(pc)
+
+    if missing_paths:
+        return (
+            f"External disk not connected (expected at {missing_paths[0]}).\n"
+            "Connect the disk and try again."
+        )
+    return "Run 'gnommo -p <project> stitch' first."
+
+
 def _resolve_narration_combined(
     project_path: Path, videos_dir: Path, config
 ) -> Optional[Path]:
@@ -2143,8 +2338,8 @@ def cmd_stitch(
         videos_dir_out.mkdir(parents=True, exist_ok=True)
         print(f"  Using {res} dirs: {narration_dir}, {videos_dir_out}")
 
-    # Get segment IDs in sorted order
-    segment_ids = sorted(narration.keys())
+    # Get segment IDs in natural order (Segment2 before Segment10)
+    segment_ids = sorted(narration.keys(), key=lambda s: [int(t) if t.isdigit() else t.lower() for t in re.split(r'(\d+)', s)])
 
     # Show what we're stitching
     print(f"\n  Segments ({len(segment_ids)}):")
@@ -2442,7 +2637,7 @@ def _parse_slide_range(slides_arg: str) -> tuple[str, Optional[str]]:
 
 
 def _project_markers_to_videos(
-    markers: list[str], videos_json_path: Path, config
+    markers: list[str], videos_json_path: Path, config, project_path: Path = None
 ) -> None:
     """ETL: project shorthand marker semantics into videos.json.
 
@@ -2451,6 +2646,9 @@ def _project_markers_to_videos(
     and layer values directly into videos.json.  This runs before parse_videos
     so the render pass reads already-projected data and needs no shorthand logic.
 
+    Videos may live in the project's local videos.json or in shared_assets/videos.json.
+    Both files are updated so the render pass always finds the projected values.
+
     The manuscript is the authoritative source: the LAST shorthand reference to
     a given video_id wins, matching what a human editor would expect when they
     change a marker near the end of the script.
@@ -2460,36 +2658,75 @@ def _project_markers_to_videos(
 
     from .transformer import _SHORTHAND_PREFIXES  # (cutout, layer) lookup table
 
-    # Build projection: video_id → {cutout, layer}
+    _PAUSE_PREFIXES = {
+        "vftp:", "vfbp:", "vfmp:",
+        "vf2tp:", "vf2bp:", "vf2mp:",
+        "vstp:", "vsbp:", "vsmp:",
+    }
+
+    # Build projection: video_id → {cutout, layer, auto_pause_narration}
+    # auto_pause_narration=True means: write pause_narration=duration if not already set.
     projection: dict[str, dict] = {}
     for marker in markers:
         for prefix, implied in _SHORTHAND_PREFIXES.items():
             if marker.startswith(prefix):
-                video_id = marker[len(prefix) :]
+                video_id = marker[len(prefix):]
                 cutout, layer = implied[0], implied[1]
-                projection[video_id] = {"cutout": cutout, "layer": layer}
+                projection[video_id] = {
+                    "cutout": cutout,
+                    "layer": layer,
+                    "_auto_pause": prefix in _PAUSE_PREFIXES,
+                }
                 break
 
     if not projection:
         return
 
-    with open(videos_json_path, "r", encoding="utf-8") as f:
-        raw = json.load(f)
+    def _apply_projection(json_path: Path) -> list[str]:
+        """Apply projection to one videos.json file; return list of updated IDs."""
+        if not json_path.exists():
+            return []
+        with open(json_path, "r", encoding="utf-8") as f:
+            raw = json.load(f)
+        changed = False
+        updated = []
+        for video_id, fields in projection.items():
+            if video_id not in raw:
+                continue
+            entry = raw[video_id]
+            video_changed = False
+            for field, value in fields.items():
+                if field == "_auto_pause":
+                    # Write pause_narration = duration only when:
+                    #   - marker is a pause-prefix (value is True)
+                    #   - pause_narration not already set (preserve manual overrides)
+                    #   - duration is known (probed by import)
+                    if value and not entry.get("pause_narration") and entry.get("duration"):
+                        entry["pause_narration"] = entry["duration"]
+                        changed = True
+                        video_changed = True
+                elif entry.get(field) != value:
+                    entry[field] = value
+                    changed = True
+                    video_changed = True
+            if video_changed:
+                updated.append(video_id)
+        if changed:
+            with open(json_path, "w", encoding="utf-8") as f:
+                json.dump(raw, f, indent=2, ensure_ascii=False)
+        return updated
 
-    changed = False
-    for video_id, fields in projection.items():
-        if video_id not in raw:
-            continue
-        for field, value in fields.items():
-            if raw[video_id].get(field) != value:
-                raw[video_id][field] = value
-                changed = True
+    updated_local = _apply_projection(videos_json_path)
+    if updated_local:
+        print(f"  Projected marker semantics → videos.json: {', '.join(updated_local)}")
 
-    if changed:
-        with open(videos_json_path, "w", encoding="utf-8") as f:
-            json.dump(raw, f, indent=2, ensure_ascii=False)
-        updated = [vid for vid in projection if vid in raw]
-        print(f"  Projected marker semantics → videos.json: {', '.join(updated)}")
+    # Also project into shared_assets/videos.json for pexels/library videos
+    shared_assets_dir = _find_shared_assets(project_path) if project_path else None
+    if shared_assets_dir:
+        shared_videos_json = shared_assets_dir / "videos.json"
+        updated_shared = _apply_projection(shared_videos_json)
+        if updated_shared:
+            print(f"  Projected marker semantics → shared_assets/videos.json: {', '.join(updated_shared)}")
 
 
 def _writeback_video_metadata(plan, project_path, config) -> None:
@@ -2696,7 +2933,7 @@ def cmd_render(
 
     # ETL: project shorthand marker semantics (cutout/layer) into videos.json
     # before parse_videos reads it, so the render pass is purely data-driven.
-    _project_markers_to_videos(markers, project_path / config.videos_path, config)
+    _project_markers_to_videos(markers, project_path / config.videos_path, config, project_path)
 
     # Override resolution for preview modes
     if res != "full":
@@ -2705,6 +2942,7 @@ def cmd_render(
 
     slides = parse_slides(project_path, config)
     videos, videos_dir = parse_videos(project_path, config)
+    source_videos_dir = videos_dir  # keep original for validation (pre-downscale)
 
     # Non-full res: use downscaled video directory, create on-the-fly if needed
     if res != "full":
@@ -2807,6 +3045,12 @@ def cmd_render(
         else:
             transcript_path = project_path / "transcript.json"
 
+    # If project.json specifies a transcript path, prefer it (always local)
+    if config.transcript_path:
+        local_transcript = project_path / config.transcript_path
+        if local_transcript.exists():
+            transcript_path = local_transcript
+
     # Try cache fallback for transcript
     transcript_path, _ = resolve_with_cache(transcript_path, project_path)
     if not transcript_path.exists():
@@ -2825,7 +3069,7 @@ def cmd_render(
     # Stage 2: Validate
     print("\n[2/4] Validating...")
     warnings = validate_project(
-        project_path, markers, config, slides, videos, videos_dir, malformed
+        project_path, markers, config, slides, videos, source_videos_dir, malformed
     )
     for w in warnings:
         print(f"  Warning: {w}")
@@ -3061,7 +3305,12 @@ def cmd_transcribe(
 
     words = transcribe_video(video_path, model="base")
 
-    output_path = video_path.with_suffix(".transcript.json")
+    # Save to project-local path if configured in project.json (keeps transcript off external drives)
+    if config.transcript_path:
+        output_path = project_path / config.transcript_path
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+    else:
+        output_path = video_path.with_suffix(".transcript.json")
     save_transcript(words, output_path)
 
     print(f"  - Transcribed {len(words)} words")
@@ -3819,7 +4068,7 @@ def cmd_extract_audio(
                 f"Error: narration_combined.mov not found at {combined_path}",
                 file=sys.stderr,
             )
-            print("Run 'gnommo -p <project> stitch' first.", file=sys.stderr)
+            print(_narration_combined_hint(project_path, config), file=sys.stderr)
             return 1
 
         # Output to project out/ directory
@@ -3985,7 +4234,7 @@ def cmd_master(
             f"Error: narration_combined.mov not found at {combined_path}",
             file=sys.stderr,
         )
-        print("Run 'gnommo -p <project> stitch' first.", file=sys.stderr)
+        print(_narration_combined_hint(project_path, config), file=sys.stderr)
         return 1
 
     # Output directory
diff --git a/gnommo/models.py b/gnommo/models.py
index e90063d..a7c3495 100644
--- a/gnommo/models.py
+++ b/gnommo/models.py
@@ -49,6 +49,7 @@ class ProjectConfig:
     slides_path: str = "slides.json"  # path to slides.json relative to project
     videos_path: str = "videos.json"  # path to videos.json relative to project
     audio_path: str = "audio.json"  # path to audio.json relative to project
+    transcript_path: Optional[str] = None  # path to transcript.json relative to project (always saved locally)
     audio_source: Optional[str] = None  # defaults to talking head
     main_video: Optional[
         Union[str, list]
@@ -135,6 +136,15 @@ class GnommoKeyConfig:
     # Can help with edge color contamination
     alpha_bias: tuple[int, int, int] = None
 
+    # Luminance protection: pixels with luma above this stay fully opaque (0-255, -1 = off)
+    # Use ~220 to protect white objects (headphones, teeth) from being partially keyed.
+    protect_luma: int = -1
+
+    # Shadow boost: extra key strength for dark pixels (0.0-5.0, 0 = off)
+    # Ramps up key signal proportionally to how dark a pixel is, helping key dark greens
+    # without affecting bright foreground areas. Values 1.0-2.0 are typical.
+    shadow_boost: float = 0.0
+
     # Edge refinement
     edge_erode: int = 0  # Pixels to erode from alpha edge (0-5)
     edge_soften: float = 0.0  # Blur the alpha edge (0-5 pixels)
@@ -195,7 +205,7 @@ class AudioNormalizeConfig:
     Applies noise reduction, compression, and loudness normalization
     to improve audio quality and consistency.
     """
-
+    enabled: bool = True  # Master switch to enable/disable all audio processing
     # Parametric EQ bands (applied before other processing)
     eq_bands: list[EQBand] = field(default_factory=list)
 
@@ -300,7 +310,7 @@ class VideoSource:
         False  # If True, skip loudnorm during preprocessing (apply after concatenation)
     )
     volume: float = 1.0  # Volume multiplier (1.0=full, >1.0=boost, <1.0=reduce)
-    layer: str = "above"  # "above" = renders on top of slides; "below" = behind slides
+    layer: str = "above"  # "above" = on top of slides; "mid" = above narrator/below slides; "below" = behind narrator
     duration: Optional[
         float
     ] = None  # Pre-probed file duration in seconds (set by import)
diff --git a/gnommo/parser.py b/gnommo/parser.py
index d49944a..1951b5a 100644
--- a/gnommo/parser.py
+++ b/gnommo/parser.py
@@ -84,9 +84,9 @@ def parse_manuscript(
     text = re.sub(r"\[marker:[^\]]+\]", "", text)
     text = re.sub(r"\[cue:[^\]]+\]", "", text)
 
-    # Extract all valid markers like [S1], [video:demo], [Zoom2], etc.
-    # Include . in pattern to catch markers with file extensions (so validator can warn about them)
-    markers = re.findall(r"\[([A-Za-z0-9_:.]+)\]", text)
+    # Extract all valid markers like [S1], [video:demo], [vf2m:pexels/clip-name], etc.
+    # Include / and - to capture pexels/library video IDs; . to catch file extensions in markers.
+    markers = re.findall(r"\[([A-Za-z0-9_:./\-]+)\]", text)
 
     # Find malformed markers (missing brackets, extra spaces, etc.)
     malformed: list[tuple[int, str]] = []
@@ -258,6 +258,7 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
         slides_path=data.get("slides", "slides.json"),
         videos_path=data.get("videos", "videos.json"),
         audio_path=data.get("audio", "audio.json"),
+        transcript_path=data.get("transcript"),
         audio_source=data.get("audio_source"),
         main_video=data.get("main_video"),
         process_cache=data.get("process_cache"),
diff --git a/gnommo/pexels.py b/gnommo/pexels.py
new file mode 100644
index 0000000..f4eb02f
--- /dev/null
+++ b/gnommo/pexels.py
@@ -0,0 +1,312 @@
+"""Pexels video downloader for gnommo shared_assets.
+
+Configure API key in ~/.gnommo.conf:
+
+    [pexels]
+    api_key = YOUR_KEY_HERE
+
+Get a free key at https://www.pexels.com/api/
+"""
+
+import configparser
+import json
+import re
+import sys
+import urllib.error
+import urllib.request
+from pathlib import Path
+from typing import Optional
+
+
+def get_pexels_api_key() -> Optional[str]:
+    config_path = Path.home() / ".gnommo.conf"
+    if not config_path.exists():
+        return None
+    cfg = configparser.ConfigParser()
+    cfg.read(config_path)
+    return cfg.get("pexels", "api_key", fallback=None)
+
+
+def extract_pexels_id(source_file: str) -> Optional[str]:
+    """Extract the numeric Pexels video ID from a source_file path.
+
+    Handles names like 'pexels/11868263-hd_1920_1080_24fps.mp4'
+    and 'pexels/12136677_1080_1920_30fps.mp4'.
+    """
+    name = Path(source_file).stem.split("/")[-1]
+    m = re.match(r"^(\d+)", name)
+    return m.group(1) if m else None
+
+
+def _fetch_video_info(pexels_id: str, api_key: str) -> Optional[dict]:
+    url = f"https://api.pexels.com/videos/videos/{pexels_id}"
+    req = urllib.request.Request(
+        url,
+        headers={"Authorization": api_key, "User-Agent": "Mozilla/5.0 gnommo/1.0"},
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            return json.loads(resp.read())
+    except urllib.error.HTTPError as e:
+        print(f"  [{pexels_id}] Pexels API error {e.code} — video may have been deleted", flush=True)
+        return None
+    except Exception as e:
+        print(f"  [{pexels_id}] Pexels API error: {e}", flush=True)
+        return None
+
+
+def description_from_url(video_url: str) -> str:
+    """Extract human-readable description from a Pexels video URL slug.
+
+    'https://www.pexels.com/video/abstract-television-noise-11868263/'
+    → 'Abstract Television Noise'
+    """
+    m = re.search(r"/video/([a-z0-9][a-z0-9-]+?)-\d+/?$", video_url)
+    if m:
+        return m.group(1).replace("-", " ").title()
+    return ""
+
+
+def _pick_best_video_file(video_files: list, source_file: str) -> Optional[dict]:
+    """Select the video_files entry that best matches the hints in source_file."""
+    stem = Path(source_file).stem.split("/")[-1]
+
+    width_hint = height_hint = fps_hint = quality_hint = None
+    m = re.search(r"[_-](\d{3,4})[_-](\d{3,4})[_-](\d+)fps", stem)
+    if m:
+        width_hint = int(m.group(1))
+        height_hint = int(m.group(2))
+        fps_hint = int(m.group(3))
+    for q in ("uhd", "hd", "sd"):
+        if q in stem.lower():
+            quality_hint = q
+            break
+
+    mp4s = [f for f in video_files if f.get("file_type") == "video/mp4"]
+    if not mp4s:
+        mp4s = video_files  # fall back to any format
+
+    def score(vf: dict) -> int:
+        s = 0
+        if quality_hint and vf.get("quality", "").lower() == quality_hint:
+            s += 10
+        if width_hint and vf.get("width") == width_hint:
+            s += 5
+        if height_hint and vf.get("height") == height_hint:
+            s += 5
+        if fps_hint and round(float(vf.get("fps") or 0)) == fps_hint:
+            s += 3
+        return s
+
+    return max(mp4s, key=score)
+
+
+def download_video(
+    source_file: str,
+    shared_assets_dir: Path,
+    api_key: str,
+) -> Optional[dict]:
+    """Download one Pexels video to shared_assets_dir/<source_file>.
+
+    Returns a metadata dict {description, duration, has_audio=False} on
+    success, or None on failure.
+    """
+    pexels_id = extract_pexels_id(source_file)
+    if not pexels_id:
+        print(f"  Cannot extract Pexels ID from: {source_file}", file=sys.stderr)
+        return None
+
+    target_path = shared_assets_dir / source_file
+    target_path.parent.mkdir(parents=True, exist_ok=True)
+
+    print(f"  [{pexels_id}] Fetching video info...", flush=True)
+    info = _fetch_video_info(pexels_id, api_key)
+    if not info:
+        return None
+
+    description = description_from_url(info.get("url", ""))
+    duration = float(info.get("duration") or 0) or None
+
+    video_files = info.get("video_files", [])
+    if not video_files:
+        print(f"  [{pexels_id}] No video files in API response", flush=True)
+        return None
+
+    best = _pick_best_video_file(video_files, source_file)
+    if not best:
+        return None
+
+    download_url = best["link"]
+    w, h, fps = best.get("width", "?"), best.get("height", "?"), best.get("fps", "?")
+    q = best.get("quality", "?")
+    label = f'"{description}" — ' if description else ""
+    print(f"  [{pexels_id}] {label}{q} {w}x{h} @ {fps}fps", flush=True)
+    print(f"    → {target_path}", flush=True)
+
+    try:
+        req = urllib.request.Request(
+            download_url, headers={"User-Agent": "Mozilla/5.0 gnommo/1.0"}
+        )
+        with urllib.request.urlopen(req, timeout=300) as resp:
+            total = int(resp.headers.get("Content-Length") or 0)
+            downloaded = 0
+            chunks: list[bytes] = []
+            chunk_size = 1024 * 512  # 512 KB
+            while True:
+                chunk = resp.read(chunk_size)
+                if not chunk:
+                    break
+                chunks.append(chunk)
+                downloaded += len(chunk)
+                if total:
+                    pct = downloaded * 100 // total
+                    mb_done = downloaded / 1024 / 1024
+                    mb_total = total / 1024 / 1024
+                    print(f"    {pct:3d}%  {mb_done:.1f}/{mb_total:.1f} MB\r", end="", flush=True)
+            print(f"    Done — {downloaded / 1024 / 1024:.1f} MB          ", flush=True)
+        target_path.write_bytes(b"".join(chunks))
+    except Exception as e:
+        print(f"\n  Download failed: {e}", flush=True)
+        return None
+
+    return {
+        "description": description,
+        "duration": duration,
+        "has_audio": False,  # conservative; renderer probes when needed
+    }
+
+
+def update_videos_json(
+    json_path: Path,
+    video_id: str,
+    metadata: dict,
+) -> None:
+    """Write description (and other metadata) into an existing videos.json entry."""
+    if not json_path.exists():
+        return
+    with open(json_path, "r", encoding="utf-8") as f:
+        raw = json.load(f)
+    if video_id not in raw:
+        return
+    changed = False
+    for key, value in metadata.items():
+        if value and raw[video_id].get(key) != value:
+            raw[video_id][key] = value
+            changed = True
+    if changed:
+        with open(json_path, "w", encoding="utf-8") as f:
+            json.dump(raw, f, indent=2, ensure_ascii=False)
+
+
+def fetch_metadata(pexels_id: str, api_key: str) -> Optional[dict]:
+    """Fetch only description and duration for a Pexels video (no download)."""
+    info = _fetch_video_info(pexels_id, api_key)
+    if not info:
+        return None
+    return {
+        "description": description_from_url(info.get("url", "")),
+        "duration": float(info.get("duration") or 0) or None,
+    }
+
+
+def enrich_missing_descriptions(
+    shared_assets_dir: Path,
+    api_key: str,
+) -> int:
+    """Fetch descriptions from Pexels API for entries that have a file on disk but no description.
+
+    Scans shared_assets/videos.json for pexels/* entries where:
+    - description is absent or empty
+    - source_file exists on disk (locally or via cache)
+
+    Returns number of entries updated.
+    """
+    from .cache import resolve_with_cache
+
+    videos_json = shared_assets_dir / "videos.json"
+    if not videos_json.exists():
+        return 0
+
+    with open(videos_json, "r", encoding="utf-8") as f:
+        raw = json.load(f)
+
+    candidates = [
+        (vid_id, entry)
+        for vid_id, entry in raw.items()
+        if vid_id.startswith("pexels/") and not entry.get("description")
+    ]
+
+    # Filter to those whose file exists on disk
+    project_root = shared_assets_dir.parent
+    to_enrich = []
+    for vid_id, entry in candidates:
+        sf = entry.get("source_file", "")
+        if not sf:
+            continue
+        path = shared_assets_dir / sf
+        resolved, _ = resolve_with_cache(path, project_root)
+        if resolved.exists():
+            pexels_id = extract_pexels_id(sf)
+            if pexels_id:
+                to_enrich.append((vid_id, pexels_id))
+
+    if not to_enrich:
+        return 0
+
+    print(f"  Enriching descriptions for {len(to_enrich)} existing pexels video(s)...", flush=True)
+
+    updated = 0
+    for vid_id, pexels_id in to_enrich:
+        meta = fetch_metadata(pexels_id, api_key)
+        if meta and meta.get("description"):
+            print(f"  [{pexels_id}] \"{meta['description']}\"", flush=True)
+            update_videos_json(videos_json, vid_id, meta)
+            updated += 1
+        else:
+            print(f"  [{pexels_id}] not found or no description — skipped", flush=True)
+
+    return updated
+
+
+def find_missing_pexels_videos(
+    manuscript_markers: list[str],
+    videos: dict,
+    shared_assets_dir: Path,
+) -> list[tuple[str, str]]:
+    """Return [(video_id, source_file)] for pexels videos referenced but not on disk."""
+    from .cache import resolve_with_cache
+
+    _VIDEO_PREFIXES = (
+        "video:", "narration:",
+        "vft:", "vfb:", "vfm:",
+        "vf2t:", "vf2b:", "vf2m:",
+        "vst:", "vsb:", "vsm:",
+        "vftp:", "vfbp:", "vfmp:",
+        "vf2tp:", "vf2bp:", "vf2mp:",
+        "vstp:", "vsbp:", "vsmp:",
+    )
+
+    seen: set[str] = set()
+    missing: list[tuple[str, str]] = []
+
+    for marker in manuscript_markers:
+        prefix = next((p for p in _VIDEO_PREFIXES if marker.startswith(p)), None)
+        if prefix is None:
+            continue
+        video_id = marker[len(prefix):]
+        if video_id in seen or not video_id.startswith("pexels/"):
+            continue
+        seen.add(video_id)
+
+        source_file = videos.get(video_id, None)
+        if source_file is None:
+            continue
+        sf = source_file.source_file if hasattr(source_file, "source_file") else source_file
+
+        candidate = shared_assets_dir / sf
+        # resolve_with_cache needs a project_path — use shared_assets parent
+        resolved, _ = resolve_with_cache(candidate, shared_assets_dir.parent)
+        if not resolved.exists():
+            missing.append((video_id, sf))
+
+    return missing
diff --git a/gnommo/preprocessor.py b/gnommo/preprocessor.py
index 5d6cd3f..2303619 100644
--- a/gnommo/preprocessor.py
+++ b/gnommo/preprocessor.py
@@ -656,6 +656,8 @@ def preprocess_video(
     batch_num = 0
     for batch in filter_batches:
         first_filter_type = batch[0].get("type")
+        
+        
 
         if first_filter_type in VIDEO_FILTER_TYPES:
             # Combined video filter batch - use chunked processing for large files
@@ -1065,6 +1067,14 @@ def build_gnommokey_filter(config: dict) -> str:
     scale_factor = gain * 2.5
     key_expr = f"({key_expr})*{scale_factor:.3f}"
 
+    # Shadow boost: amplify key signal for dark pixels so dark greens key out fully.
+    # shadow_factor = 1 - luma/255 (high for dark pixels, 0 for bright pixels)
+    # extra multiplier = 1 + shadow_boost * shadow_factor
+    if cfg.shadow_boost > 0:
+        luma_expr = f"(0.299*r(X,Y)+0.587*g(X,Y)+0.114*b(X,Y))"
+        shadow_factor = f"(1-{luma_expr}/255)"
+        key_expr = f"({key_expr})*(1+{cfg.shadow_boost:.3f}*{shadow_factor})"
+
     # Apply clip_black and clip_white to compress the matte
     # clip_black: key values below this become 0 (those pixels stay opaque)
     # clip_white: key values above this become 255 (fully transparent)
@@ -1082,6 +1092,13 @@ def build_gnommokey_filter(config: dict) -> str:
     # Invert: high key value (green) = low alpha (transparent)
     alpha_expr = f"255-{key_expr}"
 
+    # Luminance protection: lock bright pixels to fully opaque so white objects
+    # (headphones, teeth) are never accidentally keyed or jitter.
+    # protect_luma=-1 disables this. Use ~220 for typical white protection.
+    if cfg.protect_luma >= 0:
+        luma_expr = f"(0.299*r(X,Y)+0.587*g(X,Y)+0.114*b(X,Y))"
+        alpha_expr = f"if(gt({luma_expr},{cfg.protect_luma}),255,{alpha_expr})"
+
     # Build the geq filter for alpha (in RGBA mode)
     parts.append(f"geq=r='r(X,Y)':g='g(X,Y)':b='b(X,Y)':a='{alpha_expr}'")
 
@@ -1195,6 +1212,8 @@ def parse_gnommokey_config(config: dict) -> GnommoKeyConfig:
         despill_bias=despill_bias,
         despill_strength=float(config.get("despill_strength", 0.5)),
         alpha_bias=alpha_bias,
+        protect_luma=int(config.get("protect_luma", -1)),
+        shadow_boost=float(config.get("shadow_boost", 0.0)),
         edge_erode=int(config.get("edge_erode", 0)),
         edge_soften=float(config.get("edge_soften", 0.0)),
     )
@@ -1959,7 +1978,12 @@ def apply_audio_normalize(
         channel_map -> eq_bands -> highpass -> lowpass -> room_eq -> dereverb -> denoise -> gate -> compress -> normalize
     """
     cfg = parse_audio_normalize_config(config)
+    if not cfg.enabled:
+        # No audio processing, just copy
+        import shutil
 
+        shutil.copy2(input_path, output_path)
+        return
     # Build audio filter chain (order matters!)
     audio_filters: list[str] = []
 
@@ -2109,6 +2133,7 @@ def parse_audio_normalize_config(config: dict[str, Any]) -> AudioNormalizeConfig
         )
 
     return AudioNormalizeConfig(
+        enabled=bool(config.get("enabled", True)),
         # Parametric EQ
         eq_bands=eq_bands,
         # Room treatment
diff --git a/gnommo/renderer.py b/gnommo/renderer.py
index c92c0f1..bc3c047 100644
--- a/gnommo/renderer.py
+++ b/gnommo/renderer.py
@@ -237,8 +237,27 @@ def _resolve_video_path(
     source_path = base_dir / video_source.source_file
     if project_path:
         resolved, _ = resolve_with_cache(source_path, project_path)
-        return resolved
-    return source_path
+    else:
+        resolved = source_path
+
+    if not resolved.exists():
+        # File not found anywhere — substitute PlaceholderVideo so FFmpeg doesn't crash
+        placeholder = None
+        if shared_assets_dir:
+            p = shared_assets_dir / "PlaceholderVideo.mp4"
+            if project_path:
+                p, _ = resolve_with_cache(p, project_path)
+            if p.exists():
+                placeholder = p
+        if placeholder:
+            import sys
+            print(
+                f"  Warning: {video_source.source_file} not found — using PlaceholderVideo",
+                file=sys.stderr,
+            )
+            return placeholder
+
+    return resolved
 
 
 def _has_audio_stream(video_path: Path) -> bool:
@@ -362,6 +381,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
                 f"Background handle '{bg_handle}' not found in shared_assets/videos.json"
             )
         bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"]
+        bg_path, _ = resolve_with_cache(bg_path, plan.project_path)
         if not bg_path.exists():
             raise RenderError(
                 f"Background file not found: {bg_path} (from handle '{bg_handle}')"
@@ -404,12 +424,29 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
             videos_dir, event.video_source, shared_assets_dir, project_path
         )
         skip = event.video_source.skip or 0.0
+
+        # How long this clip needs to play in the output
+        clip_duration = event.end_time - event.start_time
+        if event.video_source.take is not None:
+            clip_duration = min(clip_duration, event.video_source.take)
+
+        # Loop the clip if the file is shorter than the display window.
+        # Don't loop pause-narration videos — they intentionally play once and stop.
+        needs_loop = False
+        if event.video_source.duration is not None and not event.video_source.pause_narration:
+            remaining = event.video_source.duration - skip
+            needs_loop = remaining < clip_duration - 0.1  # 0.1 s tolerance
+
+        if needs_loop:
+            cmd.extend(["-stream_loop", "-1"])
         if skip > 0:
             cmd.extend(["-ss", f"{skip:.3f}"])
         cmd.extend(["-analyzeduration", "0", "-probesize", "1000"])
-        # Use pre-probed duration to tell FFmpeg exactly how much to read,
-        # preventing scans of ghost audio tracks on empty MP4 audio streams.
-        if event.video_source.duration is not None:
+        # Use pre-probed duration (or loop-limited duration) to tell FFmpeg exactly
+        # how much to read, preventing scans of ghost audio tracks on empty streams.
+        if needs_loop:
+            cmd.extend(["-t", f"{clip_duration:.3f}"])
+        elif event.video_source.duration is not None:
             remaining = event.video_source.duration - skip
             if remaining > 0:
                 cmd.extend(["-t", f"{remaining:.3f}"])
@@ -881,31 +918,12 @@ def build_filter_complex(
         enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})"
         filters.append(
             f"[{current_label}][{video_label}]overlay="
-            f"x={cut_x}:y={cut_y}:enable={enable_expr}"
+            f"x={cut_x}:y={cut_y}:enable={enable_expr}:eof_action=pass"
             f"[{next_label}]"
         )
         current_label = next_label
 
-    # Layer 3: Slides (transparent in the talking-head cutout area)
-    for i, event in enumerate(plan.slide_events):
-        slide_idx = slide_inputs[event.slide_id]
-
-        slide_label = f"s{i}"
-        filters.append(
-            f"[{slide_idx}:v]scale={width}:{height}:"
-            f"force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=0x00000000[{slide_label}]"
-        )
-
-        next_label = f"sbase{i}"
-        enable_expr = f"between(t\\,{event.start_time:.3f}\\,{event.end_time:.3f})"
-        filters.append(
-            f"[{current_label}][{slide_label}]overlay="
-            f"x=0:y=0:enable={enable_expr}"
-            f"[{next_label}]"
-        )
-        current_label = next_label
-
-    # Layer 4: Always-visible videos (talking head) — above slides, visible through cutout
+    # Layer 3: Talking head — above below-videos, but under slides so fullscreen slides cover it
     for i, (video_id, video_source, cutout) in enumerate(plan.narration_videos):
         input_idx = always_visible_inputs[i]
         cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position(
@@ -958,7 +976,64 @@ def build_filter_complex(
                 )
                 current_label = next_label
 
-    # Layer 5: "above" triggered videos (vft/vf2t/vst) — topmost, covers slides and talking head
+    # Layer 4: "mid" triggered videos (vfm/vsm) — above talking head, below slides
+    # Use case: content that should show through a slide's transparent "screen hole"
+    for i, event in enumerate(plan.video_events):
+        if event.layer != "mid":
+            continue
+        video_idx = video_inputs[i]
+        cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position(
+            event.cutout, width, height
+        )
+
+        duration = event.end_time - event.start_time
+        if event.video_source.take is not None:
+            duration = min(duration, event.video_source.take)
+        effective_end = event.start_time + duration
+
+        zoom = event.video_source.zoom
+        zoomed_width = int(cut_width * zoom)
+        zoomed_height = int(cut_height * zoom)
+
+        video_label = f"tvm{i}"
+        start_pts = event.start_time
+        filters.append(
+            f"[{video_idx}:v]format=yuva444p10le,"
+            f"setpts=PTS-STARTPTS+{start_pts:.3f}/TB,"
+            f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase,"
+            f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2,"
+            f"format=rgba[{video_label}]"
+        )
+
+        next_label = f"tvmbase{i}"
+        enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})"
+        filters.append(
+            f"[{current_label}][{video_label}]overlay="
+            f"x={cut_x}:y={cut_y}:enable={enable_expr}:eof_action=pass"
+            f"[{next_label}]"
+        )
+        current_label = next_label
+
+    # Layer 5: Slides — on top of talking head so fullscreen slides cover the narrator
+    for i, event in enumerate(plan.slide_events):
+        slide_idx = slide_inputs[event.slide_id]
+
+        slide_label = f"s{i}"
+        filters.append(
+            f"[{slide_idx}:v]scale={width}:{height}:"
+            f"force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=0x00000000[{slide_label}]"
+        )
+
+        next_label = f"sbase{i}"
+        enable_expr = f"between(t\\,{event.start_time:.3f}\\,{event.end_time:.3f})"
+        filters.append(
+            f"[{current_label}][{slide_label}]overlay="
+            f"x=0:y=0:enable={enable_expr}"
+            f"[{next_label}]"
+        )
+        current_label = next_label
+
+    # Layer 6: "above" triggered videos (vft/vf2t/vst) — topmost, covers slides and talking head
     # Use case: fullscreen video that intentionally masks the narrator
     for i, event in enumerate(plan.video_events):
         if event.layer != "above":
@@ -991,7 +1066,7 @@ def build_filter_complex(
         enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})"
         filters.append(
             f"[{current_label}][{video_label}]overlay="
-            f"x={cut_x}:y={cut_y}:enable={enable_expr}:format=auto"
+            f"x={cut_x}:y={cut_y}:enable={enable_expr}:format=auto:eof_action=pass"
             f"[{next_label}]"
         )
         current_label = next_label
diff --git a/gnommo/transformer.py b/gnommo/transformer.py
index da1e462..e519684 100644
--- a/gnommo/transformer.py
+++ b/gnommo/transformer.py
@@ -34,18 +34,24 @@ AUDIO_OFFSET_SECONDS = 1.0
 # The pause-variant entries (vftp: etc.) carry a third element "pause_narration"
 # which is a per-event property, not stored in videos.json.
 _SHORTHAND_PREFIXES: dict[str, tuple] = {
-    "vft:": ("fullscreen", "above"),
-    "vfb:": ("fullscreen", "below"),
+    "vft:":  ("fullscreen",  "above"),
+    "vfb:":  ("fullscreen",  "below"),
+    "vfm:":  ("fullscreen",  "mid"),
     "vf2t:": ("fullscreen2", "above"),
     "vf2b:": ("fullscreen2", "below"),
-    "vst:": ("square", "above"),
-    "vsb:": ("square", "below"),
-    "vftp:": ("fullscreen", "above"),
-    "vfbp:": ("fullscreen", "below"),
+    "vf2m:": ("fullscreen2", "mid"),
+    "vst:":  ("square", "above"),
+    "vsb:":  ("square", "below"),
+    "vsm:":  ("square", "mid"),
+    "vftp:":  ("fullscreen",  "above"),
+    "vfbp:":  ("fullscreen",  "below"),
+    "vfmp:":  ("fullscreen",  "mid"),
     "vf2tp:": ("fullscreen2", "above"),
     "vf2bp:": ("fullscreen2", "below"),
-    "vstp:": ("square", "above"),
-    "vsbp:": ("square", "below"),
+    "vf2mp:": ("fullscreen2", "mid"),
+    "vstp:":  ("square", "above"),
+    "vsbp:":  ("square", "below"),
+    "vsmp:":  ("square", "mid"),
 }
 
 
@@ -157,18 +163,12 @@ def _is_known_marker(
     _VIDEO_PREFIXES = (
         "video:",
         "narration:",
-        "vft:",
-        "vfb:",
-        "vf2t:",
-        "vf2b:",
-        "vst:",
-        "vsb:",
-        "vftp:",
-        "vfbp:",
-        "vf2tp:",
-        "vf2bp:",
-        "vstp:",
-        "vsbp:",
+        "vft:", "vfb:", "vfm:",
+        "vf2t:", "vf2b:", "vf2m:",
+        "vst:", "vsb:", "vsm:",
+        "vftp:", "vfbp:", "vfmp:",
+        "vf2tp:", "vf2bp:", "vf2mp:",
+        "vstp:", "vsbp:", "vsmp:",
     )
     if any(marker_id.startswith(p) for p in _VIDEO_PREFIXES):
         return True
@@ -513,6 +513,73 @@ def align_markers_to_transcription(
                 )
             )
 
+    # Repair pass: retry INTERPOLATED markers that the forward scan missed.
+    # Root cause of cascade failures: one bad match advances last_idx past
+    # the true positions of several subsequent markers. Fix: search in a
+    # bounded window [prev_marker_time - 1s, next_marker_time + 2s] so we
+    # avoid false early matches while still recovering from cascade failures.
+    if any(t.timestamp < 0 for t in timings):
+        for i, timing in enumerate(timings):
+            if timing.timestamp >= 0:
+                continue
+
+            marker_id, anchor_text, is_borrowed, anchor_type = contexts[i]
+            if not anchor_text.strip():
+                continue
+
+            # Lower bound: previous matched marker's timestamp → word index.
+            # Repairs processed in order, so already-repaired markers count too.
+            prev_time = 0.0
+            for j in range(i - 1, -1, -1):
+                if timings[j].timestamp >= 0:
+                    prev_time = max(0.0, timings[j].timestamp - 1.0)
+                    break
+            win_start = next(
+                (j for j, w in enumerate(transcription) if w.start >= prev_time),
+                0,
+            )
+
+            # Upper bound: next matched marker in the timings list (+2s padding)
+            next_time = float("inf")
+            for j in range(i + 1, len(timings)):
+                if timings[j].timestamp >= 0:
+                    next_time = timings[j].timestamp + 2.0
+                    break
+
+            win_end = (
+                next(
+                    (j for j, w in enumerate(transcription) if w.start > next_time),
+                    len(transcription),
+                )
+                if next_time < float("inf")
+                else len(transcription)
+            )
+
+            if win_end <= win_start:
+                continue
+
+            # Search in the bounded window with a relaxed threshold
+            sub = transcription[win_start:win_end]
+            idx, timestamp, confidence, match_end_idx = _find_phrase_timestamp(
+                anchor_text,
+                sub,
+                start_from=0,
+                fuzzy_threshold=max(0.4, fuzzy_threshold - 0.1),
+            )
+
+            if idx >= 0:
+                if anchor_type == "after" and match_end_idx > 0:
+                    end_word = sub[min(match_end_idx - 1, len(sub) - 1)]
+                    marker_time = end_word.end
+                else:
+                    marker_time = max(0.0, timestamp - 0.5)
+                timings[i] = MarkerTiming(
+                    marker_id=marker_id,
+                    timestamp=marker_time,
+                    context=f"(repaired: {anchor_text[:40]})",
+                    confidence=confidence,
+                )
+
     # Deduplicate slide markers. The manuscript pattern [SN]\n\n[SN] text... is
     # common: the first blank occurrence is a visual-transition cue and the second
     # carries the narration text used for alignment. We keep the first entry in
@@ -531,10 +598,24 @@ def align_markers_to_transcription(
         else:
             prev_idx = seen[timing.marker_id]
             prev = deduped[prev_idx]
-            if (
+            # Upgrade if: previous was a placeholder/interpolated and the new one is better.
+            # Also upgrade if previous used the backward-looking "after" anchor —
+            # that heuristic gives end-of-preceding-section timing, but a direct
+            # "before" match on the second occurrence (start-of-new-section − 0.5s)
+            # is more accurate for when the slide should appear.
+            should_upgrade = (
                 prev.context == "(after previous)"
                 and timing.context != "(after previous)"
-            ):
+            ) or (
+                prev.timestamp < 0
+                and timing.timestamp >= 0
+            ) or (
+                prev.context.startswith("(end of:")
+                and timing.timestamp >= 0
+                and timing.context != "(after previous)"
+                and not timing.context.startswith("(end of:")
+            )
+            if should_upgrade:
                 deduped[prev_idx] = MarkerTiming(
                     marker_id=prev.marker_id,
                     timestamp=timing.timestamp,
@@ -658,18 +739,12 @@ def build_render_plan(
     _VIDEO_MARKER_PREFIXES = (
         "video:",
         "narration:",
-        "vft:",
-        "vfb:",
-        "vf2t:",
-        "vf2b:",
-        "vst:",
-        "vsb:",
-        "vftp:",
-        "vfbp:",
-        "vf2tp:",
-        "vf2bp:",
-        "vstp:",
-        "vsbp:",
+        "vft:", "vfb:", "vfm:",
+        "vf2t:", "vf2b:", "vf2m:",
+        "vst:", "vsb:", "vsm:",
+        "vftp:", "vfbp:", "vfmp:",
+        "vf2tp:", "vf2bp:", "vf2mp:",
+        "vstp:", "vsbp:", "vsmp:",
     )
     missing_video_ids = [
         timing.marker_id[len(prefix) :]
@@ -764,7 +839,10 @@ def build_render_plan(
                     slide_event.end_time += pause_duration
 
             for vid_event in video_events:
-                if vid_event.start_time > narration_time:
+                if vid_event is event:
+                    # Don't shift the pause event by its own pause
+                    continue
+                if vid_event.start_time >= narration_time:
                     vid_event.start_time += pause_duration
                 if vid_event.end_time > narration_time:
                     vid_event.end_time += pause_duration
@@ -1004,7 +1082,7 @@ def _extract_video_events(
 
     # Pause-variant prefixes — the only thing the render pass still needs from
     # shorthand markers at event-build time (pause_narration is per-event, not stored in videos.json).
-    _PAUSE_PREFIXES = {"vftp:", "vfbp:", "vf2tp:", "vf2bp:", "vstp:", "vsbp:"}
+    _PAUSE_PREFIXES = {"vftp:", "vfbp:", "vfmp:", "vf2tp:", "vf2bp:", "vf2mp:", "vstp:", "vsbp:", "vsmp:"}
 
     # Collect video markers: (time, video_id, event_type, pause_narration)
     # video_markers: (timestamp, video_id, marker_type, pause_narration)
@@ -1088,8 +1166,8 @@ def _extract_video_events(
             end_time = start_time + video_source.take
         elif end_on == "end":
             end_time = total_duration
-        elif end_on == "next_slide" or (end_on is None and marker_type == "video"):
-            # End at next slide marker
+        elif end_on in ("next_slide", "slide") or (end_on is None and marker_type == "video"):
+            # End at next slide marker ("slide" is a recognised alias for "next_slide")
             end_time = total_duration
             for slide_time in slide_times:
                 if slide_time > start_time:
diff --git a/gnommo/validator.py b/gnommo/validator.py
index 242fd67..2d68fca 100644
--- a/gnommo/validator.py
+++ b/gnommo/validator.py
@@ -4,7 +4,7 @@ from pathlib import Path
 
 from .cache import resolve_with_cache
 from .errors import ValidationError, ValidationIssue
-from .parser import _read_json
+from .parser import _read_json, resolve_missing_videos
 from .models import (
     ProjectConfig,
     SlideDefinition,
@@ -38,6 +38,24 @@ def validate_project(
     issues: list[ValidationIssue] = []
     warnings: list[ValidationIssue] = []
 
+    # Collect video IDs actually referenced in the manuscript (for file-existence checks)
+    _VIDEO_PREFIXES = {
+        "video:":  6,
+        "vft:":    4, "vfb:":    4, "vfm:":    4,
+        "vf2t:":   5, "vf2b:":   5, "vf2m:":   5,
+        "vst:":    4, "vsb:":    4, "vsm:":    4,
+        "vftp:":   5, "vfbp:":   5, "vfmp:":   5,
+        "vf2tp:":  6, "vf2bp:":  6, "vf2mp:":  6,
+        "vstp:":   5, "vsbp:":   5, "vsmp:":   5,
+    }
+    referenced_video_ids: set[str] = set()
+    for marker in manuscript_markers:
+        prefix = next((p for p in _VIDEO_PREFIXES if marker.startswith(p)), None)
+        if prefix is not None:
+            referenced_video_ids.add(marker[_VIDEO_PREFIXES[prefix]:])
+        elif marker.startswith("narration:"):
+            referenced_video_ids.add(marker[10:])
+
     # Check for malformed markers first (these are likely typos)
     if malformed_markers:
         for line_num, marker_text in malformed_markers:
@@ -62,21 +80,6 @@ def validate_project(
             continue
         # Validate video trigger markers — both legacy [video:xxx] and
         # shorthand [vft:xxx] / [vfb:xxx] / [vst:xxx] / [vsb:xxx].
-        _VIDEO_PREFIXES = {
-            "video:": 6,
-            "vft:": 4,
-            "vfb:": 4,
-            "vf2t:": 5,
-            "vf2b:": 5,
-            "vst:": 4,
-            "vsb:": 4,
-            "vftp:": 5,
-            "vfbp:": 5,
-            "vf2tp:": 6,
-            "vf2bp:": 6,
-            "vstp:": 5,
-            "vsbp:": 5,
-        }
         matched_prefix = next(
             (p for p in _VIDEO_PREFIXES if marker.startswith(p)), None
         )
@@ -94,6 +97,16 @@ def validate_project(
                         project_path / "manuscript.txt",
                     )
                 )
+            else:
+                vs = videos[video_id]
+                if not vs.cutout or vs.cutout not in config.cutouts:
+                    warnings.append(
+                        ValidationIssue(
+                            f"[{marker}] video '{video_id}' has no valid cutout in videos.json — "
+                            f"run 'gnommo import' to project values, or set cutout manually.",
+                            project_path / "manuscript.txt",
+                        )
+                    )
             continue
 
         # Validate narration trigger markers (narration:xxx) - continuous videos
@@ -106,6 +119,16 @@ def validate_project(
                         project_path / "manuscript.txt",
                     )
                 )
+            else:
+                vs = videos[video_id]
+                if not vs.cutout or vs.cutout not in config.cutouts:
+                    warnings.append(
+                        ValidationIssue(
+                            f"[{marker}] video '{video_id}' has no valid cutout in videos.json — "
+                            f"run 'gnommo import' to project values, or set cutout manually.",
+                            project_path / "manuscript.txt",
+                        )
+                    )
             continue
 
         # Segment markers are structural annotations, not slide references
@@ -168,6 +191,10 @@ def validate_project(
         shared_assets_dir = project_path.parent / "shared_assets"
 
     for video_id, video_source in videos.items():
+        # Only check files for videos actually used in this manuscript
+        if video_id not in referenced_video_ids:
+            continue
+
         # Determine base directory based on is_shared flag
         if video_source.is_shared:
             if shared_assets_dir:
@@ -186,9 +213,15 @@ def validate_project(
         video_path = base_dir / video_source.source_file
         video_path, _ = resolve_with_cache(video_path, project_path)
         if not video_path.exists():
+            sf = video_source.source_file
+            hint = (
+                " — run 'gnommo pexels' to download"
+                if sf.startswith("pexels/")
+                else " — falling back to PlaceholderVideo"
+            )
             warnings.append(
                 ValidationIssue(
-                    f"Video file not found: {video_source.source_file} — falling back to PlaceholderVideo",
+                    f"Video file not found: {sf}{hint}",
                     videos_json_path,
                 )
             )
@@ -229,6 +262,7 @@ def validate_project(
                 )
             else:
                 bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"]
+                bg_path, _ = resolve_with_cache(bg_path, project_path)
                 if not bg_path.exists():
                     issues.append(
                         ValidationIssue(
@@ -272,6 +306,20 @@ def validate_project(
             )
         )
 
+    # Check outro videos exist in videos.json or shared_assets
+    if config.outro:
+        missing_outro = [vid_id for vid_id in config.outro if vid_id not in videos]
+        if missing_outro:
+            found = resolve_missing_videos(missing_outro, project_path, config)
+            still_missing = [vid_id for vid_id in missing_outro if vid_id not in found]
+            for vid_id in still_missing:
+                warnings.append(
+                    ValidationIssue(
+                        f"Outro video '{vid_id}' not found in videos.json or shared_assets — will be skipped at render",
+                        project_path / "project.json",
+                    )
+                )
+
     # If any issues, raise ValidationError
     if issues:
         raise ValidationError(issues)