Fixing gnommo

2026-03-26 10:46:05 +01:00
parent 0e22fcfbb3
commit 7c75610fce
15 changed files with 2028 additions and 410 deletions
@@ -14,6 +14,7 @@
    "audioonly": [
      {
        "type": "audio_normalize",
        "compress": false,
        "normalize": true,
        "target_lufs": -14,
        "target_lra": 11,
@@ -27,7 +27,6 @@ from gnommo.parser import _read_json
 def write_manuscript(data: Path, out_path: Path):
    data = _read_json(data.read_text(encoding="utf-8"))
    lines = []
    i = 0
@@ -30,11 +30,15 @@ from pathlib import Path
 try:
    import requests
 except ImportError:
-    print("Error: 'requests' package is required. Run: pip install requests", file=sys.stderr)
+    print(
        "Error: 'requests' package is required. Run: pip install requests",
        file=sys.stderr,
    )
    sys.exit(1)
 SYNC_FILE_LOCAL = ".gnommo_sync.json"
-SYNC_FILE_PROD  = ".gnommo_sync.prod.json"
+SYNC_FILE_PROD = ".gnommo_sync.prod.json"
 def _sync_file(prod: bool) -> str:
    return SYNC_FILE_PROD if prod else SYNC_FILE_LOCAL
@@ -69,19 +73,33 @@ def _write_sync(project_path: Path, data: dict, prod: bool = False):
        json.dump(data, f, indent=2)
-def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str | None = None, prod: bool = False, res: str = "full") -> int:
+def cmd_handoff(
    project_path: Path,
    verbose: bool = False,
    file_override: str | None = None,
    prod: bool = False,
    res: str = "full",
 ) -> int:
    _load_env_file()
    if prod:
        api_url = os.environ.get("GNOMMOWEB_PROD_URL", "").rstrip("/")
        api_key = os.environ.get("GNOMMOWEB_PROD_API_KEY", "")
-        if not api_url: print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr); return 1
+        if not api_url:
-        if not api_key: print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr); return 1
+            print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr)
            return 1
        if not api_key:
            print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr)
            return 1
    else:
        api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/")
        api_key = os.environ.get("GNOMMOWEB_API_KEY", "")
-        if not api_url: print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr); return 1
+        if not api_url:
-        if not api_key: print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr); return 1
+            print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr)
            return 1
        if not api_key:
            print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr)
            return 1
    if verbose:
        target = "production" if prod else "local"
@@ -104,7 +122,9 @@ def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str |
    if file_override:
        video_path = Path(file_override)
    else:
-        output_filename = project.get("output") or Path(project.get("output_video", "")).name
+        output_filename = (
            project.get("output") or Path(project.get("output_video", "")).name
        )
        if not output_filename:
            print(
                "Error: no 'output' field in project.json and no --file provided.",
@@ -148,17 +168,23 @@ def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str |
    result = r.json()
    video_version = result.get("video_version", "?")
-    video_url     = result.get("video_url", "")
+    video_url = result.get("video_url", "")
    # ── Write sync state ───────────────────────────────────────────────────────
    now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds")
    existing_sync = _read_sync(project_path, prod)
-    _write_sync(project_path, {
+    _write_sync(
-        **existing_sync,
+        project_path,
-        "last_handoff_at":  now_iso,
+        {
-        "video_version":    video_version,
+            **existing_sync,
-        "server_updated_at": result.get("asset", {}).get("updated_at", existing_sync.get("server_updated_at")),
+            "last_handoff_at": now_iso,
-    }, prod)
+            "video_version": video_version,
            "server_updated_at": result.get("asset", {}).get(
                "updated_at", existing_sync.get("server_updated_at")
            ),
        },
        prod,
    )
    print(f"✓ {project_id} → v{video_version} [processed]")
    if video_url:
@@ -170,8 +196,8 @@ def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str |
 def _mime_type(path: Path) -> str:
    ext = path.suffix.lower()
    return {
-        ".mp4":  "video/mp4",
+        ".mp4": "video/mp4",
-        ".mov":  "video/quicktime",
+        ".mov": "video/quicktime",
        ".webm": "video/webm",
-        ".mkv":  "video/x-matroska",
+        ".mkv": "video/x-matroska",
    }.get(ext, "application/octet-stream")
@@ -65,7 +65,9 @@ class ProjectConfig:
    # YouTube description fields
    description: str = ""  # Video description text for YouTube
    footer: str = ""  # Footer text (social links, subscribe CTA, etc.)
-    output_video: str = ""  # Output filename (e.g. "DISC_INT3.mp4"); placed in out/ or out/<res>/
+    output_video: str = (
        ""  # Output filename (e.g. "DISC_INT3.mp4"); placed in out/ or out/<res>/
    )
@dataclass
@@ -295,6 +297,10 @@ class VideoSource:
        False  # If True, skip loudnorm during preprocessing (apply after concatenation)
    )
    volume: float = 1.0  # Volume multiplier (1.0=full, >1.0=boost, <1.0=reduce)
    layer: str = "above"  # "above" = renders on top of slides; "below" = behind slides
    duration: Optional[float] = None  # Pre-probed file duration in seconds (set by import)
    has_audio: Optional[bool] = None  # Pre-detected audio presence (set by import)
    end_on: Optional[str] = None  # When video event ends: "next_slide" | "end" | "take" (None = marker-type default)
@dataclass
@@ -334,6 +340,7 @@ class AudioDefinition:
    ignore_pauses: bool = (
        False  # If True, audio continues playing during narration pauses
    )
    duration: Optional[float] = None  # Pre-probed duration in seconds (set by import)
@dataclass
@@ -364,6 +371,8 @@ class VideoEvent:
    end_time: float
    video_source: "VideoSource"
    cutout: "CutoutDefinition"
    cutout_name: str = ""  # resolved cutout name (e.g. "fullscreen"), for display
    layer: str = "above"  # "above" = on top of slides; "below" = behind slides
@dataclass
@@ -508,7 +517,9 @@ class RenderPlan:
    cached_files: set = field(
        default_factory=set
    )  # Video IDs loaded from external cache (show 📁 indicator)
-    output_path: Optional[Path] = None  # Final output file path (set after plan is built)
+    output_path: Optional[
        Path
    ] = None  # Final output file path (set after plan is built)
 # Slide layout configurations (hardcoded for POC)
@@ -161,8 +161,35 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
    except json.JSONDecodeError as e:
        raise ParseError(f"Invalid JSON: {e}", config_path)
-    # Parse cutouts (named zones for video placement)
+    # Built-in cutouts — used by vft/vfb/vst/vsb marker shorthand.
-    cutouts: dict[str, CutoutDefinition] = {}
+    # Projects can override these by defining cutouts with the same names.
    cutouts: dict[str, CutoutDefinition] = {
        # 100 % × 100 % at origin — for fullscreen video (vf* markers)
        "fullscreen": CutoutDefinition(
            x=-1,
            y=-1,
            height=-1,
            width=-1,
            x_percent=0.0,
            y_percent=0.0,
            height_percent=1.0,
            width_percent=1.0,
        ),
        # 50 % height, square aspect, centred — for square video (vs* markers)
        "square": CutoutDefinition(
            x=-1,
            y=-1,
            height=-1,
            width=-1,
            x_percent=0.25,
            y_percent=0.25,
            height_percent=0.5,
            width_percent=0.0,
        ),
    }
    # Parse cutouts (named zones for video placement) — project definitions
    # override the built-ins above.
    cutouts_data = data.get("cutouts", {})
    for cutout_name, cutout_data in cutouts_data.items():
        x, x_pct = _parse_dimension(cutout_data.get("x", 0))
@@ -243,7 +270,9 @@ def parse_slides(
    # Try cache fallback for reading JSON
    slides_path, _ = resolve_with_cache(local_slides_path, project_path)
    if not slides_path.exists():
-        raise ParseError(f"slides file not found: {local_slides_path}", local_slides_path)
+        raise ParseError(
            f"slides file not found: {local_slides_path}", local_slides_path
        )
    try:
        data = _read_json(slides_path)
@@ -305,12 +334,14 @@ def parse_audio(
        if "overlap" in audio_data and audio_data["overlap"]:
            overlap = parse_timestamp(audio_data["overlap"])
        raw_duration = audio_data.get("duration")
        audio[audio_id] = AudioDefinition(
            file=audio_data["file"],
            volume=float(audio_data.get("volume", 1.0)),
            loop=bool(audio_data.get("loop", False)),
            overlap=overlap,
            ignore_pauses=bool(audio_data.get("ignore_pauses", False)),
            duration=float(raw_duration) if raw_duration is not None else None,
        )
    return audio, audio_dir
@@ -386,7 +417,9 @@ def parse_videos(
    # Try cache fallback for reading JSON
    videos_path, _ = resolve_with_cache(local_videos_path, project_path)
    if not videos_path.exists():
-        raise ParseError(f"videos.json not found: {local_videos_path}", local_videos_path)
+        raise ParseError(
            f"videos.json not found: {local_videos_path}", local_videos_path
        )
    try:
        data = _read_json(videos_path)
@@ -440,6 +473,8 @@ def parse_videos(
            # take = end - begin (duration from begin to end)
            take = end_time - skip
        raw_duration = video_data.get("duration")
        raw_has_audio = video_data.get("has_audio")
        videos[video_id] = VideoSource(
            source_file=video_data["source_file"],
            filter=filter_list,
@@ -455,6 +490,10 @@ def parse_videos(
            use_audio_channels=video_data.get("use_audio_channels", "both"),
            defer_loudnorm=video_data.get("defer_loudnorm", False),
            volume=float(video_data.get("volume", 1.0)),
            layer=video_data.get("layer", "above"),
            duration=float(raw_duration) if raw_duration is not None else None,
            has_audio=bool(raw_has_audio) if raw_has_audio is not None else None,
            end_on=video_data.get("end_on"),
        )
    return videos, videos_dir
@@ -27,9 +27,9 @@ CHUNK_DURATION = 60
 # Resolution presets for preview/proxy workflow
 # Each entry: (width, height, subdir_name)
 RES_CONFIGS: dict[str, tuple[int, int, str] | None] = {
-    "full":  None,              # no downscale, no subdir
+    "full": None,  # no downscale, no subdir
-    "low":   (490, 270, "low"),
+    "low": (490, 270, "low"),
-    "tiny":  (320, 180, "proxy"),  # "proxy" subdir kept for backward compat
+    "tiny": (320, 180, "proxy"),  # "proxy" subdir kept for backward compat
 }
 # Keep legacy constants pointing at "tiny" values
@@ -61,10 +61,14 @@ def _video_has_alpha(video_path: Path) -> bool:
    """Check if a video file has an alpha channel."""
    cmd = [
        "ffprobe",
-        "-v", "error",
+        "-v",
-        "-select_streams", "v:0",
+        "error",
-        "-show_entries", "stream=pix_fmt",
+        "-select_streams",
-        "-of", "default=noprint_wrappers=1:nokey=1",
+        "v:0",
        "-show_entries",
        "stream=pix_fmt",
        "-of",
        "default=noprint_wrappers=1:nokey=1",
        str(video_path),
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
@@ -104,13 +108,20 @@ def create_downscaled_video(
        return out_path
    cmd = [
-        "ffmpeg", "-y",
+        "ffmpeg",
-        "-i", str(source_path),
+        "-y",
-        "-vf", f"scale={width}:{height}",
+        "-i",
-        "-c:v", "libx264",
+        str(source_path),
-        "-preset", "ultrafast",
+        "-vf",
-        "-crf", "28",
+        f"scale={width}:{height}",
-        "-c:a", "copy",
+        "-c:v",
        "libx264",
        "-preset",
        "ultrafast",
        "-crf",
        "28",
        "-c:a",
        "copy",
        str(out_path),
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
@@ -204,7 +215,8 @@ def ensure_downscaled_files_exist(
    out_dir.mkdir(parents=True, exist_ok=True)
    video_files = [
-        f for f in source_dir.iterdir()
+        f
        for f in source_dir.iterdir()
        if f.is_file()
        and f.suffix.lower() in video_extensions
        and "_processed" not in f.stem
@@ -247,6 +259,7 @@ import selectors, time, sys, subprocess
 def run_ffmpeg_with_progress(cmd, duration, description="Processing"):
    cmd = cmd.copy()
    insert_pos = cmd.index("-y") + 1 if "-y" in cmd else 1
    cmd[insert_pos:insert_pos] = [
        "-progress",
@@ -269,9 +282,11 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"):
    sel.register(p.stdout, selectors.EVENT_READ)
    bar_width = 30
    start_time = time.time()
    last_update = time.time()
    last_percent = 0
    seen_any_progress = False
    last_log_line = ""
    logs = []
    def draw(percent, suffix=""):
@@ -287,6 +302,7 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"):
    while True:
        # If process ended and no more output, break
        if p.poll() is not None:
            # drain any remaining output quickly
            while True:
                line = p.stdout.readline()
@@ -297,8 +313,12 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"):
        events = sel.select(timeout=0.2)
        if not events:
-            # No output right now; show finalizing if we're near end
+            if not seen_any_progress:
-            if (
+                # Show elapsed time and last FFmpeg output line during init
                elapsed = time.time() - start_time
                hint = f" | {last_log_line[:50]}" if last_log_line else ""
                draw(0, f"Initializing... ({elapsed:.0f}s){hint}")
            elif (
                seen_any_progress
                and last_percent >= 99
                and (time.time() - last_update) > 1.0
@@ -311,6 +331,10 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"):
            if not line:
                continue
            logs.append(line)
            # Track last non-empty, non-progress-key line for init diagnostics
            stripped = line.strip()
            if stripped and "=" not in stripped:
                last_log_line = stripped
            if line.startswith("out_time_ms="):
                val = line.split("=", 1)[1].strip()
@@ -332,7 +356,10 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"):
    if p.returncode == 0:
        draw(100, "Done\n")
    else:
-        sys.stdout.write("\n")
+        code = p.returncode
        # On macOS/Linux, -9 means SIGKILL (OOM kill by OS), -6 = SIGABRT
        signal_hint = " (OOM kill)" if code == -9 else (" (abort)" if code == -6 else "")
        sys.stdout.write(f"\n          FFmpeg exited with code {code}{signal_hint}\n")
        sys.stdout.flush()
    return subprocess.CompletedProcess(
@@ -340,7 +367,33 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"):
    )
-def check_audio_channel_silent(input_path: Path, channel: str, threshold_db: float = -60.0) -> tuple[bool, float]:
+def _has_audio_stream(video_path: Path) -> bool:
    """Return True if the file has a real (non-ghost) audio stream."""
    result = subprocess.run(
        [
            "ffprobe", "-v", "error",
            "-analyzeduration", "0",
            "-probesize", "1000000",
            "-select_streams", "a:0",
            "-show_entries", "stream=index,nb_frames",
            "-of", "csv=p=0",
            str(video_path),
        ],
        capture_output=True,
        text=True,
    )
    output = result.stdout.strip()
    if not output:
        return False
    parts = output.split(",")
    if len(parts) >= 2 and parts[1].strip() == "0":
        return False  # Ghost audio track — header present but no sample data
    return True
 def check_audio_channel_silent(
    input_path: Path, channel: str, threshold_db: float = -60.0
 ) -> tuple[bool, float]:
    """
    Quick check whether the specified audio channel is silent.
    Uses ffmpeg volumedetect (audio-only pass, much faster than full processing).
@@ -349,9 +402,14 @@ def check_audio_channel_silent(input_path: Path, channel: str, threshold_db: flo
    """
    pan = "pan=mono|c0=c0" if channel == "left" else "pan=mono|c0=c1"
    cmd = [
-        "ffmpeg", "-i", str(input_path),
+        "ffmpeg",
-        "-af", f"{pan},volumedetect",
+        "-i",
-        "-f", "null", "/dev/null",
+        str(input_path),
        "-af",
        f"{pan},volumedetect",
        "-f",
        "null",
        "/dev/null",
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
    for line in result.stderr.splitlines():
@@ -416,10 +474,14 @@ def detect_silence_bounds(
    total_duration = get_video_duration(input_path)
    cmd = [
-        "ffmpeg", "-i", str(input_path),
+        "ffmpeg",
        "-i",
        str(input_path),
        "-af",
        f"silencedetect=noise={noise_threshold_db}dB:duration={min_silence_duration}",
-        "-f", "null", "/dev/null",
+        "-f",
        "null",
        "/dev/null",
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
@@ -591,6 +653,14 @@ def preprocess_video(
            # Audio normalization: denoise, compress, and normalize loudness
            # Note: skip/take are NOT applied here - they're only used during concatenation
            print("        Filter: audio_normalize")
            if not _has_audio_stream(current_input):
                raise PreprocessError(
                    f"audio_normalize requires an audio stream, but '{current_input.name}' has none.\n"
                    f"    Check that the source file has audio, or remove audio_normalize from the filter list.",
                    filter_type="audio_normalize",
                    command="",
                    stderr="",
                )
            step_output = gnommo_scratch / f"{video_id}_batch{batch_num}_audio.mov"
            intermediate_files.append(step_output)
            apply_audio_normalize(
@@ -1122,9 +1192,7 @@ def apply_combined_video_filters_chunked(
    num_chunks = int(duration / CHUNK_DURATION) + 1
    chunk_files: list[Path] = []
-    chunk_tasks: list[
+    chunk_tasks: list[tuple] = []  # (index, chunk_path, start_time, chunk_duration)
        tuple
    ] = []  # (index, chunk_path, start_time, chunk_duration)
    # Build list of chunk tasks
    for i in range(num_chunks):
@@ -1179,11 +1247,16 @@ def apply_combined_video_filters_chunked(
        print(f"          Concatenating {len(chunk_files)} chunks → {output_path.name}")
    concat_cmd = [
-        "ffmpeg", "-y",
+        "ffmpeg",
-        "-f", "concat",
+        "-y",
-        "-safe", "0",
+        "-f",
-        "-i", str(concat_list),
+        "concat",
-        "-c", "copy",
+        "-safe",
        "0",
        "-i",
        str(concat_list),
        "-c",
        "copy",
        str(output_path),
    ]
    concat_result = run_ffmpeg_with_progress(concat_cmd, duration, "Concatenating")
@@ -1953,12 +2026,14 @@ def parse_audio_normalize_config(config: dict[str, Any]) -> AudioNormalizeConfig
    # Parse EQ bands
    eq_bands = []
    for band in config.get("eq_bands", []):
-        eq_bands.append(EQBand(
+        eq_bands.append(
-            freq=float(band.get("freq", 1000)),
+            EQBand(
-            gain=float(band.get("gain", 0)),
+                freq=float(band.get("freq", 1000)),
-            q=float(band.get("q", 1.0)),
+                gain=float(band.get("gain", 0)),
-            type=str(band.get("type", "peak")),
+                q=float(band.get("q", 1.0)),
-        ))
+                type=str(band.get("type", "peak")),
            )
        )
    return AudioNormalizeConfig(
        # Parametric EQ
@@ -2163,12 +2238,18 @@ def stitch_narration_segments(
            # Preserve alpha with ProRes 4444
            cmd.extend(
                [
-                    "-vf", "fps=30,format=yuva444p10le",
+                    "-vf",
-                    "-c:v", "prores_ks",
+                    "fps=30,format=yuva444p10le",
-                    "-profile:v", "4",
+                    "-c:v",
-                    "-pix_fmt", "yuva444p10le",
+                    "prores_ks",
-                    "-c:a", "pcm_s16le",
+                    "-profile:v",
-                    "-avoid_negative_ts", "make_zero",
+                    "4",
                    "-pix_fmt",
                    "yuva444p10le",
                    "-c:a",
                    "pcm_s16le",
                    "-avoid_negative_ts",
                    "make_zero",
                    str(trimmed_path),
                ]
            )
@@ -2176,14 +2257,22 @@ def stitch_narration_segments(
            # No alpha - use fast h264 encoding
            cmd.extend(
                [
-                    "-vf", "fps=30",
+                    "-vf",
-                    "-c:v", "libx264",
+                    "fps=30",
-                    "-preset", "fast",
+                    "-c:v",
-                    "-crf", "18",
+                    "libx264",
-                    "-c:a", "aac",
+                    "-preset",
-                    "-b:a", "192k",
+                    "fast",
-                    "-avoid_negative_ts", "make_zero",
+                    "-crf",
-                    "-movflags", "+faststart",
+                    "18",
                    "-c:a",
                    "aac",
                    "-b:a",
                    "192k",
                    "-avoid_negative_ts",
                    "make_zero",
                    "-movflags",
                    "+faststart",
                    str(trimmed_path),
                ]
            )
@@ -2211,12 +2300,18 @@ def stitch_narration_segments(
    cmd = [
        "ffmpeg",
        "-y",
-        "-f", "concat",
+        "-f",
-        "-safe", "0",
+        "concat",
-        "-i", str(concat_list),
+        "-safe",
-        "-c:v", "copy",
+        "0",
-        "-c:a", "copy",
+        "-i",
-        "-movflags", "+faststart",
+        str(concat_list),
        "-c:v",
        "copy",
        "-c:a",
        "copy",
        "-movflags",
        "+faststart",
        str(output_path),
    ]
@@ -2235,16 +2330,26 @@ def stitch_narration_segments(
    )
    if needs_loudnorm:
        print("    Applying loudness normalization to stitched output...")
-        normalized_path = output_path.parent / f"{output_path.stem}_normalized{output_path.suffix}"
+        normalized_path = (
            output_path.parent / f"{output_path.stem}_normalized{output_path.suffix}"
        )
        # Use EBU R128 loudnorm targeting YouTube's recommended levels
        loudnorm_cmd = [
-            "ffmpeg", "-y",
+            "ffmpeg",
-            "-i", str(output_path),
+            "-y",
-            "-c:v", "copy",
+            "-i",
-            "-af", "loudnorm=I=-14:LRA=11:TP=-1.5",
+            str(output_path),
-            "-c:a", "aac", "-b:a", "192k",
+            "-c:v",
-            "-movflags", "+faststart",
+            "copy",
            "-af",
            "loudnorm=I=-14:LRA=11:TP=-1.5",
            "-c:a",
            "aac",
            "-b:a",
            "192k",
            "-movflags",
            "+faststart",
            str(normalized_path),
        ]
@@ -29,11 +29,15 @@ from pathlib import Path
 try:
    import requests
 except ImportError:
-    print("Error: 'requests' package is required. Run: pip install requests", file=sys.stderr)
+    print(
        "Error: 'requests' package is required. Run: pip install requests",
        file=sys.stderr,
    )
    sys.exit(1)
 SYNC_FILE_LOCAL = ".gnommo_sync.json"
-SYNC_FILE_PROD  = ".gnommo_sync.prod.json"
+SYNC_FILE_PROD = ".gnommo_sync.prod.json"
 def _sync_file(prod: bool) -> str:
    return SYNC_FILE_PROD if prod else SYNC_FILE_LOCAL
@@ -77,19 +81,29 @@ def _parse_ts(ts_str) -> datetime | None:
        return None
-def cmd_pull(project_path: Path, verbose: bool = False, force: bool = False, prod: bool = False) -> int:
+def cmd_pull(
    project_path: Path, verbose: bool = False, force: bool = False, prod: bool = False
 ) -> int:
    _load_env_file()
    if prod:
        api_url = os.environ.get("GNOMMOWEB_PROD_URL", "").rstrip("/")
        api_key = os.environ.get("GNOMMOWEB_PROD_API_KEY", "")
-        if not api_url: print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr); return 1
+        if not api_url:
-        if not api_key: print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr); return 1
+            print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr)
            return 1
        if not api_key:
            print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr)
            return 1
    else:
        api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/")
        api_key = os.environ.get("GNOMMOWEB_API_KEY", "")
-        if not api_url: print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr); return 1
+        if not api_url:
-        if not api_key: print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr); return 1
+            print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr)
            return 1
        if not api_key:
            print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr)
            return 1
    if verbose:
        target = "production" if prod else "local"
@@ -176,19 +190,23 @@ def cmd_pull(project_path: Path, verbose: bool = False, force: bool = False, pro
    now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds")
    existing_sync = _read_sync(project_path, prod)
-    _write_sync(project_path, {
+    _write_sync(
-        **existing_sync,
+        project_path,
-        "last_pulled_at":    now_iso,
+        {
-        "server_updated_at": server_updated_at,
+            **existing_sync,
-        "last_pushed_at":    existing_sync.get("last_pushed_at"),
+            "last_pulled_at": now_iso,
-    }, prod)
+            "server_updated_at": server_updated_at,
            "last_pushed_at": existing_sync.get("last_pushed_at"),
        },
        prod,
    )
    return 0
 def _merge_parent(local: dict, server: dict, verbose: bool):
    """Update parent project.json: name, description, shorts index (slugs)."""
-    local["name"]        = server.get("title", local.get("name"))
+    local["name"] = server.get("title", local.get("name"))
    local["description"] = server.get("description") or local.get("description")
    # shorts is a list of slugs — update from server's shorts list
    server_shorts = server.get("shorts", [])
@@ -42,11 +42,15 @@ from pathlib import Path
 try:
    import requests
 except ImportError:
-    print("Error: 'requests' package is required. Run: pip install requests", file=sys.stderr)
+    print(
        "Error: 'requests' package is required. Run: pip install requests",
        file=sys.stderr,
    )
    sys.exit(1)
 SYNC_FILE_LOCAL = ".gnommo_sync.json"
-SYNC_FILE_PROD  = ".gnommo_sync.prod.json"
+SYNC_FILE_PROD = ".gnommo_sync.prod.json"
 def _sync_file(prod: bool) -> str:
    return SYNC_FILE_PROD if prod else SYNC_FILE_LOCAL
@@ -90,19 +94,29 @@ def _parse_ts(ts_str) -> datetime | None:
        return None
-def cmd_push(project_path: Path, verbose: bool = False, force: bool = False, prod: bool = False) -> int:
+def cmd_push(
    project_path: Path, verbose: bool = False, force: bool = False, prod: bool = False
 ) -> int:
    _load_env_file()
    if prod:
        api_url = os.environ.get("GNOMMOWEB_PROD_URL", "").rstrip("/")
        api_key = os.environ.get("GNOMMOWEB_PROD_API_KEY", "")
-        if not api_url: print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr); return 1
+        if not api_url:
-        if not api_key: print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr); return 1
+            print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr)
            return 1
        if not api_key:
            print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr)
            return 1
    else:
        api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/")
        api_key = os.environ.get("GNOMMOWEB_API_KEY", "")
-        if not api_url: print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr); return 1
+        if not api_url:
-        if not api_key: print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr); return 1
+            print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr)
            return 1
        if not api_key:
            print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr)
            return 1
    if verbose:
        target = "production" if prod else "local"
@@ -160,11 +174,15 @@ def cmd_push(project_path: Path, verbose: bool = False, force: bool = False, pro
    # ── Write sync state ──────────────────────────────────────────────────────
    now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds")
    existing_sync = _read_sync(project_path, prod)
-    _write_sync(project_path, {
+    _write_sync(
-        **existing_sync,
+        project_path,
-        "last_pushed_at":    now_iso,
+        {
-        "server_updated_at": server_updated_at,
+            **existing_sync,
-    }, prod)
+            "last_pushed_at": now_iso,
            "server_updated_at": server_updated_at,
        },
        prod,
    )
    # ── Print summary ─────────────────────────────────────────────────────────
    asset = result.get("asset", {})
@@ -176,7 +194,9 @@ def cmd_push(project_path: Path, verbose: bool = False, force: bool = False, pro
        print(f"✓ {project_id} → gn_asset #{asset.get('id')} ({asset.get('name')})")
        if verbose:
            script_len = len(asset.get("script") or "")
-            print(f"  server.script: {script_len} chars | fps={asset.get('fps')} res={asset.get('resolution')}")
+            print(
                f"  server.script: {script_len} chars | fps={asset.get('fps')} res={asset.get('resolution')}"
            )
    return 0
@@ -201,19 +221,19 @@ def _build_parent_payload(project: dict, project_path: Path, verbose: bool) -> d
            print(f"  no manuscript field in project.json")
    return {
-        "project_id":       project["id"],
+        "project_id": project["id"],
-        "name":             project["name"],
+        "name": project["name"],
-        "description":      project.get("description"),
+        "description": project.get("description"),
-        "coursecode":       project.get("coursecode"),
+        "coursecode": project.get("coursecode"),
-        "script_content":   script_content,
+        "script_content": script_content,
-        "resolution":       project.get("resolution"),
+        "resolution": project.get("resolution"),
-        "fps":              project.get("fps"),
+        "fps": project.get("fps"),
        "duration_seconds": project.get("duration_seconds"),
-        "hook":             project.get("hook"),
+        "hook": project.get("hook"),
        "platform_targets": project.get("platform_targets"),
-        "status":           project.get("status"),
+        "status": project.get("status"),
-        "youtube_url":      project.get("youtube_url"),
+        "youtube_url": project.get("youtube_url"),
-        "shorts":           project.get("shorts", []),
+        "shorts": project.get("shorts", []),
    }
@@ -231,14 +251,14 @@ def _build_short_payload(project: dict, project_path: Path, verbose: bool) -> di
            print(f"  Warning: script file not found: {script_path}", file=sys.stderr)
    return {
-        "project_id":       project["id"],
+        "project_id": project["id"],
-        "name":             project["name"],
+        "name": project["name"],
-        "description":      project.get("description"),
+        "description": project.get("description"),
-        "parent_project":   project["parent_project"],
+        "parent_project": project["parent_project"],
-        "hook":             project.get("hook"),
+        "hook": project.get("hook"),
-        "script_content":   script_content,
+        "script_content": script_content,
        "platform_targets": project.get("platform_targets", ["youtube"]),
-        "resolution":       project.get("resolution"),
+        "resolution": project.get("resolution"),
-        "fps":              project.get("fps"),
+        "fps": project.get("fps"),
        "duration_seconds": project.get("duration_seconds"),
    }
@@ -22,12 +22,46 @@ from .preprocessor import run_ffmpeg_with_progress
 def _get_audio_duration(audio_path: Path) -> float:
-    """Get duration of an audio file using ffprobe."""
+    """Get duration of an audio file using ffprobe.
    For MP3 files, counts packets directly to get an accurate duration regardless
    of whether the file has a Xing/VBRI header. Falls back to format duration for
    other formats.
    """
    if audio_path.suffix.lower() == ".mp3":
        # Count actual packets rather than trusting the header estimate.
        # This is slower but accurate for headerless VBR/CBR MP3s.
        cmd = [
            "ffprobe",
            "-v",
            "error",
            "-count_packets",
            "-show_entries",
            "stream=nb_read_packets,duration",
            "-select_streams",
            "a:0",
            "-of",
            "default=noprint_wrappers=1:nokey=1",
            str(audio_path),
        ]
        result = subprocess.run(cmd, capture_output=True, text=True)
        if result.returncode == 0:
            # Output: duration\nnb_read_packets — take the first non-N/A line
            for line in result.stdout.strip().splitlines():
                try:
                    val = float(line)
                    if val > 0:
                        return val
                except ValueError:
                    continue
    cmd = [
        "ffprobe",
-        "-v", "error",
+        "-v",
-        "-show_entries", "format=duration",
+        "error",
-        "-of", "default=noprint_wrappers=1:nokey=1",
+        "-show_entries",
        "format=duration",
        "-of",
        "default=noprint_wrappers=1:nokey=1",
        str(audio_path),
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
@@ -208,16 +242,28 @@ def _resolve_video_path(
 def _has_audio_stream(video_path: Path) -> bool:
-    """Check if a video file contains an audio stream using ffprobe."""
+    """Check if a video file contains a non-empty audio stream.
    Uses -analyzeduration 0 to avoid the slow avformat_find_stream_info() scan
    that happens when an MP4 has a declared audio track with no actual frames —
    ffprobe would otherwise scan the entire file looking for audio packets.
    Also checks nb_frames to reject ghost audio tracks (stream header exists in
    the moov atom but no sample data in stsc/stsz).
    """
    result = subprocess.run(
        [
            "ffprobe",
            "-v",
            "error",
            "-analyzeduration",
            "0",
            "-probesize",
            "1000000",
            "-select_streams",
-            "a",
+            "a:0",
            "-show_entries",
-            "stream=index",
+            "stream=index,nb_frames",
            "-of",
            "csv=p=0",
            str(video_path),
@@ -225,7 +271,16 @@ def _has_audio_stream(video_path: Path) -> bool:
        capture_output=True,
        text=True,
    )
-    return bool(result.stdout.strip())
+    output = result.stdout.strip()
    if not output:
        return False
    # output is "index" or "index,nb_frames"
    parts = output.split(",")
    if len(parts) >= 2:
        nb_frames = parts[1].strip()
        if nb_frames == "0":
            return False  # Ghost audio track — declared but no sample data
    return True
 def _build_audio_channel_filter(use_audio_channels: str) -> str:
@@ -263,11 +318,18 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
    # Add -ss seek BEFORE -i for skip parameter and/or partial rendering
    always_visible_inputs: list[int] = []
    for video_id, video_source, cutout in plan.narration_videos:
-        video_path = _resolve_video_path(videos_dir, video_source, shared_assets_dir, project_path)
+        video_path = _resolve_video_path(
            videos_dir, video_source, shared_assets_dir, project_path
        )
        # Combine video skip setting with partial render offset
        total_seek = video_source.skip + plan.input_seek_time
        if total_seek > 0:
            cmd.extend(["-ss", f"{total_seek:.3f}"])
        # Skip stream analysis — codec params are in the container header, and
        # duration is already known by gnommo via ffprobe (plan.total_duration).
        # Without this, FFmpeg reads 100MB+ of compressed data per input at 4K
        # bitrates before encoding starts ("Estimating duration from bitrate").
        cmd.extend(["-analyzeduration", "0", "-probesize", "1000"])
        cmd.extend(["-i", str(video_path)])
        always_visible_inputs.append(input_idx)
        input_idx += 1
@@ -283,18 +345,26 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
        shared_assets_dir = project_path.parent / "shared_assets"
        videos_json_bg = shared_assets_dir / "videos.json"
        if not videos_json_bg.exists():
-            raise RenderError(f"shared_assets/videos.json not found (needed for background handle '{bg_handle}')")
+            raise RenderError(
                f"shared_assets/videos.json not found (needed for background handle '{bg_handle}')"
            )
        bg_videos = _read_json(videos_json_bg)
        if bg_handle not in bg_videos:
-            raise RenderError(f"Background handle '{bg_handle}' not found in shared_assets/videos.json")
+            raise RenderError(
                f"Background handle '{bg_handle}' not found in shared_assets/videos.json"
            )
        bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"]
        if not bg_path.exists():
-            raise RenderError(f"Background file not found: {bg_path} (from handle '{bg_handle}')")
+            raise RenderError(
                f"Background file not found: {bg_path} (from handle '{bg_handle}')"
            )
        image_extensions = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp"}
        bg_is_image = bg_path.suffix.lower() in image_extensions
        # Loop background videos infinitely
        if not bg_is_image:
            cmd.extend(["-stream_loop", "-1"])
        # Duration of background video is irrelevant (looped or image) — skip analysis
        cmd.extend(["-analyzeduration", "0", "-probesize", "1000"])
        cmd.extend(["-i", str(bg_path)])
        bg_idx = input_idx
        input_idx += 1
@@ -325,14 +395,24 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
        video_path = _resolve_video_path(
            videos_dir, event.video_source, shared_assets_dir, project_path
        )
        # Seek to skip point before loading input
        skip = event.video_source.skip
        if skip > 0:
            cmd.extend(["-ss", f"{skip:.3f}"])
        cmd.extend(["-analyzeduration", "0", "-probesize", "1000"])
        # Use pre-probed duration to tell FFmpeg exactly how much to read,
        # preventing scans of ghost audio tracks on empty MP4 audio streams.
        if event.video_source.duration is not None:
            remaining = event.video_source.duration - skip
            if remaining > 0:
                cmd.extend(["-t", f"{remaining:.3f}"])
        cmd.extend(["-i", str(video_path)])
        video_inputs[i] = input_idx
        input_idx += 1
-        if _has_audio_stream(video_path):
+        has_audio = event.video_source.has_audio
        if has_audio is None:
            print(f"  Warning: no cached metadata for '{event.video_source.source_file}' — run 'gnommo import' to avoid slow probing")
            has_audio = _has_audio_stream(video_path)
        if has_audio:
            video_events_with_audio.add(i)
    # Input: outro videos (play after narration ends)
@@ -343,14 +423,22 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
        video_path = _resolve_video_path(
            videos_dir, event.video_source, shared_assets_dir, project_path
        )
        # Seek to skip point before loading input
        skip = event.video_source.skip
        if skip > 0:
            cmd.extend(["-ss", f"{skip:.3f}"])
        cmd.extend(["-analyzeduration", "0", "-probesize", "1000"])
        if event.video_source.duration is not None:
            remaining = event.video_source.duration - skip
            if remaining > 0:
                cmd.extend(["-t", f"{remaining:.3f}"])
        cmd.extend(["-i", str(video_path)])
        outro_inputs[i] = input_idx
        input_idx += 1
-        if _has_audio_stream(video_path):
+        has_audio = event.video_source.has_audio
        if has_audio is None:
            print(f"  Warning: no cached metadata for '{event.video_source.source_file}' — run 'gnommo import' to avoid slow probing")
            has_audio = _has_audio_stream(video_path)
        if has_audio:
            outro_events_with_audio.add(i)
    # Track where audio inputs start
@@ -365,12 +453,24 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
        if event.audio_id not in audio_inputs:
            audio_path = audio_dir / event.audio_def.file
            audio_path, _ = resolve_with_cache(audio_path, project_path)
            # Use pre-probed duration from audio.json if available (set by import).
            # For MP3 without Xing/VBRI headers this is critical — FFmpeg otherwise
            # scans the whole file to estimate duration (100s+ for large files).
            # Fall back to live probe only for MP3 when duration wasn't pre-cached.
            file_duration = event.audio_def.duration
            if file_duration is None and audio_path.suffix.lower() == ".mp3":
                file_duration = _get_audio_duration(audio_path)
            if file_duration is not None:
                cmd.extend(["-t", str(file_duration)])
            cmd.extend(["-i", str(audio_path)])
            audio_inputs[event.audio_id] = input_idx
            input_idx += 1
-            # Cache duration if this audio uses crossfade looping
+            # Cache duration for crossfade loop filter
            if event.audio_def.loop and event.audio_def.overlap:
-                audio_durations[event.audio_id] = _get_audio_duration(audio_path)
+                audio_durations[event.audio_id] = (
                    file_duration if file_duration is not None
                    else _get_audio_duration(audio_path)
                )
    # Build filter_complex
    filter_complex = build_filter_complex(
@@ -418,7 +518,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
            "-preset",
            "fast",
            "-crf",
-            "23",
+            "20",
            "-c:a",
            "aac",
            "-b:a",
@@ -793,6 +893,43 @@ def build_filter_complex(
                )
                current_label = next_label
    # Add "below-slides" triggered video overlays (vfb/vsb or layer="below")
    for i, event in enumerate(plan.video_events):
        if event.layer != "below":
            continue
        video_idx = video_inputs[i]
        cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position(
            event.cutout, width, height
        )
        duration = event.end_time - event.start_time
        if event.video_source.take is not None:
            duration = min(duration, event.video_source.take)
        effective_end = event.start_time + duration
        zoom = event.video_source.zoom
        zoomed_width = int(cut_width * zoom)
        zoomed_height = int(cut_height * zoom)
        video_label = f"tvb{i}"
        start_pts = event.start_time
        filters.append(
            f"[{video_idx}:v]format=yuva444p10le,"
            f"setpts=PTS-STARTPTS+{start_pts:.3f}/TB,"
            f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase,"
            f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2,"
            f"format=rgba[{video_label}]"
        )
        next_label = f"tvbbase{i}"
        enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})"
        filters.append(
            f"[{current_label}][{video_label}]overlay="
            f"x={cut_x}:y={cut_y}:enable={enable_expr}"
            f"[{next_label}]"
        )
        current_label = next_label
    # Add slide overlays with time-based enable
    for i, event in enumerate(plan.slide_events):
        slide_idx = slide_inputs[event.slide_id]
@@ -815,8 +952,10 @@ def build_filter_complex(
        current_label = next_label
-    # Add triggered video overlays with time-based enable
+    # Add "above-slides" triggered video overlays (vft/vst or layer="above")
    for i, event in enumerate(plan.video_events):
        if event.layer != "above":
            continue
        video_idx = video_inputs[i]
        cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position(
            event.cutout, width, height
@@ -836,22 +975,25 @@ def build_filter_complex(
        # Scale to cover the zoomed area (like CSS object-fit: cover)
        # Then crop to cutout dimensions (centered)
        # Use setpts to sync video start with overlay enable time
        # IMPORTANT: convert to rgba FIRST (before scale/crop) so the alpha channel
        # is preserved throughout. scale in yuva444p10le can silently strip alpha.
        video_label = f"tv{i}"
        start_pts = event.start_time
        filters.append(
-            f"[{video_idx}:v]format=yuva444p10le,"
+            f"[{video_idx}:v]format=rgba,"
            f"setpts=PTS-STARTPTS+{start_pts:.3f}/TB,"
            f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase,"
-            f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2,"
+            f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2"
-            f"format=rgba[{video_label}]"
+            f"[{video_label}]"
        )
-        # Overlay with time-based enable
+        # Overlay with time-based enable; format=auto lets FFmpeg pick the right
        # compositing format so the RGBA alpha channel is respected.
        next_label = f"tvbase{i}"
        enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})"
        filters.append(
            f"[{current_label}][{video_label}]overlay="
-            f"x={cut_x}:y={cut_y}:enable={enable_expr}"
+            f"x={cut_x}:y={cut_y}:enable={enable_expr}:format=auto"
            f"[{next_label}]"
        )
@@ -950,13 +1092,17 @@ def build_filter_complex(
            _, first_video_source, _ = plan.narration_videos[0]
            use_channels = first_video_source.use_audio_channels
            if use_channels == "auto":
-                narration_path = _resolve_video_path(videos_dir, first_video_source, shared_assets_dir, project_path)
+                narration_path = _resolve_video_path(
                    videos_dir, first_video_source, shared_assets_dir, project_path
                )
                use_channels = _resolve_auto_channel(narration_path)
            channel_filter = _build_audio_channel_filter(use_channels)
            narration_volume = first_video_source.volume
        # Build volume filter if not 1.0
-        volume_filter = f"volume={narration_volume:.2f}" if narration_volume != 1.0 else ""
+        volume_filter = (
            f"volume={narration_volume:.2f}" if narration_volume != 1.0 else ""
        )
        # Use narration_end_time to stop audio before outro (if outro exists)
        audio_end_time = (
@@ -980,7 +1126,9 @@ def build_filter_complex(
                )
                audio_labels_to_mix.append("[main_aud]")
            elif filter_parts:
-                filters.append(f"[{main_audio_idx}:a]{','.join(filter_parts)}[main_aud]")
+                filters.append(
                    f"[{main_audio_idx}:a]{','.join(filter_parts)}[main_aud]"
                )
                audio_labels_to_mix.append("[main_aud]")
            else:
                audio_labels_to_mix.append(f"[{main_audio_idx}:a]")
@@ -1066,7 +1214,10 @@ def build_filter_complex(
                        label = f"aud{i}"
                        delay_ms = int(event.start_time * 1000)
-                        if event.audio_def.overlap and event.audio_id in audio_durations:
+                        if (
                            event.audio_def.overlap
                            and event.audio_id in audio_durations
                        ):
                            # Crossfade loop: overlap copies with fade in/out
                            audio_dur = audio_durations[event.audio_id]
                            crossfade_filters = _build_crossfade_loop_filter(
@@ -180,7 +180,9 @@ def words_to_srt(
    srt_lines = []
    for idx, (start, end, text) in enumerate(segments, 1):
        srt_lines.append(str(idx))
-        srt_lines.append(f"{_format_srt_timestamp(start)} --> {_format_srt_timestamp(end)}")
+        srt_lines.append(
            f"{_format_srt_timestamp(start)} --> {_format_srt_timestamp(end)}"
        )
        srt_lines.append(text)
        srt_lines.append("")  # Blank line between entries
@@ -1,6 +1,7 @@
 """Transform stage: resolve timings and build render plan."""
 import re
 import string
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional
@@ -99,6 +100,16 @@ def _normalize_text(text: str) -> str:
    return text.strip()
 def _normalize_token(word: str) -> str:
    """Normalize a single word token for comparison.
    Strips leading/trailing punctuation and lowercases. Interior characters
    (e.g. apostrophes in contractions) are preserved so "don't" stays "don't".
    Applied to both transcript tokens and phrase words at comparison time.
    """
    return word.lower().strip(string.punctuation)
 def _is_known_marker(
    marker_id: str, slides: dict = None, videos: dict = None, audio: dict = None
 ) -> bool:
@@ -122,8 +133,9 @@ def _is_known_marker(
    if marker_id in slides:
        return True
-    # Video/narration triggers
+    # Video/narration triggers (all supported prefixes)
-    if marker_id.startswith("video:") or marker_id.startswith("narration:"):
+    _VIDEO_PREFIXES = ("video:", "narration:", "vft:", "vfb:", "vst:", "vsb:", "vftp:", "vfbp:", "vstp:", "vsbp:")
    if any(marker_id.startswith(p) for p in _VIDEO_PREFIXES):
        return True
    # Camera presets
@@ -143,20 +155,11 @@ def _strip_unknown_markers(
    text: str, slides: dict = None, videos: dict = None, audio: dict = None
 ) -> str:
    """
-    Remove unknown markers from text.
+    Remove all [...] markers from context text — none are pronounced aloud.
-    Unknown markers aren't pronounced, so they should be stripped
+    Note: [cite:...] markers are already stripped at parse time by parse_manuscript().
    before fuzzy matching. Note: [cite:...] markers are already
    stripped at parse time by parse_manuscript().
    """
-
+    return re.sub(r"\[([^\]]+)\]", "", text)
    def replace_marker(match):
        marker_id = match.group(1)
        if _is_known_marker(marker_id, slides, videos, audio):
            return match.group(0)  # Keep known markers
        return ""  # Strip unknown markers
    return re.sub(r"\[([A-Za-z0-9_:]+)\]", replace_marker, text)
 def _extract_marker_contexts(
@@ -177,8 +180,9 @@ def _extract_marker_contexts(
    videos = videos or {}
    audio = audio or {}
-    # Split by markers, keeping the markers
+    # Split by markers, keeping the markers — broad pattern handles any content
-    parts = re.split(r"\[([A-Za-z0-9_:]+)\]", manuscript_text)
+    # including paths with / and - (e.g. [vfb:pexels/7670835-uhd_3840_2160_30fps])
    parts = re.split(r"\[([^\]]+)\]", manuscript_text)
    # parts: [text_before, marker1, text_after1, marker2, text_after2, ...]
    raw_contexts = []
@@ -189,16 +193,27 @@ def _extract_marker_contexts(
        if not _is_known_marker(marker_id, slides, videos, audio):
            continue
-        if i + 1 < len(parts):
+        # Collect all following text, looking past unknown markers until the
-            following_text = parts[i + 1].strip()
+        # next known marker. This handles [S1][segment:1] text... where the
-            # Clean up: remove newlines, collapse whitespace
+        # text lives two parts ahead rather than immediately after S1.
-            following_text = " ".join(following_text.split())
+        text_pieces = []
-            # Strip unknown markers from following text (they're not pronounced)
+        j = i + 1
-            following_text = _strip_unknown_markers(
+        while j < len(parts):
-                following_text, slides, videos, audio
+            chunk = parts[j].strip()
-            )
+            if chunk:
-            following_text = " ".join(following_text.split())  # Clean up extra spaces
+                text_pieces.append(chunk)
-            raw_contexts.append((marker_id, following_text))
+            j += 1  # advance to the marker after this text chunk
            if j >= len(parts):
                break
            if _is_known_marker(parts[j], slides, videos, audio):
                break  # stop at the next known marker
            j += 1  # skip the unknown marker; its following text is next
        following_text = " ".join(text_pieces)
        following_text = " ".join(following_text.split())  # collapse whitespace
        following_text = _strip_unknown_markers(following_text, slides, videos, audio)
        following_text = " ".join(following_text.split())
        raw_contexts.append((marker_id, following_text))
    # For markers with no following text (consecutive markers), look ahead
    # Return (marker_id, following_text, is_borrowed) - is_borrowed=True means text came from look-ahead
@@ -209,13 +224,20 @@ def _extract_marker_contexts(
            words = following_text.split()[:10]
            contexts.append((marker_id, " ".join(words), False))
        else:
-            # Look ahead for next marker with text
+            # Look ahead for next marker with text, but never borrow from another
            # slide marker — slides must align independently to avoid two consecutive
            # slides matching the same transcription position simultaneously.
            borrowed = False
            for j in range(i + 1, len(raw_contexts)):
-                if raw_contexts[j][1]:
+                next_marker_id, next_text = raw_contexts[j]
-                    words = raw_contexts[j][1].split()[:10]
+                if next_text:
                    if next_marker_id in (slides or {}):
                        break  # Slide owns this text; give up borrowing
                    words = next_text.split()[:10]
                    contexts.append((marker_id, " ".join(words), True))  # Borrowed
                    borrowed = True
                    break
-            else:
+            if not borrowed:
                contexts.append((marker_id, "", False))
    return contexts
@@ -250,7 +272,8 @@ def _fuzzy_match_ratio(
        return 0.0, 0, 0
    transcript_words = [
-        _normalize_text(transcription[j].word) for j in range(start_idx, transcript_end)
+        _normalize_token(transcription[j].word)
        for j in range(start_idx, transcript_end)
    ]
    # Match phrase words sequentially against transcript window
@@ -261,7 +284,7 @@ def _fuzzy_match_ratio(
    last_match_end_offset = 0
    for phrase_word in phrase_words[:words_to_check]:
-        normalized = _normalize_text(phrase_word)
+        normalized = _normalize_token(phrase_word)
        if len(normalized) < 2:
            continue  # skip very short words (a, I, etc.) - don't count them
        words_checked += 1
@@ -303,8 +326,12 @@ def _find_phrase_timestamp(
    (-1, -1.0, 0.0, -1) if not found. word_index points to the first
    matched word. match_end_idx points past the last matched word.
    """
-    phrase_normalized = _normalize_text(phrase)
+    # Normalize each word individually — same method as transcript tokens.
-    phrase_words = phrase_normalized.split()
+    # This keeps contractions as single tokens ("haven't" stays "haven't") so
    # phrase and transcript word counts stay in sync. Using _normalize_text on
    # the whole phrase would expand "haven't" → "have not" (2 words), creating
    # a phantom "not" that fails to match the transcript and corrupts the window.
    phrase_words = [tok for tok in (_normalize_token(w) for w in phrase.split()) if tok]
    if not phrase_words:
        return -1, -1.0, 0.0, -1
@@ -504,7 +531,9 @@ def build_render_plan(
    cached_files: set[str] = set()
    narration_videos: list[tuple[str, VideoSource, CutoutDefinition]] = []
-    video_path, is_cached = _resolve_video_path(videos_dir, narration_video, shared_assets_dir, project_path)
+    video_path, is_cached = _resolve_video_path(
        videos_dir, narration_video, shared_assets_dir, project_path
    )
    if is_cached:
        cached_files.add(narration_video_id)
    full_duration = get_video_duration(video_path)
@@ -798,40 +827,127 @@ def _extract_video_events(
        ]
    )
-    # Collect video markers
+    # Mapping from shorthand marker prefix → (implied_cutout_name, implied_layer)
-    video_markers: list[tuple[float, str, str]] = []  # (time, video_id, type)
+    # These are the defaults; videos.json values act as a base but the marker wins.
    _SHORTHAND: dict[str, tuple[str, str]] = {
        "vft:": ("fullscreen", "above"),
        "vfb:": ("fullscreen", "below"),
        "vst:": ("square", "above"),
        "vsb:": ("square", "below"),
        "vftp:": ("fullscreen", "above", "pause_narration"),
        "vfbp:": ("fullscreen", "below", "pause_narration"),
        "vstp:": ("square", "above", "pause_narration"),
        "vsbp:": ("square", "below", "pause_narration"),
    }
    # Collect video markers: (time, video_id, event_type, cutout_name_override, layer_override)
    # event_type is "video" (ends at next slide) or "narration" (runs to end)
    video_markers: list[tuple[float, str, str, str | None, str | None]] = []
    for timing in marker_timings:
        if timing.timestamp < 0:
            continue
-        if timing.marker_id.startswith("video:"):
+        mid = timing.marker_id
            video_id = timing.marker_id[6:]
            if video_id in videos:
                video_source = videos[video_id]
                if video_source.cutout and video_source.cutout in cutouts:
                    video_markers.append((timing.timestamp, video_id, "video"))
-        elif timing.marker_id.startswith("narration:"):
+        # --- shorthand markers: vft/vfb/vst/vsb ---
-            video_id = timing.marker_id[10:]
+        shorthand_match = next((p for p in _SHORTHAND if mid.startswith(p)), None)
-            if video_id in videos:
+        if shorthand_match:
-                video_source = videos[video_id]
+            video_id = mid[len(shorthand_match) :]
-                if video_source.cutout and video_source.cutout in cutouts:
+            if video_id not in videos:
-                    video_markers.append((timing.timestamp, video_id, "narration"))
+                raise ValueError(
                    f"Marker [{mid}] references unknown video '{video_id}'. "
                    f"Add it to videos.json or remove the marker."
                )
            implied_cutout, implied_layer = _SHORTHAND[shorthand_match]
            if implied_cutout not in cutouts:
                raise ValueError(
                    f"Marker [{mid}] uses shorthand '{shorthand_match}' which requires "
                    f"cutout '{implied_cutout}' but it is not defined in project config. "
                    f"Available cutouts: {list(cutouts.keys())}"
                )
            video_markers.append(
                (timing.timestamp, video_id, "video", implied_cutout, implied_layer)
            )
            continue
        # --- legacy [video:xxx] ---
        if mid.startswith("video:"):
            video_id = mid[6:]
            if video_id not in videos:
                raise ValueError(
                    f"Marker [video:{video_id}] references unknown video '{video_id}'. "
                    f"Add it to videos.json or remove the marker."
                )
            video_source = videos[video_id]
            if not video_source.cutout:
                raise ValueError(
                    f"Marker [video:{video_id}] — video '{video_id}' has no 'cutout' set in videos.json."
                )
            if video_source.cutout not in cutouts:
                raise ValueError(
                    f"Marker [video:{video_id}] — cutout '{video_source.cutout}' is not defined in project config. "
                    f"Available: {list(cutouts.keys())}"
                )
            video_markers.append(
                (timing.timestamp, video_id, "video", None, None)
            )
            continue
        # --- [narration:xxx] ---
        if mid.startswith("narration:"):
            video_id = mid[10:]
            if video_id not in videos:
                raise ValueError(
                    f"Marker [narration:{video_id}] references unknown video '{video_id}'. "
                    f"Add it to videos.json or remove the marker."
                )
            video_source = videos[video_id]
            if not video_source.cutout:
                raise ValueError(
                    f"Marker [narration:{video_id}] — video '{video_id}' has no 'cutout' set in videos.json."
                )
            if video_source.cutout not in cutouts:
                raise ValueError(
                    f"Marker [narration:{video_id}] — cutout '{video_source.cutout}' is not defined in project config. "
                    f"Available: {list(cutouts.keys())}"
                )
            video_markers.append(
                (timing.timestamp, video_id, "narration", None, None)
            )
    events: list[VideoEvent] = []
-    for start_time, video_id, marker_type in video_markers:
+    for (
        start_time,
        video_id,
        marker_type,
        cutout_override,
        layer_override,
    ) in video_markers:
        video_source = videos[video_id]
        cutout = cutouts[video_source.cutout]
-        if marker_type == "video":
+        # Resolve cutout: marker override > videos.json cutout
-            # End at next slide
+        # (validation already ensured cutout exists — this is a safety assertion)
        cutout_name = cutout_override or video_source.cutout
        cutout = cutouts[cutout_name]
        # Resolve layer: marker override > videos.json layer
        layer = layer_override if layer_override is not None else video_source.layer
        end_on = video_source.end_on
        if end_on == "take" and video_source.take is not None:
            end_time = start_time + video_source.take
        elif end_on == "end":
            end_time = total_duration
        elif end_on == "next_slide" or (end_on is None and marker_type == "video"):
            # End at next slide marker
            end_time = total_duration
            for slide_time in slide_times:
                if slide_time > start_time:
                    end_time = slide_time
                    break
        else:
-            # narration: runs to end
+            # end_on is None and marker_type == "narration": runs to end
            end_time = total_duration
        # Filter by time range
@@ -846,6 +962,8 @@ def _extract_video_events(
                end_time=end_time,
                video_source=video_source,
                cutout=cutout,
                cutout_name=cutout_name,
                layer=layer,
            )
        )
@@ -992,7 +1110,9 @@ def _extract_outro_events(
        video_source = videos[video_id]
        # Get the video duration
-        video_path, is_cached = _resolve_video_path(videos_dir, video_source, shared_assets_dir, project_path)
+        video_path, is_cached = _resolve_video_path(
            videos_dir, video_source, shared_assets_dir, project_path
        )
        if is_cached and cached_files is not None:
            cached_files.add(video_id)
        if video_path.exists():
@@ -57,16 +57,26 @@ def validate_project(
        # Skip audio markers (start with 'A' followed by audio id, e.g., Awoosh)
        if marker.startswith("A") and len(marker) > 1 and marker[1:].isalnum():
            continue
-        # Validate video trigger markers (video:xxx) - slide-like videos
+        # Validate video trigger markers — both legacy [video:xxx] and
-        if marker.startswith("video:"):
+        # shorthand [vft:xxx] / [vfb:xxx] / [vst:xxx] / [vsb:xxx].
-            video_id = marker[6:]  # Remove 'video:' prefix
+        _VIDEO_PREFIXES = {
            "video:": 6,
            "vft:": 4,
            "vfb:": 4,
            "vst:": 4,
            "vsb:": 4,
        }
        matched_prefix = next(
            (p for p in _VIDEO_PREFIXES if marker.startswith(p)), None
        )
        if matched_prefix is not None:
            video_id = marker[_VIDEO_PREFIXES[matched_prefix] :]
            if video_id not in videos:
                # Check if it's a file extension mismatch
                hint = ""
                if "." in video_id:
                    base_name = video_id.rsplit(".", 1)[0]
                    if base_name in videos:
-                        hint = f" (Did you mean [video:{base_name}]? Don't include file extensions in markers)"
+                        hint = f" (Did you mean [{matched_prefix}{base_name}]? Don't include file extensions in markers)"
                warnings.append(
                    ValidationIssue(
                        f"Video marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json{hint} — using PlaceholderVideo instead",
@@ -214,11 +224,12 @@ def validate_project(
                        )
                    )
-    # Check we have at least one video source
+    # Check videos.json exists (empty is fine — project may not need triggered videos)
-    if not videos:
+    if not (project_path / config.videos_path).exists():
        issues.append(
            ValidationIssue(
-                "No video sources defined in videos.json", project_path / "videos.json"
+                "videos.json not found — run 'gnommo import' to create it",
                project_path / "videos.json",
            )
        )
@@ -8,13 +8,13 @@
 # Options:
 #   --replace    Delete original files after successful transcoding
 #   --dry-run    Show what would be transcoded without doing it
-#   --crf <N>    Quality level (default: 23, lower=better quality, 18-28 typical)
+#   --crf <N>    Quality level (default: 20, lower=better quality, 18-28 typical)
 #
 set -e
 # Configuration
-DEFAULT_CRF=23
+DEFAULT_CRF=18
 EXTENSIONS=("mov" "mp4" "m4v" "avi" "mkv" "mxf")
 usage() {
@@ -44,7 +44,7 @@ Examples:
  $(basename "$0") ./media/videos                    # Transcode folder (smallest first)
  $(basename "$0") ./media/videos --dry-run          # Preview only
  $(basename "$0") ./media/videos --replace          # Transcode and delete originals
-  $(basename "$0") ./media/videos --crf 20           # Higher quality
+  $(basename "$0") ./media/videos --crf 18           # Higher quality
 EOF
    exit 0