Initial commit: GnommoEditor video pipeline

A code-first, declarative video editing system that compiles text documents into rendered video via FFmpeg. Uses a compiler-style ETL pipeline: Extract (parse inputs) → Validate → Transform (build timeline) → Render (FFmpeg). Features: - Text-based project definition (manuscript, transcript, JSON configs) - Slide markers [S1], [S2] in transcript map to timed overlays - Strict validation with fail-fast error reporting - FFmpeg filter_complex generation with time-based enables - CLI with validate/render/dry-run modes Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-12 11:19:38 +01:00
commit d5a8d38c9c
15 changed files with 967 additions and 0 deletions
@@ -0,0 +1,21 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+venv/
+.venv/
+*.egg-info/
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Output
+**/out/
+*.mp4
+
+# Temp
+*.tmp
+.cache/
@@ -0,0 +1,5 @@
+Welcome to GnommoEditor, a code-first video editing system. [S1]
+
+In this example, we demonstrate how slides appear at specific timestamps based on markers in the transcript. [S2]
+
+And that's the end of our demo.
@@ -0,0 +1,11 @@
+{
+  "resolution": [1920, 1080],
+  "fps": 30,
+  "talkinghead": {
+    "x": 50,
+    "y": 600,
+    "targetheight": 400
+  },
+  "defaultSlideType": "square",
+  "background_video": ""
+}
@@ -0,0 +1,10 @@
+{
+  "S1": {
+    "image": "S1.png",
+    "type": "square"
+  },
+  "S2": {
+    "image": "S2.png",
+    "type": "square"
+  }
+}
@@ -0,0 +1,8 @@
+t,word
+0.00,Hello
+0.30,world
+0.60,[S1]
+1.50,Second
+1.80,slide
+2.00,[S2]
+2.50,End
@@ -0,0 +1,6 @@
+{
+  "talking_head": {
+    "file": "media/talking_head.mp4",
+    "preprocess": []
+  }
+}
@@ -0,0 +1,3 @@
+"""GnommoEditor - A code-first, declarative video editing pipeline."""
+
+__version__ = "0.1.0"
@@ -0,0 +1,6 @@
+"""Allow running gnommo as a module: python -m gnommo"""
+
+from .cli import main
+
+if __name__ == "__main__":
+    exit(main())
@@ -0,0 +1,158 @@
+"""CLI entry point for GnommoEditor."""
+
+import argparse
+import sys
+from pathlib import Path
+
+from . import __version__
+from .errors import GnommoError, ParseError, ValidationError, RenderError
+from .parser import (
+    parse_manuscript,
+    parse_project_config,
+    parse_slides,
+    parse_transcript,
+    parse_videos,
+)
+from .validator import validate_project
+from .transformer import build_render_plan
+from .renderer import render, generate_ffmpeg_command_string
+
+
+def main() -> int:
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        prog="gnommo",
+        description="GnommoEditor - A code-first video editing pipeline",
+    )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version=f"%(prog)s {__version__}",
+    )
+
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    # validate command
+    validate_parser = subparsers.add_parser(
+        "validate",
+        help="Validate project without rendering",
+    )
+    validate_parser.add_argument(
+        "project",
+        type=Path,
+        help="Path to project directory",
+    )
+
+    # render command
+    render_parser = subparsers.add_parser(
+        "render",
+        help="Render video from project",
+    )
+    render_parser.add_argument(
+        "project",
+        type=Path,
+        help="Path to project directory",
+    )
+    render_parser.add_argument(
+        "-o", "--output",
+        type=Path,
+        help="Output file path (default: project/out/final.mp4)",
+    )
+    render_parser.add_argument(
+        "-v", "--verbose",
+        action="store_true",
+        help="Print FFmpeg command",
+    )
+    render_parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print FFmpeg command without executing",
+    )
+
+    args = parser.parse_args()
+
+    try:
+        if args.command == "validate":
+            return cmd_validate(args.project)
+        elif args.command == "render":
+            output = args.output or (args.project / "out" / "final.mp4")
+            return cmd_render(args.project, output, args.verbose, args.dry_run)
+    except GnommoError as e:
+        print(f"Error: {e}", file=sys.stderr)
+        return 1
+    except KeyboardInterrupt:
+        print("\nAborted.", file=sys.stderr)
+        return 130
+
+    return 0
+
+
+def cmd_validate(project_path: Path) -> int:
+    """Run validation only."""
+    print(f"Validating project: {project_path}")
+
+    # Parse all files
+    _, markers = parse_manuscript(project_path)
+    config = parse_project_config(project_path)
+    slides = parse_slides(project_path)
+    videos = parse_videos(project_path)
+
+    # Validate
+    validate_project(project_path, markers, config, slides, videos)
+
+    print("Validation passed.")
+    return 0
+
+
+def cmd_render(project_path: Path, output_path: Path, verbose: bool, dry_run: bool) -> int:
+    """Run full render pipeline."""
+    print(f"Rendering project: {project_path}")
+    print(f"Output: {output_path}")
+    print()
+
+    # Stage 1: Extract
+    print("Stage 1/4: Parsing input files...")
+    _, markers = parse_manuscript(project_path)
+    config = parse_project_config(project_path)
+    slides = parse_slides(project_path)
+    videos = parse_videos(project_path)
+    transcript = parse_transcript(project_path)
+
+    print(f"  - Found {len(markers)} slide markers in manuscript")
+    print(f"  - Found {len(slides)} slide definitions")
+    print(f"  - Found {len(transcript)} transcript entries")
+    print()
+
+    # Stage 2: Validate
+    print("Stage 2/4: Validating...")
+    validate_project(project_path, markers, config, slides, videos)
+    print("  - Validation passed")
+    print()
+
+    # Stage 3: Transform
+    print("Stage 3/4: Building render plan...")
+    plan = build_render_plan(project_path, config, slides, videos, transcript)
+    print(f"  - Video duration: {plan.total_duration:.2f}s")
+    print(f"  - Slide events: {len(plan.slide_events)}")
+    for event in plan.slide_events:
+        print(f"    - [{event.slide_id}] {event.start_time:.2f}s - {event.end_time:.2f}s")
+    print()
+
+    # Stage 4: Render
+    if dry_run:
+        print("Stage 4/4: Generating FFmpeg command (dry run)...")
+        print()
+        print(generate_ffmpeg_command_string(plan, output_path))
+        return 0
+
+    print("Stage 4/4: Rendering video...")
+    render(plan, output_path, verbose=verbose)
+    print(f"  - Output written to: {output_path}")
+    print()
+    print("Done.")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,59 @@
+"""Structured error types for GnommoEditor pipeline."""
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+
+
+class GnommoError(Exception):
+    """Base exception for all GnommoEditor errors."""
+    pass
+
+
+@dataclass
+class ValidationIssue:
+    """A single validation issue with location context."""
+    message: str
+    file: Optional[Path] = None
+    line: Optional[int] = None
+
+    def __str__(self) -> str:
+        parts = []
+        if self.file:
+            parts.append(str(self.file))
+        if self.line is not None:
+            parts.append(f"line {self.line}")
+        location = ":".join(parts) if parts else "project"
+        return f"[{location}] {self.message}"
+
+
+class ParseError(GnommoError):
+    """Error during parsing of input files."""
+
+    def __init__(self, message: str, file: Optional[Path] = None, line: Optional[int] = None):
+        self.issue = ValidationIssue(message, file, line)
+        super().__init__(str(self.issue))
+
+
+class ValidationError(GnommoError):
+    """Error during validation stage. Can contain multiple issues."""
+
+    def __init__(self, issues: list[ValidationIssue]):
+        self.issues = issues
+        message = f"Validation failed with {len(issues)} error(s):\n"
+        message += "\n".join(f"  - {issue}" for issue in issues)
+        super().__init__(message)
+
+
+class RenderError(GnommoError):
+    """Error during rendering stage."""
+
+    def __init__(self, message: str, command: Optional[str] = None, stderr: Optional[str] = None):
+        self.command = command
+        self.stderr = stderr
+        full_message = message
+        if command:
+            full_message += f"\nCommand: {command}"
+        if stderr:
+            full_message += f"\nFFmpeg output:\n{stderr}"
+        super().__init__(full_message)
@@ -0,0 +1,94 @@
+"""Data models for GnommoEditor pipeline."""
+
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+
+@dataclass
+class TalkingHeadConfig:
+    """Configuration for talking head video positioning."""
+    x: int
+    y: int
+    target_height: int  # in pixels, or -1 for percentage-based
+
+
+@dataclass
+class ProjectConfig:
+    """Global project configuration from project.json."""
+    resolution: tuple[int, int]
+    fps: int
+    talking_head: TalkingHeadConfig
+    default_slide_type: str
+    background_video: str
+    audio_source: Optional[str] = None  # defaults to talking head
+
+
+@dataclass
+class SlideDefinition:
+    """Definition of a single slide from slides.json."""
+    image: str
+    type: str  # "fullscreen" | "square"
+
+
+@dataclass
+class VideoSource:
+    """Video source definition from videos.json."""
+    file: str
+    preprocess: list[str] = field(default_factory=list)
+
+
+@dataclass
+class TimedWord:
+    """A word or marker with its timestamp from transcript.csv."""
+    time: float
+    word: str
+
+    @property
+    def is_marker(self) -> bool:
+        """Check if this is a slide marker like [S1]."""
+        return self.word.startswith("[") and self.word.endswith("]")
+
+    @property
+    def marker_id(self) -> Optional[str]:
+        """Extract marker ID (e.g., 'S1' from '[S1]')."""
+        if self.is_marker:
+            return self.word[1:-1]
+        return None
+
+
+@dataclass
+class SlideEvent:
+    """A resolved slide event with timing information."""
+    slide_id: str
+    start_time: float
+    end_time: float
+    slide_def: SlideDefinition
+
+
+@dataclass
+class RenderPlan:
+    """Complete plan for rendering the final video."""
+    project_path: Path
+    config: ProjectConfig
+    talking_head: VideoSource
+    slide_events: list[SlideEvent]
+    total_duration: float
+    slides: dict[str, SlideDefinition]
+
+
+# Slide layout configurations (hardcoded for POC)
+SLIDE_LAYOUTS = {
+    "fullscreen": {
+        "x": 0,
+        "y": 0,
+        "width": 1920,
+        "height": 1080,
+    },
+    "square": {
+        "x": 560,  # centered horizontally: (1920 - 800) / 2
+        "y": 140,  # positioned in upper area
+        "width": 800,
+        "height": 800,
+    },
+}
@@ -0,0 +1,197 @@
+"""Extract stage: parse all input files."""
+
+import csv
+import json
+import re
+from pathlib import Path
+from typing import Any
+
+from .errors import ParseError
+from .models import (
+    ProjectConfig,
+    SlideDefinition,
+    TalkingHeadConfig,
+    TimedWord,
+    VideoSource,
+)
+
+
+def parse_manuscript(project_path: Path) -> tuple[str, list[str]]:
+    """
+    Parse manuscript.txt and extract text content and slide markers.
+
+    Returns:
+        Tuple of (full text, list of marker IDs found)
+    """
+    manuscript_path = project_path / "manuscript.txt"
+
+    if not manuscript_path.exists():
+        raise ParseError("manuscript.txt not found", manuscript_path)
+
+    text = manuscript_path.read_text(encoding="utf-8")
+
+    # Extract all slide markers like [S1], [S2], etc.
+    markers = re.findall(r"\[([A-Za-z0-9_]+)\]", text)
+
+    return text, markers
+
+
+def parse_transcript(project_path: Path) -> list[TimedWord]:
+    """
+    Parse transcript.csv into a list of timed words.
+
+    Expected format:
+        t,word
+        0.00,This
+        0.42,is
+        ...
+    """
+    transcript_path = project_path / "transcript.csv"
+
+    if not transcript_path.exists():
+        raise ParseError("transcript.csv not found", transcript_path)
+
+    timed_words = []
+
+    with open(transcript_path, "r", encoding="utf-8") as f:
+        reader = csv.DictReader(f)
+
+        if reader.fieldnames is None or "t" not in reader.fieldnames or "word" not in reader.fieldnames:
+            raise ParseError(
+                "transcript.csv must have columns: t, word",
+                transcript_path
+            )
+
+        for line_num, row in enumerate(reader, start=2):  # start=2 because line 1 is header
+            try:
+                time = float(row["t"])
+                word = row["word"].strip()
+                timed_words.append(TimedWord(time=time, word=word))
+            except (ValueError, KeyError) as e:
+                raise ParseError(
+                    f"Invalid row: {e}",
+                    transcript_path,
+                    line_num
+                )
+
+    return timed_words
+
+
+def parse_project_config(project_path: Path) -> ProjectConfig:
+    """Parse project.json into ProjectConfig."""
+    config_path = project_path / "project.json"
+
+    if not config_path.exists():
+        raise ParseError("project.json not found", config_path)
+
+    try:
+        data = json.loads(config_path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as e:
+        raise ParseError(f"Invalid JSON: {e}", config_path)
+
+    # Parse talking head config
+    th_data = data.get("talkinghead", {})
+    talking_head = TalkingHeadConfig(
+        x=th_data.get("x", 100),
+        y=th_data.get("y", 100),
+        target_height=_parse_dimension(th_data.get("targetheight", 200)),
+    )
+
+    # Parse resolution
+    resolution = data.get("resolution", [1920, 1080])
+    if not isinstance(resolution, list) or len(resolution) != 2:
+        raise ParseError("resolution must be [width, height]", config_path)
+
+    return ProjectConfig(
+        resolution=tuple(resolution),
+        fps=data.get("fps", 30),
+        talking_head=talking_head,
+        default_slide_type=data.get("defaultSlideType", "square"),
+        background_video=data.get("background_video", ""),
+        audio_source=data.get("audio_source"),
+    )
+
+
+def _parse_dimension(value: Any) -> int:
+    """Parse a dimension value (can be int or string like '100%')."""
+    if isinstance(value, int):
+        return value
+    if isinstance(value, str):
+        if value.endswith("%"):
+            return -1  # Percentage marker, will be resolved during rendering
+        return int(value)
+    return 200  # default
+
+
+def parse_slides(project_path: Path) -> dict[str, SlideDefinition]:
+    """Parse slides.json into slide definitions."""
+    slides_path = project_path / "slides.json"
+
+    if not slides_path.exists():
+        raise ParseError("slides.json not found", slides_path)
+
+    try:
+        data = json.loads(slides_path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as e:
+        raise ParseError(f"Invalid JSON: {e}", slides_path)
+
+    slides = {}
+    for slide_id, slide_data in data.items():
+        if "image" not in slide_data:
+            raise ParseError(
+                f"Slide '{slide_id}' missing required field 'image'",
+                slides_path
+            )
+        slides[slide_id] = SlideDefinition(
+            image=slide_data["image"],
+            type=slide_data.get("type", "square"),
+        )
+
+    return slides
+
+
+def parse_videos(project_path: Path) -> dict[str, VideoSource]:
+    """Parse videos.json into video source definitions."""
+    videos_path = project_path / "videos.json"
+
+    if not videos_path.exists():
+        raise ParseError("videos.json not found", videos_path)
+
+    try:
+        data = json.loads(videos_path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as e:
+        raise ParseError(f"Invalid JSON: {e}", videos_path)
+
+    videos = {}
+    for video_id, video_data in data.items():
+        if "file" not in video_data:
+            raise ParseError(
+                f"Video '{video_id}' missing required field 'file'",
+                videos_path
+            )
+        videos[video_id] = VideoSource(
+            file=video_data["file"],
+            preprocess=video_data.get("preprocess", []),
+        )
+
+    return videos
+
+
+def get_video_duration(video_path: Path) -> float:
+    """Get duration of a video file using ffprobe."""
+    import subprocess
+
+    cmd = [
+        "ffprobe",
+        "-v", "error",
+        "-show_entries", "format=duration",
+        "-of", "default=noprint_wrappers=1:nokey=1",
+        str(video_path)
+    ]
+
+    result = subprocess.run(cmd, capture_output=True, text=True)
+
+    if result.returncode != 0:
+        raise ParseError(f"Failed to get duration: {result.stderr}", video_path)
+
+    return float(result.stdout.strip())
@@ -0,0 +1,197 @@
+"""Load stage: generate and execute FFmpeg commands."""
+
+import subprocess
+from pathlib import Path
+
+from .errors import RenderError
+from .models import RenderPlan, SlideEvent, SLIDE_LAYOUTS
+
+
+def render(plan: RenderPlan, output_path: Path, verbose: bool = False) -> None:
+    """
+    Render the final video using FFmpeg.
+
+    Generates a filter_complex command that:
+    1. Scales background video (if present) or creates solid color
+    2. Overlays talking head at configured position
+    3. Overlays slides at their configured positions with time-based enable
+    """
+    # Ensure output directory exists
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    # Build and execute FFmpeg command
+    cmd = build_ffmpeg_command(plan, output_path)
+
+    if verbose:
+        print("FFmpeg command:")
+        print(" ".join(cmd))
+        print()
+
+    result = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+    )
+
+    if result.returncode != 0:
+        raise RenderError(
+            "FFmpeg rendering failed",
+            command=" ".join(cmd),
+            stderr=result.stderr,
+        )
+
+
+def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
+    """Build the complete FFmpeg command as a list of arguments."""
+    cmd = ["ffmpeg", "-y"]  # -y to overwrite output
+
+    # Resolve paths to absolute
+    project_path = plan.project_path.resolve()
+    output_path = output_path.resolve()
+
+    # Input: talking head video
+    talking_head_path = project_path / plan.talking_head.file
+    cmd.extend(["-i", str(talking_head_path)])
+
+    # Input: background video (if specified)
+    has_background = bool(plan.config.background_video)
+    if has_background:
+        bg_path = project_path / plan.config.background_video
+        cmd.extend(["-i", str(bg_path)])
+
+    # Input: slide images
+    slides_path = project_path / "media" / "slides"
+    slide_inputs: list[str] = []  # Track which slides we've added
+
+    for event in plan.slide_events:
+        if event.slide_id not in slide_inputs:
+            image_path = slides_path / event.slide_def.image
+            cmd.extend(["-i", str(image_path)])
+            slide_inputs.append(event.slide_id)
+
+    # Build filter_complex
+    filter_complex = build_filter_complex(plan, has_background, slide_inputs)
+    cmd.extend(["-filter_complex", filter_complex])
+
+    # Map output video and audio
+    cmd.extend(["-map", "[vout]"])
+    cmd.extend(["-map", "0:a"])  # Audio from talking head
+
+    # Output settings
+    cmd.extend([
+        "-t", str(plan.total_duration),  # Limit output duration
+        "-c:v", "libx264",
+        "-preset", "fast",
+        "-crf", "23",
+        "-c:a", "aac",
+        "-b:a", "192k",
+        "-r", str(plan.config.fps),
+        str(output_path),
+    ])
+
+    return cmd
+
+
+def build_filter_complex(
+    plan: RenderPlan,
+    has_background: bool,
+    slide_inputs: list[str],
+) -> str:
+    """
+    Build the filter_complex string for FFmpeg.
+
+    Layer structure:
+    - Layer 1: Background (solid color or video)
+    - Layer 2: Talking head
+    - Layer 3: Slides (with time-based enable)
+    """
+    width, height = plan.config.resolution
+    filters: list[str] = []
+
+    # Input indices:
+    # 0 = talking head
+    # 1 = background (if present)
+    # 2+ = slides
+    talking_head_idx = 0
+    bg_idx = 1 if has_background else None
+    slide_start_idx = 2 if has_background else 1
+
+    # Create base layer (background)
+    if has_background:
+        filters.append(f"[{bg_idx}:v]scale={width}:{height}:force_original_aspect_ratio=increase,"
+                      f"crop={width}:{height}[bg]")
+        base_label = "bg"
+    else:
+        # Create solid color background
+        filters.append(f"color=c=black:s={width}x{height}:r={plan.config.fps}[bg]")
+        base_label = "bg"
+
+    # Scale and position talking head
+    th_config = plan.config.talking_head
+    th_height = th_config.target_height if th_config.target_height > 0 else height
+
+    filters.append(
+        f"[{talking_head_idx}:v]scale=-1:{th_height}[head]"
+    )
+
+    # Overlay talking head on background
+    filters.append(
+        f"[{base_label}][head]overlay=x={th_config.x}:y={th_config.y}[base]"
+    )
+
+    current_label = "base"
+
+    # Add slide overlays with time-based enable
+    for i, event in enumerate(plan.slide_events):
+        slide_idx = slide_start_idx + slide_inputs.index(event.slide_id)
+        layout = SLIDE_LAYOUTS.get(event.slide_def.type, SLIDE_LAYOUTS["square"])
+
+        # Scale slide to fit layout while preserving aspect ratio
+        slide_label = f"s{i}"
+        filters.append(
+            f"[{slide_idx}:v]scale={layout['width']}:{layout['height']}:"
+            f"force_original_aspect_ratio=decrease[{slide_label}]"
+        )
+
+        # Overlay with time-based enable
+        next_label = f"v{i}" if i < len(plan.slide_events) - 1 else "vout"
+        enable_expr = f"between(t,{event.start_time:.3f},{event.end_time:.3f})"
+
+        filters.append(
+            f"[{current_label}][{slide_label}]overlay="
+            f"x={layout['x']}:y={layout['y']}:"
+            f"enable='{enable_expr}'[{next_label}]"
+        )
+
+        current_label = next_label
+
+    # If no slides, just rename base to vout
+    if not plan.slide_events:
+        filters.append(f"[{current_label}]copy[vout]")
+
+    return ";".join(filters)
+
+
+def generate_ffmpeg_command_string(plan: RenderPlan, output_path: Path) -> str:
+    """Generate a human-readable FFmpeg command string (for debugging)."""
+    cmd = build_ffmpeg_command(plan, output_path)
+
+    # Format nicely with line breaks
+    result = []
+    i = 0
+    while i < len(cmd):
+        if cmd[i] == "-filter_complex":
+            result.append(f"  -filter_complex \"\n    {cmd[i+1].replace(';', ';' + chr(10) + '    ')}\n  \"")
+            i += 2
+        elif cmd[i].startswith("-"):
+            if i + 1 < len(cmd) and not cmd[i + 1].startswith("-"):
+                result.append(f"  {cmd[i]} {cmd[i+1]}")
+                i += 2
+            else:
+                result.append(f"  {cmd[i]}")
+                i += 1
+        else:
+            result.append(f"  {cmd[i]}")
+            i += 1
+
+    return "ffmpeg \\\n" + " \\\n".join(result)
@@ -0,0 +1,88 @@
+"""Transform stage: resolve timings and build render plan."""
+
+from pathlib import Path
+
+from .models import (
+    ProjectConfig,
+    RenderPlan,
+    SlideDefinition,
+    SlideEvent,
+    TimedWord,
+    VideoSource,
+)
+from .parser import get_video_duration
+
+
+def build_render_plan(
+    project_path: Path,
+    config: ProjectConfig,
+    slides: dict[str, SlideDefinition],
+    videos: dict[str, VideoSource],
+    transcript: list[TimedWord],
+) -> RenderPlan:
+    """
+    Build a complete render plan from parsed and validated data.
+
+    This transforms transcript markers into timed slide events and
+    assembles all information needed for the render stage.
+    """
+    # For POC: use the first video as the talking head
+    talking_head_id = next(iter(videos.keys()))
+    talking_head = videos[talking_head_id]
+
+    # Get video duration for end time calculations
+    video_path = project_path / talking_head.file
+    total_duration = get_video_duration(video_path)
+
+    # Build slide events from transcript markers
+    slide_events = _extract_slide_events(transcript, slides, total_duration)
+
+    return RenderPlan(
+        project_path=project_path,
+        config=config,
+        talking_head=talking_head,
+        slide_events=slide_events,
+        total_duration=total_duration,
+        slides=slides,
+    )
+
+
+def _extract_slide_events(
+    transcript: list[TimedWord],
+    slides: dict[str, SlideDefinition],
+    total_duration: float,
+) -> list[SlideEvent]:
+    """
+    Extract slide events from transcript markers.
+
+    Each marker like [S1] in the transcript becomes a SlideEvent with:
+    - start_time: timestamp of the marker
+    - end_time: timestamp of next marker, or end of video
+    """
+    # Find all markers in transcript
+    marker_times: list[tuple[float, str]] = []
+
+    for timed_word in transcript:
+        if timed_word.is_marker:
+            marker_id = timed_word.marker_id
+            if marker_id and marker_id in slides:
+                marker_times.append((timed_word.time, marker_id))
+
+    # Convert markers to slide events
+    events: list[SlideEvent] = []
+
+    for i, (start_time, marker_id) in enumerate(marker_times):
+        # End time is start of next marker, or end of video
+        if i + 1 < len(marker_times):
+            end_time = marker_times[i + 1][0]
+        else:
+            end_time = total_duration
+
+        events.append(SlideEvent(
+            slide_id=marker_id,
+            start_time=start_time,
+            end_time=end_time,
+            slide_def=slides[marker_id],
+        ))
+
+    return events
@@ -0,0 +1,104 @@
+"""Validation stage: fail-fast checks on parsed data."""
+
+from pathlib import Path
+
+from .errors import ValidationError, ValidationIssue
+from .models import ProjectConfig, SlideDefinition, VideoSource, SLIDE_LAYOUTS
+
+
+def validate_project(
+    project_path: Path,
+    manuscript_markers: list[str],
+    config: ProjectConfig,
+    slides: dict[str, SlideDefinition],
+    videos: dict[str, VideoSource],
+) -> None:
+    """
+    Validate all parsed project data. Raises ValidationError if any issues found.
+
+    Checks:
+    - All slide markers in manuscript exist in slides.json
+    - All slide images exist on disk
+    - All video files exist on disk
+    - Background video exists (if specified)
+    - Slide types are valid
+    """
+    issues: list[ValidationIssue] = []
+
+    # Check all manuscript markers have corresponding slides
+    for marker in manuscript_markers:
+        if marker not in slides:
+            issues.append(ValidationIssue(
+                f"Slide marker [{marker}] referenced in manuscript but not defined in slides.json",
+                project_path / "manuscript.txt"
+            ))
+
+    # Check all slide images exist
+    media_path = project_path / "media"
+    slides_path = media_path / "slides"
+
+    for slide_id, slide_def in slides.items():
+        image_path = slides_path / slide_def.image
+        if not image_path.exists():
+            issues.append(ValidationIssue(
+                f"Slide image not found: {slide_def.image}",
+                project_path / "slides.json"
+            ))
+
+        # Check slide type is valid
+        if slide_def.type not in SLIDE_LAYOUTS:
+            issues.append(ValidationIssue(
+                f"Unknown slide type '{slide_def.type}' for slide {slide_id}. "
+                f"Valid types: {list(SLIDE_LAYOUTS.keys())}",
+                project_path / "slides.json"
+            ))
+
+    # Check all video files exist
+    for video_id, video_source in videos.items():
+        video_path = project_path / video_source.file
+        if not video_path.exists():
+            issues.append(ValidationIssue(
+                f"Video file not found: {video_source.file}",
+                project_path / "videos.json"
+            ))
+
+    # Check background video exists (if specified)
+    if config.background_video:
+        bg_path = project_path / config.background_video
+        if not bg_path.exists():
+            issues.append(ValidationIssue(
+                f"Background video not found: {config.background_video}",
+                project_path / "project.json"
+            ))
+
+    # Check we have at least one video source
+    if not videos:
+        issues.append(ValidationIssue(
+            "No video sources defined in videos.json",
+            project_path / "videos.json"
+        ))
+
+    # Check resolution is reasonable
+    width, height = config.resolution
+    if width < 100 or height < 100:
+        issues.append(ValidationIssue(
+            f"Resolution too small: {width}x{height}",
+            project_path / "project.json"
+        ))
+
+    if width > 7680 or height > 4320:
+        issues.append(ValidationIssue(
+            f"Resolution too large: {width}x{height} (max 8K)",
+            project_path / "project.json"
+        ))
+
+    # Check FPS is reasonable
+    if config.fps < 1 or config.fps > 120:
+        issues.append(ValidationIssue(
+            f"Invalid FPS: {config.fps} (must be 1-120)",
+            project_path / "project.json"
+        ))
+
+    # If any issues, raise ValidationError
+    if issues:
+        raise ValidationError(issues)