gnommo/gnommo/renderer.py

"""Load stage: generate and execute FFmpeg commands."""

import subprocess
from pathlib import Path

from .errors import RenderError
from .models import RenderPlan, SlideEvent, SLIDE_LAYOUTS


def render(plan: RenderPlan, output_path: Path, verbose: bool = False) -> None:
    """
    Render the final video using FFmpeg.

    Generates a filter_complex command that:
    1. Scales background video (if present) or creates solid color
    2. Overlays talking head at configured position
    3. Overlays slides at their configured positions with time-based enable
    """
    # Ensure output directory exists
    output_path.parent.mkdir(parents=True, exist_ok=True)

    # Build and execute FFmpeg command
    cmd = build_ffmpeg_command(plan, output_path)

    if verbose:
        print("FFmpeg command:")
        print(" ".join(cmd))
        print()

    result = subprocess.run(
        cmd,
        capture_output=True,
        text=True,
    )

    if result.returncode != 0:
        raise RenderError(
            "FFmpeg rendering failed",
            command=" ".join(cmd),
            stderr=result.stderr,
        )


def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
    """Build the complete FFmpeg command as a list of arguments."""
    cmd = ["ffmpeg", "-y"]  # -y to overwrite output

    # Resolve paths to absolute
    project_path = plan.project_path.resolve()
    output_path = output_path.resolve()

    # Input: talking head video
    talking_head_path = project_path / plan.talking_head.file
    cmd.extend(["-i", str(talking_head_path)])

    # Input: background video (if specified)
    has_background = bool(plan.config.background_video)
    if has_background:
        bg_path = project_path / plan.config.background_video
        cmd.extend(["-i", str(bg_path)])

    # Input: slide images (from slides_dir, same directory as slides.json)
    slides_dir = plan.slides_dir.resolve() if plan.slides_dir else project_path / "media" / "slides"
    slide_inputs: list[str] = []  # Track which slides we've added

    for event in plan.slide_events:
        if event.slide_id not in slide_inputs:
            image_path = slides_dir / event.slide_def.image
            cmd.extend(["-i", str(image_path)])
            slide_inputs.append(event.slide_id)

    # Build filter_complex
    filter_complex = build_filter_complex(plan, has_background, slide_inputs)
    cmd.extend(["-filter_complex", filter_complex])

    # Map output video and audio
    cmd.extend(["-map", "[vout]"])
    cmd.extend(["-map", "0:a"])  # Audio from talking head

    # Output settings
    cmd.extend([
        "-t", str(plan.total_duration),  # Limit output duration
        "-c:v", "libx264",
        "-preset", "fast",
        "-crf", "23",
        "-c:a", "aac",
        "-b:a", "192k",
        "-r", str(plan.config.fps),
        str(output_path),
    ])

    return cmd


def build_filter_complex(
    plan: RenderPlan,
    has_background: bool,
    slide_inputs: list[str],
) -> str:
    """
    Build the filter_complex string for FFmpeg.

    Layer structure:
    - Layer 1: Background (solid color or video)
    - Layer 2: Talking head
    - Layer 3: Slides (with time-based enable)
    """
    width, height = plan.config.resolution
    filters: list[str] = []

    # Input indices:
    # 0 = talking head
    # 1 = background (if present)
    # 2+ = slides
    talking_head_idx = 0
    bg_idx = 1 if has_background else None
    slide_start_idx = 2 if has_background else 1

    # Create base layer (background)
    if has_background:
        filters.append(f"[{bg_idx}:v]scale={width}:{height}:force_original_aspect_ratio=increase,"
                      f"crop={width}:{height}[bg]")
        base_label = "bg"
    else:
        # Create solid color background
        filters.append(f"color=c=black:s={width}x{height}:r={plan.config.fps}[bg]")
        base_label = "bg"

    # Scale and position talking head
    th_config = plan.config.talking_head
    if th_config.target_height > 0:
        th_height = th_config.target_height
    else:
        # Percentage-based: calculate from frame height
        th_height = int(height * th_config.target_height_percent)

    filters.append(
        f"[{talking_head_idx}:v]scale=-1:{th_height}[head]"
    )

    # Overlay talking head on background
    filters.append(
        f"[{base_label}][head]overlay=x={th_config.x}:y={th_config.y}[base]"
    )

    current_label = "base"

    # Add slide overlays with time-based enable
    # Slides are scaled to full frame - transparency shows layers below
    for i, event in enumerate(plan.slide_events):
        slide_idx = slide_start_idx + slide_inputs.index(event.slide_id)

        # Scale slide to full frame size (transparent areas show through)
        slide_label = f"s{i}"
        filters.append(
            f"[{slide_idx}:v]scale={width}:{height}:"
            f"force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=0x00000000[{slide_label}]"
        )

        # Overlay at 0,0 (full frame) with time-based enable
        next_label = f"v{i}" if i < len(plan.slide_events) - 1 else "vout"
        enable_expr = f"between(t,{event.start_time:.3f},{event.end_time:.3f})"

        filters.append(
            f"[{current_label}][{slide_label}]overlay="
            f"x=0:y=0:"
            f"enable='{enable_expr}'[{next_label}]"
        )

        current_label = next_label

    # If no slides, just rename base to vout
    if not plan.slide_events:
        filters.append(f"[{current_label}]copy[vout]")

    return ";".join(filters)


def generate_ffmpeg_command_string(plan: RenderPlan, output_path: Path) -> str:
    """Generate a human-readable FFmpeg command string (for debugging)."""
    cmd = build_ffmpeg_command(plan, output_path)

    # Format nicely with line breaks
    result = []
    i = 0
    while i < len(cmd):
        if cmd[i] == "-filter_complex":
            result.append(f"  -filter_complex \"\n    {cmd[i+1].replace(';', ';' + chr(10) + '    ')}\n  \"")
            i += 2
        elif cmd[i].startswith("-"):
            if i + 1 < len(cmd) and not cmd[i + 1].startswith("-"):
                result.append(f"  {cmd[i]} {cmd[i+1]}")
                i += 2
            else:
                result.append(f"  {cmd[i]}")
                i += 1
        else:
            result.append(f"  {cmd[i]}")
            i += 1

    return "ffmpeg \\\n" + " \\\n".join(result)