Initial commit: GnommoEditor video pipeline

A code-first, declarative video editing system that compiles text
documents into rendered video via FFmpeg. Uses a compiler-style
ETL pipeline: Extract (parse inputs) → Validate → Transform
(build timeline) → Render (FFmpeg).

Features:
- Text-based project definition (manuscript, transcript, JSON configs)
- Slide markers [S1], [S2] in transcript map to timed overlays
- Strict validation with fail-fast error reporting
- FFmpeg filter_complex generation with time-based enables
- CLI with validate/render/dry-run modes

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-12 11:19:38 +01:00
commit d5a8d38c9c
15 changed files with 967 additions and 0 deletions
+197
View File
@@ -0,0 +1,197 @@
"""Load stage: generate and execute FFmpeg commands."""
import subprocess
from pathlib import Path
from .errors import RenderError
from .models import RenderPlan, SlideEvent, SLIDE_LAYOUTS
def render(plan: RenderPlan, output_path: Path, verbose: bool = False) -> None:
"""
Render the final video using FFmpeg.
Generates a filter_complex command that:
1. Scales background video (if present) or creates solid color
2. Overlays talking head at configured position
3. Overlays slides at their configured positions with time-based enable
"""
# Ensure output directory exists
output_path.parent.mkdir(parents=True, exist_ok=True)
# Build and execute FFmpeg command
cmd = build_ffmpeg_command(plan, output_path)
if verbose:
print("FFmpeg command:")
print(" ".join(cmd))
print()
result = subprocess.run(
cmd,
capture_output=True,
text=True,
)
if result.returncode != 0:
raise RenderError(
"FFmpeg rendering failed",
command=" ".join(cmd),
stderr=result.stderr,
)
def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
"""Build the complete FFmpeg command as a list of arguments."""
cmd = ["ffmpeg", "-y"] # -y to overwrite output
# Resolve paths to absolute
project_path = plan.project_path.resolve()
output_path = output_path.resolve()
# Input: talking head video
talking_head_path = project_path / plan.talking_head.file
cmd.extend(["-i", str(talking_head_path)])
# Input: background video (if specified)
has_background = bool(plan.config.background_video)
if has_background:
bg_path = project_path / plan.config.background_video
cmd.extend(["-i", str(bg_path)])
# Input: slide images
slides_path = project_path / "media" / "slides"
slide_inputs: list[str] = [] # Track which slides we've added
for event in plan.slide_events:
if event.slide_id not in slide_inputs:
image_path = slides_path / event.slide_def.image
cmd.extend(["-i", str(image_path)])
slide_inputs.append(event.slide_id)
# Build filter_complex
filter_complex = build_filter_complex(plan, has_background, slide_inputs)
cmd.extend(["-filter_complex", filter_complex])
# Map output video and audio
cmd.extend(["-map", "[vout]"])
cmd.extend(["-map", "0:a"]) # Audio from talking head
# Output settings
cmd.extend([
"-t", str(plan.total_duration), # Limit output duration
"-c:v", "libx264",
"-preset", "fast",
"-crf", "23",
"-c:a", "aac",
"-b:a", "192k",
"-r", str(plan.config.fps),
str(output_path),
])
return cmd
def build_filter_complex(
plan: RenderPlan,
has_background: bool,
slide_inputs: list[str],
) -> str:
"""
Build the filter_complex string for FFmpeg.
Layer structure:
- Layer 1: Background (solid color or video)
- Layer 2: Talking head
- Layer 3: Slides (with time-based enable)
"""
width, height = plan.config.resolution
filters: list[str] = []
# Input indices:
# 0 = talking head
# 1 = background (if present)
# 2+ = slides
talking_head_idx = 0
bg_idx = 1 if has_background else None
slide_start_idx = 2 if has_background else 1
# Create base layer (background)
if has_background:
filters.append(f"[{bg_idx}:v]scale={width}:{height}:force_original_aspect_ratio=increase,"
f"crop={width}:{height}[bg]")
base_label = "bg"
else:
# Create solid color background
filters.append(f"color=c=black:s={width}x{height}:r={plan.config.fps}[bg]")
base_label = "bg"
# Scale and position talking head
th_config = plan.config.talking_head
th_height = th_config.target_height if th_config.target_height > 0 else height
filters.append(
f"[{talking_head_idx}:v]scale=-1:{th_height}[head]"
)
# Overlay talking head on background
filters.append(
f"[{base_label}][head]overlay=x={th_config.x}:y={th_config.y}[base]"
)
current_label = "base"
# Add slide overlays with time-based enable
for i, event in enumerate(plan.slide_events):
slide_idx = slide_start_idx + slide_inputs.index(event.slide_id)
layout = SLIDE_LAYOUTS.get(event.slide_def.type, SLIDE_LAYOUTS["square"])
# Scale slide to fit layout while preserving aspect ratio
slide_label = f"s{i}"
filters.append(
f"[{slide_idx}:v]scale={layout['width']}:{layout['height']}:"
f"force_original_aspect_ratio=decrease[{slide_label}]"
)
# Overlay with time-based enable
next_label = f"v{i}" if i < len(plan.slide_events) - 1 else "vout"
enable_expr = f"between(t,{event.start_time:.3f},{event.end_time:.3f})"
filters.append(
f"[{current_label}][{slide_label}]overlay="
f"x={layout['x']}:y={layout['y']}:"
f"enable='{enable_expr}'[{next_label}]"
)
current_label = next_label
# If no slides, just rename base to vout
if not plan.slide_events:
filters.append(f"[{current_label}]copy[vout]")
return ";".join(filters)
def generate_ffmpeg_command_string(plan: RenderPlan, output_path: Path) -> str:
"""Generate a human-readable FFmpeg command string (for debugging)."""
cmd = build_ffmpeg_command(plan, output_path)
# Format nicely with line breaks
result = []
i = 0
while i < len(cmd):
if cmd[i] == "-filter_complex":
result.append(f" -filter_complex \"\n {cmd[i+1].replace(';', ';' + chr(10) + ' ')}\n \"")
i += 2
elif cmd[i].startswith("-"):
if i + 1 < len(cmd) and not cmd[i + 1].startswith("-"):
result.append(f" {cmd[i]} {cmd[i+1]}")
i += 2
else:
result.append(f" {cmd[i]}")
i += 1
else:
result.append(f" {cmd[i]}")
i += 1
return "ffmpeg \\\n" + " \\\n".join(result)