diff --git a/gnommo.sh b/gnommo.sh new file mode 100755 index 0000000..fb647b3 --- /dev/null +++ b/gnommo.sh @@ -0,0 +1,157 @@ +#!/bin/bash +# +# GnommoEditor - Code-first video editing pipeline +# +# Usage: +# gnommo.sh -p Render project +# gnommo.sh -p validate Validate only +# gnommo.sh -p transcribe Transcribe video +# gnommo.sh -p align Align markers to transcript +# gnommo.sh -p all Full pipeline: transcribe → align → render +# + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VENV_PYTHON="$SCRIPT_DIR/venv/bin/python" + +# Check for venv +if [[ ! -f "$VENV_PYTHON" ]]; then + echo "Error: Virtual environment not found at $SCRIPT_DIR/venv" + echo "Create it with: python -m venv venv && ./venv/bin/pip install openai-whisper" + exit 1 +fi + +# Parse arguments +PROJECT="" +COMMAND="render" +VERBOSE="" + +usage() { + echo "Usage: gnommo.sh -p [command] [options]" + echo "" + echo "Commands:" + echo " render Render video (default)" + echo " validate Validate project only" + echo " transcribe Transcribe video audio" + echo " align Align manuscript to transcript" + echo " all Full pipeline: transcribe → align → render" + echo "" + echo "Options:" + echo " -p Project directory (required)" + echo " -v Verbose output" + echo " -h Show this help" + echo "" + echo "Examples:" + echo " gnommo.sh -p video1 # Render video1 project" + echo " gnommo.sh -p video1 validate # Validate only" + echo " gnommo.sh -p video1 all # Full pipeline" + exit 0 +} + +while [[ $# -gt 0 ]]; do + case $1 in + -p|--project) + PROJECT="$2" + shift 2 + ;; + -v|--verbose) + VERBOSE="--verbose" + shift + ;; + -h|--help) + usage + ;; + validate|render|transcribe|align|all) + COMMAND="$1" + shift + ;; + *) + echo "Unknown option: $1" + usage + ;; + esac +done + +# Validate project argument +if [[ -z "$PROJECT" ]]; then + echo "Error: Project directory required (-p )" + echo "" + usage +fi + +if [[ ! -d "$PROJECT" ]]; then + echo "Error: Project directory not found: $PROJECT" + exit 1 +fi + +if [[ ! -f "$PROJECT/project.json" ]]; then + echo "Error: project.json not found in $PROJECT" + exit 1 +fi + +# Run commands +run_gnommo() { + "$VENV_PYTHON" -m gnommo "$@" +} + +case $COMMAND in + validate) + echo "=== Validating $PROJECT ===" + run_gnommo validate "$PROJECT" + ;; + + transcribe) + echo "=== Transcribing $PROJECT ===" + VIDEO=$(find "$PROJECT/media" -name "*.mov" -o -name "*.mp4" | head -1) + if [[ -z "$VIDEO" ]]; then + echo "Error: No video file found in $PROJECT/media/" + exit 1 + fi + run_gnommo transcribe "$VIDEO" + ;; + + align) + echo "=== Aligning $PROJECT ===" + run_gnommo align "$PROJECT" + ;; + + render) + echo "=== Rendering $PROJECT ===" + run_gnommo render "$PROJECT" $VERBOSE + ;; + + all) + echo "=== Full Pipeline: $PROJECT ===" + echo "" + + # Step 1: Transcribe + echo ">>> Step 1/3: Transcribe" + VIDEO=$(find "$PROJECT/media" -name "*.mov" -o -name "*.mp4" | grep -v transcript | head -1) + if [[ -z "$VIDEO" ]]; then + echo "Error: No video file found in $PROJECT/media/" + exit 1 + fi + TRANSCRIPT="${VIDEO%.*}.transcript.json" + if [[ -f "$TRANSCRIPT" ]]; then + echo " Transcript exists, skipping: $TRANSCRIPT" + else + run_gnommo transcribe "$VIDEO" + fi + echo "" + + # Step 2: Align + echo ">>> Step 2/3: Align" + run_gnommo align "$PROJECT" + echo "" + + # Step 3: Render + echo ">>> Step 3/3: Render" + run_gnommo render "$PROJECT" $VERBOSE + ;; + + *) + echo "Unknown command: $COMMAND" + usage + ;; +esac diff --git a/gnommo/models.py b/gnommo/models.py index fc7061a..17c876b 100644 --- a/gnommo/models.py +++ b/gnommo/models.py @@ -11,6 +11,7 @@ class TalkingHeadConfig: x: int y: int target_height: int # in pixels, or -1 for percentage-based + target_height_percent: float = 0.0 # percentage (0.0-1.0) if target_height is -1 @dataclass diff --git a/gnommo/parser.py b/gnommo/parser.py index 9b9fe71..2fd4eeb 100644 --- a/gnommo/parser.py +++ b/gnommo/parser.py @@ -113,10 +113,12 @@ def parse_project_config(project_path: Path) -> ProjectConfig: # Parse talking head config th_data = data.get("talkinghead", {}) + th_height, th_height_pct = _parse_dimension(th_data.get("targetheight", 200)) talking_head = TalkingHeadConfig( x=th_data.get("x", 100), y=th_data.get("y", 100), - target_height=_parse_dimension(th_data.get("targetheight", 200)), + target_height=th_height, + target_height_percent=th_height_pct, ) # Parse resolution @@ -135,15 +137,21 @@ def parse_project_config(project_path: Path) -> ProjectConfig: ) -def _parse_dimension(value: Any) -> int: - """Parse a dimension value (can be int or string like '100%').""" +def _parse_dimension(value: Any) -> tuple[int, float]: + """ + Parse a dimension value (can be int or string like '100%'). + + Returns: + Tuple of (pixels, percentage). If pixels is -1, use percentage. + """ if isinstance(value, int): - return value + return value, 0.0 if isinstance(value, str): if value.endswith("%"): - return -1 # Percentage marker, will be resolved during rendering - return int(value) - return 200 # default + pct = float(value[:-1]) / 100.0 + return -1, pct + return int(value), 0.0 + return 200, 0.0 # default def parse_slides(project_path: Path, config: ProjectConfig = None) -> dict[str, SlideDefinition]: diff --git a/gnommo/renderer.py b/gnommo/renderer.py index a690a0b..daf2a79 100644 --- a/gnommo/renderer.py +++ b/gnommo/renderer.py @@ -128,7 +128,11 @@ def build_filter_complex( # Scale and position talking head th_config = plan.config.talking_head - th_height = th_config.target_height if th_config.target_height > 0 else height + if th_config.target_height > 0: + th_height = th_config.target_height + else: + # Percentage-based: calculate from frame height + th_height = int(height * th_config.target_height_percent) filters.append( f"[{talking_head_idx}:v]scale=-1:{th_height}[head]" @@ -142,24 +146,24 @@ def build_filter_complex( current_label = "base" # Add slide overlays with time-based enable + # Slides are scaled to full frame - transparency shows layers below for i, event in enumerate(plan.slide_events): slide_idx = slide_start_idx + slide_inputs.index(event.slide_id) - layout = SLIDE_LAYOUTS.get(event.slide_def.type, SLIDE_LAYOUTS["square"]) - # Scale slide to fit layout while preserving aspect ratio + # Scale slide to full frame size (transparent areas show through) slide_label = f"s{i}" filters.append( - f"[{slide_idx}:v]scale={layout['width']}:{layout['height']}:" - f"force_original_aspect_ratio=decrease[{slide_label}]" + f"[{slide_idx}:v]scale={width}:{height}:" + f"force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=0x00000000[{slide_label}]" ) - # Overlay with time-based enable + # Overlay at 0,0 (full frame) with time-based enable next_label = f"v{i}" if i < len(plan.slide_events) - 1 else "vout" enable_expr = f"between(t,{event.start_time:.3f},{event.end_time:.3f})" filters.append( f"[{current_label}][{slide_label}]overlay=" - f"x={layout['x']}:y={layout['y']}:" + f"x=0:y=0:" f"enable='{enable_expr}'[{next_label}]" )