Add gnommo.sh wrapper and fix slide/scaling issues

gnommo.sh:
- Bash wrapper for easy CLI usage
- Commands: validate, transcribe, align, render, all
- `gnommo.sh -p video1 all` runs full pipeline

Slide scaling:
- Slides now scale to full frame (1920x1080)
- Transparent areas show through to layers below
- Positioned at 0,0 for full overlay

targetheight percentage:
- Supports percentage values like "100%"
- Calculates actual height from frame resolution
- "100%" on 1080p = 1080px height

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-12 15:13:33 +01:00
parent 216131e072
commit df900dfd59
4 changed files with 184 additions and 14 deletions
Executable
+157
View File
@@ -0,0 +1,157 @@
#!/bin/bash
#
# GnommoEditor - Code-first video editing pipeline
#
# Usage:
# gnommo.sh -p <project> Render project
# gnommo.sh -p <project> validate Validate only
# gnommo.sh -p <project> transcribe Transcribe video
# gnommo.sh -p <project> align Align markers to transcript
# gnommo.sh -p <project> all Full pipeline: transcribe → align → render
#
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
VENV_PYTHON="$SCRIPT_DIR/venv/bin/python"
# Check for venv
if [[ ! -f "$VENV_PYTHON" ]]; then
echo "Error: Virtual environment not found at $SCRIPT_DIR/venv"
echo "Create it with: python -m venv venv && ./venv/bin/pip install openai-whisper"
exit 1
fi
# Parse arguments
PROJECT=""
COMMAND="render"
VERBOSE=""
usage() {
echo "Usage: gnommo.sh -p <project> [command] [options]"
echo ""
echo "Commands:"
echo " render Render video (default)"
echo " validate Validate project only"
echo " transcribe Transcribe video audio"
echo " align Align manuscript to transcript"
echo " all Full pipeline: transcribe → align → render"
echo ""
echo "Options:"
echo " -p <dir> Project directory (required)"
echo " -v Verbose output"
echo " -h Show this help"
echo ""
echo "Examples:"
echo " gnommo.sh -p video1 # Render video1 project"
echo " gnommo.sh -p video1 validate # Validate only"
echo " gnommo.sh -p video1 all # Full pipeline"
exit 0
}
while [[ $# -gt 0 ]]; do
case $1 in
-p|--project)
PROJECT="$2"
shift 2
;;
-v|--verbose)
VERBOSE="--verbose"
shift
;;
-h|--help)
usage
;;
validate|render|transcribe|align|all)
COMMAND="$1"
shift
;;
*)
echo "Unknown option: $1"
usage
;;
esac
done
# Validate project argument
if [[ -z "$PROJECT" ]]; then
echo "Error: Project directory required (-p <project>)"
echo ""
usage
fi
if [[ ! -d "$PROJECT" ]]; then
echo "Error: Project directory not found: $PROJECT"
exit 1
fi
if [[ ! -f "$PROJECT/project.json" ]]; then
echo "Error: project.json not found in $PROJECT"
exit 1
fi
# Run commands
run_gnommo() {
"$VENV_PYTHON" -m gnommo "$@"
}
case $COMMAND in
validate)
echo "=== Validating $PROJECT ==="
run_gnommo validate "$PROJECT"
;;
transcribe)
echo "=== Transcribing $PROJECT ==="
VIDEO=$(find "$PROJECT/media" -name "*.mov" -o -name "*.mp4" | head -1)
if [[ -z "$VIDEO" ]]; then
echo "Error: No video file found in $PROJECT/media/"
exit 1
fi
run_gnommo transcribe "$VIDEO"
;;
align)
echo "=== Aligning $PROJECT ==="
run_gnommo align "$PROJECT"
;;
render)
echo "=== Rendering $PROJECT ==="
run_gnommo render "$PROJECT" $VERBOSE
;;
all)
echo "=== Full Pipeline: $PROJECT ==="
echo ""
# Step 1: Transcribe
echo ">>> Step 1/3: Transcribe"
VIDEO=$(find "$PROJECT/media" -name "*.mov" -o -name "*.mp4" | grep -v transcript | head -1)
if [[ -z "$VIDEO" ]]; then
echo "Error: No video file found in $PROJECT/media/"
exit 1
fi
TRANSCRIPT="${VIDEO%.*}.transcript.json"
if [[ -f "$TRANSCRIPT" ]]; then
echo " Transcript exists, skipping: $TRANSCRIPT"
else
run_gnommo transcribe "$VIDEO"
fi
echo ""
# Step 2: Align
echo ">>> Step 2/3: Align"
run_gnommo align "$PROJECT"
echo ""
# Step 3: Render
echo ">>> Step 3/3: Render"
run_gnommo render "$PROJECT" $VERBOSE
;;
*)
echo "Unknown command: $COMMAND"
usage
;;
esac
+1
View File
@@ -11,6 +11,7 @@ class TalkingHeadConfig:
x: int
y: int
target_height: int # in pixels, or -1 for percentage-based
target_height_percent: float = 0.0 # percentage (0.0-1.0) if target_height is -1
@dataclass
+15 -7
View File
@@ -113,10 +113,12 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
# Parse talking head config
th_data = data.get("talkinghead", {})
th_height, th_height_pct = _parse_dimension(th_data.get("targetheight", 200))
talking_head = TalkingHeadConfig(
x=th_data.get("x", 100),
y=th_data.get("y", 100),
target_height=_parse_dimension(th_data.get("targetheight", 200)),
target_height=th_height,
target_height_percent=th_height_pct,
)
# Parse resolution
@@ -135,15 +137,21 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
)
def _parse_dimension(value: Any) -> int:
"""Parse a dimension value (can be int or string like '100%')."""
def _parse_dimension(value: Any) -> tuple[int, float]:
"""
Parse a dimension value (can be int or string like '100%').
Returns:
Tuple of (pixels, percentage). If pixels is -1, use percentage.
"""
if isinstance(value, int):
return value
return value, 0.0
if isinstance(value, str):
if value.endswith("%"):
return -1 # Percentage marker, will be resolved during rendering
return int(value)
return 200 # default
pct = float(value[:-1]) / 100.0
return -1, pct
return int(value), 0.0
return 200, 0.0 # default
def parse_slides(project_path: Path, config: ProjectConfig = None) -> dict[str, SlideDefinition]:
+11 -7
View File
@@ -128,7 +128,11 @@ def build_filter_complex(
# Scale and position talking head
th_config = plan.config.talking_head
th_height = th_config.target_height if th_config.target_height > 0 else height
if th_config.target_height > 0:
th_height = th_config.target_height
else:
# Percentage-based: calculate from frame height
th_height = int(height * th_config.target_height_percent)
filters.append(
f"[{talking_head_idx}:v]scale=-1:{th_height}[head]"
@@ -142,24 +146,24 @@ def build_filter_complex(
current_label = "base"
# Add slide overlays with time-based enable
# Slides are scaled to full frame - transparency shows layers below
for i, event in enumerate(plan.slide_events):
slide_idx = slide_start_idx + slide_inputs.index(event.slide_id)
layout = SLIDE_LAYOUTS.get(event.slide_def.type, SLIDE_LAYOUTS["square"])
# Scale slide to fit layout while preserving aspect ratio
# Scale slide to full frame size (transparent areas show through)
slide_label = f"s{i}"
filters.append(
f"[{slide_idx}:v]scale={layout['width']}:{layout['height']}:"
f"force_original_aspect_ratio=decrease[{slide_label}]"
f"[{slide_idx}:v]scale={width}:{height}:"
f"force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=0x00000000[{slide_label}]"
)
# Overlay with time-based enable
# Overlay at 0,0 (full frame) with time-based enable
next_label = f"v{i}" if i < len(plan.slide_events) - 1 else "vout"
enable_expr = f"between(t,{event.start_time:.3f},{event.end_time:.3f})"
filters.append(
f"[{current_label}][{slide_label}]overlay="
f"x={layout['x']}:y={layout['y']}:"
f"x=0:y=0:"
f"enable='{enable_expr}'[{next_label}]"
)