Refactor CLI and add preprocessing pipeline
- New CLI structure: -p project, -a action (required flags) - Add -i import, -f force, -v verbose, --dry-run, --no-cache options - Add preprocessor.py with chroma key filter (ProRes 4444 output) - Support background images from shared_assets folder - Support video metadata JSON files (talkinghead.json) - Add validation for preprocessed output before render - Update gnommo.sh with import command and new CLI interface - Fix Python 3.9 compatibility (Optional[] instead of | None) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -4,7 +4,9 @@
|
|||||||
#
|
#
|
||||||
# Usage:
|
# Usage:
|
||||||
# gnommo.sh -p <project> Render project
|
# gnommo.sh -p <project> Render project
|
||||||
|
# gnommo.sh -p <project> import Generate slides.json from image files
|
||||||
# gnommo.sh -p <project> validate Validate only
|
# gnommo.sh -p <project> validate Validate only
|
||||||
|
# gnommo.sh -p <project> preprocess Apply video preprocessing filters
|
||||||
# gnommo.sh -p <project> transcribe Transcribe video
|
# gnommo.sh -p <project> transcribe Transcribe video
|
||||||
# gnommo.sh -p <project> align Align markers to transcript
|
# gnommo.sh -p <project> align Align markers to transcript
|
||||||
# gnommo.sh -p <project> all Full pipeline: transcribe → align → render
|
# gnommo.sh -p <project> all Full pipeline: transcribe → align → render
|
||||||
@@ -26,13 +28,16 @@ fi
|
|||||||
PROJECT=""
|
PROJECT=""
|
||||||
COMMAND="render"
|
COMMAND="render"
|
||||||
VERBOSE=""
|
VERBOSE=""
|
||||||
|
FORCE=""
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
echo "Usage: gnommo.sh -p <project> [command] [options]"
|
echo "Usage: gnommo.sh -p <project> [command] [options]"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Commands:"
|
echo "Commands:"
|
||||||
echo " render Render video (default)"
|
echo " render Render video (default)"
|
||||||
|
echo " import Generate slides.json from image files"
|
||||||
echo " validate Validate project only"
|
echo " validate Validate project only"
|
||||||
|
echo " preprocess Apply video preprocessing filters (chroma key, etc.)"
|
||||||
echo " transcribe Transcribe video audio"
|
echo " transcribe Transcribe video audio"
|
||||||
echo " align Align manuscript to transcript"
|
echo " align Align manuscript to transcript"
|
||||||
echo " all Full pipeline: transcribe → align → render"
|
echo " all Full pipeline: transcribe → align → render"
|
||||||
@@ -40,10 +45,13 @@ usage() {
|
|||||||
echo "Options:"
|
echo "Options:"
|
||||||
echo " -p <dir> Project directory (required)"
|
echo " -p <dir> Project directory (required)"
|
||||||
echo " -v Verbose output"
|
echo " -v Verbose output"
|
||||||
|
echo " -f Force overwrite existing files"
|
||||||
echo " -h Show this help"
|
echo " -h Show this help"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Examples:"
|
echo "Examples:"
|
||||||
echo " gnommo.sh -p video1 # Render video1 project"
|
echo " gnommo.sh -p video1 # Render video1 project"
|
||||||
|
echo " gnommo.sh -p video1 import # Generate slides.json"
|
||||||
|
echo " gnommo.sh -p video1 import -f # Force overwrite slides.json"
|
||||||
echo " gnommo.sh -p video1 validate # Validate only"
|
echo " gnommo.sh -p video1 validate # Validate only"
|
||||||
echo " gnommo.sh -p video1 all # Full pipeline"
|
echo " gnommo.sh -p video1 all # Full pipeline"
|
||||||
exit 0
|
exit 0
|
||||||
@@ -56,13 +64,17 @@ while [[ $# -gt 0 ]]; do
|
|||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
-v|--verbose)
|
-v|--verbose)
|
||||||
VERBOSE="--verbose"
|
VERBOSE="-v"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-f|--force)
|
||||||
|
FORCE="-f"
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
-h|--help)
|
-h|--help)
|
||||||
usage
|
usage
|
||||||
;;
|
;;
|
||||||
validate|render|transcribe|align|all)
|
import|validate|render|preprocess|transcribe|align|all)
|
||||||
COMMAND="$1"
|
COMMAND="$1"
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
@@ -90,64 +102,49 @@ if [[ ! -f "$PROJECT/project.json" ]]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Run commands
|
# Run commands using new CLI interface
|
||||||
run_gnommo() {
|
run_gnommo() {
|
||||||
"$VENV_PYTHON" -m gnommo "$@"
|
"$VENV_PYTHON" -m gnommo -p "$PROJECT" -a "$1" $VERBOSE
|
||||||
|
}
|
||||||
|
|
||||||
|
run_gnommo_import() {
|
||||||
|
"$VENV_PYTHON" -m gnommo -p "$PROJECT" -a validate -i $FORCE $VERBOSE
|
||||||
}
|
}
|
||||||
|
|
||||||
case $COMMAND in
|
case $COMMAND in
|
||||||
|
import)
|
||||||
|
echo "=== Importing assets for $PROJECT ==="
|
||||||
|
run_gnommo_import
|
||||||
|
;;
|
||||||
|
|
||||||
validate)
|
validate)
|
||||||
echo "=== Validating $PROJECT ==="
|
echo "=== Validating $PROJECT ==="
|
||||||
run_gnommo validate "$PROJECT"
|
run_gnommo validate
|
||||||
;;
|
;;
|
||||||
|
|
||||||
transcribe)
|
transcribe)
|
||||||
echo "=== Transcribing $PROJECT ==="
|
echo "=== Transcribing $PROJECT ==="
|
||||||
VIDEO=$(find "$PROJECT/media" -name "*.mov" -o -name "*.mp4" | head -1)
|
run_gnommo transcribe
|
||||||
if [[ -z "$VIDEO" ]]; then
|
|
||||||
echo "Error: No video file found in $PROJECT/media/"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
run_gnommo transcribe "$VIDEO"
|
|
||||||
;;
|
;;
|
||||||
|
|
||||||
align)
|
align)
|
||||||
echo "=== Aligning $PROJECT ==="
|
echo "=== Aligning $PROJECT ==="
|
||||||
run_gnommo align "$PROJECT"
|
run_gnommo align
|
||||||
;;
|
;;
|
||||||
|
|
||||||
render)
|
render)
|
||||||
echo "=== Rendering $PROJECT ==="
|
echo "=== Rendering $PROJECT ==="
|
||||||
run_gnommo render "$PROJECT" $VERBOSE
|
run_gnommo render
|
||||||
|
;;
|
||||||
|
|
||||||
|
preprocess)
|
||||||
|
echo "=== Preprocessing $PROJECT ==="
|
||||||
|
run_gnommo preprocess
|
||||||
;;
|
;;
|
||||||
|
|
||||||
all)
|
all)
|
||||||
echo "=== Full Pipeline: $PROJECT ==="
|
echo "=== Full Pipeline: $PROJECT ==="
|
||||||
echo ""
|
run_gnommo all
|
||||||
|
|
||||||
# Step 1: Transcribe
|
|
||||||
echo ">>> Step 1/3: Transcribe"
|
|
||||||
VIDEO=$(find "$PROJECT/media" -name "*.mov" -o -name "*.mp4" | grep -v transcript | head -1)
|
|
||||||
if [[ -z "$VIDEO" ]]; then
|
|
||||||
echo "Error: No video file found in $PROJECT/media/"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
TRANSCRIPT="${VIDEO%.*}.transcript.json"
|
|
||||||
if [[ -f "$TRANSCRIPT" ]]; then
|
|
||||||
echo " Transcript exists, skipping: $TRANSCRIPT"
|
|
||||||
else
|
|
||||||
run_gnommo transcribe "$VIDEO"
|
|
||||||
fi
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Step 2: Align
|
|
||||||
echo ">>> Step 2/3: Align"
|
|
||||||
run_gnommo align "$PROJECT"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Step 3: Render
|
|
||||||
echo ">>> Step 3/3: Render"
|
|
||||||
run_gnommo render "$PROJECT" $VERBOSE
|
|
||||||
;;
|
;;
|
||||||
|
|
||||||
*)
|
*)
|
||||||
|
|||||||
+338
-228
@@ -8,18 +8,11 @@ from pathlib import Path
|
|||||||
|
|
||||||
from . import __version__
|
from . import __version__
|
||||||
from .errors import GnommoError, ParseError, ValidationError, RenderError
|
from .errors import GnommoError, ParseError, ValidationError, RenderError
|
||||||
from .parser import (
|
|
||||||
parse_manuscript,
|
|
||||||
parse_project_config,
|
class NotImplementedException(GnommoError):
|
||||||
parse_slides,
|
"""Feature not yet implemented."""
|
||||||
parse_transcript,
|
pass
|
||||||
parse_videos,
|
|
||||||
)
|
|
||||||
from .validator import validate_project
|
|
||||||
from .transformer import build_render_plan
|
|
||||||
from .renderer import render, generate_ffmpeg_command_string
|
|
||||||
from .transcriber import transcribe_video, save_transcript, load_transcript
|
|
||||||
from .aligner import align_markers, save_aligned_transcript
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
@@ -34,120 +27,79 @@ def main() -> int:
|
|||||||
version=f"%(prog)s {__version__}",
|
version=f"%(prog)s {__version__}",
|
||||||
)
|
)
|
||||||
|
|
||||||
subparsers = parser.add_subparsers(dest="command", required=True)
|
# Required arguments
|
||||||
|
parser.add_argument(
|
||||||
# validate command
|
"-p", "--project",
|
||||||
validate_parser = subparsers.add_parser(
|
type=str,
|
||||||
"validate",
|
required=True,
|
||||||
help="Validate project without rendering",
|
help="Project name (directory in current folder)",
|
||||||
)
|
)
|
||||||
validate_parser.add_argument(
|
parser.add_argument(
|
||||||
"project",
|
"-a", "--action",
|
||||||
type=Path,
|
type=str,
|
||||||
help="Path to project directory",
|
choices=["validate", "preprocess", "render", "all", "transcribe", "align"],
|
||||||
|
required=True,
|
||||||
|
help="Action to perform",
|
||||||
)
|
)
|
||||||
|
|
||||||
# render command
|
# Optional arguments
|
||||||
render_parser = subparsers.add_parser(
|
parser.add_argument(
|
||||||
"render",
|
"-i", "--import",
|
||||||
help="Render video from project",
|
dest="import_assets",
|
||||||
|
action="store_true",
|
||||||
|
help="Import assets and generate metadata JSON files",
|
||||||
)
|
)
|
||||||
render_parser.add_argument(
|
parser.add_argument(
|
||||||
"project",
|
|
||||||
type=Path,
|
|
||||||
help="Path to project directory",
|
|
||||||
)
|
|
||||||
render_parser.add_argument(
|
|
||||||
"-o", "--output",
|
|
||||||
type=Path,
|
|
||||||
help="Output file path (default: project/out/final.mp4)",
|
|
||||||
)
|
|
||||||
render_parser.add_argument(
|
|
||||||
"-v", "--verbose",
|
"-v", "--verbose",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Print FFmpeg command",
|
help="Verbose output",
|
||||||
)
|
)
|
||||||
render_parser.add_argument(
|
parser.add_argument(
|
||||||
|
"-f", "--force",
|
||||||
|
action="store_true",
|
||||||
|
help="Force destructive changes (overwrite existing files)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-cache",
|
||||||
|
action="store_true",
|
||||||
|
help="Force cache break (not implemented)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
"--dry-run",
|
"--dry-run",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Print FFmpeg command without executing",
|
help="Show what would be done without executing",
|
||||||
)
|
|
||||||
|
|
||||||
# generate-slides command
|
|
||||||
gen_slides_parser = subparsers.add_parser(
|
|
||||||
"generate-slides",
|
|
||||||
help="Generate slides.json from Keynote export folder",
|
|
||||||
)
|
|
||||||
gen_slides_parser.add_argument(
|
|
||||||
"directory",
|
|
||||||
type=Path,
|
|
||||||
help="Path to slides directory (e.g., media/slides/Video1)",
|
|
||||||
)
|
|
||||||
gen_slides_parser.add_argument(
|
|
||||||
"--type",
|
|
||||||
default="square",
|
|
||||||
help="Slide type for all slides (default: square)",
|
|
||||||
)
|
|
||||||
|
|
||||||
# transcribe command
|
|
||||||
transcribe_parser = subparsers.add_parser(
|
|
||||||
"transcribe",
|
|
||||||
help="Transcribe video audio using Whisper",
|
|
||||||
)
|
|
||||||
transcribe_parser.add_argument(
|
|
||||||
"video",
|
|
||||||
type=Path,
|
|
||||||
help="Path to video file",
|
|
||||||
)
|
|
||||||
transcribe_parser.add_argument(
|
|
||||||
"-o", "--output",
|
|
||||||
type=Path,
|
|
||||||
help="Output JSON file (default: <video>.transcript.json)",
|
|
||||||
)
|
|
||||||
transcribe_parser.add_argument(
|
|
||||||
"--model",
|
|
||||||
default="base",
|
|
||||||
choices=["tiny", "base", "small", "medium", "large"],
|
|
||||||
help="Whisper model size (default: base)",
|
|
||||||
)
|
|
||||||
|
|
||||||
# align command
|
|
||||||
align_parser = subparsers.add_parser(
|
|
||||||
"align",
|
|
||||||
help="Align manuscript markers to transcript timestamps",
|
|
||||||
)
|
|
||||||
align_parser.add_argument(
|
|
||||||
"project",
|
|
||||||
type=Path,
|
|
||||||
help="Path to project directory",
|
|
||||||
)
|
|
||||||
align_parser.add_argument(
|
|
||||||
"--transcript",
|
|
||||||
type=Path,
|
|
||||||
help="Path to transcript JSON (default: media/talking_head.transcript.json)",
|
|
||||||
)
|
|
||||||
align_parser.add_argument(
|
|
||||||
"--offset",
|
|
||||||
type=float,
|
|
||||||
default=-1.0,
|
|
||||||
help="Seconds to offset marker times (default: -1.0)",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Resolve project path
|
||||||
|
project_path = Path(args.project)
|
||||||
|
if not project_path.is_absolute():
|
||||||
|
project_path = Path.cwd() / project_path
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if args.command == "validate":
|
# Check for --no-cache
|
||||||
return cmd_validate(args.project)
|
if args.no_cache:
|
||||||
elif args.command == "render":
|
raise NotImplementedException("--no-cache is not yet implemented")
|
||||||
output = args.output or (args.project / "out" / "final.mp4")
|
|
||||||
return cmd_render(args.project, output, args.verbose, args.dry_run)
|
# Handle import mode
|
||||||
elif args.command == "generate-slides":
|
if args.import_assets:
|
||||||
return cmd_generate_slides(args.directory, args.type)
|
return cmd_import(project_path, args.force, args.verbose)
|
||||||
elif args.command == "transcribe":
|
|
||||||
output = args.output or args.video.with_suffix(".transcript.json")
|
# Handle actions
|
||||||
return cmd_transcribe(args.video, output, args.model)
|
if args.action == "validate":
|
||||||
elif args.command == "align":
|
return cmd_validate(project_path, args.verbose)
|
||||||
return cmd_align(args.project, args.transcript, args.offset)
|
elif args.action == "preprocess":
|
||||||
|
return cmd_preprocess(project_path, args.verbose, args.dry_run)
|
||||||
|
elif args.action == "render":
|
||||||
|
return cmd_render(project_path, args.verbose, args.dry_run)
|
||||||
|
elif args.action == "transcribe":
|
||||||
|
return cmd_transcribe(project_path, args.verbose)
|
||||||
|
elif args.action == "align":
|
||||||
|
return cmd_align(project_path, args.verbose)
|
||||||
|
elif args.action == "all":
|
||||||
|
return cmd_all(project_path, args.verbose, args.dry_run)
|
||||||
|
|
||||||
except GnommoError as e:
|
except GnommoError as e:
|
||||||
print(f"Error: {e}", file=sys.stderr)
|
print(f"Error: {e}", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
@@ -158,9 +110,109 @@ def main() -> int:
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def cmd_validate(project_path: Path) -> int:
|
# =============================================================================
|
||||||
"""Run validation only."""
|
# Import Command
|
||||||
print(f"Validating project: {project_path}")
|
# =============================================================================
|
||||||
|
|
||||||
|
def cmd_import(project_path: Path, force: bool, verbose: bool) -> int:
|
||||||
|
"""Import assets and generate metadata JSON files."""
|
||||||
|
print(f"Importing assets for: {project_path.name}")
|
||||||
|
|
||||||
|
if not project_path.exists():
|
||||||
|
print(f"Error: Project directory not found: {project_path}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Check for existing files that would be overwritten
|
||||||
|
slides_base = project_path / "media" / "slides"
|
||||||
|
slides_dirs = [d for d in slides_base.glob("*/") if d.is_dir()] if slides_base.exists() else []
|
||||||
|
videos_json = project_path / "videos.json"
|
||||||
|
|
||||||
|
files_to_create = []
|
||||||
|
|
||||||
|
# Check for slide directories to import
|
||||||
|
for slides_dir in slides_dirs:
|
||||||
|
slides_json = slides_dir / "slides.json"
|
||||||
|
if slides_json.exists() and not force:
|
||||||
|
print(f"Warning: {slides_json} already exists. Use -f to overwrite.")
|
||||||
|
return 1
|
||||||
|
files_to_create.append(("slides", slides_dir))
|
||||||
|
|
||||||
|
if not force and files_to_create:
|
||||||
|
print("\nThe following files will be created/overwritten:")
|
||||||
|
for ftype, fpath in files_to_create:
|
||||||
|
print(f" - {fpath}/slides.json")
|
||||||
|
print("\nUse -f/--force to proceed.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Generate slides.json for each directory
|
||||||
|
for ftype, slides_dir in files_to_create:
|
||||||
|
if ftype == "slides":
|
||||||
|
_generate_slides_json(slides_dir, verbose)
|
||||||
|
|
||||||
|
print("Import complete.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_slides_json(directory: Path, verbose: bool) -> None:
|
||||||
|
"""Generate slides.json from Keynote export folder."""
|
||||||
|
extensions = {".png", ".gif", ".pdf", ".jpg", ".jpeg"}
|
||||||
|
files = [f for f in directory.iterdir() if f.suffix.lower() in extensions]
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
print(f" Warning: No image files in {directory}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Extract numeric suffix from filenames like "Video1.001.png"
|
||||||
|
pattern = re.compile(r"\.(\d+)\.[^.]+$")
|
||||||
|
|
||||||
|
slides = {}
|
||||||
|
for file in files:
|
||||||
|
match = pattern.search(file.name)
|
||||||
|
if match:
|
||||||
|
num = int(match.group(1))
|
||||||
|
slide_id = f"S{num}"
|
||||||
|
slides[slide_id] = {
|
||||||
|
"image": file.name,
|
||||||
|
"type": "fullscreen",
|
||||||
|
}
|
||||||
|
|
||||||
|
if not slides:
|
||||||
|
print(f" Warning: No valid slide files in {directory}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Sort by slide number
|
||||||
|
sorted_slides = dict(sorted(slides.items(), key=lambda x: int(x[0][1:])))
|
||||||
|
|
||||||
|
# Write slides.json
|
||||||
|
output_path = directory / "slides.json"
|
||||||
|
with open(output_path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(sorted_slides, f, indent=2)
|
||||||
|
|
||||||
|
print(f" Generated {output_path} ({len(sorted_slides)} slides)")
|
||||||
|
if verbose:
|
||||||
|
for slide_id in sorted_slides:
|
||||||
|
print(f" [{slide_id}]")
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Validate Command
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
def cmd_validate(project_path: Path, verbose: bool) -> int:
|
||||||
|
"""Validate project configuration."""
|
||||||
|
from .parser import (
|
||||||
|
parse_manuscript,
|
||||||
|
parse_project_config,
|
||||||
|
parse_slides,
|
||||||
|
parse_videos,
|
||||||
|
)
|
||||||
|
from .validator import validate_project
|
||||||
|
|
||||||
|
print(f"Validating: {project_path.name}")
|
||||||
|
|
||||||
|
if not (project_path / "project.json").exists():
|
||||||
|
print(f"Error: project.json not found in {project_path}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
# Parse all files
|
# Parse all files
|
||||||
_, markers, malformed = parse_manuscript(project_path)
|
_, markers, malformed = parse_manuscript(project_path)
|
||||||
@@ -168,6 +220,11 @@ def cmd_validate(project_path: Path) -> int:
|
|||||||
slides = parse_slides(project_path, config)
|
slides = parse_slides(project_path, config)
|
||||||
videos = parse_videos(project_path)
|
videos = parse_videos(project_path)
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print(f" - Markers in manuscript: {len(markers)}")
|
||||||
|
print(f" - Slides defined: {len(slides)}")
|
||||||
|
print(f" - Videos defined: {len(videos)}")
|
||||||
|
|
||||||
# Validate
|
# Validate
|
||||||
validate_project(project_path, markers, config, slides, videos, malformed)
|
validate_project(project_path, markers, config, slides, videos, malformed)
|
||||||
|
|
||||||
@@ -175,140 +232,155 @@ def cmd_validate(project_path: Path) -> int:
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def cmd_render(project_path: Path, output_path: Path, verbose: bool, dry_run: bool) -> int:
|
# =============================================================================
|
||||||
"""Run full render pipeline."""
|
# Preprocess Command
|
||||||
print(f"Rendering project: {project_path}")
|
# =============================================================================
|
||||||
print(f"Output: {output_path}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
# Stage 1: Extract
|
def cmd_preprocess(project_path: Path, verbose: bool, dry_run: bool) -> int:
|
||||||
print("Stage 1/4: Parsing input files...")
|
"""Run preprocessing pipeline on video sources."""
|
||||||
|
from .parser import parse_project_config, parse_videos
|
||||||
|
from .preprocessor import preprocess_video
|
||||||
|
|
||||||
|
print(f"Preprocessing: {project_path.name}")
|
||||||
|
|
||||||
|
config = parse_project_config(project_path)
|
||||||
|
videos = parse_videos(project_path)
|
||||||
|
|
||||||
|
for video_id, video_source in videos.items():
|
||||||
|
print(f"\n Processing: {video_id}")
|
||||||
|
|
||||||
|
if not video_source.preprocess:
|
||||||
|
print(" No preprocessing steps defined, skipping.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
print(f" Would preprocess: {video_source.file}")
|
||||||
|
for step in video_source.preprocess:
|
||||||
|
print(f" - {step}")
|
||||||
|
else:
|
||||||
|
preprocess_video(project_path, video_id, video_source, verbose)
|
||||||
|
|
||||||
|
print("\nPreprocessing complete.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Render Command
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
def cmd_render(project_path: Path, verbose: bool, dry_run: bool) -> int:
|
||||||
|
"""Render final video."""
|
||||||
|
from .parser import (
|
||||||
|
parse_manuscript,
|
||||||
|
parse_project_config,
|
||||||
|
parse_slides,
|
||||||
|
parse_transcript,
|
||||||
|
parse_videos,
|
||||||
|
)
|
||||||
|
from .validator import validate_project
|
||||||
|
from .transformer import build_render_plan
|
||||||
|
from .renderer import render, generate_ffmpeg_command_string
|
||||||
|
|
||||||
|
print(f"Rendering: {project_path.name}")
|
||||||
|
|
||||||
|
# Stage 1: Parse
|
||||||
|
print("\n[1/4] Parsing...")
|
||||||
_, markers, malformed = parse_manuscript(project_path)
|
_, markers, malformed = parse_manuscript(project_path)
|
||||||
config = parse_project_config(project_path)
|
config = parse_project_config(project_path)
|
||||||
slides = parse_slides(project_path, config)
|
slides = parse_slides(project_path, config)
|
||||||
videos = parse_videos(project_path)
|
videos = parse_videos(project_path)
|
||||||
transcript = parse_transcript(project_path)
|
transcript = parse_transcript(project_path)
|
||||||
|
|
||||||
print(f" - Found {len(markers)} slide markers in manuscript")
|
if verbose:
|
||||||
print(f" - Found {len(slides)} slide definitions")
|
print(f" - Markers: {len(markers)}")
|
||||||
print(f" - Found {len(transcript)} transcript entries")
|
print(f" - Slides: {len(slides)}")
|
||||||
print()
|
print(f" - Transcript entries: {len(transcript)}")
|
||||||
|
|
||||||
# Stage 2: Validate
|
# Stage 2: Validate
|
||||||
print("Stage 2/4: Validating...")
|
print("\n[2/4] Validating...")
|
||||||
validate_project(project_path, markers, config, slides, videos, malformed)
|
validate_project(project_path, markers, config, slides, videos, malformed)
|
||||||
print(" - Validation passed")
|
print(" Passed.")
|
||||||
print()
|
|
||||||
|
|
||||||
# Stage 3: Transform
|
# Stage 3: Transform
|
||||||
print("Stage 3/4: Building render plan...")
|
print("\n[3/4] Building render plan...")
|
||||||
plan = build_render_plan(project_path, config, slides, videos, transcript)
|
plan = build_render_plan(project_path, config, slides, videos, transcript)
|
||||||
print(f" - Video duration: {plan.total_duration:.2f}s")
|
print(f" - Duration: {plan.total_duration:.1f}s")
|
||||||
print(f" - Slide events: {len(plan.slide_events)}")
|
print(f" - Slide events: {len(plan.slide_events)}")
|
||||||
for event in plan.slide_events:
|
|
||||||
print(f" - [{event.slide_id}] {event.start_time:.2f}s - {event.end_time:.2f}s")
|
if verbose:
|
||||||
print()
|
for event in plan.slide_events:
|
||||||
|
print(f" [{event.slide_id}] {event.start_time:.1f}s - {event.end_time:.1f}s")
|
||||||
|
|
||||||
# Stage 4: Render
|
# Stage 4: Render
|
||||||
|
output_path = project_path / "out" / "final.mp4"
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
print("Stage 4/4: Generating FFmpeg command (dry run)...")
|
print("\n[4/4] FFmpeg command (dry run):")
|
||||||
print()
|
|
||||||
print(generate_ffmpeg_command_string(plan, output_path))
|
print(generate_ffmpeg_command_string(plan, output_path))
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
print("Stage 4/4: Rendering video...")
|
print("\n[4/4] Rendering...")
|
||||||
render(plan, output_path, verbose=verbose)
|
render(plan, output_path, verbose=verbose)
|
||||||
print(f" - Output written to: {output_path}")
|
print(f" Output: {output_path}")
|
||||||
print()
|
|
||||||
print("Done.")
|
|
||||||
|
|
||||||
|
print("\nDone.")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def cmd_generate_slides(directory: Path, slide_type: str) -> int:
|
# =============================================================================
|
||||||
"""Generate slides.json from Keynote export folder."""
|
# Transcribe Command
|
||||||
directory = directory.resolve()
|
# =============================================================================
|
||||||
|
|
||||||
if not directory.exists():
|
def cmd_transcribe(project_path: Path, verbose: bool) -> int:
|
||||||
print(f"Error: Directory not found: {directory}", file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if not directory.is_dir():
|
|
||||||
print(f"Error: Not a directory: {directory}", file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
# Find all image files (png, gif, pdf)
|
|
||||||
extensions = {".png", ".gif", ".pdf", ".jpg", ".jpeg"}
|
|
||||||
files = [f for f in directory.iterdir() if f.suffix.lower() in extensions]
|
|
||||||
|
|
||||||
if not files:
|
|
||||||
print(f"Error: No image files found in {directory}", file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
# Extract numeric suffix from filenames like "Video1.001.png"
|
|
||||||
# Pattern: anything followed by .NNN. followed by extension
|
|
||||||
pattern = re.compile(r"\.(\d+)\.[^.]+$")
|
|
||||||
|
|
||||||
slides = {}
|
|
||||||
for file in files:
|
|
||||||
match = pattern.search(file.name)
|
|
||||||
if match:
|
|
||||||
num = int(match.group(1)) # "001" -> 1
|
|
||||||
slide_id = f"S{num}"
|
|
||||||
slides[slide_id] = {
|
|
||||||
"image": file.name,
|
|
||||||
"type": slide_type,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
print(f" Warning: Could not parse slide number from: {file.name}")
|
|
||||||
|
|
||||||
if not slides:
|
|
||||||
print("Error: No valid slide files found", file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
# Sort by slide number
|
|
||||||
sorted_slides = dict(sorted(slides.items(), key=lambda x: int(x[0][1:])))
|
|
||||||
|
|
||||||
# Write slides.json in the same directory
|
|
||||||
output_path = directory / "slides.json"
|
|
||||||
with open(output_path, "w", encoding="utf-8") as f:
|
|
||||||
json.dump(sorted_slides, f, indent=2)
|
|
||||||
|
|
||||||
print(f"Generated {output_path}")
|
|
||||||
print(f" - Found {len(sorted_slides)} slides")
|
|
||||||
for slide_id, slide_def in sorted_slides.items():
|
|
||||||
print(f" [{slide_id}] {slide_def['image']}")
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def cmd_transcribe(video_path: Path, output_path: Path, model: str) -> int:
|
|
||||||
"""Transcribe video audio using Whisper."""
|
"""Transcribe video audio using Whisper."""
|
||||||
print(f"Transcribing: {video_path}")
|
from .transcriber import transcribe_video, save_transcript
|
||||||
print(f"Model: {model}")
|
from .parser import parse_videos
|
||||||
print()
|
|
||||||
|
|
||||||
words = transcribe_video(video_path, model=model)
|
print(f"Transcribing: {project_path.name}")
|
||||||
|
|
||||||
|
videos = parse_videos(project_path)
|
||||||
|
if not videos:
|
||||||
|
print("Error: No videos defined in videos.json", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Use first video
|
||||||
|
video_id = next(iter(videos.keys()))
|
||||||
|
video_source = videos[video_id]
|
||||||
|
video_path = project_path / video_source.file
|
||||||
|
|
||||||
|
if not video_path.exists():
|
||||||
|
print(f"Error: Video not found: {video_path}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print(f" Video: {video_path.name}")
|
||||||
|
|
||||||
|
words = transcribe_video(video_path, model="base")
|
||||||
|
|
||||||
|
output_path = video_path.with_suffix(".transcript.json")
|
||||||
|
save_transcript(words, output_path)
|
||||||
|
|
||||||
print(f" - Transcribed {len(words)} words")
|
print(f" - Transcribed {len(words)} words")
|
||||||
print(f" - Duration: {words[-1].end:.1f}s" if words else " - No words found")
|
print(f" - Duration: {words[-1].end:.1f}s" if words else " - No words found")
|
||||||
|
print(f" - Saved: {output_path}")
|
||||||
|
|
||||||
save_transcript(words, output_path)
|
if verbose and words:
|
||||||
print(f" - Saved to: {output_path}")
|
|
||||||
|
|
||||||
# Show first few words as preview
|
|
||||||
if words:
|
|
||||||
preview = " ".join(w.word for w in words[:10])
|
preview = " ".join(w.word for w in words[:10])
|
||||||
print(f" - Preview: {preview}...")
|
print(f" - Preview: {preview}...")
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def cmd_align(project_path: Path, transcript_path: Path = None, offset: float = -1.0) -> int:
|
# =============================================================================
|
||||||
|
# Align Command
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
def cmd_align(project_path: Path, verbose: bool) -> int:
|
||||||
"""Align manuscript markers to transcript timestamps."""
|
"""Align manuscript markers to transcript timestamps."""
|
||||||
print(f"Aligning: {project_path}")
|
from .transcriber import load_transcript
|
||||||
print(f"Offset: {offset}s")
|
from .aligner import align_markers, save_aligned_transcript
|
||||||
print()
|
from .parser import parse_videos
|
||||||
|
|
||||||
|
print(f"Aligning: {project_path.name}")
|
||||||
|
|
||||||
# Load manuscript
|
# Load manuscript
|
||||||
manuscript_path = project_path / "manuscript.txt"
|
manuscript_path = project_path / "manuscript.txt"
|
||||||
@@ -318,45 +390,83 @@ def cmd_align(project_path: Path, transcript_path: Path = None, offset: float =
|
|||||||
|
|
||||||
manuscript_text = manuscript_path.read_text(encoding="utf-8")
|
manuscript_text = manuscript_path.read_text(encoding="utf-8")
|
||||||
|
|
||||||
# Load transcript
|
# Find transcript
|
||||||
if transcript_path is None:
|
videos = parse_videos(project_path)
|
||||||
# Try to find transcript in media folder
|
video_id = next(iter(videos.keys()))
|
||||||
transcript_path = project_path / "media" / "talking_head.transcript.json"
|
video_source = videos[video_id]
|
||||||
|
video_path = project_path / video_source.file
|
||||||
|
transcript_path = video_path.with_suffix(".transcript.json")
|
||||||
|
|
||||||
if not transcript_path.exists():
|
if not transcript_path.exists():
|
||||||
print(f"Error: Transcript not found: {transcript_path}", file=sys.stderr)
|
print(f"Error: Transcript not found: {transcript_path}", file=sys.stderr)
|
||||||
print("Run 'gnommo transcribe' first to generate the transcript.", file=sys.stderr)
|
print("Run with -a transcribe first.", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
print(f" - Loading transcript: {transcript_path}")
|
print(f" Loading: {transcript_path.name}")
|
||||||
transcript = load_transcript(transcript_path)
|
transcript = load_transcript(transcript_path)
|
||||||
print(f" - Loaded {len(transcript)} words")
|
print(f" - {len(transcript)} words")
|
||||||
|
|
||||||
# Align markers
|
# Align
|
||||||
print(" - Aligning markers...")
|
print(" Aligning markers...")
|
||||||
alignments = align_markers(manuscript_text, transcript, offset_seconds=offset)
|
alignments = align_markers(manuscript_text, transcript, offset_seconds=-1.0)
|
||||||
|
|
||||||
# Report results
|
# Report
|
||||||
print()
|
|
||||||
print("Alignment results:")
|
|
||||||
unmatched = 0
|
unmatched = 0
|
||||||
for a in alignments:
|
for a in alignments:
|
||||||
if a.timestamp >= 0:
|
if a.timestamp >= 0:
|
||||||
print(f" [{a.marker_id}] @ {a.timestamp:.2f}s - \"{a.matched_phrase}...\"")
|
if verbose:
|
||||||
|
print(f" [{a.marker_id}] @ {a.timestamp:.1f}s")
|
||||||
else:
|
else:
|
||||||
print(f" [{a.marker_id}] NOT FOUND - \"{a.matched_phrase}...\"")
|
print(f" [{a.marker_id}] NOT FOUND")
|
||||||
unmatched += 1
|
unmatched += 1
|
||||||
|
|
||||||
if unmatched > 0:
|
if unmatched > 0:
|
||||||
print(f"\nWarning: {unmatched} markers could not be aligned")
|
print(f"\n Warning: {unmatched} markers not aligned")
|
||||||
|
|
||||||
# Save aligned transcript.csv
|
# Save
|
||||||
output_path = project_path / "transcript.csv"
|
output_path = project_path / "transcript.csv"
|
||||||
save_aligned_transcript(alignments, transcript, output_path)
|
save_aligned_transcript(alignments, transcript, output_path)
|
||||||
print(f"\nSaved: {output_path}")
|
print(f"\n Saved: {output_path}")
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# All Command (Full Pipeline)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
def cmd_all(project_path: Path, verbose: bool, dry_run: bool) -> int:
|
||||||
|
"""Run full pipeline: transcribe → align → render."""
|
||||||
|
from .parser import parse_videos
|
||||||
|
|
||||||
|
print(f"=== Full Pipeline: {project_path.name} ===\n")
|
||||||
|
|
||||||
|
# Check if transcript exists
|
||||||
|
videos = parse_videos(project_path)
|
||||||
|
if videos:
|
||||||
|
video_id = next(iter(videos.keys()))
|
||||||
|
video_source = videos[video_id]
|
||||||
|
video_path = project_path / video_source.file
|
||||||
|
transcript_path = video_path.with_suffix(".transcript.json")
|
||||||
|
|
||||||
|
if not transcript_path.exists():
|
||||||
|
print(">>> Step 1/3: Transcribe\n")
|
||||||
|
result = cmd_transcribe(project_path, verbose)
|
||||||
|
if result != 0:
|
||||||
|
return result
|
||||||
|
else:
|
||||||
|
print(f">>> Step 1/3: Transcribe (cached: {transcript_path.name})\n")
|
||||||
|
|
||||||
|
# Align
|
||||||
|
print("\n>>> Step 2/3: Align\n")
|
||||||
|
result = cmd_align(project_path, verbose)
|
||||||
|
if result != 0:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Render
|
||||||
|
print("\n>>> Step 3/3: Render\n")
|
||||||
|
return cmd_render(project_path, verbose, dry_run)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
|||||||
@@ -57,3 +57,20 @@ class RenderError(GnommoError):
|
|||||||
if stderr:
|
if stderr:
|
||||||
full_message += f"\nFFmpeg output:\n{stderr}"
|
full_message += f"\nFFmpeg output:\n{stderr}"
|
||||||
super().__init__(full_message)
|
super().__init__(full_message)
|
||||||
|
|
||||||
|
|
||||||
|
class PreprocessError(GnommoError):
|
||||||
|
"""Error during preprocessing stage."""
|
||||||
|
|
||||||
|
def __init__(self, message: str, filter_type: Optional[str] = None, command: Optional[str] = None, stderr: Optional[str] = None):
|
||||||
|
self.filter_type = filter_type
|
||||||
|
self.command = command
|
||||||
|
self.stderr = stderr
|
||||||
|
full_message = message
|
||||||
|
if filter_type:
|
||||||
|
full_message = f"[{filter_type}] {full_message}"
|
||||||
|
if command:
|
||||||
|
full_message += f"\nCommand: {command}"
|
||||||
|
if stderr:
|
||||||
|
full_message += f"\nFFmpeg output:\n{stderr}"
|
||||||
|
super().__init__(full_message)
|
||||||
|
|||||||
+35
-2
@@ -12,6 +12,7 @@ class TalkingHeadConfig:
|
|||||||
y: int
|
y: int
|
||||||
target_height: int # in pixels, or -1 for percentage-based
|
target_height: int # in pixels, or -1 for percentage-based
|
||||||
target_height_percent: float = 0.0 # percentage (0.0-1.0) if target_height is -1
|
target_height_percent: float = 0.0 # percentage (0.0-1.0) if target_height is -1
|
||||||
|
file: Optional[str] = None # Path to video or metadata JSON file
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -21,7 +22,8 @@ class ProjectConfig:
|
|||||||
fps: int
|
fps: int
|
||||||
talking_head: TalkingHeadConfig
|
talking_head: TalkingHeadConfig
|
||||||
default_slide_type: str
|
default_slide_type: str
|
||||||
background_video: str
|
background: str = "" # Background image or video path (in shared_assets/)
|
||||||
|
background_video: str = "" # Deprecated: use background instead
|
||||||
slides_path: str = "slides.json" # path to slides.json relative to project
|
slides_path: str = "slides.json" # path to slides.json relative to project
|
||||||
audio_source: Optional[str] = None # defaults to talking head
|
audio_source: Optional[str] = None # defaults to talking head
|
||||||
|
|
||||||
@@ -33,11 +35,41 @@ class SlideDefinition:
|
|||||||
type: str # "fullscreen" | "square"
|
type: str # "fullscreen" | "square"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ChromaKeyConfig:
|
||||||
|
"""Configuration for chroma key (green screen) filter."""
|
||||||
|
color: tuple[int, int, int] = (0, 255, 0) # RGB color to key out
|
||||||
|
similarity: float = 0.15 # Color similarity threshold (0.0-1.0)
|
||||||
|
blend: float = 0.1 # Edge blend/feathering (0.0-1.0)
|
||||||
|
spill: float = 0.0 # Spill suppression amount (0.0-1.0)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FilterConfig:
|
||||||
|
"""Base configuration for a preprocessing filter."""
|
||||||
|
type: str
|
||||||
|
# Type-specific config stored in subclasses or as dict
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class VideoSource:
|
class VideoSource:
|
||||||
"""Video source definition from videos.json."""
|
"""Video source definition from videos.json."""
|
||||||
file: str
|
file: str
|
||||||
preprocess: list[str] = field(default_factory=list)
|
preprocess: list[dict] = field(default_factory=list) # List of filter config dicts
|
||||||
|
output_file: Optional[str] = None # Path to preprocessed output (if any)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class VideoMetadata:
|
||||||
|
"""
|
||||||
|
Metadata for a video source, typically from a .json file.
|
||||||
|
|
||||||
|
This allows defining preprocessing steps separately from videos.json,
|
||||||
|
enabling per-video preprocessing configuration.
|
||||||
|
"""
|
||||||
|
source_file: str # Original source video file
|
||||||
|
preprocess: list[dict] = field(default_factory=list) # Preprocessing filters
|
||||||
|
output: Optional[dict] = None # Output config {"file": "...", "colorspace": "...", "alpha": "..."}
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -78,6 +110,7 @@ class RenderPlan:
|
|||||||
total_duration: float
|
total_duration: float
|
||||||
slides: dict[str, SlideDefinition]
|
slides: dict[str, SlideDefinition]
|
||||||
slides_dir: Path = None # directory containing slide images
|
slides_dir: Path = None # directory containing slide images
|
||||||
|
talking_head_path: Path = None # Resolved path to actual video file
|
||||||
|
|
||||||
|
|
||||||
# Slide layout configurations (hardcoded for POC)
|
# Slide layout configurations (hardcoded for POC)
|
||||||
|
|||||||
+73
-2
@@ -4,7 +4,7 @@ import csv
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any, Optional
|
||||||
|
|
||||||
from .errors import ParseError
|
from .errors import ParseError
|
||||||
from .models import (
|
from .models import (
|
||||||
@@ -12,6 +12,7 @@ from .models import (
|
|||||||
SlideDefinition,
|
SlideDefinition,
|
||||||
TalkingHeadConfig,
|
TalkingHeadConfig,
|
||||||
TimedWord,
|
TimedWord,
|
||||||
|
VideoMetadata,
|
||||||
VideoSource,
|
VideoSource,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -119,6 +120,7 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
|
|||||||
y=th_data.get("y", 100),
|
y=th_data.get("y", 100),
|
||||||
target_height=th_height,
|
target_height=th_height,
|
||||||
target_height_percent=th_height_pct,
|
target_height_percent=th_height_pct,
|
||||||
|
file=th_data.get("file"),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Parse resolution
|
# Parse resolution
|
||||||
@@ -131,7 +133,8 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
|
|||||||
fps=data.get("fps", 30),
|
fps=data.get("fps", 30),
|
||||||
talking_head=talking_head,
|
talking_head=talking_head,
|
||||||
default_slide_type=data.get("defaultSlideType", "square"),
|
default_slide_type=data.get("defaultSlideType", "square"),
|
||||||
background_video=data.get("background_video", ""),
|
background=data.get("background", ""),
|
||||||
|
background_video=data.get("background_video", ""), # Deprecated
|
||||||
slides_path=data.get("slides", "slides.json"),
|
slides_path=data.get("slides", "slides.json"),
|
||||||
audio_source=data.get("audio_source"),
|
audio_source=data.get("audio_source"),
|
||||||
)
|
)
|
||||||
@@ -206,6 +209,7 @@ def parse_videos(project_path: Path) -> dict[str, VideoSource]:
|
|||||||
videos[video_id] = VideoSource(
|
videos[video_id] = VideoSource(
|
||||||
file=video_data["file"],
|
file=video_data["file"],
|
||||||
preprocess=video_data.get("preprocess", []),
|
preprocess=video_data.get("preprocess", []),
|
||||||
|
output_file=video_data.get("output_file"),
|
||||||
)
|
)
|
||||||
|
|
||||||
return videos
|
return videos
|
||||||
@@ -229,3 +233,70 @@ def get_video_duration(video_path: Path) -> float:
|
|||||||
raise ParseError(f"Failed to get duration: {result.stderr}", video_path)
|
raise ParseError(f"Failed to get duration: {result.stderr}", video_path)
|
||||||
|
|
||||||
return float(result.stdout.strip())
|
return float(result.stdout.strip())
|
||||||
|
|
||||||
|
|
||||||
|
def parse_video_metadata(metadata_path: Path) -> VideoMetadata:
|
||||||
|
"""
|
||||||
|
Parse a video metadata JSON file.
|
||||||
|
|
||||||
|
Expected format:
|
||||||
|
{
|
||||||
|
"source_file": "talking_head.mov",
|
||||||
|
"preprocess": [
|
||||||
|
{"type": "chroma_key", "color": [0, 255, 0], "similarity": 0.15}
|
||||||
|
],
|
||||||
|
"output": {
|
||||||
|
"file": "intermediate/talking_head_rgba.mov",
|
||||||
|
"colorspace": "rgba",
|
||||||
|
"alpha": "straight"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
if not metadata_path.exists():
|
||||||
|
raise ParseError(f"Video metadata not found: {metadata_path}", metadata_path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(metadata_path.read_text(encoding="utf-8"))
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
raise ParseError(f"Invalid JSON: {e}", metadata_path)
|
||||||
|
|
||||||
|
if "source_file" not in data:
|
||||||
|
raise ParseError("Video metadata missing required field 'source_file'", metadata_path)
|
||||||
|
|
||||||
|
return VideoMetadata(
|
||||||
|
source_file=data["source_file"],
|
||||||
|
preprocess=data.get("preprocess", []),
|
||||||
|
output=data.get("output"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_video_file(project_path: Path, file_ref: str) -> tuple[Path, Optional[VideoMetadata]]:
|
||||||
|
"""
|
||||||
|
Resolve a video file reference, which can be either:
|
||||||
|
1. A direct path to a video file
|
||||||
|
2. A path to a metadata JSON file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (actual video path to use, metadata if JSON file was used)
|
||||||
|
"""
|
||||||
|
ref_path = project_path / file_ref
|
||||||
|
|
||||||
|
# Check if it's a metadata JSON file
|
||||||
|
if file_ref.endswith(".json") and ref_path.exists():
|
||||||
|
metadata = parse_video_metadata(ref_path)
|
||||||
|
|
||||||
|
# Resolve paths relative to the metadata file's directory
|
||||||
|
metadata_dir = ref_path.parent
|
||||||
|
|
||||||
|
# If output is specified and exists, use it; otherwise use source
|
||||||
|
if metadata.output and metadata.output.get("file"):
|
||||||
|
output_path = metadata_dir / metadata.output["file"]
|
||||||
|
if output_path.exists():
|
||||||
|
return output_path, metadata
|
||||||
|
|
||||||
|
# Fall back to source file
|
||||||
|
source_path = metadata_dir / metadata.source_file
|
||||||
|
return source_path, metadata
|
||||||
|
|
||||||
|
# Direct video file reference
|
||||||
|
return ref_path, None
|
||||||
|
|||||||
@@ -0,0 +1,195 @@
|
|||||||
|
"""Preprocessing stage: apply filters to source videos."""
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .errors import PreprocessError
|
||||||
|
from .models import VideoSource, ChromaKeyConfig
|
||||||
|
|
||||||
|
|
||||||
|
def preprocess_video(
|
||||||
|
project_path: Path,
|
||||||
|
video_id: str,
|
||||||
|
video_source: VideoSource,
|
||||||
|
verbose: bool = False,
|
||||||
|
) -> Path:
|
||||||
|
"""
|
||||||
|
Apply preprocessing filters to a video source.
|
||||||
|
|
||||||
|
Each filter is applied atomically, producing an intermediate ProRes 4444
|
||||||
|
file with alpha channel support. Filters are chained sequentially.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Path to the final preprocessed output file.
|
||||||
|
"""
|
||||||
|
if not video_source.preprocess:
|
||||||
|
# No preprocessing needed, return original file
|
||||||
|
return project_path / video_source.file
|
||||||
|
|
||||||
|
# Ensure intermediate directory exists
|
||||||
|
intermediate_dir = project_path / "intermediate"
|
||||||
|
intermediate_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Start with the source file
|
||||||
|
current_input = project_path / video_source.file
|
||||||
|
|
||||||
|
if not current_input.exists():
|
||||||
|
raise PreprocessError(
|
||||||
|
f"Source video not found: {current_input}",
|
||||||
|
filter_type=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Apply each filter in sequence
|
||||||
|
for i, filter_config in enumerate(video_source.preprocess):
|
||||||
|
filter_type = filter_config.get("type")
|
||||||
|
|
||||||
|
if filter_type is None:
|
||||||
|
raise PreprocessError(
|
||||||
|
f"Filter {i} missing 'type' field",
|
||||||
|
filter_type=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Determine output path for this filter step
|
||||||
|
step_output = intermediate_dir / f"{video_id}_step{i}_{filter_type}.mov"
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print(f" Step {i + 1}: {filter_type}")
|
||||||
|
print(f" Input: {current_input}")
|
||||||
|
print(f" Output: {step_output}")
|
||||||
|
|
||||||
|
# Apply the appropriate filter
|
||||||
|
if filter_type == "chroma_key":
|
||||||
|
apply_chroma_key(current_input, step_output, filter_config, verbose)
|
||||||
|
else:
|
||||||
|
raise PreprocessError(
|
||||||
|
f"Unknown filter type: {filter_type}",
|
||||||
|
filter_type=filter_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
current_input = step_output
|
||||||
|
|
||||||
|
# If output_file is specified, copy/rename to final location
|
||||||
|
if video_source.output_file:
|
||||||
|
final_output = project_path / video_source.output_file
|
||||||
|
final_output.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Copy the final intermediate to the output location
|
||||||
|
import shutil
|
||||||
|
shutil.copy2(current_input, final_output)
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print(f" Final output: {final_output}")
|
||||||
|
|
||||||
|
return final_output
|
||||||
|
|
||||||
|
return current_input
|
||||||
|
|
||||||
|
|
||||||
|
def apply_chroma_key(
|
||||||
|
input_path: Path,
|
||||||
|
output_path: Path,
|
||||||
|
config: dict[str, Any],
|
||||||
|
verbose: bool = False,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Apply chroma key (green screen) filter using FFmpeg.
|
||||||
|
|
||||||
|
Config options:
|
||||||
|
color: [R, G, B] - Color to key out (default: [0, 255, 0] green)
|
||||||
|
similarity: float - Color similarity threshold 0.0-1.0 (default: 0.15)
|
||||||
|
blend: float - Edge blend/feathering 0.0-1.0 (default: 0.1)
|
||||||
|
spill: float - Spill suppression 0.0-1.0 (default: 0.0)
|
||||||
|
|
||||||
|
Output is ProRes 4444 with alpha channel for lossless quality.
|
||||||
|
"""
|
||||||
|
# Parse config with defaults
|
||||||
|
chroma_config = parse_chroma_key_config(config)
|
||||||
|
|
||||||
|
# Convert RGB to hex format for FFmpeg
|
||||||
|
r, g, b = chroma_config.color
|
||||||
|
hex_color = f"0x{r:02x}{g:02x}{b:02x}"
|
||||||
|
|
||||||
|
# Build FFmpeg chromakey filter
|
||||||
|
# chromakey=color:similarity:blend
|
||||||
|
filter_parts = [
|
||||||
|
f"chromakey={hex_color}:{chroma_config.similarity:.3f}:{chroma_config.blend:.3f}"
|
||||||
|
]
|
||||||
|
|
||||||
|
# Add despill if specified
|
||||||
|
if chroma_config.spill > 0:
|
||||||
|
# despill filter removes color spill on edges
|
||||||
|
filter_parts.append(f"despill=type=green:mix={chroma_config.spill:.3f}")
|
||||||
|
|
||||||
|
video_filter = ",".join(filter_parts)
|
||||||
|
|
||||||
|
# Build FFmpeg command
|
||||||
|
# ProRes 4444 profile for alpha channel support
|
||||||
|
cmd = [
|
||||||
|
"ffmpeg",
|
||||||
|
"-y", # Overwrite output
|
||||||
|
"-i", str(input_path),
|
||||||
|
"-vf", video_filter,
|
||||||
|
"-c:v", "prores_ks",
|
||||||
|
"-profile:v", "4", # ProRes 4444
|
||||||
|
"-pix_fmt", "yuva444p10le", # 10-bit with alpha
|
||||||
|
"-c:a", "pcm_s16le", # Lossless audio
|
||||||
|
str(output_path),
|
||||||
|
]
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print(f" Filter: {video_filter}")
|
||||||
|
print(f" Command: {' '.join(cmd)}")
|
||||||
|
|
||||||
|
result = subprocess.run(
|
||||||
|
cmd,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise PreprocessError(
|
||||||
|
"Chroma key filter failed",
|
||||||
|
filter_type="chroma_key",
|
||||||
|
command=" ".join(cmd),
|
||||||
|
stderr=result.stderr,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_chroma_key_config(config: dict[str, Any]) -> ChromaKeyConfig:
|
||||||
|
"""Parse a chroma key config dictionary into ChromaKeyConfig."""
|
||||||
|
color = config.get("color", [0, 255, 0])
|
||||||
|
if isinstance(color, list) and len(color) == 3:
|
||||||
|
color = tuple(color)
|
||||||
|
else:
|
||||||
|
color = (0, 255, 0)
|
||||||
|
|
||||||
|
return ChromaKeyConfig(
|
||||||
|
color=color,
|
||||||
|
similarity=float(config.get("similarity", 0.15)),
|
||||||
|
blend=float(config.get("blend", 0.1)),
|
||||||
|
spill=float(config.get("spill", 0.0)),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_preprocessed_path(project_path: Path, video_source: VideoSource) -> Path:
|
||||||
|
"""
|
||||||
|
Get the path to the preprocessed video file.
|
||||||
|
|
||||||
|
Returns output_file if specified, otherwise returns the original file.
|
||||||
|
"""
|
||||||
|
if video_source.output_file:
|
||||||
|
return project_path / video_source.output_file
|
||||||
|
return project_path / video_source.file
|
||||||
|
|
||||||
|
|
||||||
|
def needs_preprocessing(project_path: Path, video_source: VideoSource) -> bool:
|
||||||
|
"""Check if preprocessing is needed (has filters and output doesn't exist)."""
|
||||||
|
if not video_source.preprocess:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if video_source.output_file:
|
||||||
|
output_path = project_path / video_source.output_file
|
||||||
|
return not output_path.exists()
|
||||||
|
|
||||||
|
return True
|
||||||
+29
-8
@@ -50,14 +50,23 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
|||||||
output_path = output_path.resolve()
|
output_path = output_path.resolve()
|
||||||
|
|
||||||
# Input: talking head video
|
# Input: talking head video
|
||||||
talking_head_path = project_path / plan.talking_head.file
|
# Use resolved path if available, otherwise construct from file
|
||||||
|
talking_head_path = plan.talking_head_path or (project_path / plan.talking_head.file)
|
||||||
cmd.extend(["-i", str(talking_head_path)])
|
cmd.extend(["-i", str(talking_head_path)])
|
||||||
|
|
||||||
# Input: background video (if specified)
|
# Input: background image/video (if specified)
|
||||||
has_background = bool(plan.config.background_video)
|
bg_file = plan.config.background or plan.config.background_video
|
||||||
|
has_background = bool(bg_file)
|
||||||
|
bg_is_image = False
|
||||||
if has_background:
|
if has_background:
|
||||||
bg_path = project_path / plan.config.background_video
|
# Try project folder first, then parent (for shared_assets)
|
||||||
|
bg_path = project_path / bg_file
|
||||||
|
if not bg_path.exists():
|
||||||
|
bg_path = project_path.parent / bg_file
|
||||||
cmd.extend(["-i", str(bg_path)])
|
cmd.extend(["-i", str(bg_path)])
|
||||||
|
# Check if background is an image
|
||||||
|
image_extensions = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp"}
|
||||||
|
bg_is_image = bg_path.suffix.lower() in image_extensions
|
||||||
|
|
||||||
# Input: slide images (from slides_dir, same directory as slides.json)
|
# Input: slide images (from slides_dir, same directory as slides.json)
|
||||||
slides_dir = plan.slides_dir.resolve() if plan.slides_dir else project_path / "media" / "slides"
|
slides_dir = plan.slides_dir.resolve() if plan.slides_dir else project_path / "media" / "slides"
|
||||||
@@ -70,7 +79,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
|||||||
slide_inputs.append(event.slide_id)
|
slide_inputs.append(event.slide_id)
|
||||||
|
|
||||||
# Build filter_complex
|
# Build filter_complex
|
||||||
filter_complex = build_filter_complex(plan, has_background, slide_inputs)
|
filter_complex = build_filter_complex(plan, has_background, slide_inputs, bg_is_image)
|
||||||
cmd.extend(["-filter_complex", filter_complex])
|
cmd.extend(["-filter_complex", filter_complex])
|
||||||
|
|
||||||
# Map output video and audio
|
# Map output video and audio
|
||||||
@@ -96,12 +105,13 @@ def build_filter_complex(
|
|||||||
plan: RenderPlan,
|
plan: RenderPlan,
|
||||||
has_background: bool,
|
has_background: bool,
|
||||||
slide_inputs: list[str],
|
slide_inputs: list[str],
|
||||||
|
bg_is_image: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Build the filter_complex string for FFmpeg.
|
Build the filter_complex string for FFmpeg.
|
||||||
|
|
||||||
Layer structure:
|
Layer structure:
|
||||||
- Layer 1: Background (solid color or video)
|
- Layer 1: Background (solid color, image, or video)
|
||||||
- Layer 2: Talking head
|
- Layer 2: Talking head
|
||||||
- Layer 3: Slides (with time-based enable)
|
- Layer 3: Slides (with time-based enable)
|
||||||
"""
|
"""
|
||||||
@@ -118,8 +128,19 @@ def build_filter_complex(
|
|||||||
|
|
||||||
# Create base layer (background)
|
# Create base layer (background)
|
||||||
if has_background:
|
if has_background:
|
||||||
filters.append(f"[{bg_idx}:v]scale={width}:{height}:force_original_aspect_ratio=increase,"
|
if bg_is_image:
|
||||||
f"crop={width}:{height}[bg]")
|
# For images: loop to create video stream, then scale
|
||||||
|
filters.append(
|
||||||
|
f"[{bg_idx}:v]loop=loop=-1:size=1:start=0,"
|
||||||
|
f"scale={width}:{height}:force_original_aspect_ratio=increase,"
|
||||||
|
f"crop={width}:{height},fps={plan.config.fps}[bg]"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# For videos: just scale
|
||||||
|
filters.append(
|
||||||
|
f"[{bg_idx}:v]scale={width}:{height}:force_original_aspect_ratio=increase,"
|
||||||
|
f"crop={width}:{height}[bg]"
|
||||||
|
)
|
||||||
base_label = "bg"
|
base_label = "bg"
|
||||||
else:
|
else:
|
||||||
# Create solid color background
|
# Create solid color background
|
||||||
|
|||||||
+21
-5
@@ -10,7 +10,7 @@ from .models import (
|
|||||||
TimedWord,
|
TimedWord,
|
||||||
VideoSource,
|
VideoSource,
|
||||||
)
|
)
|
||||||
from .parser import get_video_duration
|
from .parser import get_video_duration, resolve_video_file
|
||||||
|
|
||||||
|
|
||||||
def build_render_plan(
|
def build_render_plan(
|
||||||
@@ -26,12 +26,27 @@ def build_render_plan(
|
|||||||
This transforms transcript markers into timed slide events and
|
This transforms transcript markers into timed slide events and
|
||||||
assembles all information needed for the render stage.
|
assembles all information needed for the render stage.
|
||||||
"""
|
"""
|
||||||
# For POC: use the first video as the talking head
|
# Determine talking head source:
|
||||||
talking_head_id = next(iter(videos.keys()))
|
# 1. If config.talking_head.file is set, use that (may be JSON metadata)
|
||||||
talking_head = videos[talking_head_id]
|
# 2. Otherwise, use first video from videos.json
|
||||||
|
if config.talking_head.file:
|
||||||
|
video_path, metadata = resolve_video_file(project_path, config.talking_head.file)
|
||||||
|
# Create a VideoSource from the resolved metadata
|
||||||
|
if metadata:
|
||||||
|
talking_head = VideoSource(
|
||||||
|
file=str(video_path.relative_to(project_path)) if video_path.is_relative_to(project_path) else str(video_path),
|
||||||
|
preprocess=metadata.preprocess,
|
||||||
|
output_file=metadata.output.get("file") if metadata.output else None,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
talking_head = VideoSource(file=config.talking_head.file)
|
||||||
|
else:
|
||||||
|
# Fall back to first video in videos.json
|
||||||
|
talking_head_id = next(iter(videos.keys()))
|
||||||
|
talking_head = videos[talking_head_id]
|
||||||
|
video_path = project_path / talking_head.file
|
||||||
|
|
||||||
# Get video duration for end time calculations
|
# Get video duration for end time calculations
|
||||||
video_path = project_path / talking_head.file
|
|
||||||
total_duration = get_video_duration(video_path)
|
total_duration = get_video_duration(video_path)
|
||||||
|
|
||||||
# Build slide events from transcript markers
|
# Build slide events from transcript markers
|
||||||
@@ -49,6 +64,7 @@ def build_render_plan(
|
|||||||
total_duration=total_duration,
|
total_duration=total_duration,
|
||||||
slides=slides,
|
slides=slides,
|
||||||
slides_dir=slides_dir,
|
slides_dir=slides_dir,
|
||||||
|
talking_head_path=video_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+20
-4
@@ -74,12 +74,28 @@ def validate_project(
|
|||||||
project_path / "videos.json"
|
project_path / "videos.json"
|
||||||
))
|
))
|
||||||
|
|
||||||
# Check background video exists (if specified)
|
# Check preprocessed output exists if preprocessing is defined
|
||||||
if config.background_video:
|
if video_source.preprocess and video_source.output_file:
|
||||||
bg_path = project_path / config.background_video
|
output_path = project_path / video_source.output_file
|
||||||
|
if not output_path.exists():
|
||||||
|
issues.append(ValidationIssue(
|
||||||
|
f"Preprocessed output not found: {video_source.output_file}. "
|
||||||
|
f"Run with -a preprocess first.",
|
||||||
|
project_path / "videos.json"
|
||||||
|
))
|
||||||
|
|
||||||
|
# Check background exists (image or video)
|
||||||
|
# Try 'background' first, fall back to deprecated 'background_video'
|
||||||
|
bg_file = config.background or config.background_video
|
||||||
|
if bg_file:
|
||||||
|
# Check in project folder first, then parent (for shared_assets)
|
||||||
|
bg_path = project_path / bg_file
|
||||||
|
if not bg_path.exists():
|
||||||
|
# Try parent directory (shared_assets at repo root)
|
||||||
|
bg_path = project_path.parent / bg_file
|
||||||
if not bg_path.exists():
|
if not bg_path.exists():
|
||||||
issues.append(ValidationIssue(
|
issues.append(ValidationIssue(
|
||||||
f"Background video not found: {config.background_video}",
|
f"Background not found: {bg_file}",
|
||||||
project_path / "project.json"
|
project_path / "project.json"
|
||||||
))
|
))
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user