Commti prior to change to video tag below / above layering

This commit is contained in:
2026-03-16 16:57:54 +01:00
parent 757d966803
commit e734dbfcac
12 changed files with 416 additions and 154 deletions
+2 -20
View File
@@ -23,26 +23,8 @@
"talkinghead": [ "talkinghead": [
{ {
"type": "audio_normalize", "type": "audio_normalize",
"highpass": 100,
"room_eq": true,
"room_eq_freq": 300,
"room_eq_gain": -4,
"room_eq_width": 1.5,
"dereverb_model": "shared_assets/models/std.rnnn",
"dereverb_mix": 0.8,
"denoise": true,
"noise_floor": -25,
"gate": true,
"gate_threshold": -35,
"gate_range": -20,
"compress": true,
"threshold": -20,
"ratio": 4,
"attack": 5,
"release": 50,
"makeup": 2,
"normalize": true, "normalize": true,
"target_lufs": -16, "target_lufs": -14,
"target_lra": 11, "target_lra": 11,
"target_tp": -1.5 "target_tp": -1.5
}, },
@@ -101,5 +83,5 @@
}, },
"manuscript": "manuscript.txt", "manuscript": "manuscript.txt",
"shorts": [], "shorts": [],
"output_video": "out/final.mp4" "output_video": "TRAILER.mp4"
} }
+1 -1
View File
@@ -14,7 +14,7 @@
"videos": "media/videos/videos.json", "videos": "media/videos/videos.json",
"slides": "media/slides/Example/slides.json", "slides": "media/slides/Example/slides.json",
"audio": "media/audio/audio.json", "audio": "media/audio/audio.json",
"default_filters": { "default_filters": {
"talkinghead": [ "talkinghead": [
{ {
"type": "audio_normalize", "type": "audio_normalize",
+204 -72
View File
@@ -2,12 +2,15 @@
import argparse import argparse
import json import json
from logging import config
import re import re
import shutil import shutil
import subprocess import subprocess
import sys import sys
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from gnommo.parser import _read_json
from . import __version__ from . import __version__
from .errors import GnommoError, ParseError, ValidationError, RenderError from .errors import GnommoError, ParseError, ValidationError, RenderError
from .cache import get_cache_info, resolve_with_cache from .cache import get_cache_info, resolve_with_cache
@@ -35,10 +38,15 @@ Examples:
gnommo -p video1 import Generate slides.json from images gnommo -p video1 import Generate slides.json from images
gnommo -p video1 pre Preprocess videos (chroma key, etc.) gnommo -p video1 pre Preprocess videos (chroma key, etc.)
gnommo -p video1 stitch --res tiny -f Fast stitch with new begin/end values gnommo -p video1 stitch --res tiny -f Fast stitch with new begin/end values
gnommo -p video1 trim Auto-detect silence and set skip/take in narration.json
gnommo -p video1 trim --force Redo trim even for segments that already have skip/take
gnommo -p video1 trim --threshold -25 Raise threshold to ignore clothing/room noise
gnommo -p video1 trim -v Show detected silence periods for debugging
gnommo -p video1 all Full pipeline: transcribe → align → render gnommo -p video1 all Full pipeline: transcribe → align → render
gnommo -p video1 render --dry-run Show FFmpeg command without running gnommo -p video1 render --dry-run Show FFmpeg command without running
gnommo -p video1 description Generate YouTube description file gnommo -p video1 description Generate YouTube description file
gnommo -p video1 transcribe --final Transcribe final.mp4 and generate SRT for YouTube gnommo -p video1 transcribe Narration file for timing of slides
gnommo -p video1 transcribe --final Transcribe outputted file and generate SRT for YouTube
gnommo -p video1 archive Sync project to external cache storage gnommo -p video1 archive Sync project to external cache storage
gnommo -p video1 archive --dry-run Preview what would be synced gnommo -p video1 archive --dry-run Preview what would be synced
gnommo -p video1 extract-audio --combined Extract audio from narration_combined.mov gnommo -p video1 extract-audio --combined Extract audio from narration_combined.mov
@@ -71,6 +79,7 @@ Examples:
"preprocess", "preprocess",
"pre", "pre",
"stitch", "stitch",
"trim",
"render", "render",
"all", "all",
"transcribe", "transcribe",
@@ -156,6 +165,12 @@ Examples:
action="store_true", action="store_true",
help="Target production server (GNOMMOWEB_PROD_URL / GNOMMOWEB_PROD_API_KEY)", help="Target production server (GNOMMOWEB_PROD_URL / GNOMMOWEB_PROD_API_KEY)",
) )
parser.add_argument(
"--threshold",
type=float,
default=-40.0,
help="For trim: silence threshold in dB (default: -40). Raise (e.g. -25) to ignore clothing/room noise.",
)
args = parser.parse_args() args = parser.parse_args()
@@ -181,6 +196,8 @@ Examples:
args.workers, args.workers,
args.res, args.res,
) )
elif action == "trim":
return cmd_trim(project_path, args.verbose, args.force, args.threshold)
elif action in ("stitch"): elif action in ("stitch"):
return cmd_stitch( return cmd_stitch(
project_path, project_path,
@@ -223,7 +240,7 @@ Examples:
return cmd_pull(project_path, args.verbose, args.force, args.prod) return cmd_pull(project_path, args.verbose, args.force, args.prod)
elif action == "handoff": elif action == "handoff":
from .handoff import cmd_handoff from .handoff import cmd_handoff
return cmd_handoff(project_path, args.verbose, args.file, args.prod) return cmd_handoff(project_path, args.verbose, args.file, args.prod, args.res)
except GnommoError as e: except GnommoError as e:
print(f"Error: {e}", file=sys.stderr) print(f"Error: {e}", file=sys.stderr)
@@ -242,7 +259,7 @@ Examples:
def cmd_import(project_path: Path, force: bool, verbose: bool) -> int: def cmd_import(project_path: Path, force: bool, verbose: bool) -> int:
"""Import assets and generate metadata JSON files.""" """Import assets and generate metadata JSON files."""
from .parser import parse_project_config from .parser import parse_project_config, _read_json
print(f"Importing assets for: {project_path.name}") print(f"Importing assets for: {project_path.name}")
@@ -367,8 +384,7 @@ def _import_shared_assets(shared_assets_dir: Path, verbose: bool) -> None:
videos_json_path = shared_assets_dir / "videos.json" videos_json_path = shared_assets_dir / "videos.json"
existing_videos: dict = {} existing_videos: dict = {}
if videos_json_path.exists(): if videos_json_path.exists():
with open(videos_json_path, "r", encoding="utf-8") as f: existing_videos = _read_json(videos_json_path)
existing_videos = json.load(f)
# Add new videos (don't overwrite existing) # Add new videos (don't overwrite existing)
added_count = 0 added_count = 0
@@ -474,8 +490,7 @@ def _import_videos(videos_dir: Path, config, verbose: bool) -> None:
videos_json_path = videos_dir / "videos.json" videos_json_path = videos_dir / "videos.json"
existing_videos: dict = {} existing_videos: dict = {}
if videos_json_path.exists(): if videos_json_path.exists():
with open(videos_json_path, "r", encoding="utf-8") as f: existing_videos = _read_json(videos_json_path)
existing_videos = json.load(f)
# Get available filter presets from config # Get available filter presets from config
default_filters = config.default_filters if config else {} default_filters = config.default_filters if config else {}
@@ -558,8 +573,7 @@ def _import_narration_segments(narration_dir: Path, config, verbose: bool) -> No
narration_json_path = narration_dir / "narration.json" narration_json_path = narration_dir / "narration.json"
existing_narration: dict = {} existing_narration: dict = {}
if narration_json_path.exists(): if narration_json_path.exists():
with open(narration_json_path, "r", encoding="utf-8") as f: existing_narration = _read_json(narration_json_path)
existing_narration = json.load(f)
# Get available filter presets from config # Get available filter presets from config
default_filters = config.default_filters if config else {} default_filters = config.default_filters if config else {}
@@ -583,9 +597,11 @@ def _import_narration_segments(narration_dir: Path, config, verbose: bool) -> No
# Apply talkinghead preset if available # Apply talkinghead preset if available
if "talkinghead" in default_filters: if "talkinghead" in default_filters:
narration_entry["filter"] = "talkinghead"
narration_entry["cutout"] = "talkinghead" narration_entry["cutout"] = "talkinghead"
if "talkinghead" in default_filters:
narration_entry["filter"] = "talkinghead"
# Default audio settings for narration # Default audio settings for narration
narration_entry["use_audio_channels"] = "left" narration_entry["use_audio_channels"] = "left"
narration_entry["defer_loudnorm"] = True narration_entry["defer_loudnorm"] = True
@@ -656,7 +672,7 @@ def _import_presenter_notes(
# Parse JSON output from JXA script # Parse JSON output from JXA script
try: try:
notes_data = json.loads(proc.stdout) notes_data = json.loads(proc.stdout) if proc.stdout.strip() else []
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
print(f" Error parsing notes JSON: {e}", file=sys.stderr) print(f" Error parsing notes JSON: {e}", file=sys.stderr)
return return
@@ -714,9 +730,11 @@ def cmd_validate(project_path: Path, verbose: bool) -> int:
print(f" - Videos defined: {len(videos)}") print(f" - Videos defined: {len(videos)}")
# Validate # Validate
validate_project( warnings = validate_project(
project_path, markers, config, slides, videos, videos_dir, malformed project_path, markers, config, slides, videos, videos_dir, malformed
) )
for w in warnings:
print(f" Warning: {w}")
print("Validation passed.") print("Validation passed.")
return 0 return 0
@@ -735,9 +753,9 @@ def cmd_preprocess(
workers: int = 1, workers: int = 1,
res: str = "full", res: str = "full",
) -> int: ) -> int:
"""Run preprocessing pipeline on narration segments.""" """Run preprocessing pipeline on narration segments and videos."""
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from .parser import parse_project_config, parse_narration from .parser import parse_project_config, parse_narration, parse_videos
from .preprocessor import ( from .preprocessor import (
preprocess_video, preprocess_video,
create_downscaled_videos, create_downscaled_videos,
@@ -834,10 +852,118 @@ def cmd_preprocess(
) )
print(f"\n Run 'gnommo -p <project> stitch' to stitch narration segments into one fulll length narration file.") print(f"\n Run 'gnommo -p <project> stitch' to stitch narration segments into one fulll length narration file.")
# Also preprocess videos from videos.json (e.g. chroma key, color grade)
videos, videos_dir = parse_videos(project_path, config)
videos_to_process = [
(vid_id, vid_src)
for vid_id, vid_src in videos.items()
if vid_src.filter and not vid_src.is_shared
]
if videos_to_process:
print(f"\n Processing {len(videos_to_process)} video(s) from videos.json:")
for video_id, video_source in videos_to_process:
if video_source.output_file:
output_path = videos_dir / video_source.output_file
if output_path.exists() and not force:
print(f" {video_id}: output exists, skipping (use --force to reprocess)")
continue
if dry_run:
print(f" Would preprocess: {video_id} ({len(video_source.filter)} filter(s))")
continue
print(f" Processing: {video_id}")
preprocess_video(videos_dir, video_id, video_source, verbose, force, gnommo_scratch)
print("\nPreprocessing complete.") print("\nPreprocessing complete.")
return 0 return 0
# =============================================================================
# Trim Command — auto-detect silence bounds for narration segments
# =============================================================================
def cmd_trim(
project_path: Path,
verbose: bool,
force: bool = False,
threshold_db: float = -40.0,
) -> int:
"""
Auto-detect silence bounds for all narration segments and write skip/take
values into narration.json.
For each segment:
skip = max(0, first_sound_time - 0.5)
take = last_sound_time + 3.0 - skip (capped at file duration)
Segments that already have explicit skip or take values are left unchanged
unless --force is passed.
Use --threshold to adjust sensitivity, e.g. -25 to ignore clothing/room
noise that sits above -40 dB.
"""
from .parser import parse_project_config, parse_narration
from .preprocessor import detect_silence_bounds, get_video_duration
print(f"Auto-trimming narration: {project_path.name}")
config = parse_project_config(project_path)
narration, narration_dir = parse_narration(project_path, config)
if not narration:
print(" No narration segments found in narration.json")
print(" Run 'gnommo -p <project> import' first.")
return 1
narration_json_path = narration_dir / "narration.json"
raw_data: dict = _read_json(narration_json_path)
updated = 0
for seg_id in sorted(narration.keys()):
seg = narration[seg_id]
existing = raw_data.get(seg_id, {})
has_explicit = "skip" in existing or "take" in existing
if has_explicit and not force:
print(f" {seg_id}: already trimmed, skipping (use --force to redo)")
continue
# Always analyse the raw source file — it's always present and has the
# same audio as any processed version (processing is video-only).
source_path = narration_dir / seg.source_file
if not source_path.exists():
print(f" {seg_id}: source file not found ({seg.source_file}), skipping")
continue
print(f" {seg_id}: analysing...", end="", flush=True)
first_sound, last_sound = detect_silence_bounds(source_path, noise_threshold_db=threshold_db, verbose=verbose)
total_dur = get_video_duration(source_path)
new_skip = max(0.0, round(first_sound - 0.5, 3))
new_take = round(min(total_dur - new_skip, last_sound + 3.0 - new_skip), 3)
new_take = max(0.0, new_take)
print(
f" first={first_sound:.2f}s last={last_sound:.2f}s"
f" → skip={new_skip:.3f}s take={new_take:.3f}s"
)
raw_data[seg_id]["skip"] = new_skip
raw_data[seg_id]["take"] = new_take
updated += 1
if updated > 0:
with open(narration_json_path, "w", encoding="utf-8") as f:
json.dump(raw_data, f, indent=2)
print(f"\n Updated {updated} segment(s) in narration.json")
else:
print(f"\n No segments updated")
return 0
# ============================================================================= # =============================================================================
# Stitch Command (fast iteration on narration segments) # Stitch Command (fast iteration on narration segments)
# ============================================================================= # =============================================================================
@@ -903,19 +1029,17 @@ def cmd_stitch(
if stitch_output.exists() and not force: if stitch_output.exists() and not force:
print(f"\n Combined narration exists: {stitch_output.name}") print(f"\n Combined narration exists: {stitch_output.name}")
print(" (use --force to regenerate)") print(" (use --force to regenerate)")
return 0 else:
stitch_narration_segments(
stitch_narration_segments( narration_dir,
narration_dir, segment_ids,
segment_ids, narration,
narration, stitch_output,
stitch_output, verbose=verbose,
verbose=verbose, default_end_trim=config.default_end_trim if config else 0.0,
default_end_trim=config.default_end_trim if config else 0.0, )
) # Run import videos again, because at this point narration_combined might have been created.
_import_videos(videos_dir, config, verbose)
# Run import videos again, because at this point narration_combined might have been created.
_import_videos(videos_dir, config, verbose)
# Always update the MAIN videos.json (parent of subdir when using low/tiny res) # Always update the MAIN videos.json (parent of subdir when using low/tiny res)
# Downscaled dirs only affect file paths, not JSON metadata updates # Downscaled dirs only affect file paths, not JSON metadata updates
@@ -924,12 +1048,11 @@ def cmd_stitch(
if True: # Always update JSON regardless of proxy mode if True: # Always update JSON regardless of proxy mode
existing_videos: dict = {} existing_videos: dict = {}
if videos_json_path.exists(): if videos_json_path.exists():
with open(videos_json_path, "r", encoding="utf-8") as f: existing_videos = _read_json(videos_json_path)
existing_videos = json.load(f)
# Get cutout from first narration segment # Get cutout from first narration segment
first_seg = narration[segment_ids[0]] first_seg = narration[segment_ids[0]]
cutout = first_seg.cutout or "talkinghead" cutout = first_seg.cutout or "talkinghead" # Default to audioonly if no cutout specified
# Create/update narration_combined entry # Create/update narration_combined entry
existing_videos["narration_combined"] = { existing_videos["narration_combined"] = {
@@ -1149,7 +1272,10 @@ def cmd_render(
# Non-full res: use downscaled video directory, create on-the-fly if needed # Non-full res: use downscaled video directory, create on-the-fly if needed
if res != "full": if res != "full":
videos_dir = ensure_downscaled_files_exist(videos_dir, res, force=False, verbose=verbose) # Skip downscaling sources that have a preprocessed output_file — the
# renderer will use the full-res processed version instead, saving disk space.
sources_with_output = {v.source_file for v in videos.values() if v.output_file}
videos_dir = ensure_downscaled_files_exist(videos_dir, res, force=False, verbose=verbose, skip_sources=sources_with_output)
if verbose: if verbose:
print(f" Using {res} dir: {videos_dir}") print(f" Using {res} dir: {videos_dir}")
audio, audio_dir = parse_audio(project_path, config) audio, audio_dir = parse_audio(project_path, config)
@@ -1246,9 +1372,11 @@ def cmd_render(
# Stage 2: Validate # Stage 2: Validate
print("\n[2/4] Validating...") print("\n[2/4] Validating...")
validate_project( warnings = validate_project(
project_path, markers, config, slides, videos, videos_dir, malformed project_path, markers, config, slides, videos, videos_dir, malformed
) )
for w in warnings:
print(f" Warning: {w}")
print(" Passed.") print(" Passed.")
# Stage 3: Transform (includes on-the-fly alignment) # Stage 3: Transform (includes on-the-fly alignment)
@@ -1310,14 +1438,19 @@ def cmd_render(
print(f"\n Continuing anyway due to --force flag...") print(f"\n Continuing anyway due to --force flag...")
# Stage 4: Render # Stage 4: Render
# Generate output filename based on slide range and resolution # Determine output filename and directory
base_name = "preview" if res == "low" else "final" if config.output_video:
if slide_range: out_filename = config.output_video
elif slide_range:
start, end = slide_range start, end = slide_range
range_suffix = f"_{start}-{end}" if end else f"_{start}-end" range_suffix = f"_{start}-{end}" if end else f"_{start}-end"
output_path = project_path / "out" / f"{base_name}{range_suffix}.mp4" out_filename = f"final{range_suffix}.mp4"
else: else:
output_path = project_path / "out" / f"{base_name}.mp4" out_filename = f"{config.co}.mp4"
out_dir = project_path / "out" / res if res != "full" else project_path / "out"
output_path = out_dir / out_filename
plan.output_path = output_path
if dry_run: if dry_run:
print("\n[4/4] FFmpeg command (dry run):") print("\n[4/4] FFmpeg command (dry run):")
@@ -1372,15 +1505,17 @@ def cmd_transcribe(
from .transcriber import transcribe_video, save_transcript, words_to_srt from .transcriber import transcribe_video, save_transcript, words_to_srt
from .parser import parse_project_config, parse_videos from .parser import parse_project_config, parse_videos
from .preprocessor import ensure_downscaled_files_exist from .preprocessor import ensure_downscaled_files_exist
config = parse_project_config(project_path)
# Handle --final mode: transcribe the rendered output for YouTube captions # Handle --final mode: transcribe the rendered output for YouTube captions
if final: if final:
return _transcribe_final(project_path, verbose) path = project_path / "out" / f"{config.output_video}.mp4"
return _transcribe_final(path, verbose)
mode_str = f" ({res.upper()})" if res != "full" else "" mode_str = f" ({res.upper()})" if res != "full" else ""
print(f"Transcribing: {project_path.name}{mode_str}") print(f"Transcribing: {project_path.name}{mode_str}")
config = parse_project_config(project_path)
videos, videos_dir = parse_videos(project_path, config) videos, videos_dir = parse_videos(project_path, config)
if not videos: if not videos:
print("Error: No videos defined in videos.json", file=sys.stderr) print("Error: No videos defined in videos.json", file=sys.stderr)
@@ -1433,23 +1568,20 @@ def cmd_transcribe(
return 0 return 0
def _transcribe_final(project_path: Path, verbose: bool) -> int: def _transcribe_final(final_video: Path, verbose: bool) -> int:
""" """
Transcribe the final rendered video and generate SRT captions for YouTube. Transcribe the final rendered video and generate SRT captions for YouTube.
Looks for out/final.mp4 and creates out/final.srt suitable for upload. Looks and creates out filename.srt suitable for upload.
""" """
from .transcriber import transcribe_video, save_transcript, words_to_srt from .transcriber import transcribe_video, save_transcript, words_to_srt
print(f"Transcribing final output: {project_path.name}") print(f"Transcribing final output: {final_video}")
# Look for the final rendered video
out_dir = project_path / "out"
final_video = out_dir / "final.mp4"
if not final_video.exists(): if not final_video.exists():
print(f"Error: Final video not found: {final_video}", file=sys.stderr) print(f"Error: Final video not found: {final_video}", file=sys.stderr)
print(f"Run 'gnommo -p {project_path.name} render' first.", file=sys.stderr) print("Run 'gnommo render' first.", file=sys.stderr)
return 1 return 1
print(f" Video: {final_video.name}") print(f" Video: {final_video.name}")
@@ -1462,11 +1594,11 @@ def _transcribe_final(project_path: Path, verbose: bool) -> int:
return 1 return 1
# Save JSON transcript # Save JSON transcript
transcript_path = out_dir / "final.transcript.json" transcript_path = final_video.with_suffix(".transcript.json")
save_transcript(words, transcript_path) save_transcript(words, transcript_path)
# Generate SRT captions # Generate SRT captions
srt_path = out_dir / "final.srt" srt_path = final_video.with_suffix(".srt")
srt_content = words_to_srt(words) srt_content = words_to_srt(words)
srt_path.write_text(srt_content, encoding="utf-8") srt_path.write_text(srt_content, encoding="utf-8")
@@ -1597,33 +1729,33 @@ def cmd_all(
res: str = "full", res: str = "full",
force: bool = False, force: bool = False,
) -> int: ) -> int:
"""Run full pipeline: transcribe → render (alignment is automatic).""" """Run full pipeline: preprocess → stitch → render → handoff."""
from .parser import parse_project_config, parse_videos from .handoff import cmd_handoff
print(f"=== Full Pipeline: {project_path.name} ===\n") print(f"=== Full Pipeline: {project_path.name} ===\n")
# Check if transcription exists print(">>> Step 1/5: Import\n")
config = parse_project_config(project_path) result = cmd_import(project_path, force, verbose)
videos, videos_dir = parse_videos(project_path, config) if result != 0:
result = _find_narration_video(config, videos) return result
if result:
video_id, video_source = result
video_path = videos_dir / video_source.source_file
transcript_path = video_path.with_suffix(".transcript.json")
# Try cache fallback for transcript print("\n>>> Step 2/5: Preprocess\n")
resolved_transcript, _ = resolve_with_cache(transcript_path, project_path) result = cmd_preprocess(project_path, verbose, dry_run, force, workers=1, res=res)
if not resolved_transcript.exists(): if result != 0:
print(">>> Step 1/2: Transcribe\n") return result
result = cmd_transcribe(project_path, verbose)
if result != 0:
return result
else:
print(f">>> Step 1/2: Transcribe (cached: {resolved_transcript.name})\n")
# Render (alignment happens automatically) print("\n>>> Step 3/5: Stitch\n")
print("\n>>> Step 2/2: Render\n") result = cmd_stitch(project_path, verbose, force, res=res)
return cmd_render(project_path, verbose, dry_run, res=res, force=force) if result != 0:
return result
print("\n>>> Step 4/5: Render\n")
result = cmd_render(project_path, verbose, dry_run, res=res, force=force)
if result != 0:
return result
print("\n>>> Step 5/5: Handoff\n")
return cmd_handoff(project_path, verbose, file_override=None, prod=False, res=res)
# ============================================================================= # =============================================================================
@@ -1801,7 +1933,7 @@ def cmd_archive(project_path: Path, verbose: bool, dry_run: bool) -> int:
project_json_path = project_path / "project.json" project_json_path = project_path / "project.json"
if project_json_path.exists(): if project_json_path.exists():
try: try:
data = json.loads(project_json_path.read_text(encoding="utf-8")) data = _read_json(project_json_path.read_text(encoding="utf-8"))
data["synced_time"] = datetime.now().isoformat() data["synced_time"] = datetime.now().isoformat()
project_json_path.write_text( project_json_path.write_text(
json.dumps(data, indent=2, ensure_ascii=False) + "\n", json.dumps(data, indent=2, ensure_ascii=False) + "\n",
+1 -4
View File
@@ -176,12 +176,9 @@ def generate_chapters(
for slide_id in slide_ids: for slide_id in slide_ids:
if slide_id not in timing_lookup: if slide_id not in timing_lookup:
continue continue
timestamp = timing_lookup[slide_id] timestamp = timing_lookup[slide_id]
title = _extract_chapter_title(manuscript_text, slide_id, slides) title = _extract_chapter_title(manuscript_text, slide_id, slides)
if chapters and (timestamp - chapters[-1].timestamp) < min_chapter_duration:
# Check if we should merge with previous chapter (too short)
if chapters and (timestamp - chapters[-1].timestamp) < min_chapter_duration:
continue # Skip this chapter, previous one covers it continue # Skip this chapter, previous one covers it
chapters.append( chapters.append(
+4 -4
View File
@@ -23,12 +23,12 @@ import tempfile
import zipfile import zipfile
from pathlib import Path from pathlib import Path
from gnommo.parser import _read_json
def write_manuscript(data: Path, out_path: Path): def write_manuscript(data: Path, out_path: Path):
data = json.loads(
data.read_text(encoding="utf-8") data = _read_json(data.read_text(encoding="utf-8"))
) # list of {"slide_index": int, "notes": str}
lines = [] lines = []
i = 0 i = 0
for item in data: for item in data:
+8 -5
View File
@@ -69,7 +69,7 @@ def _write_sync(project_path: Path, data: dict, prod: bool = False):
json.dump(data, f, indent=2) json.dump(data, f, indent=2)
def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str | None = None, prod: bool = False) -> int: def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str | None = None, prod: bool = False, res: str = "full") -> int:
_load_env_file() _load_env_file()
if prod: if prod:
@@ -104,14 +104,17 @@ def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str |
if file_override: if file_override:
video_path = Path(file_override) video_path = Path(file_override)
else: else:
output_video = project.get("output_video") output_filename = project.get("output") or Path(project.get("output_video", "")).name
if not output_video: if not output_filename:
print( print(
"Error: no 'output_video' field in project.json and no --file provided.", "Error: no 'output' field in project.json and no --file provided.",
file=sys.stderr, file=sys.stderr,
) )
return 1 return 1
video_path = project_path / output_video if res != "full":
video_path = project_path / "out" / res / output_filename
else:
video_path = project_path / "out" / output_filename
if not video_path.exists(): if not video_path.exists():
print(f"Error: video file not found: {video_path}", file=sys.stderr) print(f"Error: video file not found: {video_path}", file=sys.stderr)
+2
View File
@@ -65,6 +65,7 @@ class ProjectConfig:
# YouTube description fields # YouTube description fields
description: str = "" # Video description text for YouTube description: str = "" # Video description text for YouTube
footer: str = "" # Footer text (social links, subscribe CTA, etc.) footer: str = "" # Footer text (social links, subscribe CTA, etc.)
output_video: str = "" # Output filename (e.g. "DISC_INT3.mp4"); placed in out/ or out/<res>/
@dataclass @dataclass
@@ -507,6 +508,7 @@ class RenderPlan:
cached_files: set = field( cached_files: set = field(
default_factory=set default_factory=set
) # Video IDs loaded from external cache (show 📁 indicator) ) # Video IDs loaded from external cache (show 📁 indicator)
output_path: Optional[Path] = None # Final output file path (set after plan is built)
# Slide layout configurations (hardcoded for POC) # Slide layout configurations (hardcoded for POC)
+14 -7
View File
@@ -19,6 +19,12 @@ from .models import (
) )
def _read_json(path: Path) -> Any:
"""Read and parse a JSON file, treating an empty file as {}."""
text = path.read_text(encoding="utf-8").strip()
return json.loads(text) if text else {}
def parse_manuscript( def parse_manuscript(
project_path: Path, project_path: Path,
) -> tuple[str, list[str], list[tuple[int, str]], list[Citation]]: ) -> tuple[str, list[str], list[tuple[int, str]], list[Citation]]:
@@ -132,7 +138,7 @@ def load_citations(path: Path) -> list[Citation]:
"""Load citations from a JSON file.""" """Load citations from a JSON file."""
if not path.exists(): if not path.exists():
return [] return []
data = json.loads(path.read_text(encoding="utf-8")) data = _read_json(path)
return [ return [
Citation( Citation(
reference=item["reference"], reference=item["reference"],
@@ -151,7 +157,7 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
raise ParseError("project.json not found", config_path) raise ParseError("project.json not found", config_path)
try: try:
data = json.loads(config_path.read_text(encoding="utf-8")) data = _read_json(config_path)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
raise ParseError(f"Invalid JSON: {e}", config_path) raise ParseError(f"Invalid JSON: {e}", config_path)
@@ -204,6 +210,7 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
outro=data.get("outro", []), outro=data.get("outro", []),
description=data.get("description", ""), description=data.get("description", ""),
footer=data.get("footer", ""), footer=data.get("footer", ""),
output_video=data.get("output_video", ""),
) )
@@ -239,7 +246,7 @@ def parse_slides(
raise ParseError(f"slides file not found: {local_slides_path}", local_slides_path) raise ParseError(f"slides file not found: {local_slides_path}", local_slides_path)
try: try:
data = json.loads(slides_path.read_text(encoding="utf-8")) data = _read_json(slides_path)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
raise ParseError(f"Invalid JSON: {e}", slides_path) raise ParseError(f"Invalid JSON: {e}", slides_path)
@@ -283,7 +290,7 @@ def parse_audio(
return {}, audio_dir return {}, audio_dir
try: try:
data = json.loads(audio_path.read_text(encoding="utf-8")) data = _read_json(audio_path)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
raise ParseError(f"Invalid JSON: {e}", audio_path) raise ParseError(f"Invalid JSON: {e}", audio_path)
@@ -382,7 +389,7 @@ def parse_videos(
raise ParseError(f"videos.json not found: {local_videos_path}", local_videos_path) raise ParseError(f"videos.json not found: {local_videos_path}", local_videos_path)
try: try:
data = json.loads(videos_path.read_text(encoding="utf-8")) data = _read_json(videos_path)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
raise ParseError(f"Invalid JSON: {e}", videos_path) raise ParseError(f"Invalid JSON: {e}", videos_path)
@@ -489,7 +496,7 @@ def parse_narration(
return {}, narration_dir return {}, narration_dir
try: try:
data = json.loads(narration_path.read_text(encoding="utf-8")) data = _read_json(narration_path)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
raise ParseError(f"Invalid JSON: {e}", narration_path) raise ParseError(f"Invalid JSON: {e}", narration_path)
@@ -594,7 +601,7 @@ def parse_video_metadata(metadata_path: Path) -> VideoMetadata:
raise ParseError(f"Video metadata not found: {metadata_path}", metadata_path) raise ParseError(f"Video metadata not found: {metadata_path}", metadata_path)
try: try:
data = json.loads(metadata_path.read_text(encoding="utf-8")) data = _read_json(metadata_path)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
raise ParseError(f"Invalid JSON: {e}", metadata_path) raise ParseError(f"Invalid JSON: {e}", metadata_path)
+125 -8
View File
@@ -185,10 +185,14 @@ def ensure_downscaled_files_exist(
res: str, res: str,
force: bool = False, force: bool = False,
verbose: bool = False, verbose: bool = False,
skip_sources: set = None,
) -> Path: ) -> Path:
""" """
Ensure downscaled copies exist for all videos in source_dir for the given res preset. Ensure downscaled copies exist for all videos in source_dir for the given res preset.
Creates them on-the-fly if missing. Returns the output subdirectory. Creates them on-the-fly if missing. Returns the output subdirectory.
skip_sources: optional set of source filenames to skip (e.g. files that have a
preprocessed output_file, where the full-res processed version will be used instead).
""" """
cfg = RES_CONFIGS[res] cfg = RES_CONFIGS[res]
if cfg is None: if cfg is None:
@@ -205,6 +209,7 @@ def ensure_downscaled_files_exist(
and f.suffix.lower() in video_extensions and f.suffix.lower() in video_extensions
and "_processed" not in f.stem and "_processed" not in f.stem
and not f.name.startswith(".") and not f.name.startswith(".")
and (skip_sources is None or f.name not in skip_sources)
] ]
if not video_files: if not video_files:
@@ -359,6 +364,115 @@ def check_audio_channel_silent(input_path: Path, channel: str, threshold_db: flo
return False, 0.0 return False, 0.0
def _resolve_auto_channel(input_path: Path, threshold_db: float = -60.0) -> str:
"""
Detect which audio channels have signal and return the appropriate channel setting.
Logic:
- One channel silent, the other not → return the active channel ("left" or "right")
- Both channels have signal → return "both"
"""
left_silent, _ = check_audio_channel_silent(input_path, "left", threshold_db)
right_silent, _ = check_audio_channel_silent(input_path, "right", threshold_db)
if left_silent and not right_silent:
return "right"
if right_silent and not left_silent:
return "left"
return "both"
def detect_silence_bounds(
input_path: Path,
noise_threshold_db: float = -40.0,
min_silence_duration: float = 0.3,
verbose: bool = False,
) -> tuple[float, float]:
"""
Detect when audio content starts and ends in a file.
Uses FFmpeg's silencedetect filter to find the first and last
non-silent moments. Useful for automatically computing skip/take values.
Two common preamble shapes are handled:
- File starts with silence → first_sound = end of that silence.
- File starts with noise (e.g. clothing rustle) followed by a brief
quiet gap before speech → first_sound = end of that first gap.
Args:
input_path: Video or audio file to analyse.
noise_threshold_db: dB level below which audio is considered silent.
Raise (e.g. -25) to treat low-level noise like clothing rustle
as silence.
min_silence_duration: Minimum gap length (seconds) that counts as
silence. Shorter gaps are ignored.
verbose: Print detected silence periods for debugging.
Returns:
(first_sound_time, last_sound_time) in seconds.
first_sound_time — when the first meaningful sound begins.
last_sound_time — when the last meaningful sound ends.
"""
total_duration = get_video_duration(input_path)
cmd = [
"ffmpeg", "-i", str(input_path),
"-af",
f"silencedetect=noise={noise_threshold_db}dB:duration={min_silence_duration}",
"-f", "null", "/dev/null",
]
result = subprocess.run(cmd, capture_output=True, text=True)
# Parse silence_start / silence_end lines from stderr
silence_periods: list[tuple[float, float]] = []
pending_start: float | None = None
for line in result.stderr.splitlines():
if "silence_start:" in line:
try:
pending_start = float(line.split("silence_start:")[1].strip())
except ValueError:
pass
elif "silence_end:" in line and pending_start is not None:
try:
end_t = float(line.split("silence_end:")[1].split("|")[0].strip())
silence_periods.append((pending_start, end_t))
pending_start = None
except ValueError:
pass
# File ended while still in silence — close the period at total_duration
if pending_start is not None:
silence_periods.append((pending_start, total_duration))
if verbose:
print(f"\n silence periods ({len(silence_periods)}):")
for s, e in silence_periods:
print(f" {s:.3f}s {e:.3f}s")
# --- First sound ---
# Take the end of the FIRST silence period found in the preamble window
# (first 60 s). This handles both:
# • file starts with silence → silence[0].start ≈ 0
# • file starts with noise (crumpling etc.) then has a brief quiet gap
# before speech → silence[0].start > 0
# If no silence is found at all the whole file is assumed to be content.
PREAMBLE_LIMIT = 60.0
first_sound = 0.0
for s_start, s_end in silence_periods:
if s_start < PREAMBLE_LIMIT:
first_sound = s_end
break
# --- Last sound ---
# Where the trailing silence begins (if the file ends with silence).
last_sound = total_duration
if silence_periods and silence_periods[-1][1] >= total_duration - 0.05:
last_sound = silence_periods[-1][0]
return first_sound, last_sound
def preprocess_video( def preprocess_video(
videos_dir: Path, videos_dir: Path,
video_id: str, video_id: str,
@@ -402,9 +516,12 @@ def preprocess_video(
filter_type=None, filter_type=None,
) )
# Quick audio sanity check: warn early if selected channel is silent # Resolve channel setting (auto-detect if needed) and sanity check
channel = video_source.use_audio_channels channel = video_source.use_audio_channels
if channel in ("left", "right"): if channel == "auto":
channel = _resolve_auto_channel(current_input)
print(f" Auto channel detection: using '{channel}'")
elif channel in ("left", "right"):
is_silent, max_vol = check_audio_channel_silent(current_input, channel) is_silent, max_vol = check_audio_channel_silent(current_input, channel)
if is_silent: if is_silent:
raise PreprocessError( raise PreprocessError(
@@ -482,7 +599,7 @@ def preprocess_video(
batch[0], batch[0],
verbose, verbose,
take=None, take=None,
use_audio_channels=video_source.use_audio_channels, use_audio_channels=channel,
skip_loudnorm=video_source.defer_loudnorm, skip_loudnorm=video_source.defer_loudnorm,
) )
current_input = step_output current_input = step_output
@@ -2022,12 +2139,12 @@ def stitch_narration_segments(
f" Skip: {skip}s, Take: {take or 'all'}s, Duration: {effective_duration:.1f}s" f" Skip: {skip}s, Take: {take or 'all'}s, Duration: {effective_duration:.1f}s"
) )
# If no trimming needed, use source directly # Always re-encode every segment to normalize fps and timestamps.
if skip == 0 and take is None: # Mixing un-normalized source files (e.g. 60fps camera) with
trimmed_segments.append(source_path) # trimmed-and-re-encoded 30fps segments causes cumulative A/V drift
continue # in the final concat.
# Trim the segment # Trim/normalize the segment
trimmed_path = temp_dir / f"segment_{i:03d}.mov" trimmed_path = temp_dir / f"segment_{i:03d}.mov"
# Check if source has alpha channel (for ProRes 4444, etc.) # Check if source has alpha channel (for ProRes 4444, etc.)
+31 -23
View File
@@ -5,6 +5,8 @@ import subprocess
from pathlib import Path from pathlib import Path
from .errors import RenderError from .errors import RenderError
from .parser import _read_json
from .preprocessor import _resolve_auto_channel
from .models import ( from .models import (
AudioEvent, AudioEvent,
CameraEvent, CameraEvent,
@@ -179,22 +181,23 @@ def _resolve_video_path(
base_dir = videos_dir base_dir = videos_dir
if video_source.output_file: if video_source.output_file:
video_path = base_dir / video_source.output_file for candidate_dir in [base_dir, base_dir.parent]:
# Check with cache fallback video_path = candidate_dir / video_source.output_file
if project_path: # Check with cache fallback
resolved, _ = resolve_with_cache(video_path, project_path) if project_path:
if resolved.exists(): resolved, _ = resolve_with_cache(video_path, project_path)
return resolved if resolved.exists():
elif video_path.exists(): return resolved
return video_path elif video_path.exists():
# Check for WebM variant (preprocessing outputs compressed WebM instead of ProRes) return video_path
webm_path = video_path.with_suffix(".mov") # Check for WebM variant (preprocessing outputs compressed WebM instead of ProRes)
if project_path: webm_path = video_path.with_suffix(".mov")
resolved, _ = resolve_with_cache(webm_path, project_path) if project_path:
if resolved.exists(): resolved, _ = resolve_with_cache(webm_path, project_path)
return resolved if resolved.exists():
elif webm_path.exists(): return resolved
return webm_path elif webm_path.exists():
return webm_path
# Fall back to source_file with cache fallback # Fall back to source_file with cache fallback
source_path = base_dir / video_source.source_file source_path = base_dir / video_source.source_file
@@ -272,7 +275,6 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
from .cache import resolve_with_cache from .cache import resolve_with_cache
# Input: background — resolved via handle in shared_assets/videos.json # Input: background — resolved via handle in shared_assets/videos.json
import json as _json
bg_handle = plan.config.background bg_handle = plan.config.background
has_background = bool(bg_handle) has_background = bool(bg_handle)
bg_idx = None bg_idx = None
@@ -282,7 +284,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
videos_json_bg = shared_assets_dir / "videos.json" videos_json_bg = shared_assets_dir / "videos.json"
if not videos_json_bg.exists(): if not videos_json_bg.exists():
raise RenderError(f"shared_assets/videos.json not found (needed for background handle '{bg_handle}')") raise RenderError(f"shared_assets/videos.json not found (needed for background handle '{bg_handle}')")
bg_videos = _json.loads(videos_json_bg.read_text()) bg_videos = _read_json(videos_json_bg)
if bg_handle not in bg_videos: if bg_handle not in bg_videos:
raise RenderError(f"Background handle '{bg_handle}' not found in shared_assets/videos.json") raise RenderError(f"Background handle '{bg_handle}' not found in shared_assets/videos.json")
bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"] bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"]
@@ -719,7 +721,8 @@ def build_filter_complex(
) )
else: else:
filters.append( filters.append(
f"[{bg_idx}:v]scale={width}:{height}:force_original_aspect_ratio=increase," f"[{bg_idx}:v]fps={plan.config.fps},"
f"scale={width}:{height}:force_original_aspect_ratio=increase,"
f"crop={width}:{height}[bg]" f"crop={width}:{height}[bg]"
) )
else: else:
@@ -742,9 +745,12 @@ def build_filter_complex(
if not plan.narration_pauses: if not plan.narration_pauses:
# Simple case: no pauses, continuous overlay # Simple case: no pauses, continuous overlay
# fps+setpts normalise the source to a constant frame rate and reset
# the timeline to 0 so the video stays locked to the audio track.
video_label = f"av{i}" video_label = f"av{i}"
filters.append( filters.append(
f"[{input_idx}:v]format=yuva444p10le," f"[{input_idx}:v]fps={plan.config.fps},setpts=PTS-STARTPTS,"
f"format=yuva444p10le,"
f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase," f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase,"
f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2," f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2,"
f"format=rgba[{video_label}]" f"format=rgba[{video_label}]"
@@ -942,9 +948,11 @@ def build_filter_complex(
narration_volume = 1.0 narration_volume = 1.0
if plan.narration_videos: if plan.narration_videos:
_, first_video_source, _ = plan.narration_videos[0] _, first_video_source, _ = plan.narration_videos[0]
channel_filter = _build_audio_channel_filter( use_channels = first_video_source.use_audio_channels
first_video_source.use_audio_channels if use_channels == "auto":
) narration_path = _resolve_video_path(videos_dir, first_video_source, shared_assets_dir, project_path)
use_channels = _resolve_auto_channel(narration_path)
channel_filter = _build_audio_channel_filter(use_channels)
narration_volume = first_video_source.volume narration_volume = first_video_source.volume
# Build volume filter if not 1.0 # Build volume filter if not 1.0
+23 -9
View File
@@ -4,6 +4,7 @@ from pathlib import Path
from .cache import resolve_with_cache from .cache import resolve_with_cache
from .errors import ValidationError, ValidationIssue from .errors import ValidationError, ValidationIssue
from .parser import _read_json
from .models import ( from .models import (
ProjectConfig, ProjectConfig,
SlideDefinition, SlideDefinition,
@@ -21,9 +22,10 @@ def validate_project(
videos: dict[str, VideoSource], videos: dict[str, VideoSource],
videos_dir: Path, videos_dir: Path,
malformed_markers: list[tuple[int, str]] = None, malformed_markers: list[tuple[int, str]] = None,
) -> None: ) -> list[ValidationIssue]:
""" """
Validate all parsed project data. Raises ValidationError if any issues found. Validate all parsed project data. Raises ValidationError if any issues found.
Returns a list of warnings (non-fatal issues).
Checks: Checks:
- All slide markers in manuscript exist in slides.json - All slide markers in manuscript exist in slides.json
@@ -34,6 +36,7 @@ def validate_project(
- No malformed markers in manuscript - No malformed markers in manuscript
""" """
issues: list[ValidationIssue] = [] issues: list[ValidationIssue] = []
warnings: list[ValidationIssue] = []
# Check for malformed markers first (these are likely typos) # Check for malformed markers first (these are likely typos)
if malformed_markers: if malformed_markers:
@@ -64,9 +67,9 @@ def validate_project(
base_name = video_id.rsplit(".", 1)[0] base_name = video_id.rsplit(".", 1)[0]
if base_name in videos: if base_name in videos:
hint = f" (Did you mean [video:{base_name}]? Don't include file extensions in markers)" hint = f" (Did you mean [video:{base_name}]? Don't include file extensions in markers)"
issues.append( warnings.append(
ValidationIssue( ValidationIssue(
f"Video marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json{hint}", f"Video marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json{hint} — using PlaceholderVideo instead",
project_path / "manuscript.txt", project_path / "manuscript.txt",
) )
) )
@@ -76,9 +79,9 @@ def validate_project(
if marker.startswith("narration:"): if marker.startswith("narration:"):
video_id = marker[10:] # Remove 'narration:' prefix video_id = marker[10:] # Remove 'narration:' prefix
if video_id not in videos: if video_id not in videos:
issues.append( warnings.append(
ValidationIssue( ValidationIssue(
f"Narration marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json", f"Narration marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json — using PlaceholderVideo instead",
project_path / "manuscript.txt", project_path / "manuscript.txt",
) )
) )
@@ -88,6 +91,16 @@ def validate_project(
if marker.startswith("segment:"): if marker.startswith("segment:"):
continue continue
# Unknown namespaced markers (e.g. [background:xxx]) — not supported, ignore with warning
if ":" in marker:
warnings.append(
ValidationIssue(
f"Unknown marker type [{marker}] — ignoring (no support for '{marker.split(':', 1)[0]}:' markers)",
project_path / "manuscript.txt",
)
)
continue
if marker not in slides: if marker not in slides:
issues.append( issues.append(
ValidationIssue( ValidationIssue(
@@ -150,9 +163,9 @@ def validate_project(
video_path = base_dir / video_source.source_file video_path = base_dir / video_source.source_file
video_path, _ = resolve_with_cache(video_path, project_path) video_path, _ = resolve_with_cache(video_path, project_path)
if not video_path.exists(): if not video_path.exists():
issues.append( warnings.append(
ValidationIssue( ValidationIssue(
f"Video file not found: {video_source.source_file}", f"Video file not found: {video_source.source_file} — falling back to PlaceholderVideo",
videos_json_path, videos_json_path,
) )
) )
@@ -183,8 +196,7 @@ def validate_project(
) )
) )
else: else:
import json as _json bg_videos = _read_json(videos_json_path_bg)
bg_videos = _json.loads(videos_json_path_bg.read_text())
if bg_handle not in bg_videos: if bg_handle not in bg_videos:
issues.append( issues.append(
ValidationIssue( ValidationIssue(
@@ -239,3 +251,5 @@ def validate_project(
# If any issues, raise ValidationError # If any issues, raise ValidationError
if issues: if issues:
raise ValidationError(issues) raise ValidationError(issues)
return warnings
+1 -1
View File
@@ -14,7 +14,7 @@
"fps": 30, "fps": 30,
"duration_seconds": 60, "duration_seconds": 60,
"script": "script.md", "script": "script.md",
"output_video": "export/final.mp4", "output_video": "short_is_universe_pixelated.mp4",
"keynote_file": "../video1/media/video1.key", "keynote_file": "../video1/media/video1.key",
"background": "../video1/shared_assets/BlackBackground.mp4", "background": "../video1/shared_assets/BlackBackground.mp4",
"slides": "../video1/media/slides/Video1/slides.json", "slides": "../video1/media/slides/Video1/slides.json",