Commti prior to change to video tag below / above layering

This commit is contained in:
2026-03-16 16:57:54 +01:00
parent 757d966803
commit e734dbfcac
12 changed files with 416 additions and 154 deletions
+2 -20
View File
@@ -23,26 +23,8 @@
"talkinghead": [
{
"type": "audio_normalize",
"highpass": 100,
"room_eq": true,
"room_eq_freq": 300,
"room_eq_gain": -4,
"room_eq_width": 1.5,
"dereverb_model": "shared_assets/models/std.rnnn",
"dereverb_mix": 0.8,
"denoise": true,
"noise_floor": -25,
"gate": true,
"gate_threshold": -35,
"gate_range": -20,
"compress": true,
"threshold": -20,
"ratio": 4,
"attack": 5,
"release": 50,
"makeup": 2,
"normalize": true,
"target_lufs": -16,
"target_lufs": -14,
"target_lra": 11,
"target_tp": -1.5
},
@@ -101,5 +83,5 @@
},
"manuscript": "manuscript.txt",
"shorts": [],
"output_video": "out/final.mp4"
"output_video": "TRAILER.mp4"
}
+1 -1
View File
@@ -14,7 +14,7 @@
"videos": "media/videos/videos.json",
"slides": "media/slides/Example/slides.json",
"audio": "media/audio/audio.json",
"default_filters": {
"default_filters": {
"talkinghead": [
{
"type": "audio_normalize",
+203 -71
View File
@@ -2,12 +2,15 @@
import argparse
import json
from logging import config
import re
import shutil
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from gnommo.parser import _read_json
from . import __version__
from .errors import GnommoError, ParseError, ValidationError, RenderError
from .cache import get_cache_info, resolve_with_cache
@@ -35,10 +38,15 @@ Examples:
gnommo -p video1 import Generate slides.json from images
gnommo -p video1 pre Preprocess videos (chroma key, etc.)
gnommo -p video1 stitch --res tiny -f Fast stitch with new begin/end values
gnommo -p video1 trim Auto-detect silence and set skip/take in narration.json
gnommo -p video1 trim --force Redo trim even for segments that already have skip/take
gnommo -p video1 trim --threshold -25 Raise threshold to ignore clothing/room noise
gnommo -p video1 trim -v Show detected silence periods for debugging
gnommo -p video1 all Full pipeline: transcribe → align → render
gnommo -p video1 render --dry-run Show FFmpeg command without running
gnommo -p video1 description Generate YouTube description file
gnommo -p video1 transcribe --final Transcribe final.mp4 and generate SRT for YouTube
gnommo -p video1 transcribe Narration file for timing of slides
gnommo -p video1 transcribe --final Transcribe outputted file and generate SRT for YouTube
gnommo -p video1 archive Sync project to external cache storage
gnommo -p video1 archive --dry-run Preview what would be synced
gnommo -p video1 extract-audio --combined Extract audio from narration_combined.mov
@@ -71,6 +79,7 @@ Examples:
"preprocess",
"pre",
"stitch",
"trim",
"render",
"all",
"transcribe",
@@ -156,6 +165,12 @@ Examples:
action="store_true",
help="Target production server (GNOMMOWEB_PROD_URL / GNOMMOWEB_PROD_API_KEY)",
)
parser.add_argument(
"--threshold",
type=float,
default=-40.0,
help="For trim: silence threshold in dB (default: -40). Raise (e.g. -25) to ignore clothing/room noise.",
)
args = parser.parse_args()
@@ -181,6 +196,8 @@ Examples:
args.workers,
args.res,
)
elif action == "trim":
return cmd_trim(project_path, args.verbose, args.force, args.threshold)
elif action in ("stitch"):
return cmd_stitch(
project_path,
@@ -223,7 +240,7 @@ Examples:
return cmd_pull(project_path, args.verbose, args.force, args.prod)
elif action == "handoff":
from .handoff import cmd_handoff
return cmd_handoff(project_path, args.verbose, args.file, args.prod)
return cmd_handoff(project_path, args.verbose, args.file, args.prod, args.res)
except GnommoError as e:
print(f"Error: {e}", file=sys.stderr)
@@ -242,7 +259,7 @@ Examples:
def cmd_import(project_path: Path, force: bool, verbose: bool) -> int:
"""Import assets and generate metadata JSON files."""
from .parser import parse_project_config
from .parser import parse_project_config, _read_json
print(f"Importing assets for: {project_path.name}")
@@ -367,8 +384,7 @@ def _import_shared_assets(shared_assets_dir: Path, verbose: bool) -> None:
videos_json_path = shared_assets_dir / "videos.json"
existing_videos: dict = {}
if videos_json_path.exists():
with open(videos_json_path, "r", encoding="utf-8") as f:
existing_videos = json.load(f)
existing_videos = _read_json(videos_json_path)
# Add new videos (don't overwrite existing)
added_count = 0
@@ -474,8 +490,7 @@ def _import_videos(videos_dir: Path, config, verbose: bool) -> None:
videos_json_path = videos_dir / "videos.json"
existing_videos: dict = {}
if videos_json_path.exists():
with open(videos_json_path, "r", encoding="utf-8") as f:
existing_videos = json.load(f)
existing_videos = _read_json(videos_json_path)
# Get available filter presets from config
default_filters = config.default_filters if config else {}
@@ -558,8 +573,7 @@ def _import_narration_segments(narration_dir: Path, config, verbose: bool) -> No
narration_json_path = narration_dir / "narration.json"
existing_narration: dict = {}
if narration_json_path.exists():
with open(narration_json_path, "r", encoding="utf-8") as f:
existing_narration = json.load(f)
existing_narration = _read_json(narration_json_path)
# Get available filter presets from config
default_filters = config.default_filters if config else {}
@@ -583,9 +597,11 @@ def _import_narration_segments(narration_dir: Path, config, verbose: bool) -> No
# Apply talkinghead preset if available
if "talkinghead" in default_filters:
narration_entry["filter"] = "talkinghead"
narration_entry["cutout"] = "talkinghead"
if "talkinghead" in default_filters:
narration_entry["filter"] = "talkinghead"
# Default audio settings for narration
narration_entry["use_audio_channels"] = "left"
narration_entry["defer_loudnorm"] = True
@@ -656,7 +672,7 @@ def _import_presenter_notes(
# Parse JSON output from JXA script
try:
notes_data = json.loads(proc.stdout)
notes_data = json.loads(proc.stdout) if proc.stdout.strip() else []
except json.JSONDecodeError as e:
print(f" Error parsing notes JSON: {e}", file=sys.stderr)
return
@@ -714,9 +730,11 @@ def cmd_validate(project_path: Path, verbose: bool) -> int:
print(f" - Videos defined: {len(videos)}")
# Validate
validate_project(
warnings = validate_project(
project_path, markers, config, slides, videos, videos_dir, malformed
)
for w in warnings:
print(f" Warning: {w}")
print("Validation passed.")
return 0
@@ -735,9 +753,9 @@ def cmd_preprocess(
workers: int = 1,
res: str = "full",
) -> int:
"""Run preprocessing pipeline on narration segments."""
"""Run preprocessing pipeline on narration segments and videos."""
from concurrent.futures import ThreadPoolExecutor, as_completed
from .parser import parse_project_config, parse_narration
from .parser import parse_project_config, parse_narration, parse_videos
from .preprocessor import (
preprocess_video,
create_downscaled_videos,
@@ -834,10 +852,118 @@ def cmd_preprocess(
)
print(f"\n Run 'gnommo -p <project> stitch' to stitch narration segments into one fulll length narration file.")
# Also preprocess videos from videos.json (e.g. chroma key, color grade)
videos, videos_dir = parse_videos(project_path, config)
videos_to_process = [
(vid_id, vid_src)
for vid_id, vid_src in videos.items()
if vid_src.filter and not vid_src.is_shared
]
if videos_to_process:
print(f"\n Processing {len(videos_to_process)} video(s) from videos.json:")
for video_id, video_source in videos_to_process:
if video_source.output_file:
output_path = videos_dir / video_source.output_file
if output_path.exists() and not force:
print(f" {video_id}: output exists, skipping (use --force to reprocess)")
continue
if dry_run:
print(f" Would preprocess: {video_id} ({len(video_source.filter)} filter(s))")
continue
print(f" Processing: {video_id}")
preprocess_video(videos_dir, video_id, video_source, verbose, force, gnommo_scratch)
print("\nPreprocessing complete.")
return 0
# =============================================================================
# Trim Command — auto-detect silence bounds for narration segments
# =============================================================================
def cmd_trim(
project_path: Path,
verbose: bool,
force: bool = False,
threshold_db: float = -40.0,
) -> int:
"""
Auto-detect silence bounds for all narration segments and write skip/take
values into narration.json.
For each segment:
skip = max(0, first_sound_time - 0.5)
take = last_sound_time + 3.0 - skip (capped at file duration)
Segments that already have explicit skip or take values are left unchanged
unless --force is passed.
Use --threshold to adjust sensitivity, e.g. -25 to ignore clothing/room
noise that sits above -40 dB.
"""
from .parser import parse_project_config, parse_narration
from .preprocessor import detect_silence_bounds, get_video_duration
print(f"Auto-trimming narration: {project_path.name}")
config = parse_project_config(project_path)
narration, narration_dir = parse_narration(project_path, config)
if not narration:
print(" No narration segments found in narration.json")
print(" Run 'gnommo -p <project> import' first.")
return 1
narration_json_path = narration_dir / "narration.json"
raw_data: dict = _read_json(narration_json_path)
updated = 0
for seg_id in sorted(narration.keys()):
seg = narration[seg_id]
existing = raw_data.get(seg_id, {})
has_explicit = "skip" in existing or "take" in existing
if has_explicit and not force:
print(f" {seg_id}: already trimmed, skipping (use --force to redo)")
continue
# Always analyse the raw source file — it's always present and has the
# same audio as any processed version (processing is video-only).
source_path = narration_dir / seg.source_file
if not source_path.exists():
print(f" {seg_id}: source file not found ({seg.source_file}), skipping")
continue
print(f" {seg_id}: analysing...", end="", flush=True)
first_sound, last_sound = detect_silence_bounds(source_path, noise_threshold_db=threshold_db, verbose=verbose)
total_dur = get_video_duration(source_path)
new_skip = max(0.0, round(first_sound - 0.5, 3))
new_take = round(min(total_dur - new_skip, last_sound + 3.0 - new_skip), 3)
new_take = max(0.0, new_take)
print(
f" first={first_sound:.2f}s last={last_sound:.2f}s"
f" → skip={new_skip:.3f}s take={new_take:.3f}s"
)
raw_data[seg_id]["skip"] = new_skip
raw_data[seg_id]["take"] = new_take
updated += 1
if updated > 0:
with open(narration_json_path, "w", encoding="utf-8") as f:
json.dump(raw_data, f, indent=2)
print(f"\n Updated {updated} segment(s) in narration.json")
else:
print(f"\n No segments updated")
return 0
# =============================================================================
# Stitch Command (fast iteration on narration segments)
# =============================================================================
@@ -903,19 +1029,17 @@ def cmd_stitch(
if stitch_output.exists() and not force:
print(f"\n Combined narration exists: {stitch_output.name}")
print(" (use --force to regenerate)")
return 0
stitch_narration_segments(
narration_dir,
segment_ids,
narration,
stitch_output,
verbose=verbose,
default_end_trim=config.default_end_trim if config else 0.0,
)
# Run import videos again, because at this point narration_combined might have been created.
_import_videos(videos_dir, config, verbose)
else:
stitch_narration_segments(
narration_dir,
segment_ids,
narration,
stitch_output,
verbose=verbose,
default_end_trim=config.default_end_trim if config else 0.0,
)
# Run import videos again, because at this point narration_combined might have been created.
_import_videos(videos_dir, config, verbose)
# Always update the MAIN videos.json (parent of subdir when using low/tiny res)
# Downscaled dirs only affect file paths, not JSON metadata updates
@@ -924,12 +1048,11 @@ def cmd_stitch(
if True: # Always update JSON regardless of proxy mode
existing_videos: dict = {}
if videos_json_path.exists():
with open(videos_json_path, "r", encoding="utf-8") as f:
existing_videos = json.load(f)
existing_videos = _read_json(videos_json_path)
# Get cutout from first narration segment
first_seg = narration[segment_ids[0]]
cutout = first_seg.cutout or "talkinghead"
cutout = first_seg.cutout or "talkinghead" # Default to audioonly if no cutout specified
# Create/update narration_combined entry
existing_videos["narration_combined"] = {
@@ -1149,7 +1272,10 @@ def cmd_render(
# Non-full res: use downscaled video directory, create on-the-fly if needed
if res != "full":
videos_dir = ensure_downscaled_files_exist(videos_dir, res, force=False, verbose=verbose)
# Skip downscaling sources that have a preprocessed output_file — the
# renderer will use the full-res processed version instead, saving disk space.
sources_with_output = {v.source_file for v in videos.values() if v.output_file}
videos_dir = ensure_downscaled_files_exist(videos_dir, res, force=False, verbose=verbose, skip_sources=sources_with_output)
if verbose:
print(f" Using {res} dir: {videos_dir}")
audio, audio_dir = parse_audio(project_path, config)
@@ -1246,9 +1372,11 @@ def cmd_render(
# Stage 2: Validate
print("\n[2/4] Validating...")
validate_project(
warnings = validate_project(
project_path, markers, config, slides, videos, videos_dir, malformed
)
for w in warnings:
print(f" Warning: {w}")
print(" Passed.")
# Stage 3: Transform (includes on-the-fly alignment)
@@ -1310,14 +1438,19 @@ def cmd_render(
print(f"\n Continuing anyway due to --force flag...")
# Stage 4: Render
# Generate output filename based on slide range and resolution
base_name = "preview" if res == "low" else "final"
if slide_range:
# Determine output filename and directory
if config.output_video:
out_filename = config.output_video
elif slide_range:
start, end = slide_range
range_suffix = f"_{start}-{end}" if end else f"_{start}-end"
output_path = project_path / "out" / f"{base_name}{range_suffix}.mp4"
out_filename = f"final{range_suffix}.mp4"
else:
output_path = project_path / "out" / f"{base_name}.mp4"
out_filename = f"{config.co}.mp4"
out_dir = project_path / "out" / res if res != "full" else project_path / "out"
output_path = out_dir / out_filename
plan.output_path = output_path
if dry_run:
print("\n[4/4] FFmpeg command (dry run):")
@@ -1372,15 +1505,17 @@ def cmd_transcribe(
from .transcriber import transcribe_video, save_transcript, words_to_srt
from .parser import parse_project_config, parse_videos
from .preprocessor import ensure_downscaled_files_exist
config = parse_project_config(project_path)
# Handle --final mode: transcribe the rendered output for YouTube captions
if final:
return _transcribe_final(project_path, verbose)
path = project_path / "out" / f"{config.output_video}.mp4"
return _transcribe_final(path, verbose)
mode_str = f" ({res.upper()})" if res != "full" else ""
print(f"Transcribing: {project_path.name}{mode_str}")
config = parse_project_config(project_path)
videos, videos_dir = parse_videos(project_path, config)
if not videos:
print("Error: No videos defined in videos.json", file=sys.stderr)
@@ -1433,23 +1568,20 @@ def cmd_transcribe(
return 0
def _transcribe_final(project_path: Path, verbose: bool) -> int:
def _transcribe_final(final_video: Path, verbose: bool) -> int:
"""
Transcribe the final rendered video and generate SRT captions for YouTube.
Looks for out/final.mp4 and creates out/final.srt suitable for upload.
Looks and creates out filename.srt suitable for upload.
"""
from .transcriber import transcribe_video, save_transcript, words_to_srt
print(f"Transcribing final output: {project_path.name}")
print(f"Transcribing final output: {final_video}")
# Look for the final rendered video
out_dir = project_path / "out"
final_video = out_dir / "final.mp4"
if not final_video.exists():
print(f"Error: Final video not found: {final_video}", file=sys.stderr)
print(f"Run 'gnommo -p {project_path.name} render' first.", file=sys.stderr)
print("Run 'gnommo render' first.", file=sys.stderr)
return 1
print(f" Video: {final_video.name}")
@@ -1462,11 +1594,11 @@ def _transcribe_final(project_path: Path, verbose: bool) -> int:
return 1
# Save JSON transcript
transcript_path = out_dir / "final.transcript.json"
transcript_path = final_video.with_suffix(".transcript.json")
save_transcript(words, transcript_path)
# Generate SRT captions
srt_path = out_dir / "final.srt"
srt_path = final_video.with_suffix(".srt")
srt_content = words_to_srt(words)
srt_path.write_text(srt_content, encoding="utf-8")
@@ -1597,33 +1729,33 @@ def cmd_all(
res: str = "full",
force: bool = False,
) -> int:
"""Run full pipeline: transcribe → render (alignment is automatic)."""
from .parser import parse_project_config, parse_videos
"""Run full pipeline: preprocess → stitch → render → handoff."""
from .handoff import cmd_handoff
print(f"=== Full Pipeline: {project_path.name} ===\n")
# Check if transcription exists
config = parse_project_config(project_path)
videos, videos_dir = parse_videos(project_path, config)
result = _find_narration_video(config, videos)
if result:
video_id, video_source = result
video_path = videos_dir / video_source.source_file
transcript_path = video_path.with_suffix(".transcript.json")
print(">>> Step 1/5: Import\n")
result = cmd_import(project_path, force, verbose)
if result != 0:
return result
# Try cache fallback for transcript
resolved_transcript, _ = resolve_with_cache(transcript_path, project_path)
if not resolved_transcript.exists():
print(">>> Step 1/2: Transcribe\n")
result = cmd_transcribe(project_path, verbose)
if result != 0:
return result
else:
print(f">>> Step 1/2: Transcribe (cached: {resolved_transcript.name})\n")
print("\n>>> Step 2/5: Preprocess\n")
result = cmd_preprocess(project_path, verbose, dry_run, force, workers=1, res=res)
if result != 0:
return result
# Render (alignment happens automatically)
print("\n>>> Step 2/2: Render\n")
return cmd_render(project_path, verbose, dry_run, res=res, force=force)
print("\n>>> Step 3/5: Stitch\n")
result = cmd_stitch(project_path, verbose, force, res=res)
if result != 0:
return result
print("\n>>> Step 4/5: Render\n")
result = cmd_render(project_path, verbose, dry_run, res=res, force=force)
if result != 0:
return result
print("\n>>> Step 5/5: Handoff\n")
return cmd_handoff(project_path, verbose, file_override=None, prod=False, res=res)
# =============================================================================
@@ -1801,7 +1933,7 @@ def cmd_archive(project_path: Path, verbose: bool, dry_run: bool) -> int:
project_json_path = project_path / "project.json"
if project_json_path.exists():
try:
data = json.loads(project_json_path.read_text(encoding="utf-8"))
data = _read_json(project_json_path.read_text(encoding="utf-8"))
data["synced_time"] = datetime.now().isoformat()
project_json_path.write_text(
json.dumps(data, indent=2, ensure_ascii=False) + "\n",
-3
View File
@@ -176,11 +176,8 @@ def generate_chapters(
for slide_id in slide_ids:
if slide_id not in timing_lookup:
continue
timestamp = timing_lookup[slide_id]
title = _extract_chapter_title(manuscript_text, slide_id, slides)
# Check if we should merge with previous chapter (too short)
if chapters and (timestamp - chapters[-1].timestamp) < min_chapter_duration:
continue # Skip this chapter, previous one covers it
+3 -3
View File
@@ -23,12 +23,12 @@ import tempfile
import zipfile
from pathlib import Path
from gnommo.parser import _read_json
def write_manuscript(data: Path, out_path: Path):
data = json.loads(
data.read_text(encoding="utf-8")
) # list of {"slide_index": int, "notes": str}
data = _read_json(data.read_text(encoding="utf-8"))
lines = []
i = 0
for item in data:
+8 -5
View File
@@ -69,7 +69,7 @@ def _write_sync(project_path: Path, data: dict, prod: bool = False):
json.dump(data, f, indent=2)
def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str | None = None, prod: bool = False) -> int:
def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str | None = None, prod: bool = False, res: str = "full") -> int:
_load_env_file()
if prod:
@@ -104,14 +104,17 @@ def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str |
if file_override:
video_path = Path(file_override)
else:
output_video = project.get("output_video")
if not output_video:
output_filename = project.get("output") or Path(project.get("output_video", "")).name
if not output_filename:
print(
"Error: no 'output_video' field in project.json and no --file provided.",
"Error: no 'output' field in project.json and no --file provided.",
file=sys.stderr,
)
return 1
video_path = project_path / output_video
if res != "full":
video_path = project_path / "out" / res / output_filename
else:
video_path = project_path / "out" / output_filename
if not video_path.exists():
print(f"Error: video file not found: {video_path}", file=sys.stderr)
+2
View File
@@ -65,6 +65,7 @@ class ProjectConfig:
# YouTube description fields
description: str = "" # Video description text for YouTube
footer: str = "" # Footer text (social links, subscribe CTA, etc.)
output_video: str = "" # Output filename (e.g. "DISC_INT3.mp4"); placed in out/ or out/<res>/
@dataclass
@@ -507,6 +508,7 @@ class RenderPlan:
cached_files: set = field(
default_factory=set
) # Video IDs loaded from external cache (show 📁 indicator)
output_path: Optional[Path] = None # Final output file path (set after plan is built)
# Slide layout configurations (hardcoded for POC)
+14 -7
View File
@@ -19,6 +19,12 @@ from .models import (
)
def _read_json(path: Path) -> Any:
"""Read and parse a JSON file, treating an empty file as {}."""
text = path.read_text(encoding="utf-8").strip()
return json.loads(text) if text else {}
def parse_manuscript(
project_path: Path,
) -> tuple[str, list[str], list[tuple[int, str]], list[Citation]]:
@@ -132,7 +138,7 @@ def load_citations(path: Path) -> list[Citation]:
"""Load citations from a JSON file."""
if not path.exists():
return []
data = json.loads(path.read_text(encoding="utf-8"))
data = _read_json(path)
return [
Citation(
reference=item["reference"],
@@ -151,7 +157,7 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
raise ParseError("project.json not found", config_path)
try:
data = json.loads(config_path.read_text(encoding="utf-8"))
data = _read_json(config_path)
except json.JSONDecodeError as e:
raise ParseError(f"Invalid JSON: {e}", config_path)
@@ -204,6 +210,7 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
outro=data.get("outro", []),
description=data.get("description", ""),
footer=data.get("footer", ""),
output_video=data.get("output_video", ""),
)
@@ -239,7 +246,7 @@ def parse_slides(
raise ParseError(f"slides file not found: {local_slides_path}", local_slides_path)
try:
data = json.loads(slides_path.read_text(encoding="utf-8"))
data = _read_json(slides_path)
except json.JSONDecodeError as e:
raise ParseError(f"Invalid JSON: {e}", slides_path)
@@ -283,7 +290,7 @@ def parse_audio(
return {}, audio_dir
try:
data = json.loads(audio_path.read_text(encoding="utf-8"))
data = _read_json(audio_path)
except json.JSONDecodeError as e:
raise ParseError(f"Invalid JSON: {e}", audio_path)
@@ -382,7 +389,7 @@ def parse_videos(
raise ParseError(f"videos.json not found: {local_videos_path}", local_videos_path)
try:
data = json.loads(videos_path.read_text(encoding="utf-8"))
data = _read_json(videos_path)
except json.JSONDecodeError as e:
raise ParseError(f"Invalid JSON: {e}", videos_path)
@@ -489,7 +496,7 @@ def parse_narration(
return {}, narration_dir
try:
data = json.loads(narration_path.read_text(encoding="utf-8"))
data = _read_json(narration_path)
except json.JSONDecodeError as e:
raise ParseError(f"Invalid JSON: {e}", narration_path)
@@ -594,7 +601,7 @@ def parse_video_metadata(metadata_path: Path) -> VideoMetadata:
raise ParseError(f"Video metadata not found: {metadata_path}", metadata_path)
try:
data = json.loads(metadata_path.read_text(encoding="utf-8"))
data = _read_json(metadata_path)
except json.JSONDecodeError as e:
raise ParseError(f"Invalid JSON: {e}", metadata_path)
+125 -8
View File
@@ -185,10 +185,14 @@ def ensure_downscaled_files_exist(
res: str,
force: bool = False,
verbose: bool = False,
skip_sources: set = None,
) -> Path:
"""
Ensure downscaled copies exist for all videos in source_dir for the given res preset.
Creates them on-the-fly if missing. Returns the output subdirectory.
skip_sources: optional set of source filenames to skip (e.g. files that have a
preprocessed output_file, where the full-res processed version will be used instead).
"""
cfg = RES_CONFIGS[res]
if cfg is None:
@@ -205,6 +209,7 @@ def ensure_downscaled_files_exist(
and f.suffix.lower() in video_extensions
and "_processed" not in f.stem
and not f.name.startswith(".")
and (skip_sources is None or f.name not in skip_sources)
]
if not video_files:
@@ -359,6 +364,115 @@ def check_audio_channel_silent(input_path: Path, channel: str, threshold_db: flo
return False, 0.0
def _resolve_auto_channel(input_path: Path, threshold_db: float = -60.0) -> str:
"""
Detect which audio channels have signal and return the appropriate channel setting.
Logic:
- One channel silent, the other not → return the active channel ("left" or "right")
- Both channels have signal → return "both"
"""
left_silent, _ = check_audio_channel_silent(input_path, "left", threshold_db)
right_silent, _ = check_audio_channel_silent(input_path, "right", threshold_db)
if left_silent and not right_silent:
return "right"
if right_silent and not left_silent:
return "left"
return "both"
def detect_silence_bounds(
input_path: Path,
noise_threshold_db: float = -40.0,
min_silence_duration: float = 0.3,
verbose: bool = False,
) -> tuple[float, float]:
"""
Detect when audio content starts and ends in a file.
Uses FFmpeg's silencedetect filter to find the first and last
non-silent moments. Useful for automatically computing skip/take values.
Two common preamble shapes are handled:
- File starts with silence → first_sound = end of that silence.
- File starts with noise (e.g. clothing rustle) followed by a brief
quiet gap before speech → first_sound = end of that first gap.
Args:
input_path: Video or audio file to analyse.
noise_threshold_db: dB level below which audio is considered silent.
Raise (e.g. -25) to treat low-level noise like clothing rustle
as silence.
min_silence_duration: Minimum gap length (seconds) that counts as
silence. Shorter gaps are ignored.
verbose: Print detected silence periods for debugging.
Returns:
(first_sound_time, last_sound_time) in seconds.
first_sound_time — when the first meaningful sound begins.
last_sound_time — when the last meaningful sound ends.
"""
total_duration = get_video_duration(input_path)
cmd = [
"ffmpeg", "-i", str(input_path),
"-af",
f"silencedetect=noise={noise_threshold_db}dB:duration={min_silence_duration}",
"-f", "null", "/dev/null",
]
result = subprocess.run(cmd, capture_output=True, text=True)
# Parse silence_start / silence_end lines from stderr
silence_periods: list[tuple[float, float]] = []
pending_start: float | None = None
for line in result.stderr.splitlines():
if "silence_start:" in line:
try:
pending_start = float(line.split("silence_start:")[1].strip())
except ValueError:
pass
elif "silence_end:" in line and pending_start is not None:
try:
end_t = float(line.split("silence_end:")[1].split("|")[0].strip())
silence_periods.append((pending_start, end_t))
pending_start = None
except ValueError:
pass
# File ended while still in silence — close the period at total_duration
if pending_start is not None:
silence_periods.append((pending_start, total_duration))
if verbose:
print(f"\n silence periods ({len(silence_periods)}):")
for s, e in silence_periods:
print(f" {s:.3f}s {e:.3f}s")
# --- First sound ---
# Take the end of the FIRST silence period found in the preamble window
# (first 60 s). This handles both:
# • file starts with silence → silence[0].start ≈ 0
# • file starts with noise (crumpling etc.) then has a brief quiet gap
# before speech → silence[0].start > 0
# If no silence is found at all the whole file is assumed to be content.
PREAMBLE_LIMIT = 60.0
first_sound = 0.0
for s_start, s_end in silence_periods:
if s_start < PREAMBLE_LIMIT:
first_sound = s_end
break
# --- Last sound ---
# Where the trailing silence begins (if the file ends with silence).
last_sound = total_duration
if silence_periods and silence_periods[-1][1] >= total_duration - 0.05:
last_sound = silence_periods[-1][0]
return first_sound, last_sound
def preprocess_video(
videos_dir: Path,
video_id: str,
@@ -402,9 +516,12 @@ def preprocess_video(
filter_type=None,
)
# Quick audio sanity check: warn early if selected channel is silent
# Resolve channel setting (auto-detect if needed) and sanity check
channel = video_source.use_audio_channels
if channel in ("left", "right"):
if channel == "auto":
channel = _resolve_auto_channel(current_input)
print(f" Auto channel detection: using '{channel}'")
elif channel in ("left", "right"):
is_silent, max_vol = check_audio_channel_silent(current_input, channel)
if is_silent:
raise PreprocessError(
@@ -482,7 +599,7 @@ def preprocess_video(
batch[0],
verbose,
take=None,
use_audio_channels=video_source.use_audio_channels,
use_audio_channels=channel,
skip_loudnorm=video_source.defer_loudnorm,
)
current_input = step_output
@@ -2022,12 +2139,12 @@ def stitch_narration_segments(
f" Skip: {skip}s, Take: {take or 'all'}s, Duration: {effective_duration:.1f}s"
)
# If no trimming needed, use source directly
if skip == 0 and take is None:
trimmed_segments.append(source_path)
continue
# Always re-encode every segment to normalize fps and timestamps.
# Mixing un-normalized source files (e.g. 60fps camera) with
# trimmed-and-re-encoded 30fps segments causes cumulative A/V drift
# in the final concat.
# Trim the segment
# Trim/normalize the segment
trimmed_path = temp_dir / f"segment_{i:03d}.mov"
# Check if source has alpha channel (for ProRes 4444, etc.)
+31 -23
View File
@@ -5,6 +5,8 @@ import subprocess
from pathlib import Path
from .errors import RenderError
from .parser import _read_json
from .preprocessor import _resolve_auto_channel
from .models import (
AudioEvent,
CameraEvent,
@@ -179,22 +181,23 @@ def _resolve_video_path(
base_dir = videos_dir
if video_source.output_file:
video_path = base_dir / video_source.output_file
# Check with cache fallback
if project_path:
resolved, _ = resolve_with_cache(video_path, project_path)
if resolved.exists():
return resolved
elif video_path.exists():
return video_path
# Check for WebM variant (preprocessing outputs compressed WebM instead of ProRes)
webm_path = video_path.with_suffix(".mov")
if project_path:
resolved, _ = resolve_with_cache(webm_path, project_path)
if resolved.exists():
return resolved
elif webm_path.exists():
return webm_path
for candidate_dir in [base_dir, base_dir.parent]:
video_path = candidate_dir / video_source.output_file
# Check with cache fallback
if project_path:
resolved, _ = resolve_with_cache(video_path, project_path)
if resolved.exists():
return resolved
elif video_path.exists():
return video_path
# Check for WebM variant (preprocessing outputs compressed WebM instead of ProRes)
webm_path = video_path.with_suffix(".mov")
if project_path:
resolved, _ = resolve_with_cache(webm_path, project_path)
if resolved.exists():
return resolved
elif webm_path.exists():
return webm_path
# Fall back to source_file with cache fallback
source_path = base_dir / video_source.source_file
@@ -272,7 +275,6 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
from .cache import resolve_with_cache
# Input: background — resolved via handle in shared_assets/videos.json
import json as _json
bg_handle = plan.config.background
has_background = bool(bg_handle)
bg_idx = None
@@ -282,7 +284,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
videos_json_bg = shared_assets_dir / "videos.json"
if not videos_json_bg.exists():
raise RenderError(f"shared_assets/videos.json not found (needed for background handle '{bg_handle}')")
bg_videos = _json.loads(videos_json_bg.read_text())
bg_videos = _read_json(videos_json_bg)
if bg_handle not in bg_videos:
raise RenderError(f"Background handle '{bg_handle}' not found in shared_assets/videos.json")
bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"]
@@ -719,7 +721,8 @@ def build_filter_complex(
)
else:
filters.append(
f"[{bg_idx}:v]scale={width}:{height}:force_original_aspect_ratio=increase,"
f"[{bg_idx}:v]fps={plan.config.fps},"
f"scale={width}:{height}:force_original_aspect_ratio=increase,"
f"crop={width}:{height}[bg]"
)
else:
@@ -742,9 +745,12 @@ def build_filter_complex(
if not plan.narration_pauses:
# Simple case: no pauses, continuous overlay
# fps+setpts normalise the source to a constant frame rate and reset
# the timeline to 0 so the video stays locked to the audio track.
video_label = f"av{i}"
filters.append(
f"[{input_idx}:v]format=yuva444p10le,"
f"[{input_idx}:v]fps={plan.config.fps},setpts=PTS-STARTPTS,"
f"format=yuva444p10le,"
f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase,"
f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2,"
f"format=rgba[{video_label}]"
@@ -942,9 +948,11 @@ def build_filter_complex(
narration_volume = 1.0
if plan.narration_videos:
_, first_video_source, _ = plan.narration_videos[0]
channel_filter = _build_audio_channel_filter(
first_video_source.use_audio_channels
)
use_channels = first_video_source.use_audio_channels
if use_channels == "auto":
narration_path = _resolve_video_path(videos_dir, first_video_source, shared_assets_dir, project_path)
use_channels = _resolve_auto_channel(narration_path)
channel_filter = _build_audio_channel_filter(use_channels)
narration_volume = first_video_source.volume
# Build volume filter if not 1.0
+23 -9
View File
@@ -4,6 +4,7 @@ from pathlib import Path
from .cache import resolve_with_cache
from .errors import ValidationError, ValidationIssue
from .parser import _read_json
from .models import (
ProjectConfig,
SlideDefinition,
@@ -21,9 +22,10 @@ def validate_project(
videos: dict[str, VideoSource],
videos_dir: Path,
malformed_markers: list[tuple[int, str]] = None,
) -> None:
) -> list[ValidationIssue]:
"""
Validate all parsed project data. Raises ValidationError if any issues found.
Returns a list of warnings (non-fatal issues).
Checks:
- All slide markers in manuscript exist in slides.json
@@ -34,6 +36,7 @@ def validate_project(
- No malformed markers in manuscript
"""
issues: list[ValidationIssue] = []
warnings: list[ValidationIssue] = []
# Check for malformed markers first (these are likely typos)
if malformed_markers:
@@ -64,9 +67,9 @@ def validate_project(
base_name = video_id.rsplit(".", 1)[0]
if base_name in videos:
hint = f" (Did you mean [video:{base_name}]? Don't include file extensions in markers)"
issues.append(
warnings.append(
ValidationIssue(
f"Video marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json{hint}",
f"Video marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json{hint} — using PlaceholderVideo instead",
project_path / "manuscript.txt",
)
)
@@ -76,9 +79,9 @@ def validate_project(
if marker.startswith("narration:"):
video_id = marker[10:] # Remove 'narration:' prefix
if video_id not in videos:
issues.append(
warnings.append(
ValidationIssue(
f"Narration marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json",
f"Narration marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json — using PlaceholderVideo instead",
project_path / "manuscript.txt",
)
)
@@ -88,6 +91,16 @@ def validate_project(
if marker.startswith("segment:"):
continue
# Unknown namespaced markers (e.g. [background:xxx]) — not supported, ignore with warning
if ":" in marker:
warnings.append(
ValidationIssue(
f"Unknown marker type [{marker}] — ignoring (no support for '{marker.split(':', 1)[0]}:' markers)",
project_path / "manuscript.txt",
)
)
continue
if marker not in slides:
issues.append(
ValidationIssue(
@@ -150,9 +163,9 @@ def validate_project(
video_path = base_dir / video_source.source_file
video_path, _ = resolve_with_cache(video_path, project_path)
if not video_path.exists():
issues.append(
warnings.append(
ValidationIssue(
f"Video file not found: {video_source.source_file}",
f"Video file not found: {video_source.source_file} — falling back to PlaceholderVideo",
videos_json_path,
)
)
@@ -183,8 +196,7 @@ def validate_project(
)
)
else:
import json as _json
bg_videos = _json.loads(videos_json_path_bg.read_text())
bg_videos = _read_json(videos_json_path_bg)
if bg_handle not in bg_videos:
issues.append(
ValidationIssue(
@@ -239,3 +251,5 @@ def validate_project(
# If any issues, raise ValidationError
if issues:
raise ValidationError(issues)
return warnings
+1 -1
View File
@@ -14,7 +14,7 @@
"fps": 30,
"duration_seconds": 60,
"script": "script.md",
"output_video": "export/final.mp4",
"output_video": "short_is_universe_pixelated.mp4",
"keynote_file": "../video1/media/video1.key",
"background": "../video1/shared_assets/BlackBackground.mp4",
"slides": "../video1/media/slides/Video1/slides.json",