diff --git a/GlitchTrailer/project.json b/GlitchTrailer/project.json index eecabfd..9c3f5ed 100644 --- a/GlitchTrailer/project.json +++ b/GlitchTrailer/project.json @@ -23,26 +23,8 @@ "talkinghead": [ { "type": "audio_normalize", - "highpass": 100, - "room_eq": true, - "room_eq_freq": 300, - "room_eq_gain": -4, - "room_eq_width": 1.5, - "dereverb_model": "shared_assets/models/std.rnnn", - "dereverb_mix": 0.8, - "denoise": true, - "noise_floor": -25, - "gate": true, - "gate_threshold": -35, - "gate_range": -20, - "compress": true, - "threshold": -20, - "ratio": 4, - "attack": 5, - "release": 50, - "makeup": 2, "normalize": true, - "target_lufs": -16, + "target_lufs": -14, "target_lra": 11, "target_tp": -1.5 }, @@ -101,5 +83,5 @@ }, "manuscript": "manuscript.txt", "shorts": [], - "output_video": "out/final.mp4" + "output_video": "TRAILER.mp4" } diff --git a/example/project.json b/example/project.json index eeb35be..fe39e7c 100644 --- a/example/project.json +++ b/example/project.json @@ -14,7 +14,7 @@ "videos": "media/videos/videos.json", "slides": "media/slides/Example/slides.json", "audio": "media/audio/audio.json", - "default_filters": { + "default_filters": { "talkinghead": [ { "type": "audio_normalize", diff --git a/gnommo/cli.py b/gnommo/cli.py index 06e99dc..8182545 100644 --- a/gnommo/cli.py +++ b/gnommo/cli.py @@ -2,12 +2,15 @@ import argparse import json +from logging import config import re import shutil import subprocess import sys from datetime import datetime from pathlib import Path + +from gnommo.parser import _read_json from . import __version__ from .errors import GnommoError, ParseError, ValidationError, RenderError from .cache import get_cache_info, resolve_with_cache @@ -35,10 +38,15 @@ Examples: gnommo -p video1 import Generate slides.json from images gnommo -p video1 pre Preprocess videos (chroma key, etc.) gnommo -p video1 stitch --res tiny -f Fast stitch with new begin/end values + gnommo -p video1 trim Auto-detect silence and set skip/take in narration.json + gnommo -p video1 trim --force Redo trim even for segments that already have skip/take + gnommo -p video1 trim --threshold -25 Raise threshold to ignore clothing/room noise + gnommo -p video1 trim -v Show detected silence periods for debugging gnommo -p video1 all Full pipeline: transcribe → align → render gnommo -p video1 render --dry-run Show FFmpeg command without running gnommo -p video1 description Generate YouTube description file - gnommo -p video1 transcribe --final Transcribe final.mp4 and generate SRT for YouTube + gnommo -p video1 transcribe Narration file for timing of slides + gnommo -p video1 transcribe --final Transcribe outputted file and generate SRT for YouTube gnommo -p video1 archive Sync project to external cache storage gnommo -p video1 archive --dry-run Preview what would be synced gnommo -p video1 extract-audio --combined Extract audio from narration_combined.mov @@ -71,6 +79,7 @@ Examples: "preprocess", "pre", "stitch", + "trim", "render", "all", "transcribe", @@ -156,6 +165,12 @@ Examples: action="store_true", help="Target production server (GNOMMOWEB_PROD_URL / GNOMMOWEB_PROD_API_KEY)", ) + parser.add_argument( + "--threshold", + type=float, + default=-40.0, + help="For trim: silence threshold in dB (default: -40). Raise (e.g. -25) to ignore clothing/room noise.", + ) args = parser.parse_args() @@ -181,6 +196,8 @@ Examples: args.workers, args.res, ) + elif action == "trim": + return cmd_trim(project_path, args.verbose, args.force, args.threshold) elif action in ("stitch"): return cmd_stitch( project_path, @@ -223,7 +240,7 @@ Examples: return cmd_pull(project_path, args.verbose, args.force, args.prod) elif action == "handoff": from .handoff import cmd_handoff - return cmd_handoff(project_path, args.verbose, args.file, args.prod) + return cmd_handoff(project_path, args.verbose, args.file, args.prod, args.res) except GnommoError as e: print(f"Error: {e}", file=sys.stderr) @@ -242,7 +259,7 @@ Examples: def cmd_import(project_path: Path, force: bool, verbose: bool) -> int: """Import assets and generate metadata JSON files.""" - from .parser import parse_project_config + from .parser import parse_project_config, _read_json print(f"Importing assets for: {project_path.name}") @@ -367,8 +384,7 @@ def _import_shared_assets(shared_assets_dir: Path, verbose: bool) -> None: videos_json_path = shared_assets_dir / "videos.json" existing_videos: dict = {} if videos_json_path.exists(): - with open(videos_json_path, "r", encoding="utf-8") as f: - existing_videos = json.load(f) + existing_videos = _read_json(videos_json_path) # Add new videos (don't overwrite existing) added_count = 0 @@ -474,8 +490,7 @@ def _import_videos(videos_dir: Path, config, verbose: bool) -> None: videos_json_path = videos_dir / "videos.json" existing_videos: dict = {} if videos_json_path.exists(): - with open(videos_json_path, "r", encoding="utf-8") as f: - existing_videos = json.load(f) + existing_videos = _read_json(videos_json_path) # Get available filter presets from config default_filters = config.default_filters if config else {} @@ -558,8 +573,7 @@ def _import_narration_segments(narration_dir: Path, config, verbose: bool) -> No narration_json_path = narration_dir / "narration.json" existing_narration: dict = {} if narration_json_path.exists(): - with open(narration_json_path, "r", encoding="utf-8") as f: - existing_narration = json.load(f) + existing_narration = _read_json(narration_json_path) # Get available filter presets from config default_filters = config.default_filters if config else {} @@ -583,9 +597,11 @@ def _import_narration_segments(narration_dir: Path, config, verbose: bool) -> No # Apply talkinghead preset if available if "talkinghead" in default_filters: - narration_entry["filter"] = "talkinghead" narration_entry["cutout"] = "talkinghead" + if "talkinghead" in default_filters: + narration_entry["filter"] = "talkinghead" + # Default audio settings for narration narration_entry["use_audio_channels"] = "left" narration_entry["defer_loudnorm"] = True @@ -656,7 +672,7 @@ def _import_presenter_notes( # Parse JSON output from JXA script try: - notes_data = json.loads(proc.stdout) + notes_data = json.loads(proc.stdout) if proc.stdout.strip() else [] except json.JSONDecodeError as e: print(f" Error parsing notes JSON: {e}", file=sys.stderr) return @@ -714,9 +730,11 @@ def cmd_validate(project_path: Path, verbose: bool) -> int: print(f" - Videos defined: {len(videos)}") # Validate - validate_project( + warnings = validate_project( project_path, markers, config, slides, videos, videos_dir, malformed ) + for w in warnings: + print(f" Warning: {w}") print("Validation passed.") return 0 @@ -735,9 +753,9 @@ def cmd_preprocess( workers: int = 1, res: str = "full", ) -> int: - """Run preprocessing pipeline on narration segments.""" + """Run preprocessing pipeline on narration segments and videos.""" from concurrent.futures import ThreadPoolExecutor, as_completed - from .parser import parse_project_config, parse_narration + from .parser import parse_project_config, parse_narration, parse_videos from .preprocessor import ( preprocess_video, create_downscaled_videos, @@ -834,10 +852,118 @@ def cmd_preprocess( ) print(f"\n Run 'gnommo -p stitch' to stitch narration segments into one fulll length narration file.") + + # Also preprocess videos from videos.json (e.g. chroma key, color grade) + videos, videos_dir = parse_videos(project_path, config) + videos_to_process = [ + (vid_id, vid_src) + for vid_id, vid_src in videos.items() + if vid_src.filter and not vid_src.is_shared + ] + + if videos_to_process: + print(f"\n Processing {len(videos_to_process)} video(s) from videos.json:") + for video_id, video_source in videos_to_process: + if video_source.output_file: + output_path = videos_dir / video_source.output_file + if output_path.exists() and not force: + print(f" {video_id}: output exists, skipping (use --force to reprocess)") + continue + if dry_run: + print(f" Would preprocess: {video_id} ({len(video_source.filter)} filter(s))") + continue + print(f" Processing: {video_id}") + preprocess_video(videos_dir, video_id, video_source, verbose, force, gnommo_scratch) + print("\nPreprocessing complete.") return 0 +# ============================================================================= +# Trim Command — auto-detect silence bounds for narration segments +# ============================================================================= + + +def cmd_trim( + project_path: Path, + verbose: bool, + force: bool = False, + threshold_db: float = -40.0, +) -> int: + """ + Auto-detect silence bounds for all narration segments and write skip/take + values into narration.json. + + For each segment: + skip = max(0, first_sound_time - 0.5) + take = last_sound_time + 3.0 - skip (capped at file duration) + + Segments that already have explicit skip or take values are left unchanged + unless --force is passed. + + Use --threshold to adjust sensitivity, e.g. -25 to ignore clothing/room + noise that sits above -40 dB. + """ + from .parser import parse_project_config, parse_narration + from .preprocessor import detect_silence_bounds, get_video_duration + + print(f"Auto-trimming narration: {project_path.name}") + + config = parse_project_config(project_path) + narration, narration_dir = parse_narration(project_path, config) + + if not narration: + print(" No narration segments found in narration.json") + print(" Run 'gnommo -p import' first.") + return 1 + + narration_json_path = narration_dir / "narration.json" + raw_data: dict = _read_json(narration_json_path) + + updated = 0 + for seg_id in sorted(narration.keys()): + seg = narration[seg_id] + + existing = raw_data.get(seg_id, {}) + has_explicit = "skip" in existing or "take" in existing + if has_explicit and not force: + print(f" {seg_id}: already trimmed, skipping (use --force to redo)") + continue + + # Always analyse the raw source file — it's always present and has the + # same audio as any processed version (processing is video-only). + source_path = narration_dir / seg.source_file + if not source_path.exists(): + print(f" {seg_id}: source file not found ({seg.source_file}), skipping") + continue + + print(f" {seg_id}: analysing...", end="", flush=True) + first_sound, last_sound = detect_silence_bounds(source_path, noise_threshold_db=threshold_db, verbose=verbose) + total_dur = get_video_duration(source_path) + + new_skip = max(0.0, round(first_sound - 0.5, 3)) + new_take = round(min(total_dur - new_skip, last_sound + 3.0 - new_skip), 3) + new_take = max(0.0, new_take) + + print( + f" first={first_sound:.2f}s last={last_sound:.2f}s" + f" → skip={new_skip:.3f}s take={new_take:.3f}s" + ) + + raw_data[seg_id]["skip"] = new_skip + raw_data[seg_id]["take"] = new_take + updated += 1 + + if updated > 0: + with open(narration_json_path, "w", encoding="utf-8") as f: + json.dump(raw_data, f, indent=2) + print(f"\n Updated {updated} segment(s) in narration.json") + else: + print(f"\n No segments updated") + + return 0 + + # ============================================================================= # Stitch Command (fast iteration on narration segments) # ============================================================================= @@ -903,19 +1029,17 @@ def cmd_stitch( if stitch_output.exists() and not force: print(f"\n Combined narration exists: {stitch_output.name}") print(" (use --force to regenerate)") - return 0 - - stitch_narration_segments( - narration_dir, - segment_ids, - narration, - stitch_output, - verbose=verbose, - default_end_trim=config.default_end_trim if config else 0.0, - ) - - # Run import videos again, because at this point narration_combined might have been created. - _import_videos(videos_dir, config, verbose) + else: + stitch_narration_segments( + narration_dir, + segment_ids, + narration, + stitch_output, + verbose=verbose, + default_end_trim=config.default_end_trim if config else 0.0, + ) + # Run import videos again, because at this point narration_combined might have been created. + _import_videos(videos_dir, config, verbose) # Always update the MAIN videos.json (parent of subdir when using low/tiny res) # Downscaled dirs only affect file paths, not JSON metadata updates @@ -924,12 +1048,11 @@ def cmd_stitch( if True: # Always update JSON regardless of proxy mode existing_videos: dict = {} if videos_json_path.exists(): - with open(videos_json_path, "r", encoding="utf-8") as f: - existing_videos = json.load(f) + existing_videos = _read_json(videos_json_path) # Get cutout from first narration segment first_seg = narration[segment_ids[0]] - cutout = first_seg.cutout or "talkinghead" + cutout = first_seg.cutout or "talkinghead" # Default to audioonly if no cutout specified # Create/update narration_combined entry existing_videos["narration_combined"] = { @@ -1149,7 +1272,10 @@ def cmd_render( # Non-full res: use downscaled video directory, create on-the-fly if needed if res != "full": - videos_dir = ensure_downscaled_files_exist(videos_dir, res, force=False, verbose=verbose) + # Skip downscaling sources that have a preprocessed output_file — the + # renderer will use the full-res processed version instead, saving disk space. + sources_with_output = {v.source_file for v in videos.values() if v.output_file} + videos_dir = ensure_downscaled_files_exist(videos_dir, res, force=False, verbose=verbose, skip_sources=sources_with_output) if verbose: print(f" Using {res} dir: {videos_dir}") audio, audio_dir = parse_audio(project_path, config) @@ -1246,9 +1372,11 @@ def cmd_render( # Stage 2: Validate print("\n[2/4] Validating...") - validate_project( + warnings = validate_project( project_path, markers, config, slides, videos, videos_dir, malformed ) + for w in warnings: + print(f" Warning: {w}") print(" Passed.") # Stage 3: Transform (includes on-the-fly alignment) @@ -1310,14 +1438,19 @@ def cmd_render( print(f"\n Continuing anyway due to --force flag...") # Stage 4: Render - # Generate output filename based on slide range and resolution - base_name = "preview" if res == "low" else "final" - if slide_range: + # Determine output filename and directory + if config.output_video: + out_filename = config.output_video + elif slide_range: start, end = slide_range range_suffix = f"_{start}-{end}" if end else f"_{start}-end" - output_path = project_path / "out" / f"{base_name}{range_suffix}.mp4" + out_filename = f"final{range_suffix}.mp4" else: - output_path = project_path / "out" / f"{base_name}.mp4" + out_filename = f"{config.co}.mp4" + + out_dir = project_path / "out" / res if res != "full" else project_path / "out" + output_path = out_dir / out_filename + plan.output_path = output_path if dry_run: print("\n[4/4] FFmpeg command (dry run):") @@ -1372,15 +1505,17 @@ def cmd_transcribe( from .transcriber import transcribe_video, save_transcript, words_to_srt from .parser import parse_project_config, parse_videos from .preprocessor import ensure_downscaled_files_exist - + config = parse_project_config(project_path) + # Handle --final mode: transcribe the rendered output for YouTube captions if final: - return _transcribe_final(project_path, verbose) + path = project_path / "out" / f"{config.output_video}.mp4" + return _transcribe_final(path, verbose) mode_str = f" ({res.upper()})" if res != "full" else "" print(f"Transcribing: {project_path.name}{mode_str}") - config = parse_project_config(project_path) + videos, videos_dir = parse_videos(project_path, config) if not videos: print("Error: No videos defined in videos.json", file=sys.stderr) @@ -1433,23 +1568,20 @@ def cmd_transcribe( return 0 -def _transcribe_final(project_path: Path, verbose: bool) -> int: +def _transcribe_final(final_video: Path, verbose: bool) -> int: """ Transcribe the final rendered video and generate SRT captions for YouTube. - Looks for out/final.mp4 and creates out/final.srt suitable for upload. + Looks and creates out filename.srt suitable for upload. """ from .transcriber import transcribe_video, save_transcript, words_to_srt - print(f"Transcribing final output: {project_path.name}") + print(f"Transcribing final output: {final_video}") - # Look for the final rendered video - out_dir = project_path / "out" - final_video = out_dir / "final.mp4" if not final_video.exists(): print(f"Error: Final video not found: {final_video}", file=sys.stderr) - print(f"Run 'gnommo -p {project_path.name} render' first.", file=sys.stderr) + print("Run 'gnommo render' first.", file=sys.stderr) return 1 print(f" Video: {final_video.name}") @@ -1462,11 +1594,11 @@ def _transcribe_final(project_path: Path, verbose: bool) -> int: return 1 # Save JSON transcript - transcript_path = out_dir / "final.transcript.json" + transcript_path = final_video.with_suffix(".transcript.json") save_transcript(words, transcript_path) # Generate SRT captions - srt_path = out_dir / "final.srt" + srt_path = final_video.with_suffix(".srt") srt_content = words_to_srt(words) srt_path.write_text(srt_content, encoding="utf-8") @@ -1597,33 +1729,33 @@ def cmd_all( res: str = "full", force: bool = False, ) -> int: - """Run full pipeline: transcribe → render (alignment is automatic).""" - from .parser import parse_project_config, parse_videos + """Run full pipeline: preprocess → stitch → render → handoff.""" + from .handoff import cmd_handoff print(f"=== Full Pipeline: {project_path.name} ===\n") - # Check if transcription exists - config = parse_project_config(project_path) - videos, videos_dir = parse_videos(project_path, config) - result = _find_narration_video(config, videos) - if result: - video_id, video_source = result - video_path = videos_dir / video_source.source_file - transcript_path = video_path.with_suffix(".transcript.json") + print(">>> Step 1/5: Import\n") + result = cmd_import(project_path, force, verbose) + if result != 0: + return result - # Try cache fallback for transcript - resolved_transcript, _ = resolve_with_cache(transcript_path, project_path) - if not resolved_transcript.exists(): - print(">>> Step 1/2: Transcribe\n") - result = cmd_transcribe(project_path, verbose) - if result != 0: - return result - else: - print(f">>> Step 1/2: Transcribe (cached: {resolved_transcript.name})\n") + print("\n>>> Step 2/5: Preprocess\n") + result = cmd_preprocess(project_path, verbose, dry_run, force, workers=1, res=res) + if result != 0: + return result - # Render (alignment happens automatically) - print("\n>>> Step 2/2: Render\n") - return cmd_render(project_path, verbose, dry_run, res=res, force=force) + print("\n>>> Step 3/5: Stitch\n") + result = cmd_stitch(project_path, verbose, force, res=res) + if result != 0: + return result + + print("\n>>> Step 4/5: Render\n") + result = cmd_render(project_path, verbose, dry_run, res=res, force=force) + if result != 0: + return result + + print("\n>>> Step 5/5: Handoff\n") + return cmd_handoff(project_path, verbose, file_override=None, prod=False, res=res) # ============================================================================= @@ -1801,7 +1933,7 @@ def cmd_archive(project_path: Path, verbose: bool, dry_run: bool) -> int: project_json_path = project_path / "project.json" if project_json_path.exists(): try: - data = json.loads(project_json_path.read_text(encoding="utf-8")) + data = _read_json(project_json_path.read_text(encoding="utf-8")) data["synced_time"] = datetime.now().isoformat() project_json_path.write_text( json.dumps(data, indent=2, ensure_ascii=False) + "\n", diff --git a/gnommo/description.py b/gnommo/description.py index a4cf73c..bd1c671 100644 --- a/gnommo/description.py +++ b/gnommo/description.py @@ -176,12 +176,9 @@ def generate_chapters( for slide_id in slide_ids: if slide_id not in timing_lookup: continue - timestamp = timing_lookup[slide_id] title = _extract_chapter_title(manuscript_text, slide_id, slides) - - # Check if we should merge with previous chapter (too short) - if chapters and (timestamp - chapters[-1].timestamp) < min_chapter_duration: + if chapters and (timestamp - chapters[-1].timestamp) < min_chapter_duration: continue # Skip this chapter, previous one covers it chapters.append( diff --git a/gnommo/extract_presenter_notes.py b/gnommo/extract_presenter_notes.py index 29a04ec..996114f 100644 --- a/gnommo/extract_presenter_notes.py +++ b/gnommo/extract_presenter_notes.py @@ -23,12 +23,12 @@ import tempfile import zipfile from pathlib import Path +from gnommo.parser import _read_json + def write_manuscript(data: Path, out_path: Path): - data = json.loads( - data.read_text(encoding="utf-8") - ) # list of {"slide_index": int, "notes": str} - + + data = _read_json(data.read_text(encoding="utf-8")) lines = [] i = 0 for item in data: diff --git a/gnommo/handoff.py b/gnommo/handoff.py index 07e148a..a6ebc9c 100644 --- a/gnommo/handoff.py +++ b/gnommo/handoff.py @@ -69,7 +69,7 @@ def _write_sync(project_path: Path, data: dict, prod: bool = False): json.dump(data, f, indent=2) -def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str | None = None, prod: bool = False) -> int: +def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str | None = None, prod: bool = False, res: str = "full") -> int: _load_env_file() if prod: @@ -104,14 +104,17 @@ def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str | if file_override: video_path = Path(file_override) else: - output_video = project.get("output_video") - if not output_video: + output_filename = project.get("output") or Path(project.get("output_video", "")).name + if not output_filename: print( - "Error: no 'output_video' field in project.json and no --file provided.", + "Error: no 'output' field in project.json and no --file provided.", file=sys.stderr, ) return 1 - video_path = project_path / output_video + if res != "full": + video_path = project_path / "out" / res / output_filename + else: + video_path = project_path / "out" / output_filename if not video_path.exists(): print(f"Error: video file not found: {video_path}", file=sys.stderr) diff --git a/gnommo/models.py b/gnommo/models.py index e6d24fe..1b88f3e 100644 --- a/gnommo/models.py +++ b/gnommo/models.py @@ -65,6 +65,7 @@ class ProjectConfig: # YouTube description fields description: str = "" # Video description text for YouTube footer: str = "" # Footer text (social links, subscribe CTA, etc.) + output_video: str = "" # Output filename (e.g. "DISC_INT3.mp4"); placed in out/ or out// @dataclass @@ -507,6 +508,7 @@ class RenderPlan: cached_files: set = field( default_factory=set ) # Video IDs loaded from external cache (show 📁 indicator) + output_path: Optional[Path] = None # Final output file path (set after plan is built) # Slide layout configurations (hardcoded for POC) diff --git a/gnommo/parser.py b/gnommo/parser.py index 0343443..ae24e63 100644 --- a/gnommo/parser.py +++ b/gnommo/parser.py @@ -19,6 +19,12 @@ from .models import ( ) +def _read_json(path: Path) -> Any: + """Read and parse a JSON file, treating an empty file as {}.""" + text = path.read_text(encoding="utf-8").strip() + return json.loads(text) if text else {} + + def parse_manuscript( project_path: Path, ) -> tuple[str, list[str], list[tuple[int, str]], list[Citation]]: @@ -132,7 +138,7 @@ def load_citations(path: Path) -> list[Citation]: """Load citations from a JSON file.""" if not path.exists(): return [] - data = json.loads(path.read_text(encoding="utf-8")) + data = _read_json(path) return [ Citation( reference=item["reference"], @@ -151,7 +157,7 @@ def parse_project_config(project_path: Path) -> ProjectConfig: raise ParseError("project.json not found", config_path) try: - data = json.loads(config_path.read_text(encoding="utf-8")) + data = _read_json(config_path) except json.JSONDecodeError as e: raise ParseError(f"Invalid JSON: {e}", config_path) @@ -204,6 +210,7 @@ def parse_project_config(project_path: Path) -> ProjectConfig: outro=data.get("outro", []), description=data.get("description", ""), footer=data.get("footer", ""), + output_video=data.get("output_video", ""), ) @@ -239,7 +246,7 @@ def parse_slides( raise ParseError(f"slides file not found: {local_slides_path}", local_slides_path) try: - data = json.loads(slides_path.read_text(encoding="utf-8")) + data = _read_json(slides_path) except json.JSONDecodeError as e: raise ParseError(f"Invalid JSON: {e}", slides_path) @@ -283,7 +290,7 @@ def parse_audio( return {}, audio_dir try: - data = json.loads(audio_path.read_text(encoding="utf-8")) + data = _read_json(audio_path) except json.JSONDecodeError as e: raise ParseError(f"Invalid JSON: {e}", audio_path) @@ -382,7 +389,7 @@ def parse_videos( raise ParseError(f"videos.json not found: {local_videos_path}", local_videos_path) try: - data = json.loads(videos_path.read_text(encoding="utf-8")) + data = _read_json(videos_path) except json.JSONDecodeError as e: raise ParseError(f"Invalid JSON: {e}", videos_path) @@ -489,7 +496,7 @@ def parse_narration( return {}, narration_dir try: - data = json.loads(narration_path.read_text(encoding="utf-8")) + data = _read_json(narration_path) except json.JSONDecodeError as e: raise ParseError(f"Invalid JSON: {e}", narration_path) @@ -594,7 +601,7 @@ def parse_video_metadata(metadata_path: Path) -> VideoMetadata: raise ParseError(f"Video metadata not found: {metadata_path}", metadata_path) try: - data = json.loads(metadata_path.read_text(encoding="utf-8")) + data = _read_json(metadata_path) except json.JSONDecodeError as e: raise ParseError(f"Invalid JSON: {e}", metadata_path) diff --git a/gnommo/preprocessor.py b/gnommo/preprocessor.py index 54b6c6c..97fac0d 100644 --- a/gnommo/preprocessor.py +++ b/gnommo/preprocessor.py @@ -185,10 +185,14 @@ def ensure_downscaled_files_exist( res: str, force: bool = False, verbose: bool = False, + skip_sources: set = None, ) -> Path: """ Ensure downscaled copies exist for all videos in source_dir for the given res preset. Creates them on-the-fly if missing. Returns the output subdirectory. + + skip_sources: optional set of source filenames to skip (e.g. files that have a + preprocessed output_file, where the full-res processed version will be used instead). """ cfg = RES_CONFIGS[res] if cfg is None: @@ -205,6 +209,7 @@ def ensure_downscaled_files_exist( and f.suffix.lower() in video_extensions and "_processed" not in f.stem and not f.name.startswith(".") + and (skip_sources is None or f.name not in skip_sources) ] if not video_files: @@ -359,6 +364,115 @@ def check_audio_channel_silent(input_path: Path, channel: str, threshold_db: flo return False, 0.0 +def _resolve_auto_channel(input_path: Path, threshold_db: float = -60.0) -> str: + """ + Detect which audio channels have signal and return the appropriate channel setting. + + Logic: + - One channel silent, the other not → return the active channel ("left" or "right") + - Both channels have signal → return "both" + """ + left_silent, _ = check_audio_channel_silent(input_path, "left", threshold_db) + right_silent, _ = check_audio_channel_silent(input_path, "right", threshold_db) + + if left_silent and not right_silent: + return "right" + if right_silent and not left_silent: + return "left" + return "both" + + +def detect_silence_bounds( + input_path: Path, + noise_threshold_db: float = -40.0, + min_silence_duration: float = 0.3, + verbose: bool = False, +) -> tuple[float, float]: + """ + Detect when audio content starts and ends in a file. + + Uses FFmpeg's silencedetect filter to find the first and last + non-silent moments. Useful for automatically computing skip/take values. + + Two common preamble shapes are handled: + - File starts with silence → first_sound = end of that silence. + - File starts with noise (e.g. clothing rustle) followed by a brief + quiet gap before speech → first_sound = end of that first gap. + + Args: + input_path: Video or audio file to analyse. + noise_threshold_db: dB level below which audio is considered silent. + Raise (e.g. -25) to treat low-level noise like clothing rustle + as silence. + min_silence_duration: Minimum gap length (seconds) that counts as + silence. Shorter gaps are ignored. + verbose: Print detected silence periods for debugging. + + Returns: + (first_sound_time, last_sound_time) in seconds. + first_sound_time — when the first meaningful sound begins. + last_sound_time — when the last meaningful sound ends. + """ + total_duration = get_video_duration(input_path) + + cmd = [ + "ffmpeg", "-i", str(input_path), + "-af", + f"silencedetect=noise={noise_threshold_db}dB:duration={min_silence_duration}", + "-f", "null", "/dev/null", + ] + result = subprocess.run(cmd, capture_output=True, text=True) + + # Parse silence_start / silence_end lines from stderr + silence_periods: list[tuple[float, float]] = [] + pending_start: float | None = None + + for line in result.stderr.splitlines(): + if "silence_start:" in line: + try: + pending_start = float(line.split("silence_start:")[1].strip()) + except ValueError: + pass + elif "silence_end:" in line and pending_start is not None: + try: + end_t = float(line.split("silence_end:")[1].split("|")[0].strip()) + silence_periods.append((pending_start, end_t)) + pending_start = None + except ValueError: + pass + + # File ended while still in silence — close the period at total_duration + if pending_start is not None: + silence_periods.append((pending_start, total_duration)) + + if verbose: + print(f"\n silence periods ({len(silence_periods)}):") + for s, e in silence_periods: + print(f" {s:.3f}s – {e:.3f}s") + + # --- First sound --- + # Take the end of the FIRST silence period found in the preamble window + # (first 60 s). This handles both: + # • file starts with silence → silence[0].start ≈ 0 + # • file starts with noise (crumpling etc.) then has a brief quiet gap + # before speech → silence[0].start > 0 + # If no silence is found at all the whole file is assumed to be content. + PREAMBLE_LIMIT = 60.0 + first_sound = 0.0 + for s_start, s_end in silence_periods: + if s_start < PREAMBLE_LIMIT: + first_sound = s_end + break + + # --- Last sound --- + # Where the trailing silence begins (if the file ends with silence). + last_sound = total_duration + if silence_periods and silence_periods[-1][1] >= total_duration - 0.05: + last_sound = silence_periods[-1][0] + + return first_sound, last_sound + + def preprocess_video( videos_dir: Path, video_id: str, @@ -402,9 +516,12 @@ def preprocess_video( filter_type=None, ) - # Quick audio sanity check: warn early if selected channel is silent + # Resolve channel setting (auto-detect if needed) and sanity check channel = video_source.use_audio_channels - if channel in ("left", "right"): + if channel == "auto": + channel = _resolve_auto_channel(current_input) + print(f" Auto channel detection: using '{channel}'") + elif channel in ("left", "right"): is_silent, max_vol = check_audio_channel_silent(current_input, channel) if is_silent: raise PreprocessError( @@ -482,7 +599,7 @@ def preprocess_video( batch[0], verbose, take=None, - use_audio_channels=video_source.use_audio_channels, + use_audio_channels=channel, skip_loudnorm=video_source.defer_loudnorm, ) current_input = step_output @@ -2022,12 +2139,12 @@ def stitch_narration_segments( f" Skip: {skip}s, Take: {take or 'all'}s, Duration: {effective_duration:.1f}s" ) - # If no trimming needed, use source directly - if skip == 0 and take is None: - trimmed_segments.append(source_path) - continue + # Always re-encode every segment to normalize fps and timestamps. + # Mixing un-normalized source files (e.g. 60fps camera) with + # trimmed-and-re-encoded 30fps segments causes cumulative A/V drift + # in the final concat. - # Trim the segment + # Trim/normalize the segment trimmed_path = temp_dir / f"segment_{i:03d}.mov" # Check if source has alpha channel (for ProRes 4444, etc.) diff --git a/gnommo/renderer.py b/gnommo/renderer.py index 52c14d4..8971195 100644 --- a/gnommo/renderer.py +++ b/gnommo/renderer.py @@ -5,6 +5,8 @@ import subprocess from pathlib import Path from .errors import RenderError +from .parser import _read_json +from .preprocessor import _resolve_auto_channel from .models import ( AudioEvent, CameraEvent, @@ -179,22 +181,23 @@ def _resolve_video_path( base_dir = videos_dir if video_source.output_file: - video_path = base_dir / video_source.output_file - # Check with cache fallback - if project_path: - resolved, _ = resolve_with_cache(video_path, project_path) - if resolved.exists(): - return resolved - elif video_path.exists(): - return video_path - # Check for WebM variant (preprocessing outputs compressed WebM instead of ProRes) - webm_path = video_path.with_suffix(".mov") - if project_path: - resolved, _ = resolve_with_cache(webm_path, project_path) - if resolved.exists(): - return resolved - elif webm_path.exists(): - return webm_path + for candidate_dir in [base_dir, base_dir.parent]: + video_path = candidate_dir / video_source.output_file + # Check with cache fallback + if project_path: + resolved, _ = resolve_with_cache(video_path, project_path) + if resolved.exists(): + return resolved + elif video_path.exists(): + return video_path + # Check for WebM variant (preprocessing outputs compressed WebM instead of ProRes) + webm_path = video_path.with_suffix(".mov") + if project_path: + resolved, _ = resolve_with_cache(webm_path, project_path) + if resolved.exists(): + return resolved + elif webm_path.exists(): + return webm_path # Fall back to source_file with cache fallback source_path = base_dir / video_source.source_file @@ -272,7 +275,6 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: from .cache import resolve_with_cache # Input: background — resolved via handle in shared_assets/videos.json - import json as _json bg_handle = plan.config.background has_background = bool(bg_handle) bg_idx = None @@ -282,7 +284,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: videos_json_bg = shared_assets_dir / "videos.json" if not videos_json_bg.exists(): raise RenderError(f"shared_assets/videos.json not found (needed for background handle '{bg_handle}')") - bg_videos = _json.loads(videos_json_bg.read_text()) + bg_videos = _read_json(videos_json_bg) if bg_handle not in bg_videos: raise RenderError(f"Background handle '{bg_handle}' not found in shared_assets/videos.json") bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"] @@ -719,7 +721,8 @@ def build_filter_complex( ) else: filters.append( - f"[{bg_idx}:v]scale={width}:{height}:force_original_aspect_ratio=increase," + f"[{bg_idx}:v]fps={plan.config.fps}," + f"scale={width}:{height}:force_original_aspect_ratio=increase," f"crop={width}:{height}[bg]" ) else: @@ -742,9 +745,12 @@ def build_filter_complex( if not plan.narration_pauses: # Simple case: no pauses, continuous overlay + # fps+setpts normalise the source to a constant frame rate and reset + # the timeline to 0 so the video stays locked to the audio track. video_label = f"av{i}" filters.append( - f"[{input_idx}:v]format=yuva444p10le," + f"[{input_idx}:v]fps={plan.config.fps},setpts=PTS-STARTPTS," + f"format=yuva444p10le," f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase," f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2," f"format=rgba[{video_label}]" @@ -942,9 +948,11 @@ def build_filter_complex( narration_volume = 1.0 if plan.narration_videos: _, first_video_source, _ = plan.narration_videos[0] - channel_filter = _build_audio_channel_filter( - first_video_source.use_audio_channels - ) + use_channels = first_video_source.use_audio_channels + if use_channels == "auto": + narration_path = _resolve_video_path(videos_dir, first_video_source, shared_assets_dir, project_path) + use_channels = _resolve_auto_channel(narration_path) + channel_filter = _build_audio_channel_filter(use_channels) narration_volume = first_video_source.volume # Build volume filter if not 1.0 diff --git a/gnommo/validator.py b/gnommo/validator.py index 160b3e1..ba9e4dc 100644 --- a/gnommo/validator.py +++ b/gnommo/validator.py @@ -4,6 +4,7 @@ from pathlib import Path from .cache import resolve_with_cache from .errors import ValidationError, ValidationIssue +from .parser import _read_json from .models import ( ProjectConfig, SlideDefinition, @@ -21,9 +22,10 @@ def validate_project( videos: dict[str, VideoSource], videos_dir: Path, malformed_markers: list[tuple[int, str]] = None, -) -> None: +) -> list[ValidationIssue]: """ Validate all parsed project data. Raises ValidationError if any issues found. + Returns a list of warnings (non-fatal issues). Checks: - All slide markers in manuscript exist in slides.json @@ -34,6 +36,7 @@ def validate_project( - No malformed markers in manuscript """ issues: list[ValidationIssue] = [] + warnings: list[ValidationIssue] = [] # Check for malformed markers first (these are likely typos) if malformed_markers: @@ -64,9 +67,9 @@ def validate_project( base_name = video_id.rsplit(".", 1)[0] if base_name in videos: hint = f" (Did you mean [video:{base_name}]? Don't include file extensions in markers)" - issues.append( + warnings.append( ValidationIssue( - f"Video marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json{hint}", + f"Video marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json{hint} — using PlaceholderVideo instead", project_path / "manuscript.txt", ) ) @@ -76,9 +79,9 @@ def validate_project( if marker.startswith("narration:"): video_id = marker[10:] # Remove 'narration:' prefix if video_id not in videos: - issues.append( + warnings.append( ValidationIssue( - f"Narration marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json", + f"Narration marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json — using PlaceholderVideo instead", project_path / "manuscript.txt", ) ) @@ -88,6 +91,16 @@ def validate_project( if marker.startswith("segment:"): continue + # Unknown namespaced markers (e.g. [background:xxx]) — not supported, ignore with warning + if ":" in marker: + warnings.append( + ValidationIssue( + f"Unknown marker type [{marker}] — ignoring (no support for '{marker.split(':', 1)[0]}:' markers)", + project_path / "manuscript.txt", + ) + ) + continue + if marker not in slides: issues.append( ValidationIssue( @@ -150,9 +163,9 @@ def validate_project( video_path = base_dir / video_source.source_file video_path, _ = resolve_with_cache(video_path, project_path) if not video_path.exists(): - issues.append( + warnings.append( ValidationIssue( - f"Video file not found: {video_source.source_file}", + f"Video file not found: {video_source.source_file} — falling back to PlaceholderVideo", videos_json_path, ) ) @@ -183,8 +196,7 @@ def validate_project( ) ) else: - import json as _json - bg_videos = _json.loads(videos_json_path_bg.read_text()) + bg_videos = _read_json(videos_json_path_bg) if bg_handle not in bg_videos: issues.append( ValidationIssue( @@ -239,3 +251,5 @@ def validate_project( # If any issues, raise ValidationError if issues: raise ValidationError(issues) + + return warnings diff --git a/short_is_universe_pixelated/project.json b/short_is_universe_pixelated/project.json index e91f7b3..2c90610 100644 --- a/short_is_universe_pixelated/project.json +++ b/short_is_universe_pixelated/project.json @@ -14,7 +14,7 @@ "fps": 30, "duration_seconds": 60, "script": "script.md", - "output_video": "export/final.mp4", + "output_video": "short_is_universe_pixelated.mp4", "keynote_file": "../video1/media/video1.key", "background": "../video1/shared_assets/BlackBackground.mp4", "slides": "../video1/media/slides/Video1/slides.json",