Fixing gnommo
This commit is contained in:
+180
-29
@@ -22,12 +22,46 @@ from .preprocessor import run_ffmpeg_with_progress
|
||||
|
||||
|
||||
def _get_audio_duration(audio_path: Path) -> float:
|
||||
"""Get duration of an audio file using ffprobe."""
|
||||
"""Get duration of an audio file using ffprobe.
|
||||
|
||||
For MP3 files, counts packets directly to get an accurate duration regardless
|
||||
of whether the file has a Xing/VBRI header. Falls back to format duration for
|
||||
other formats.
|
||||
"""
|
||||
if audio_path.suffix.lower() == ".mp3":
|
||||
# Count actual packets rather than trusting the header estimate.
|
||||
# This is slower but accurate for headerless VBR/CBR MP3s.
|
||||
cmd = [
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"error",
|
||||
"-count_packets",
|
||||
"-show_entries",
|
||||
"stream=nb_read_packets,duration",
|
||||
"-select_streams",
|
||||
"a:0",
|
||||
"-of",
|
||||
"default=noprint_wrappers=1:nokey=1",
|
||||
str(audio_path),
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode == 0:
|
||||
# Output: duration\nnb_read_packets — take the first non-N/A line
|
||||
for line in result.stdout.strip().splitlines():
|
||||
try:
|
||||
val = float(line)
|
||||
if val > 0:
|
||||
return val
|
||||
except ValueError:
|
||||
continue
|
||||
cmd = [
|
||||
"ffprobe",
|
||||
"-v", "error",
|
||||
"-show_entries", "format=duration",
|
||||
"-of", "default=noprint_wrappers=1:nokey=1",
|
||||
"-v",
|
||||
"error",
|
||||
"-show_entries",
|
||||
"format=duration",
|
||||
"-of",
|
||||
"default=noprint_wrappers=1:nokey=1",
|
||||
str(audio_path),
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
@@ -208,16 +242,28 @@ def _resolve_video_path(
|
||||
|
||||
|
||||
def _has_audio_stream(video_path: Path) -> bool:
|
||||
"""Check if a video file contains an audio stream using ffprobe."""
|
||||
"""Check if a video file contains a non-empty audio stream.
|
||||
|
||||
Uses -analyzeduration 0 to avoid the slow avformat_find_stream_info() scan
|
||||
that happens when an MP4 has a declared audio track with no actual frames —
|
||||
ffprobe would otherwise scan the entire file looking for audio packets.
|
||||
|
||||
Also checks nb_frames to reject ghost audio tracks (stream header exists in
|
||||
the moov atom but no sample data in stsc/stsz).
|
||||
"""
|
||||
result = subprocess.run(
|
||||
[
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"error",
|
||||
"-analyzeduration",
|
||||
"0",
|
||||
"-probesize",
|
||||
"1000000",
|
||||
"-select_streams",
|
||||
"a",
|
||||
"a:0",
|
||||
"-show_entries",
|
||||
"stream=index",
|
||||
"stream=index,nb_frames",
|
||||
"-of",
|
||||
"csv=p=0",
|
||||
str(video_path),
|
||||
@@ -225,7 +271,16 @@ def _has_audio_stream(video_path: Path) -> bool:
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
return bool(result.stdout.strip())
|
||||
output = result.stdout.strip()
|
||||
if not output:
|
||||
return False
|
||||
# output is "index" or "index,nb_frames"
|
||||
parts = output.split(",")
|
||||
if len(parts) >= 2:
|
||||
nb_frames = parts[1].strip()
|
||||
if nb_frames == "0":
|
||||
return False # Ghost audio track — declared but no sample data
|
||||
return True
|
||||
|
||||
|
||||
def _build_audio_channel_filter(use_audio_channels: str) -> str:
|
||||
@@ -263,11 +318,18 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
||||
# Add -ss seek BEFORE -i for skip parameter and/or partial rendering
|
||||
always_visible_inputs: list[int] = []
|
||||
for video_id, video_source, cutout in plan.narration_videos:
|
||||
video_path = _resolve_video_path(videos_dir, video_source, shared_assets_dir, project_path)
|
||||
video_path = _resolve_video_path(
|
||||
videos_dir, video_source, shared_assets_dir, project_path
|
||||
)
|
||||
# Combine video skip setting with partial render offset
|
||||
total_seek = video_source.skip + plan.input_seek_time
|
||||
if total_seek > 0:
|
||||
cmd.extend(["-ss", f"{total_seek:.3f}"])
|
||||
# Skip stream analysis — codec params are in the container header, and
|
||||
# duration is already known by gnommo via ffprobe (plan.total_duration).
|
||||
# Without this, FFmpeg reads 100MB+ of compressed data per input at 4K
|
||||
# bitrates before encoding starts ("Estimating duration from bitrate").
|
||||
cmd.extend(["-analyzeduration", "0", "-probesize", "1000"])
|
||||
cmd.extend(["-i", str(video_path)])
|
||||
always_visible_inputs.append(input_idx)
|
||||
input_idx += 1
|
||||
@@ -283,18 +345,26 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
||||
shared_assets_dir = project_path.parent / "shared_assets"
|
||||
videos_json_bg = shared_assets_dir / "videos.json"
|
||||
if not videos_json_bg.exists():
|
||||
raise RenderError(f"shared_assets/videos.json not found (needed for background handle '{bg_handle}')")
|
||||
raise RenderError(
|
||||
f"shared_assets/videos.json not found (needed for background handle '{bg_handle}')"
|
||||
)
|
||||
bg_videos = _read_json(videos_json_bg)
|
||||
if bg_handle not in bg_videos:
|
||||
raise RenderError(f"Background handle '{bg_handle}' not found in shared_assets/videos.json")
|
||||
raise RenderError(
|
||||
f"Background handle '{bg_handle}' not found in shared_assets/videos.json"
|
||||
)
|
||||
bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"]
|
||||
if not bg_path.exists():
|
||||
raise RenderError(f"Background file not found: {bg_path} (from handle '{bg_handle}')")
|
||||
raise RenderError(
|
||||
f"Background file not found: {bg_path} (from handle '{bg_handle}')"
|
||||
)
|
||||
image_extensions = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp"}
|
||||
bg_is_image = bg_path.suffix.lower() in image_extensions
|
||||
# Loop background videos infinitely
|
||||
if not bg_is_image:
|
||||
cmd.extend(["-stream_loop", "-1"])
|
||||
# Duration of background video is irrelevant (looped or image) — skip analysis
|
||||
cmd.extend(["-analyzeduration", "0", "-probesize", "1000"])
|
||||
cmd.extend(["-i", str(bg_path)])
|
||||
bg_idx = input_idx
|
||||
input_idx += 1
|
||||
@@ -325,14 +395,24 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
||||
video_path = _resolve_video_path(
|
||||
videos_dir, event.video_source, shared_assets_dir, project_path
|
||||
)
|
||||
# Seek to skip point before loading input
|
||||
skip = event.video_source.skip
|
||||
if skip > 0:
|
||||
cmd.extend(["-ss", f"{skip:.3f}"])
|
||||
cmd.extend(["-analyzeduration", "0", "-probesize", "1000"])
|
||||
# Use pre-probed duration to tell FFmpeg exactly how much to read,
|
||||
# preventing scans of ghost audio tracks on empty MP4 audio streams.
|
||||
if event.video_source.duration is not None:
|
||||
remaining = event.video_source.duration - skip
|
||||
if remaining > 0:
|
||||
cmd.extend(["-t", f"{remaining:.3f}"])
|
||||
cmd.extend(["-i", str(video_path)])
|
||||
video_inputs[i] = input_idx
|
||||
input_idx += 1
|
||||
if _has_audio_stream(video_path):
|
||||
has_audio = event.video_source.has_audio
|
||||
if has_audio is None:
|
||||
print(f" Warning: no cached metadata for '{event.video_source.source_file}' — run 'gnommo import' to avoid slow probing")
|
||||
has_audio = _has_audio_stream(video_path)
|
||||
if has_audio:
|
||||
video_events_with_audio.add(i)
|
||||
|
||||
# Input: outro videos (play after narration ends)
|
||||
@@ -343,14 +423,22 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
||||
video_path = _resolve_video_path(
|
||||
videos_dir, event.video_source, shared_assets_dir, project_path
|
||||
)
|
||||
# Seek to skip point before loading input
|
||||
skip = event.video_source.skip
|
||||
if skip > 0:
|
||||
cmd.extend(["-ss", f"{skip:.3f}"])
|
||||
cmd.extend(["-analyzeduration", "0", "-probesize", "1000"])
|
||||
if event.video_source.duration is not None:
|
||||
remaining = event.video_source.duration - skip
|
||||
if remaining > 0:
|
||||
cmd.extend(["-t", f"{remaining:.3f}"])
|
||||
cmd.extend(["-i", str(video_path)])
|
||||
outro_inputs[i] = input_idx
|
||||
input_idx += 1
|
||||
if _has_audio_stream(video_path):
|
||||
has_audio = event.video_source.has_audio
|
||||
if has_audio is None:
|
||||
print(f" Warning: no cached metadata for '{event.video_source.source_file}' — run 'gnommo import' to avoid slow probing")
|
||||
has_audio = _has_audio_stream(video_path)
|
||||
if has_audio:
|
||||
outro_events_with_audio.add(i)
|
||||
|
||||
# Track where audio inputs start
|
||||
@@ -365,12 +453,24 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
||||
if event.audio_id not in audio_inputs:
|
||||
audio_path = audio_dir / event.audio_def.file
|
||||
audio_path, _ = resolve_with_cache(audio_path, project_path)
|
||||
# Use pre-probed duration from audio.json if available (set by import).
|
||||
# For MP3 without Xing/VBRI headers this is critical — FFmpeg otherwise
|
||||
# scans the whole file to estimate duration (100s+ for large files).
|
||||
# Fall back to live probe only for MP3 when duration wasn't pre-cached.
|
||||
file_duration = event.audio_def.duration
|
||||
if file_duration is None and audio_path.suffix.lower() == ".mp3":
|
||||
file_duration = _get_audio_duration(audio_path)
|
||||
if file_duration is not None:
|
||||
cmd.extend(["-t", str(file_duration)])
|
||||
cmd.extend(["-i", str(audio_path)])
|
||||
audio_inputs[event.audio_id] = input_idx
|
||||
input_idx += 1
|
||||
# Cache duration if this audio uses crossfade looping
|
||||
# Cache duration for crossfade loop filter
|
||||
if event.audio_def.loop and event.audio_def.overlap:
|
||||
audio_durations[event.audio_id] = _get_audio_duration(audio_path)
|
||||
audio_durations[event.audio_id] = (
|
||||
file_duration if file_duration is not None
|
||||
else _get_audio_duration(audio_path)
|
||||
)
|
||||
|
||||
# Build filter_complex
|
||||
filter_complex = build_filter_complex(
|
||||
@@ -418,7 +518,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
||||
"-preset",
|
||||
"fast",
|
||||
"-crf",
|
||||
"23",
|
||||
"20",
|
||||
"-c:a",
|
||||
"aac",
|
||||
"-b:a",
|
||||
@@ -793,6 +893,43 @@ def build_filter_complex(
|
||||
)
|
||||
current_label = next_label
|
||||
|
||||
# Add "below-slides" triggered video overlays (vfb/vsb or layer="below")
|
||||
for i, event in enumerate(plan.video_events):
|
||||
if event.layer != "below":
|
||||
continue
|
||||
video_idx = video_inputs[i]
|
||||
cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position(
|
||||
event.cutout, width, height
|
||||
)
|
||||
|
||||
duration = event.end_time - event.start_time
|
||||
if event.video_source.take is not None:
|
||||
duration = min(duration, event.video_source.take)
|
||||
effective_end = event.start_time + duration
|
||||
|
||||
zoom = event.video_source.zoom
|
||||
zoomed_width = int(cut_width * zoom)
|
||||
zoomed_height = int(cut_height * zoom)
|
||||
|
||||
video_label = f"tvb{i}"
|
||||
start_pts = event.start_time
|
||||
filters.append(
|
||||
f"[{video_idx}:v]format=yuva444p10le,"
|
||||
f"setpts=PTS-STARTPTS+{start_pts:.3f}/TB,"
|
||||
f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase,"
|
||||
f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2,"
|
||||
f"format=rgba[{video_label}]"
|
||||
)
|
||||
|
||||
next_label = f"tvbbase{i}"
|
||||
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})"
|
||||
filters.append(
|
||||
f"[{current_label}][{video_label}]overlay="
|
||||
f"x={cut_x}:y={cut_y}:enable={enable_expr}"
|
||||
f"[{next_label}]"
|
||||
)
|
||||
current_label = next_label
|
||||
|
||||
# Add slide overlays with time-based enable
|
||||
for i, event in enumerate(plan.slide_events):
|
||||
slide_idx = slide_inputs[event.slide_id]
|
||||
@@ -815,8 +952,10 @@ def build_filter_complex(
|
||||
|
||||
current_label = next_label
|
||||
|
||||
# Add triggered video overlays with time-based enable
|
||||
# Add "above-slides" triggered video overlays (vft/vst or layer="above")
|
||||
for i, event in enumerate(plan.video_events):
|
||||
if event.layer != "above":
|
||||
continue
|
||||
video_idx = video_inputs[i]
|
||||
cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position(
|
||||
event.cutout, width, height
|
||||
@@ -836,22 +975,25 @@ def build_filter_complex(
|
||||
# Scale to cover the zoomed area (like CSS object-fit: cover)
|
||||
# Then crop to cutout dimensions (centered)
|
||||
# Use setpts to sync video start with overlay enable time
|
||||
# IMPORTANT: convert to rgba FIRST (before scale/crop) so the alpha channel
|
||||
# is preserved throughout. scale in yuva444p10le can silently strip alpha.
|
||||
video_label = f"tv{i}"
|
||||
start_pts = event.start_time
|
||||
filters.append(
|
||||
f"[{video_idx}:v]format=yuva444p10le,"
|
||||
f"[{video_idx}:v]format=rgba,"
|
||||
f"setpts=PTS-STARTPTS+{start_pts:.3f}/TB,"
|
||||
f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase,"
|
||||
f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2,"
|
||||
f"format=rgba[{video_label}]"
|
||||
f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2"
|
||||
f"[{video_label}]"
|
||||
)
|
||||
|
||||
# Overlay with time-based enable
|
||||
# Overlay with time-based enable; format=auto lets FFmpeg pick the right
|
||||
# compositing format so the RGBA alpha channel is respected.
|
||||
next_label = f"tvbase{i}"
|
||||
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})"
|
||||
filters.append(
|
||||
f"[{current_label}][{video_label}]overlay="
|
||||
f"x={cut_x}:y={cut_y}:enable={enable_expr}"
|
||||
f"x={cut_x}:y={cut_y}:enable={enable_expr}:format=auto"
|
||||
f"[{next_label}]"
|
||||
)
|
||||
|
||||
@@ -950,13 +1092,17 @@ def build_filter_complex(
|
||||
_, first_video_source, _ = plan.narration_videos[0]
|
||||
use_channels = first_video_source.use_audio_channels
|
||||
if use_channels == "auto":
|
||||
narration_path = _resolve_video_path(videos_dir, first_video_source, shared_assets_dir, project_path)
|
||||
narration_path = _resolve_video_path(
|
||||
videos_dir, first_video_source, shared_assets_dir, project_path
|
||||
)
|
||||
use_channels = _resolve_auto_channel(narration_path)
|
||||
channel_filter = _build_audio_channel_filter(use_channels)
|
||||
narration_volume = first_video_source.volume
|
||||
|
||||
# Build volume filter if not 1.0
|
||||
volume_filter = f"volume={narration_volume:.2f}" if narration_volume != 1.0 else ""
|
||||
volume_filter = (
|
||||
f"volume={narration_volume:.2f}" if narration_volume != 1.0 else ""
|
||||
)
|
||||
|
||||
# Use narration_end_time to stop audio before outro (if outro exists)
|
||||
audio_end_time = (
|
||||
@@ -980,7 +1126,9 @@ def build_filter_complex(
|
||||
)
|
||||
audio_labels_to_mix.append("[main_aud]")
|
||||
elif filter_parts:
|
||||
filters.append(f"[{main_audio_idx}:a]{','.join(filter_parts)}[main_aud]")
|
||||
filters.append(
|
||||
f"[{main_audio_idx}:a]{','.join(filter_parts)}[main_aud]"
|
||||
)
|
||||
audio_labels_to_mix.append("[main_aud]")
|
||||
else:
|
||||
audio_labels_to_mix.append(f"[{main_audio_idx}:a]")
|
||||
@@ -1066,7 +1214,10 @@ def build_filter_complex(
|
||||
label = f"aud{i}"
|
||||
delay_ms = int(event.start_time * 1000)
|
||||
|
||||
if event.audio_def.overlap and event.audio_id in audio_durations:
|
||||
if (
|
||||
event.audio_def.overlap
|
||||
and event.audio_id in audio_durations
|
||||
):
|
||||
# Crossfade loop: overlap copies with fade in/out
|
||||
audio_dur = audio_durations[event.audio_id]
|
||||
crossfade_filters = _build_crossfade_loop_filter(
|
||||
|
||||
Reference in New Issue
Block a user