Adding fix to the slide

This commit is contained in:
2026-05-12 21:11:33 +02:00
parent 87424a6531
commit 5d7c77db91
+28 -53
View File
@@ -814,10 +814,10 @@ def build_filter_complex(
Layer structure (bottom to top): Layer structure (bottom to top):
- Layer 1: Background (solid color, image, or video) - Layer 1: Background (solid color, image, or video)
- Layer 2: "below" triggered videos (vfb/vsb) — behind talking head - Layer 2: "below" triggered videos (vfb/vf2b/vsb) — behind slides, use with slide on top to mask
- Layer 3: Always visible videos (like talking head) in cutouts - Layer 3: Slides (transparent in talking-head cutout area)
- Layer 4: Slides (with time-based enable) - Layer 4: Always visible videos (talking head) — above slides, visible through cutout
- Layer 5: "above" triggered videos (vft/vst) — in front of slides - Layer 5: "above" triggered videos (vft/vf2t/vst) — topmost, covers everything including talking head
- Layer 6: Camera transform - Layer 6: Camera transform
- Layer 7: Outro videos (fullscreen, after narration ends) - Layer 7: Outro videos (fullscreen, after narration ends)
- Audio: Main audio mixed with triggered sound effects and outro audio - Audio: Main audio mixed with triggered sound effects and outro audio
@@ -846,8 +846,7 @@ def build_filter_complex(
current_label = "bg" current_label = "bg"
# Add "below" triggered video overlays (vfb/vsb) BEFORE the talking head # Layer 2: "below" triggered video overlays (vfb/vsb) — behind slides and talking head
# so they sit behind it in the composite stack.
for i, event in enumerate(plan.video_events): for i, event in enumerate(plan.video_events):
if event.layer != "below": if event.layer != "below":
continue continue
@@ -884,23 +883,37 @@ def build_filter_complex(
) )
current_label = next_label current_label = next_label
# Overlay always_visible videos (like talking head) # Layer 3: Slides (transparent in the talking-head cutout area)
# If there are narration pauses, we need to segment the video for i, event in enumerate(plan.slide_events):
slide_idx = slide_inputs[event.slide_id]
slide_label = f"s{i}"
filters.append(
f"[{slide_idx}:v]scale={width}:{height}:"
f"force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=0x00000000[{slide_label}]"
)
next_label = f"sbase{i}"
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{event.end_time:.3f})"
filters.append(
f"[{current_label}][{slide_label}]overlay="
f"x=0:y=0:enable={enable_expr}"
f"[{next_label}]"
)
current_label = next_label
# Layer 4: Always-visible videos (talking head) — above slides, visible through cutout
for i, (video_id, video_source, cutout) in enumerate(plan.narration_videos): for i, (video_id, video_source, cutout) in enumerate(plan.narration_videos):
input_idx = always_visible_inputs[i] input_idx = always_visible_inputs[i]
cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position( cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position(
cutout, width, height cutout, width, height
) )
# Apply zoom factor to cutout dimensions
zoom = video_source.zoom zoom = video_source.zoom
zoomed_width = int(cut_width * zoom) zoomed_width = int(cut_width * zoom)
zoomed_height = int(cut_height * zoom) zoomed_height = int(cut_height * zoom)
if not plan.narration_pauses: if not plan.narration_pauses:
# Simple case: no pauses, continuous overlay
# fps+setpts normalise the source to a constant frame rate and reset
# the timeline to 0 so the video stays locked to the audio track.
video_label = f"av{i}" video_label = f"av{i}"
filters.append( filters.append(
f"[{input_idx}:v]fps={plan.config.fps},setpts=PTS-STARTPTS," f"[{input_idx}:v]fps={plan.config.fps},setpts=PTS-STARTPTS,"
@@ -916,18 +929,12 @@ def build_filter_complex(
) )
current_label = next_label current_label = next_label
else: else:
# Complex case: narration pauses - segment the video
# Each segment is trimmed from source and positioned in output timeline
segments = _build_narration_segments( segments = _build_narration_segments(
plan.narration_pauses, plan.total_duration plan.narration_pauses, plan.total_duration
) )
for seg_idx, (src_start, src_end, out_start, out_end) in enumerate( for seg_idx, (src_start, src_end, out_start, out_end) in enumerate(segments):
segments
):
seg_label = f"av{i}_seg{seg_idx}" seg_label = f"av{i}_seg{seg_idx}"
# Trim to source range, then shift PTS to output position
# setpts=PTS-STARTPTS puts segment at 0, then +offset/TB shifts to output time
pts_offset = out_start pts_offset = out_start
filters.append( filters.append(
f"[{input_idx}:v]trim={src_start:.3f}:{src_end:.3f}," f"[{input_idx}:v]trim={src_start:.3f}:{src_end:.3f},"
@@ -938,7 +945,6 @@ def build_filter_complex(
f"format=rgba[{seg_label}]" f"format=rgba[{seg_label}]"
) )
# Overlay with enable for this segment's output time range
next_label = f"avbase{i}_seg{seg_idx}" next_label = f"avbase{i}_seg{seg_idx}"
enable_expr = f"between(t\\,{out_start:.3f}\\,{out_end:.3f})" enable_expr = f"between(t\\,{out_start:.3f}\\,{out_end:.3f})"
filters.append( filters.append(
@@ -947,29 +953,8 @@ def build_filter_complex(
) )
current_label = next_label current_label = next_label
# Add slide overlays with time-based enable # Layer 5: "above" triggered videos (vft/vf2t/vst) — topmost, covers slides and talking head
for i, event in enumerate(plan.slide_events): # Use case: fullscreen video that intentionally masks the narrator
slide_idx = slide_inputs[event.slide_id]
# Scale slide to full frame size (transparent areas show through)
slide_label = f"s{i}"
filters.append(
f"[{slide_idx}:v]scale={width}:{height}:"
f"force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=0x00000000[{slide_label}]"
)
# Overlay at 0,0 (full frame) with time-based enable
next_label = f"sbase{i}"
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{event.end_time:.3f})"
filters.append(
f"[{current_label}][{slide_label}]overlay="
f"x=0:y=0:enable={enable_expr}"
f"[{next_label}]"
)
current_label = next_label
# Add "above-slides" triggered video overlays (vft/vst or layer="above")
for i, event in enumerate(plan.video_events): for i, event in enumerate(plan.video_events):
if event.layer != "above": if event.layer != "above":
continue continue
@@ -978,22 +963,15 @@ def build_filter_complex(
event.cutout, width, height event.cutout, width, height
) )
# Calculate effective end time (respecting 'take' parameter)
duration = event.end_time - event.start_time duration = event.end_time - event.start_time
if event.video_source.take is not None: if event.video_source.take is not None:
duration = min(duration, event.video_source.take) duration = min(duration, event.video_source.take)
effective_end = event.start_time + duration effective_end = event.start_time + duration
# Apply zoom factor to cutout dimensions
zoom = event.video_source.zoom zoom = event.video_source.zoom
zoomed_width = int(cut_width * zoom) zoomed_width = int(cut_width * zoom)
zoomed_height = int(cut_height * zoom) zoomed_height = int(cut_height * zoom)
# Scale to cover the zoomed area (like CSS object-fit: cover)
# Then crop to cutout dimensions (centered)
# Use setpts to sync video start with overlay enable time
# IMPORTANT: convert to rgba FIRST (before scale/crop) so the alpha channel
# is preserved throughout. scale in yuva444p10le can silently strip alpha.
video_label = f"tv{i}" video_label = f"tv{i}"
start_pts = event.start_time start_pts = event.start_time
filters.append( filters.append(
@@ -1004,8 +982,6 @@ def build_filter_complex(
f"[{video_label}]" f"[{video_label}]"
) )
# Overlay with time-based enable; format=auto lets FFmpeg pick the right
# compositing format so the RGBA alpha channel is respected.
next_label = f"tvbase{i}" next_label = f"tvbase{i}"
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})" enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})"
filters.append( filters.append(
@@ -1013,7 +989,6 @@ def build_filter_complex(
f"x={cut_x}:y={cut_y}:enable={enable_expr}:format=auto" f"x={cut_x}:y={cut_y}:enable={enable_expr}:format=auto"
f"[{next_label}]" f"[{next_label}]"
) )
current_label = next_label current_label = next_label
# Scene composition complete - now apply camera transform # Scene composition complete - now apply camera transform