Adding fix to the slide
This commit is contained in:
+28
-53
@@ -814,10 +814,10 @@ def build_filter_complex(
|
|||||||
|
|
||||||
Layer structure (bottom to top):
|
Layer structure (bottom to top):
|
||||||
- Layer 1: Background (solid color, image, or video)
|
- Layer 1: Background (solid color, image, or video)
|
||||||
- Layer 2: "below" triggered videos (vfb/vsb) — behind talking head
|
- Layer 2: "below" triggered videos (vfb/vf2b/vsb) — behind slides, use with slide on top to mask
|
||||||
- Layer 3: Always visible videos (like talking head) in cutouts
|
- Layer 3: Slides (transparent in talking-head cutout area)
|
||||||
- Layer 4: Slides (with time-based enable)
|
- Layer 4: Always visible videos (talking head) — above slides, visible through cutout
|
||||||
- Layer 5: "above" triggered videos (vft/vst) — in front of slides
|
- Layer 5: "above" triggered videos (vft/vf2t/vst) — topmost, covers everything including talking head
|
||||||
- Layer 6: Camera transform
|
- Layer 6: Camera transform
|
||||||
- Layer 7: Outro videos (fullscreen, after narration ends)
|
- Layer 7: Outro videos (fullscreen, after narration ends)
|
||||||
- Audio: Main audio mixed with triggered sound effects and outro audio
|
- Audio: Main audio mixed with triggered sound effects and outro audio
|
||||||
@@ -846,8 +846,7 @@ def build_filter_complex(
|
|||||||
|
|
||||||
current_label = "bg"
|
current_label = "bg"
|
||||||
|
|
||||||
# Add "below" triggered video overlays (vfb/vsb) BEFORE the talking head
|
# Layer 2: "below" triggered video overlays (vfb/vsb) — behind slides and talking head
|
||||||
# so they sit behind it in the composite stack.
|
|
||||||
for i, event in enumerate(plan.video_events):
|
for i, event in enumerate(plan.video_events):
|
||||||
if event.layer != "below":
|
if event.layer != "below":
|
||||||
continue
|
continue
|
||||||
@@ -884,23 +883,37 @@ def build_filter_complex(
|
|||||||
)
|
)
|
||||||
current_label = next_label
|
current_label = next_label
|
||||||
|
|
||||||
# Overlay always_visible videos (like talking head)
|
# Layer 3: Slides (transparent in the talking-head cutout area)
|
||||||
# If there are narration pauses, we need to segment the video
|
for i, event in enumerate(plan.slide_events):
|
||||||
|
slide_idx = slide_inputs[event.slide_id]
|
||||||
|
|
||||||
|
slide_label = f"s{i}"
|
||||||
|
filters.append(
|
||||||
|
f"[{slide_idx}:v]scale={width}:{height}:"
|
||||||
|
f"force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=0x00000000[{slide_label}]"
|
||||||
|
)
|
||||||
|
|
||||||
|
next_label = f"sbase{i}"
|
||||||
|
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{event.end_time:.3f})"
|
||||||
|
filters.append(
|
||||||
|
f"[{current_label}][{slide_label}]overlay="
|
||||||
|
f"x=0:y=0:enable={enable_expr}"
|
||||||
|
f"[{next_label}]"
|
||||||
|
)
|
||||||
|
current_label = next_label
|
||||||
|
|
||||||
|
# Layer 4: Always-visible videos (talking head) — above slides, visible through cutout
|
||||||
for i, (video_id, video_source, cutout) in enumerate(plan.narration_videos):
|
for i, (video_id, video_source, cutout) in enumerate(plan.narration_videos):
|
||||||
input_idx = always_visible_inputs[i]
|
input_idx = always_visible_inputs[i]
|
||||||
cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position(
|
cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position(
|
||||||
cutout, width, height
|
cutout, width, height
|
||||||
)
|
)
|
||||||
|
|
||||||
# Apply zoom factor to cutout dimensions
|
|
||||||
zoom = video_source.zoom
|
zoom = video_source.zoom
|
||||||
zoomed_width = int(cut_width * zoom)
|
zoomed_width = int(cut_width * zoom)
|
||||||
zoomed_height = int(cut_height * zoom)
|
zoomed_height = int(cut_height * zoom)
|
||||||
|
|
||||||
if not plan.narration_pauses:
|
if not plan.narration_pauses:
|
||||||
# Simple case: no pauses, continuous overlay
|
|
||||||
# fps+setpts normalise the source to a constant frame rate and reset
|
|
||||||
# the timeline to 0 so the video stays locked to the audio track.
|
|
||||||
video_label = f"av{i}"
|
video_label = f"av{i}"
|
||||||
filters.append(
|
filters.append(
|
||||||
f"[{input_idx}:v]fps={plan.config.fps},setpts=PTS-STARTPTS,"
|
f"[{input_idx}:v]fps={plan.config.fps},setpts=PTS-STARTPTS,"
|
||||||
@@ -916,18 +929,12 @@ def build_filter_complex(
|
|||||||
)
|
)
|
||||||
current_label = next_label
|
current_label = next_label
|
||||||
else:
|
else:
|
||||||
# Complex case: narration pauses - segment the video
|
|
||||||
# Each segment is trimmed from source and positioned in output timeline
|
|
||||||
segments = _build_narration_segments(
|
segments = _build_narration_segments(
|
||||||
plan.narration_pauses, plan.total_duration
|
plan.narration_pauses, plan.total_duration
|
||||||
)
|
)
|
||||||
|
|
||||||
for seg_idx, (src_start, src_end, out_start, out_end) in enumerate(
|
for seg_idx, (src_start, src_end, out_start, out_end) in enumerate(segments):
|
||||||
segments
|
|
||||||
):
|
|
||||||
seg_label = f"av{i}_seg{seg_idx}"
|
seg_label = f"av{i}_seg{seg_idx}"
|
||||||
# Trim to source range, then shift PTS to output position
|
|
||||||
# setpts=PTS-STARTPTS puts segment at 0, then +offset/TB shifts to output time
|
|
||||||
pts_offset = out_start
|
pts_offset = out_start
|
||||||
filters.append(
|
filters.append(
|
||||||
f"[{input_idx}:v]trim={src_start:.3f}:{src_end:.3f},"
|
f"[{input_idx}:v]trim={src_start:.3f}:{src_end:.3f},"
|
||||||
@@ -938,7 +945,6 @@ def build_filter_complex(
|
|||||||
f"format=rgba[{seg_label}]"
|
f"format=rgba[{seg_label}]"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Overlay with enable for this segment's output time range
|
|
||||||
next_label = f"avbase{i}_seg{seg_idx}"
|
next_label = f"avbase{i}_seg{seg_idx}"
|
||||||
enable_expr = f"between(t\\,{out_start:.3f}\\,{out_end:.3f})"
|
enable_expr = f"between(t\\,{out_start:.3f}\\,{out_end:.3f})"
|
||||||
filters.append(
|
filters.append(
|
||||||
@@ -947,29 +953,8 @@ def build_filter_complex(
|
|||||||
)
|
)
|
||||||
current_label = next_label
|
current_label = next_label
|
||||||
|
|
||||||
# Add slide overlays with time-based enable
|
# Layer 5: "above" triggered videos (vft/vf2t/vst) — topmost, covers slides and talking head
|
||||||
for i, event in enumerate(plan.slide_events):
|
# Use case: fullscreen video that intentionally masks the narrator
|
||||||
slide_idx = slide_inputs[event.slide_id]
|
|
||||||
|
|
||||||
# Scale slide to full frame size (transparent areas show through)
|
|
||||||
slide_label = f"s{i}"
|
|
||||||
filters.append(
|
|
||||||
f"[{slide_idx}:v]scale={width}:{height}:"
|
|
||||||
f"force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=0x00000000[{slide_label}]"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Overlay at 0,0 (full frame) with time-based enable
|
|
||||||
next_label = f"sbase{i}"
|
|
||||||
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{event.end_time:.3f})"
|
|
||||||
filters.append(
|
|
||||||
f"[{current_label}][{slide_label}]overlay="
|
|
||||||
f"x=0:y=0:enable={enable_expr}"
|
|
||||||
f"[{next_label}]"
|
|
||||||
)
|
|
||||||
|
|
||||||
current_label = next_label
|
|
||||||
|
|
||||||
# Add "above-slides" triggered video overlays (vft/vst or layer="above")
|
|
||||||
for i, event in enumerate(plan.video_events):
|
for i, event in enumerate(plan.video_events):
|
||||||
if event.layer != "above":
|
if event.layer != "above":
|
||||||
continue
|
continue
|
||||||
@@ -978,22 +963,15 @@ def build_filter_complex(
|
|||||||
event.cutout, width, height
|
event.cutout, width, height
|
||||||
)
|
)
|
||||||
|
|
||||||
# Calculate effective end time (respecting 'take' parameter)
|
|
||||||
duration = event.end_time - event.start_time
|
duration = event.end_time - event.start_time
|
||||||
if event.video_source.take is not None:
|
if event.video_source.take is not None:
|
||||||
duration = min(duration, event.video_source.take)
|
duration = min(duration, event.video_source.take)
|
||||||
effective_end = event.start_time + duration
|
effective_end = event.start_time + duration
|
||||||
|
|
||||||
# Apply zoom factor to cutout dimensions
|
|
||||||
zoom = event.video_source.zoom
|
zoom = event.video_source.zoom
|
||||||
zoomed_width = int(cut_width * zoom)
|
zoomed_width = int(cut_width * zoom)
|
||||||
zoomed_height = int(cut_height * zoom)
|
zoomed_height = int(cut_height * zoom)
|
||||||
|
|
||||||
# Scale to cover the zoomed area (like CSS object-fit: cover)
|
|
||||||
# Then crop to cutout dimensions (centered)
|
|
||||||
# Use setpts to sync video start with overlay enable time
|
|
||||||
# IMPORTANT: convert to rgba FIRST (before scale/crop) so the alpha channel
|
|
||||||
# is preserved throughout. scale in yuva444p10le can silently strip alpha.
|
|
||||||
video_label = f"tv{i}"
|
video_label = f"tv{i}"
|
||||||
start_pts = event.start_time
|
start_pts = event.start_time
|
||||||
filters.append(
|
filters.append(
|
||||||
@@ -1004,8 +982,6 @@ def build_filter_complex(
|
|||||||
f"[{video_label}]"
|
f"[{video_label}]"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Overlay with time-based enable; format=auto lets FFmpeg pick the right
|
|
||||||
# compositing format so the RGBA alpha channel is respected.
|
|
||||||
next_label = f"tvbase{i}"
|
next_label = f"tvbase{i}"
|
||||||
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})"
|
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})"
|
||||||
filters.append(
|
filters.append(
|
||||||
@@ -1013,7 +989,6 @@ def build_filter_complex(
|
|||||||
f"x={cut_x}:y={cut_y}:enable={enable_expr}:format=auto"
|
f"x={cut_x}:y={cut_y}:enable={enable_expr}:format=auto"
|
||||||
f"[{next_label}]"
|
f"[{next_label}]"
|
||||||
)
|
)
|
||||||
|
|
||||||
current_label = next_label
|
current_label = next_label
|
||||||
|
|
||||||
# Scene composition complete - now apply camera transform
|
# Scene composition complete - now apply camera transform
|
||||||
|
|||||||
Reference in New Issue
Block a user