Fixes to performance
This commit is contained in:
+28
-25
@@ -19,7 +19,7 @@ from .models import (
|
|||||||
from typing import Union, Optional
|
from typing import Union, Optional
|
||||||
|
|
||||||
# Number of parallel workers for chunk processing
|
# Number of parallel workers for chunk processing
|
||||||
DEFAULT_CHUNK_WORKERS = 4
|
DEFAULT_CHUNK_WORKERS = 1
|
||||||
|
|
||||||
# Chunk duration in seconds for parallel filter processing (avoids huge intermediate files)
|
# Chunk duration in seconds for parallel filter processing (avoids huge intermediate files)
|
||||||
CHUNK_DURATION = 60
|
CHUNK_DURATION = 60
|
||||||
@@ -770,16 +770,17 @@ def apply_combined_video_filters(
|
|||||||
|
|
||||||
# Build FFmpeg command
|
# Build FFmpeg command
|
||||||
cmd = ["ffmpeg", "-y"]
|
cmd = ["ffmpeg", "-y"]
|
||||||
|
# Global options before -i (after -i they become output options and don't limit filter threads)
|
||||||
|
cmd.extend(["-threads", "1", "-filter_threads", "1"])
|
||||||
|
|
||||||
if take is not None:
|
if take is not None:
|
||||||
cmd.extend(["-t", str(take)])
|
cmd.extend(["-t", str(take)])
|
||||||
|
|
||||||
cmd.extend(
|
cmd.extend(
|
||||||
[
|
[
|
||||||
|
"-probesize", "50000000", "-analyzeduration", "50000000",
|
||||||
"-i",
|
"-i",
|
||||||
str(input_path),
|
str(input_path),
|
||||||
"-filter_threads",
|
|
||||||
"1",
|
|
||||||
"-vf",
|
"-vf",
|
||||||
video_filter,
|
video_filter,
|
||||||
"-c:v",
|
"-c:v",
|
||||||
@@ -887,7 +888,9 @@ def build_mask_filter(config: dict) -> str:
|
|||||||
alpha_expr = "+".join(conditions)
|
alpha_expr = "+".join(conditions)
|
||||||
alpha_expr = f"if({alpha_expr},0,alpha(X,Y))"
|
alpha_expr = f"if({alpha_expr},0,alpha(X,Y))"
|
||||||
|
|
||||||
return f"geq=lum='lum(X,Y)':cb='cb(X,Y)':cr='cr(X,Y)':a='{alpha_expr}'"
|
# Use r/g/b passthrough so this works in rgba space (as output by gnommokey/color_grade)
|
||||||
|
# without triggering an rgba→yuv conversion that would spawn 11 more swscaler threads.
|
||||||
|
return f"geq=r='r(X,Y)':g='g(X,Y)':b='b(X,Y)':a='{alpha_expr}'"
|
||||||
|
|
||||||
|
|
||||||
def build_color_grade_filter(config: dict) -> str:
|
def build_color_grade_filter(config: dict) -> str:
|
||||||
@@ -1140,8 +1143,9 @@ def build_gnommokey_filter(config: dict) -> str:
|
|||||||
parts.append(f"alphaextract,avgblur=sizeX={radius}:sizeY={radius}[blur]")
|
parts.append(f"alphaextract,avgblur=sizeX={radius}:sizeY={radius}[blur]")
|
||||||
# This gets complex - for now, skip alpha blur and just use erosion
|
# This gets complex - for now, skip alpha blur and just use erosion
|
||||||
|
|
||||||
# Ensure output is in a good format
|
# Stay in rgba so downstream filters (color_grade, mask) don't trigger
|
||||||
parts.append("format=yuva444p10le")
|
# a redundant yuva444p10le→rgba round-trip and its 11-thread swscaler call.
|
||||||
|
# The caller (_process_chunk_to_prores4444) appends format=yuva444p10le at the end.
|
||||||
|
|
||||||
return ",".join(parts)
|
return ",".join(parts)
|
||||||
|
|
||||||
@@ -1353,10 +1357,20 @@ def _process_chunk_to_prores4444(
|
|||||||
# Build FFmpeg command
|
# Build FFmpeg command
|
||||||
cmd: list[str] = ["ffmpeg", "-y"]
|
cmd: list[str] = ["ffmpeg", "-y"]
|
||||||
|
|
||||||
|
# Global thread limits MUST be before the first -i.
|
||||||
|
# After -i they become output-stream options and FFmpeg ignores them for the
|
||||||
|
# filter graph — each geq stage then spawns one thread per CPU core (11 on M-series),
|
||||||
|
# causing the N-way RGBA frame buffer explosion that OOM-kills the process.
|
||||||
|
cmd.extend(["-threads", "1", "-filter_threads", "1"])
|
||||||
|
|
||||||
# Seek to start time (before input for fast seeking)
|
# Seek to start time (before input for fast seeking)
|
||||||
if start_time > 0:
|
if start_time > 0:
|
||||||
cmd.extend(["-ss", str(start_time)])
|
cmd.extend(["-ss", str(start_time)])
|
||||||
|
|
||||||
|
# Limit initial file analysis to 50 MB. Without this, FFmpeg scans the entire
|
||||||
|
# source file when moov is at the end (common for camera recordings), which reads
|
||||||
|
# gigabytes of data and triggers OOM when multiple chunk workers run in parallel.
|
||||||
|
cmd.extend(["-probesize", "50000000", "-analyzeduration", "50000000"])
|
||||||
cmd.extend(["-i", str(input_path)])
|
cmd.extend(["-i", str(input_path)])
|
||||||
|
|
||||||
# Limit duration
|
# Limit duration
|
||||||
@@ -1364,13 +1378,8 @@ def _process_chunk_to_prores4444(
|
|||||||
if actual_take is not None:
|
if actual_take is not None:
|
||||||
cmd.extend(["-t", str(actual_take)])
|
cmd.extend(["-t", str(actual_take)])
|
||||||
|
|
||||||
# Video encode: ProRes 4444 with alpha
|
|
||||||
# -filter_threads 1: geq is serial anyway; limiting threads eliminates the N-way
|
|
||||||
# RGBA frame buffer explosion that causes OOM when chunk workers run in parallel.
|
|
||||||
cmd.extend(
|
cmd.extend(
|
||||||
[
|
[
|
||||||
"-filter_threads",
|
|
||||||
"1",
|
|
||||||
"-vf",
|
"-vf",
|
||||||
video_filter,
|
video_filter,
|
||||||
"-c:v",
|
"-c:v",
|
||||||
@@ -1682,10 +1691,9 @@ def apply_chroma_key(
|
|||||||
|
|
||||||
# Build FFmpeg command
|
# Build FFmpeg command
|
||||||
# ProRes 4444 profile for alpha channel support
|
# ProRes 4444 profile for alpha channel support
|
||||||
cmd = [
|
cmd = ["ffmpeg", "-y"]
|
||||||
"ffmpeg",
|
# Global options before -i
|
||||||
"-y", # Overwrite output
|
cmd.extend(["-threads", "1", "-filter_threads", "1"])
|
||||||
]
|
|
||||||
|
|
||||||
# Add duration limit if specified (before input for efficiency)
|
# Add duration limit if specified (before input for efficiency)
|
||||||
if take is not None:
|
if take is not None:
|
||||||
@@ -1695,8 +1703,6 @@ def apply_chroma_key(
|
|||||||
[
|
[
|
||||||
"-i",
|
"-i",
|
||||||
str(input_path),
|
str(input_path),
|
||||||
"-filter_threads",
|
|
||||||
"1",
|
|
||||||
"-vf",
|
"-vf",
|
||||||
video_filter,
|
video_filter,
|
||||||
"-c:v",
|
"-c:v",
|
||||||
@@ -1786,14 +1792,13 @@ def apply_mask(
|
|||||||
# Using: if(condition, 0, alpha(X,Y))
|
# Using: if(condition, 0, alpha(X,Y))
|
||||||
alpha_expr = f"if({alpha_expr},0,alpha(X,Y))"
|
alpha_expr = f"if({alpha_expr},0,alpha(X,Y))"
|
||||||
|
|
||||||
# Build the geq filter - preserve luma, chroma, modify alpha
|
# Build the geq filter - preserve RGB channels, modify alpha
|
||||||
video_filter = f"geq=lum='lum(X,Y)':cb='cb(X,Y)':cr='cr(X,Y)':a='{alpha_expr}'"
|
video_filter = f"geq=r='r(X,Y)':g='g(X,Y)':b='b(X,Y)':a='{alpha_expr}'"
|
||||||
|
|
||||||
# Build FFmpeg command
|
# Build FFmpeg command
|
||||||
cmd = [
|
cmd = ["ffmpeg", "-y"]
|
||||||
"ffmpeg",
|
# Global options before -i
|
||||||
"-y", # Overwrite output
|
cmd.extend(["-threads", "1", "-filter_threads", "1"])
|
||||||
]
|
|
||||||
|
|
||||||
if take is not None:
|
if take is not None:
|
||||||
cmd.extend(["-t", str(take)])
|
cmd.extend(["-t", str(take)])
|
||||||
@@ -1802,8 +1807,6 @@ def apply_mask(
|
|||||||
[
|
[
|
||||||
"-i",
|
"-i",
|
||||||
str(input_path),
|
str(input_path),
|
||||||
"-filter_threads",
|
|
||||||
"1",
|
|
||||||
"-vf",
|
"-vf",
|
||||||
video_filter,
|
video_filter,
|
||||||
"-c:v",
|
"-c:v",
|
||||||
|
|||||||
@@ -303,6 +303,11 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
|||||||
"""Build the complete FFmpeg command as a list of arguments."""
|
"""Build the complete FFmpeg command as a list of arguments."""
|
||||||
cmd = ["ffmpeg", "-y"] # -y to overwrite output
|
cmd = ["ffmpeg", "-y"] # -y to overwrite output
|
||||||
|
|
||||||
|
# Global thread limits before any -i. Without this, each format=rgba conversion
|
||||||
|
# in the filter graph (one per video layer) spawns one swscaler thread per CPU core,
|
||||||
|
# causing OOM on Apple Silicon where av_cpu_count() returns 10-11.
|
||||||
|
cmd.extend(["-threads", "1", "-filter_threads", "1"])
|
||||||
|
|
||||||
# Resolve paths to absolute
|
# Resolve paths to absolute
|
||||||
project_path = plan.project_path.resolve()
|
project_path = plan.project_path.resolve()
|
||||||
output_path = output_path.resolve()
|
output_path = output_path.resolve()
|
||||||
|
|||||||
Reference in New Issue
Block a user