From 60e2f20b0f0e41feab220ce680df8f279f4be2d9 Mon Sep 17 00:00:00 2001 From: jenstandstad Date: Tue, 12 May 2026 20:22:05 +0200 Subject: [PATCH] Adding performance tuning --- gnommo/cache.py | 32 ++++++++++++++++++++++++++++++++ gnommo/preprocessor.py | 14 ++++++++++---- gnommo/renderer.py | 4 +++- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/gnommo/cache.py b/gnommo/cache.py index 36347bd..6f956b6 100644 --- a/gnommo/cache.py +++ b/gnommo/cache.py @@ -11,10 +11,42 @@ Files are looked up first locally, then in the cache at: """ import configparser +import os from pathlib import Path from typing import Optional, Tuple _cache_config: Optional[dict] = None +_perf_config: Optional[dict] = None + + +def get_ffmpeg_thread_count() -> int: + """Return FFmpeg thread count based on [performance] cpu_limit in ~/.gnommo.conf. + + cpu_limit is a fraction of logical CPUs (e.g. 0.8 = 80%). + Defaults to 1 when not configured, which is safe on memory-constrained machines. + + Example ~/.gnommo.conf: + [performance] + cpu_limit = 0.8 + """ + global _perf_config + if _perf_config is None: + config_path = Path.home() / ".gnommo.conf" + _perf_config = {} + if config_path.exists(): + cfg = configparser.ConfigParser() + cfg.read(config_path) + if cfg.has_option("performance", "cpu_limit"): + try: + _perf_config["cpu_limit"] = float(cfg.get("performance", "cpu_limit")) + except ValueError: + pass + + cpu_limit = _perf_config.get("cpu_limit") + if cpu_limit is None: + return 1 + cpu_count = os.cpu_count() or 1 + return max(1, int(cpu_count * cpu_limit)) def load_cache_config() -> Optional[Path]: diff --git a/gnommo/preprocessor.py b/gnommo/preprocessor.py index 66cb5bc..8010605 100644 --- a/gnommo/preprocessor.py +++ b/gnommo/preprocessor.py @@ -18,6 +18,12 @@ from .models import ( ) from typing import Union, Optional +def _tc() -> str: + """Return FFmpeg thread count string from ~/.gnommo.conf [performance] cpu_limit.""" + from .cache import get_ffmpeg_thread_count + return str(get_ffmpeg_thread_count()) + + # Number of parallel workers for chunk processing DEFAULT_CHUNK_WORKERS = 1 @@ -771,7 +777,7 @@ def apply_combined_video_filters( # Build FFmpeg command cmd = ["ffmpeg", "-y"] # Global options before -i (after -i they become output options and don't limit filter threads) - cmd.extend(["-threads", "1", "-filter_threads", "1"]) + cmd.extend(["-threads", _tc(), "-filter_threads", _tc()]) if take is not None: cmd.extend(["-t", str(take)]) @@ -1361,7 +1367,7 @@ def _process_chunk_to_prores4444( # After -i they become output-stream options and FFmpeg ignores them for the # filter graph — each geq stage then spawns one thread per CPU core (11 on M-series), # causing the N-way RGBA frame buffer explosion that OOM-kills the process. - cmd.extend(["-threads", "1", "-filter_threads", "1"]) + cmd.extend(["-threads", _tc(), "-filter_threads", _tc()]) # Seek to start time (before input for fast seeking) if start_time > 0: @@ -1693,7 +1699,7 @@ def apply_chroma_key( # ProRes 4444 profile for alpha channel support cmd = ["ffmpeg", "-y"] # Global options before -i - cmd.extend(["-threads", "1", "-filter_threads", "1"]) + cmd.extend(["-threads", _tc(), "-filter_threads", _tc()]) # Add duration limit if specified (before input for efficiency) if take is not None: @@ -1798,7 +1804,7 @@ def apply_mask( # Build FFmpeg command cmd = ["ffmpeg", "-y"] # Global options before -i - cmd.extend(["-threads", "1", "-filter_threads", "1"]) + cmd.extend(["-threads", _tc(), "-filter_threads", _tc()]) if take is not None: cmd.extend(["-t", str(take)]) diff --git a/gnommo/renderer.py b/gnommo/renderer.py index a2a864a..f67ac34 100644 --- a/gnommo/renderer.py +++ b/gnommo/renderer.py @@ -306,7 +306,9 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: # Global thread limits before any -i. Without this, each format=rgba conversion # in the filter graph (one per video layer) spawns one swscaler thread per CPU core, # causing OOM on Apple Silicon where av_cpu_count() returns 10-11. - cmd.extend(["-threads", "1", "-filter_threads", "1"]) + from .cache import get_ffmpeg_thread_count + _tc = str(get_ffmpeg_thread_count()) + cmd.extend(["-threads", _tc, "-filter_threads", _tc]) # Resolve paths to absolute project_path = plan.project_path.resolve()