From 2dff8f45b9a4b6d4806ff4a8aba4af65ced55633 Mon Sep 17 00:00:00 2001 From: jenstandstad Date: Sat, 9 May 2026 15:36:15 +0200 Subject: [PATCH] Adding fixes to the publish pipeline --- example/project.json | 1 + gnommo/cli.py | 35 ++++-- gnommo/handoff.py | 97 ++++++++++------ gnommo/parser.py | 2 +- gnommo/push.py | 271 +++++++++++++++++++++++++++++++++---------- 5 files changed, 298 insertions(+), 108 deletions(-) diff --git a/example/project.json b/example/project.json index fe39e7c..4cc3f45 100644 --- a/example/project.json +++ b/example/project.json @@ -14,6 +14,7 @@ "videos": "media/videos/videos.json", "slides": "media/slides/Example/slides.json", "audio": "media/audio/audio.json", + "output": "final.mp4", "default_filters": { "talkinghead": [ { diff --git a/gnommo/cli.py b/gnommo/cli.py index 197e169..088eaa3 100644 --- a/gnommo/cli.py +++ b/gnommo/cli.py @@ -50,7 +50,7 @@ Examples: gnommo -p video1 transcode --processed --alpha-quality 0.5 More aggressive alpha compression gnommo -p video1 transcode --processed --dry-run Preview what would be compressed gnommo -p video1 transcode --force Re-transcode even if output already exists - gnommo -p video1 all Full pipeline: import → preprocess → trim → stitch → render → handoff + gnommo -p video1 all Full pipeline: down → import → preprocess → trim → stitch → render → push → handoff → up gnommo -p video1 render --dry-run Show FFmpeg command without running gnommo -p video1 description Generate YouTube description file gnommo -p video1 transcribe Narration file for timing of slides @@ -2836,21 +2836,27 @@ def cmd_all( res: str = "full", force: bool = False, ) -> int: - """Run full pipeline: import → preprocess → trim → stitch → render → handoff. + """Run full pipeline: down → import → preprocess → trim → stitch → render → push → handoff → up. Cascade rule: if any stage produces output, all subsequent stages are forced to re-run (cascade_force=True), regardless of whether --force was passed. This ensures downstream caches are always consistent with upstream changes. """ from .handoff import cmd_handoff + from .push import cmd_push print(f"=== Full Pipeline: {project_path.name} ===\n") + print(">>> Step 1/9: Download\n") + result = cmd_sync(project_path, verbose, dry_run, download=True) + if result != 0: + return result + # cascade_force starts at --force. Once any stage does real work it flips to # True so all downstream stages re-run unconditionally. cascade_force = force - print(">>> Step 1/6: Import\n") + print("\n>>> Step 2/9: Import\n") t0 = time.time() result = cmd_import(project_path, cascade_force, verbose) if result != 0: @@ -2860,7 +2866,7 @@ def cmd_all( ): cascade_force = True - print("\n>>> Step 2/6: Preprocess\n") + print("\n>>> Step 3/9: Preprocess\n") t0 = time.time() result = cmd_preprocess( project_path, verbose, dry_run, cascade_force, workers=1, res=res @@ -2872,7 +2878,7 @@ def cmd_all( ) or _files_modified_since(project_path, t0, "*_processed.webm"): cascade_force = True - print("\n>>> Step 3/6: Trim\n") + print("\n>>> Step 4/9: Trim\n") t0 = time.time() result = cmd_trim(project_path, verbose, force=cascade_force, threshold_db=-40.0) if result != 0: @@ -2881,7 +2887,7 @@ def cmd_all( if _files_modified_since(project_path, t0, "narration.json"): cascade_force = True - print("\n>>> Step 4/6: Stitch\n") + print("\n>>> Step 5/9: Stitch\n") t0 = time.time() result = cmd_stitch(project_path, verbose, cascade_force, res=res) if result != 0: @@ -2889,13 +2895,23 @@ def cmd_all( if _files_modified_since(project_path, t0, "narration_combined.mov"): cascade_force = True - print("\n>>> Step 5/6: Render\n") + print("\n>>> Step 6/9: Render\n") result = cmd_render(project_path, verbose, dry_run, res=res, force=cascade_force) if result != 0: return result - print("\n>>> Step 6/6: Handoff\n") - return cmd_handoff(project_path, verbose, file_override=None, prod=False, res=res) + print("\n>>> Step 7/9: Push\n") + result = cmd_push(project_path, verbose, force=False, prod=True) + if result != 0: + return result + + print("\n>>> Step 8/9: Handoff\n") + result = cmd_handoff(project_path, verbose, file_override=None, prod=True, res=res) + if result != 0: + return result + + print("\n>>> Step 9/9: Upload\n") + return cmd_sync(project_path, verbose, dry_run, download=False) # ============================================================================= @@ -3214,7 +3230,6 @@ def cmd_sync(project_path: Path, verbose: bool, dry_run: bool, download: bool) - print(f"Error: could not create remote directory {remote_dir}") return 1 - rsync_cmd = [ "rsync", "-av", diff --git a/gnommo/handoff.py b/gnommo/handoff.py index 5215c35..1c4373a 100644 --- a/gnommo/handoff.py +++ b/gnommo/handoff.py @@ -1,4 +1,4 @@ -"""Hand off a finished video to the gnommoweb server. +"""Hand off a finished video to MinIO storage via gnommoeditor (prod) or gnommoweb (local). Works for any gnommo project type: parent videos and shorts alike. @@ -10,14 +10,17 @@ Usage: Reads project.json for the 'output_video' field (path relative to the project directory). Override with --file. -On success: - - Uploads the video to MinIO via POST /api/projects/:handle/handoff - - For shorts: server auto-advances status to 'processed' - - Bumps video_version on every upload +On success (production): + - Uploads the video to MinIO via POST /api/assets/upload on gnommoeditor + - Updates .gnommo_sync.prod.json with asset URL + +On success (local): + - Uploads via POST /api/projects/:handle/handoff on gnommoweb - Updates .gnommo_sync.json with new video_version Configuration (from .env or environment): - GNOMMOWEB_URL Base URL (e.g. http://localhost:3001) + GNOMMOEDITOR_URL Base URL for production (e.g. https://editor.glitch.university) + GNOMMOWEB_URL Base URL for local dev (e.g. http://localhost:3001) GNOMMOWEB_API_KEY Bearer token (CONTENT_API_KEY from gnommoweb) """ @@ -83,13 +86,9 @@ def cmd_handoff( _load_env_file() if prod: - api_url = os.environ.get("GNOMMOWEB_PROD_URL", "").rstrip("/") - api_key = os.environ.get("GNOMMOWEB_PROD_API_KEY", "") + api_url = os.environ.get("GNOMMOEDITOR_URL", "").rstrip("/") if not api_url: - print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr) - return 1 - if not api_key: - print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr) + print("Error: GNOMMOEDITOR_URL is not set.", file=sys.stderr) return 1 else: api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/") @@ -102,7 +101,7 @@ def cmd_handoff( return 1 if verbose: - target = "production" if prod else "local" + target = "production (gnommoeditor)" if prod else "local" print(f" → {target}: {api_url}") project_file = project_path / "project.json" @@ -147,13 +146,23 @@ def cmd_handoff( # ── Upload ───────────────────────────────────────────────────────────────── try: - with open(video_path, "rb") as vf: - r = requests.post( - f"{api_url}/api/projects/{project_id}/handoff", - files={"video": (video_path.name, vf, _mime_type(video_path))}, - headers={"Authorization": f"Bearer {api_key}"}, - timeout=None, # large files may take a while - ) + if prod: + # gnommoeditor: POST /api/assets/upload — field name is 'file', no auth + with open(video_path, "rb") as vf: + r = requests.post( + f"{api_url}/api/assets/upload", + files={"file": (video_path.name, vf, _mime_type(video_path))}, + timeout=None, + ) + else: + # gnommoweb: POST /api/projects/:id/handoff + with open(video_path, "rb") as vf: + r = requests.post( + f"{api_url}/api/projects/{project_id}/handoff", + files={"video": (video_path.name, vf, _mime_type(video_path))}, + headers={"Authorization": f"Bearer {api_key}"}, + timeout=None, + ) except requests.exceptions.ConnectionError: print(f"✗ Could not connect to {api_url}") return 1 @@ -167,28 +176,42 @@ def cmd_handoff( return 1 result = r.json() - video_version = result.get("video_version", "?") - video_url = result.get("video_url", "") # ── Write sync state ─────────────────────────────────────────────────────── now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds") existing_sync = _read_sync(project_path, prod) - _write_sync( - project_path, - { - **existing_sync, - "last_handoff_at": now_iso, - "video_version": video_version, - "server_updated_at": result.get("asset", {}).get( - "updated_at", existing_sync.get("server_updated_at") - ), - }, - prod, - ) - print(f"✓ {project_id} → v{video_version} [processed]") - if video_url: - print(f" {video_url}") + if prod: + # gnommoeditor response: { asset: { id, url, minio_object_key, ... } } + asset = result.get("asset", {}) + asset_url = asset.get("url", "") + _write_sync( + project_path, + {**existing_sync, "last_handoff_at": now_iso, "asset_url": asset_url}, + prod, + ) + print(f"✓ {project_id} → uploaded [asset #{asset.get('id')}]") + if asset_url: + print(f" {asset_url}") + else: + # gnommoweb response: { video_version, video_url, asset: { updated_at } } + video_version = result.get("video_version", "?") + video_url = result.get("video_url", "") + _write_sync( + project_path, + { + **existing_sync, + "last_handoff_at": now_iso, + "video_version": video_version, + "server_updated_at": result.get("asset", {}).get( + "updated_at", existing_sync.get("server_updated_at") + ), + }, + prod, + ) + print(f"✓ {project_id} → v{video_version} [processed]") + if video_url: + print(f" {video_url}") return 0 diff --git a/gnommo/parser.py b/gnommo/parser.py index 5b7ce6e..fdd243c 100644 --- a/gnommo/parser.py +++ b/gnommo/parser.py @@ -38,7 +38,7 @@ def _resolve_case_insensitive(path: Path) -> Path: return path resolved = path.anchor and Path(path.anchor) or Path(".") - for part in path.parts[len(Path(path.anchor).parts):]: + for part in path.parts[len(Path(path.anchor).parts) :]: if (resolved / part).exists(): resolved = resolved / part else: diff --git a/gnommo/push.py b/gnommo/push.py index 8e332e5..08de2b0 100644 --- a/gnommo/push.py +++ b/gnommo/push.py @@ -1,36 +1,19 @@ -"""Push project metadata to gnommoweb server. +"""Push project metadata to gnommoeditor (prod) or gnommoweb (local). Usage: gnommo push -p video1 # push parent video project gnommo push -p short_pixelated_universe # push a short project gnommo push -p myproject --force # force push, overwrite server -Reads project.json and POSTs to POST /api/projects/push. - -If project.json contains a "parent_project" field, the project is pushed -as a short and registered under that parent. Otherwise it is pushed as a -parent video project. - -Parent project.json "shorts" field is a list of slugs (just an index): - "shorts": ["short_pixelated_universe", "short_planck_length"] - -Short project.json has its own full config plus a parent_project field: - { - "id": "short_pixelated_universe", - "parent_project": "Video1", - "resolution": [1080, 1920], - "fps": 30, - "duration_seconds": 60, - ... - } - -Conflict detection: - - If server.updated_at > our recorded server_updated_at → server has newer changes - → warn and abort unless --force +Reads project.json and companion JSON files, then POSTs to: + Production: POST /api/ingest (gnommoeditor, uses INGEST_API_KEY) + Local: POST /api/projects/push (gnommoweb, uses GNOMMOWEB_API_KEY) Configuration (from .env or environment): - GNOMMOWEB_URL Base URL (e.g. http://localhost:3001) - GNOMMOWEB_API_KEY Bearer token (CONTENT_API_KEY from gnommoweb) + GNOMMOEDITOR_URL Base URL for production (e.g. https://editor.glitch.university) + INGEST_API_KEY Bearer token for gnommoeditor ingest endpoint + GNOMMOWEB_URL Base URL for local dev (e.g. http://localhost:3001) + GNOMMOWEB_API_KEY Bearer token for local gnommoweb """ import json @@ -85,43 +68,135 @@ def _write_sync(project_path: Path, data: dict, prod: bool = False): json.dump(data, f, indent=2) -def _parse_ts(ts_str) -> datetime | None: - if not ts_str: +def _load_json_file(path: Path, label: str, verbose: bool) -> dict | list | None: + """Load a JSON file, returning None if it doesn't exist.""" + if not path.exists(): + if verbose: + print(f" {label}: not found at {path}") return None try: - return datetime.fromisoformat(ts_str.replace("Z", "+00:00")) - except ValueError: + with open(path) as f: + return json.load(f) + except json.JSONDecodeError as e: + print(f" Warning: could not parse {label} ({path}): {e}", file=sys.stderr) return None +def _load_text_file(path: Path, label: str) -> str | None: + """Load a text file, returning None if it doesn't exist.""" + if not path.exists(): + return None + try: + return path.read_text(encoding="utf-8") + except UnicodeDecodeError: + return path.read_text(encoding="latin-1") + + +def _parse_seconds(value) -> float | None: + """Convert a time value like '30s', '1:30', or 30 into a plain float of seconds.""" + if value is None: + return None + if isinstance(value, (int, float)): + return float(value) + value = str(value).strip() + if value.endswith("s"): + value = value[:-1] + if ":" in value: + parts = value.split(":") + if len(parts) == 2: + return float(parts[0]) * 60 + float(parts[1]) + elif len(parts) == 3: + return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2]) + return float(value) + + +def _sanitize_time_fields(data: dict | None, fields: list[str]) -> dict | None: + """Return a copy of dict with the given fields converted to plain floats.""" + if not data: + return data + result = dict(data) + for field in fields: + if field in result and result[field] is not None: + try: + result[field] = _parse_seconds(result[field]) + except (ValueError, TypeError): + pass # leave invalid values for the server to reject with a clear error + return result + + +def _build_ingest_payload(project: dict, project_path: Path, verbose: bool) -> dict: + """Build the rich ingest payload for gnommoeditor POST /api/ingest.""" + + # ── slides ──────────────────────────────────────────────────────────────── + slides_path_str = project.get("slides", "slides.json") + slides_path = project_path / slides_path_str + slides = _load_json_file(slides_path, "slides", verbose) + if slides and verbose: + print(f" slides: {len(slides)} entries") + + # ── manuscript ──────────────────────────────────────────────────────────── + manuscript_path_str = project.get("manuscript", "manuscript.txt") + manuscript_path = project_path / manuscript_path_str + manuscript = _load_text_file(manuscript_path, "manuscript") + if manuscript: + print(f" manuscript: {len(manuscript)} chars") + elif verbose: + print(f" manuscript: not found at {manuscript_path}") + + # ── narration ───────────────────────────────────────────────────────────── + narration_path_str = project.get("narration", "narration.json") + narration_path = project_path / narration_path_str + narration = _load_json_file(narration_path, "narration", verbose) + + # ── audio ───────────────────────────────────────────────────────────────── + audio_path_str = project.get("audio_tracks", "audio.json") + audio_path = project_path / audio_path_str + audio = _load_json_file(audio_path, "audio", verbose) + + # ── videos ──────────────────────────────────────────────────────────────── + videos_path_str = project.get("videos", "videos.json") + videos_path = project_path / videos_path_str + videos = _load_json_file(videos_path, "videos", verbose) + + # ── citations ───────────────────────────────────────────────────────────── + citations_path = project_path / "citations.json" + citations = _load_json_file(citations_path, "citations", verbose) + + # Sanitize time fields — convert "30s", "1:30" etc. to plain floats + _VIDEO_TIME_FIELDS = ["duration", "pause_narration", "skip", "take"] + _NARRATION_TIME_FIELDS = ["skip", "take"] + _AUDIO_TIME_FIELDS = ["overlap", "duration"] + + if videos: + videos = { + k: _sanitize_time_fields(v, _VIDEO_TIME_FIELDS) for k, v in videos.items() + } + if narration: + narration = { + k: _sanitize_time_fields(v, _NARRATION_TIME_FIELDS) + for k, v in narration.items() + } + if audio: + audio = { + k: _sanitize_time_fields(v, _AUDIO_TIME_FIELDS) for k, v in audio.items() + } + + return { + "project": project, + "slides": slides, + "manuscript": manuscript, + "narration": narration, + "audio": audio, + "videos": videos, + "citations": citations, + } + + def cmd_push( project_path: Path, verbose: bool = False, force: bool = False, prod: bool = False ) -> int: _load_env_file() - if prod: - api_url = os.environ.get("GNOMMOWEB_PROD_URL", "").rstrip("/") - api_key = os.environ.get("GNOMMOWEB_PROD_API_KEY", "") - if not api_url: - print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr) - return 1 - if not api_key: - print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr) - return 1 - else: - api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/") - api_key = os.environ.get("GNOMMOWEB_API_KEY", "") - if not api_url: - print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr) - return 1 - if not api_key: - print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr) - return 1 - - if verbose: - target = "production" if prod else "local" - print(f" → {target}: {api_url}") - project_file = project_path / "project.json" if not project_file.exists(): print(f"Error: {project_file} not found", file=sys.stderr) @@ -136,9 +211,90 @@ def cmd_push( print("Error: project.json must have 'id' and 'name' fields.", file=sys.stderr) return 1 + if prod: + return _push_prod(project, project_path, verbose) + else: + return _push_local(project, project_path, verbose, force) + + +# ── Production: gnommoeditor POST /api/ingest ───────────────────────────────── + + +def _push_prod(project: dict, project_path: Path, verbose: bool) -> int: + api_url = os.environ.get("GNOMMOEDITOR_URL", "").rstrip("/") + api_key = os.environ.get("INGEST_API_KEY", "") + if not api_url: + print("Error: GNOMMOEDITOR_URL is not set.", file=sys.stderr) + return 1 + if not api_key: + print("Error: INGEST_API_KEY is not set.", file=sys.stderr) + return 1 + + project_id = project["id"] + payload = _build_ingest_payload(project, project_path, verbose) + + # Attach sync state so the server can record it + sync = _read_sync(project_path, prod=True) + if sync: + payload["sync"] = sync + + print(f" → {api_url}/api/ingest") + + try: + r = requests.post( + f"{api_url}/api/ingest", + json=payload, + headers={"Authorization": f"Bearer {api_key}"}, + timeout=30, + ) + except requests.exceptions.ConnectionError: + print(f"✗ Could not connect to {api_url}") + return 1 + + if not r.ok: + try: + body = r.json() + except Exception: + body = r.text[:500] + print(f"✗ Server returned {r.status_code}: {body}") + return 1 + + result = r.json() + video_id = result.get("video_id") + slides_upserted = result.get("slides_upserted", 0) + + # Update sync state + now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds") + existing_sync = _read_sync(project_path, prod=True) + _write_sync( + project_path, + {**existing_sync, "last_pushed_at": now_iso}, + prod=True, + ) + + print(f"✓ {project_id} → video #{video_id} ({slides_upserted} slides)") + return 0 + + +# ── Local dev: gnommoweb POST /api/projects/push ────────────────────────────── + + +def _push_local(project: dict, project_path: Path, verbose: bool, force: bool) -> int: + api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/") + api_key = os.environ.get("GNOMMOWEB_API_KEY", "") + if not api_url: + print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr) + return 1 + if not api_key: + print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr) + return 1 + + if verbose: + print(f" → local: {api_url}") + + project_id = project["id"] parent_project = project.get("parent_project") - # ── Build payload ───────────────────────────────────────────────────────── if parent_project: payload = _build_short_payload(project, project_path, verbose) else: @@ -148,7 +304,6 @@ def cmd_push( kind = "short" if parent_project else "parent video" print(f"Pushing {project_id} ({kind}) to {api_url}") - # ── POST ────────────────────────────────────────────────────────────────── try: r = requests.post( f"{api_url}/api/projects/push", @@ -171,9 +326,8 @@ def cmd_push( result = r.json() server_updated_at = result.get("server_updated_at") - # ── Write sync state ────────────────────────────────────────────────────── now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds") - existing_sync = _read_sync(project_path, prod) + existing_sync = _read_sync(project_path, prod=False) _write_sync( project_path, { @@ -181,10 +335,9 @@ def cmd_push( "last_pushed_at": now_iso, "server_updated_at": server_updated_at, }, - prod, + prod=False, ) - # ── Print summary ───────────────────────────────────────────────────────── asset = result.get("asset", {}) if result.get("type") == "short": print(f"✓ {project_id} → gn_asset #{asset.get('id')} [{asset.get('status')}]") @@ -202,7 +355,6 @@ def cmd_push( def _build_parent_payload(project: dict, project_path: Path, verbose: bool) -> dict: - # Read the manuscript file if one is specified script_content = None manuscript_str = project.get("manuscript") if manuscript_str: @@ -238,7 +390,6 @@ def _build_parent_payload(project: dict, project_path: Path, verbose: bool) -> d def _build_short_payload(project: dict, project_path: Path, verbose: bool) -> dict: - # Read the script file if one is specified script_content = None script_path_str = project.get("script") if script_path_str: