Adding fixes to the publish pipeline

This commit is contained in:
2026-05-09 15:36:15 +02:00
parent 00e01237ed
commit 2dff8f45b9
5 changed files with 298 additions and 108 deletions
+1
View File
@@ -14,6 +14,7 @@
"videos": "media/videos/videos.json",
"slides": "media/slides/Example/slides.json",
"audio": "media/audio/audio.json",
"output": "final.mp4",
"default_filters": {
"talkinghead": [
{
+25 -10
View File
@@ -50,7 +50,7 @@ Examples:
gnommo -p video1 transcode --processed --alpha-quality 0.5 More aggressive alpha compression
gnommo -p video1 transcode --processed --dry-run Preview what would be compressed
gnommo -p video1 transcode --force Re-transcode even if output already exists
gnommo -p video1 all Full pipeline: import → preprocess → trim → stitch → render → handoff
gnommo -p video1 all Full pipeline: down → import → preprocess → trim → stitch → render → push → handoff → up
gnommo -p video1 render --dry-run Show FFmpeg command without running
gnommo -p video1 description Generate YouTube description file
gnommo -p video1 transcribe Narration file for timing of slides
@@ -2836,21 +2836,27 @@ def cmd_all(
res: str = "full",
force: bool = False,
) -> int:
"""Run full pipeline: import → preprocess → trim → stitch → render → handoff.
"""Run full pipeline: down → import → preprocess → trim → stitch → render → push → handoff → up.
Cascade rule: if any stage produces output, all subsequent stages are forced
to re-run (cascade_force=True), regardless of whether --force was passed.
This ensures downstream caches are always consistent with upstream changes.
"""
from .handoff import cmd_handoff
from .push import cmd_push
print(f"=== Full Pipeline: {project_path.name} ===\n")
print(">>> Step 1/9: Download\n")
result = cmd_sync(project_path, verbose, dry_run, download=True)
if result != 0:
return result
# cascade_force starts at --force. Once any stage does real work it flips to
# True so all downstream stages re-run unconditionally.
cascade_force = force
print(">>> Step 1/6: Import\n")
print("\n>>> Step 2/9: Import\n")
t0 = time.time()
result = cmd_import(project_path, cascade_force, verbose)
if result != 0:
@@ -2860,7 +2866,7 @@ def cmd_all(
):
cascade_force = True
print("\n>>> Step 2/6: Preprocess\n")
print("\n>>> Step 3/9: Preprocess\n")
t0 = time.time()
result = cmd_preprocess(
project_path, verbose, dry_run, cascade_force, workers=1, res=res
@@ -2872,7 +2878,7 @@ def cmd_all(
) or _files_modified_since(project_path, t0, "*_processed.webm"):
cascade_force = True
print("\n>>> Step 3/6: Trim\n")
print("\n>>> Step 4/9: Trim\n")
t0 = time.time()
result = cmd_trim(project_path, verbose, force=cascade_force, threshold_db=-40.0)
if result != 0:
@@ -2881,7 +2887,7 @@ def cmd_all(
if _files_modified_since(project_path, t0, "narration.json"):
cascade_force = True
print("\n>>> Step 4/6: Stitch\n")
print("\n>>> Step 5/9: Stitch\n")
t0 = time.time()
result = cmd_stitch(project_path, verbose, cascade_force, res=res)
if result != 0:
@@ -2889,13 +2895,23 @@ def cmd_all(
if _files_modified_since(project_path, t0, "narration_combined.mov"):
cascade_force = True
print("\n>>> Step 5/6: Render\n")
print("\n>>> Step 6/9: Render\n")
result = cmd_render(project_path, verbose, dry_run, res=res, force=cascade_force)
if result != 0:
return result
print("\n>>> Step 6/6: Handoff\n")
return cmd_handoff(project_path, verbose, file_override=None, prod=False, res=res)
print("\n>>> Step 7/9: Push\n")
result = cmd_push(project_path, verbose, force=False, prod=True)
if result != 0:
return result
print("\n>>> Step 8/9: Handoff\n")
result = cmd_handoff(project_path, verbose, file_override=None, prod=True, res=res)
if result != 0:
return result
print("\n>>> Step 9/9: Upload\n")
return cmd_sync(project_path, verbose, dry_run, download=False)
# =============================================================================
@@ -3214,7 +3230,6 @@ def cmd_sync(project_path: Path, verbose: bool, dry_run: bool, download: bool) -
print(f"Error: could not create remote directory {remote_dir}")
return 1
rsync_cmd = [
"rsync",
"-av",
+40 -17
View File
@@ -1,4 +1,4 @@
"""Hand off a finished video to the gnommoweb server.
"""Hand off a finished video to MinIO storage via gnommoeditor (prod) or gnommoweb (local).
Works for any gnommo project type: parent videos and shorts alike.
@@ -10,14 +10,17 @@ Usage:
Reads project.json for the 'output_video' field (path relative to the
project directory). Override with --file.
On success:
- Uploads the video to MinIO via POST /api/projects/:handle/handoff
- For shorts: server auto-advances status to 'processed'
- Bumps video_version on every upload
On success (production):
- Uploads the video to MinIO via POST /api/assets/upload on gnommoeditor
- Updates .gnommo_sync.prod.json with asset URL
On success (local):
- Uploads via POST /api/projects/:handle/handoff on gnommoweb
- Updates .gnommo_sync.json with new video_version
Configuration (from .env or environment):
GNOMMOWEB_URL Base URL (e.g. http://localhost:3001)
GNOMMOEDITOR_URL Base URL for production (e.g. https://editor.glitch.university)
GNOMMOWEB_URL Base URL for local dev (e.g. http://localhost:3001)
GNOMMOWEB_API_KEY Bearer token (CONTENT_API_KEY from gnommoweb)
"""
@@ -83,13 +86,9 @@ def cmd_handoff(
_load_env_file()
if prod:
api_url = os.environ.get("GNOMMOWEB_PROD_URL", "").rstrip("/")
api_key = os.environ.get("GNOMMOWEB_PROD_API_KEY", "")
api_url = os.environ.get("GNOMMOEDITOR_URL", "").rstrip("/")
if not api_url:
print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr)
return 1
if not api_key:
print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr)
print("Error: GNOMMOEDITOR_URL is not set.", file=sys.stderr)
return 1
else:
api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/")
@@ -102,7 +101,7 @@ def cmd_handoff(
return 1
if verbose:
target = "production" if prod else "local"
target = "production (gnommoeditor)" if prod else "local"
print(f"{target}: {api_url}")
project_file = project_path / "project.json"
@@ -147,12 +146,22 @@ def cmd_handoff(
# ── Upload ─────────────────────────────────────────────────────────────────
try:
if prod:
# gnommoeditor: POST /api/assets/upload — field name is 'file', no auth
with open(video_path, "rb") as vf:
r = requests.post(
f"{api_url}/api/assets/upload",
files={"file": (video_path.name, vf, _mime_type(video_path))},
timeout=None,
)
else:
# gnommoweb: POST /api/projects/:id/handoff
with open(video_path, "rb") as vf:
r = requests.post(
f"{api_url}/api/projects/{project_id}/handoff",
files={"video": (video_path.name, vf, _mime_type(video_path))},
headers={"Authorization": f"Bearer {api_key}"},
timeout=None, # large files may take a while
timeout=None,
)
except requests.exceptions.ConnectionError:
print(f"✗ Could not connect to {api_url}")
@@ -167,12 +176,27 @@ def cmd_handoff(
return 1
result = r.json()
video_version = result.get("video_version", "?")
video_url = result.get("video_url", "")
# ── Write sync state ───────────────────────────────────────────────────────
now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds")
existing_sync = _read_sync(project_path, prod)
if prod:
# gnommoeditor response: { asset: { id, url, minio_object_key, ... } }
asset = result.get("asset", {})
asset_url = asset.get("url", "")
_write_sync(
project_path,
{**existing_sync, "last_handoff_at": now_iso, "asset_url": asset_url},
prod,
)
print(f"{project_id} → uploaded [asset #{asset.get('id')}]")
if asset_url:
print(f" {asset_url}")
else:
# gnommoweb response: { video_version, video_url, asset: { updated_at } }
video_version = result.get("video_version", "?")
video_url = result.get("video_url", "")
_write_sync(
project_path,
{
@@ -185,7 +209,6 @@ def cmd_handoff(
},
prod,
)
print(f"{project_id} → v{video_version} [processed]")
if video_url:
print(f" {video_url}")
+1 -1
View File
@@ -38,7 +38,7 @@ def _resolve_case_insensitive(path: Path) -> Path:
return path
resolved = path.anchor and Path(path.anchor) or Path(".")
for part in path.parts[len(Path(path.anchor).parts):]:
for part in path.parts[len(Path(path.anchor).parts) :]:
if (resolved / part).exists():
resolved = resolved / part
else:
+211 -60
View File
@@ -1,36 +1,19 @@
"""Push project metadata to gnommoweb server.
"""Push project metadata to gnommoeditor (prod) or gnommoweb (local).
Usage:
gnommo push -p video1 # push parent video project
gnommo push -p short_pixelated_universe # push a short project
gnommo push -p myproject --force # force push, overwrite server
Reads project.json and POSTs to POST /api/projects/push.
If project.json contains a "parent_project" field, the project is pushed
as a short and registered under that parent. Otherwise it is pushed as a
parent video project.
Parent project.json "shorts" field is a list of slugs (just an index):
"shorts": ["short_pixelated_universe", "short_planck_length"]
Short project.json has its own full config plus a parent_project field:
{
"id": "short_pixelated_universe",
"parent_project": "Video1",
"resolution": [1080, 1920],
"fps": 30,
"duration_seconds": 60,
...
}
Conflict detection:
- If server.updated_at > our recorded server_updated_at → server has newer changes
→ warn and abort unless --force
Reads project.json and companion JSON files, then POSTs to:
Production: POST /api/ingest (gnommoeditor, uses INGEST_API_KEY)
Local: POST /api/projects/push (gnommoweb, uses GNOMMOWEB_API_KEY)
Configuration (from .env or environment):
GNOMMOWEB_URL Base URL (e.g. http://localhost:3001)
GNOMMOWEB_API_KEY Bearer token (CONTENT_API_KEY from gnommoweb)
GNOMMOEDITOR_URL Base URL for production (e.g. https://editor.glitch.university)
INGEST_API_KEY Bearer token for gnommoeditor ingest endpoint
GNOMMOWEB_URL Base URL for local dev (e.g. http://localhost:3001)
GNOMMOWEB_API_KEY Bearer token for local gnommoweb
"""
import json
@@ -85,43 +68,135 @@ def _write_sync(project_path: Path, data: dict, prod: bool = False):
json.dump(data, f, indent=2)
def _parse_ts(ts_str) -> datetime | None:
if not ts_str:
def _load_json_file(path: Path, label: str, verbose: bool) -> dict | list | None:
"""Load a JSON file, returning None if it doesn't exist."""
if not path.exists():
if verbose:
print(f" {label}: not found at {path}")
return None
try:
return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
except ValueError:
with open(path) as f:
return json.load(f)
except json.JSONDecodeError as e:
print(f" Warning: could not parse {label} ({path}): {e}", file=sys.stderr)
return None
def _load_text_file(path: Path, label: str) -> str | None:
"""Load a text file, returning None if it doesn't exist."""
if not path.exists():
return None
try:
return path.read_text(encoding="utf-8")
except UnicodeDecodeError:
return path.read_text(encoding="latin-1")
def _parse_seconds(value) -> float | None:
"""Convert a time value like '30s', '1:30', or 30 into a plain float of seconds."""
if value is None:
return None
if isinstance(value, (int, float)):
return float(value)
value = str(value).strip()
if value.endswith("s"):
value = value[:-1]
if ":" in value:
parts = value.split(":")
if len(parts) == 2:
return float(parts[0]) * 60 + float(parts[1])
elif len(parts) == 3:
return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
return float(value)
def _sanitize_time_fields(data: dict | None, fields: list[str]) -> dict | None:
"""Return a copy of dict with the given fields converted to plain floats."""
if not data:
return data
result = dict(data)
for field in fields:
if field in result and result[field] is not None:
try:
result[field] = _parse_seconds(result[field])
except (ValueError, TypeError):
pass # leave invalid values for the server to reject with a clear error
return result
def _build_ingest_payload(project: dict, project_path: Path, verbose: bool) -> dict:
"""Build the rich ingest payload for gnommoeditor POST /api/ingest."""
# ── slides ────────────────────────────────────────────────────────────────
slides_path_str = project.get("slides", "slides.json")
slides_path = project_path / slides_path_str
slides = _load_json_file(slides_path, "slides", verbose)
if slides and verbose:
print(f" slides: {len(slides)} entries")
# ── manuscript ────────────────────────────────────────────────────────────
manuscript_path_str = project.get("manuscript", "manuscript.txt")
manuscript_path = project_path / manuscript_path_str
manuscript = _load_text_file(manuscript_path, "manuscript")
if manuscript:
print(f" manuscript: {len(manuscript)} chars")
elif verbose:
print(f" manuscript: not found at {manuscript_path}")
# ── narration ─────────────────────────────────────────────────────────────
narration_path_str = project.get("narration", "narration.json")
narration_path = project_path / narration_path_str
narration = _load_json_file(narration_path, "narration", verbose)
# ── audio ─────────────────────────────────────────────────────────────────
audio_path_str = project.get("audio_tracks", "audio.json")
audio_path = project_path / audio_path_str
audio = _load_json_file(audio_path, "audio", verbose)
# ── videos ────────────────────────────────────────────────────────────────
videos_path_str = project.get("videos", "videos.json")
videos_path = project_path / videos_path_str
videos = _load_json_file(videos_path, "videos", verbose)
# ── citations ─────────────────────────────────────────────────────────────
citations_path = project_path / "citations.json"
citations = _load_json_file(citations_path, "citations", verbose)
# Sanitize time fields — convert "30s", "1:30" etc. to plain floats
_VIDEO_TIME_FIELDS = ["duration", "pause_narration", "skip", "take"]
_NARRATION_TIME_FIELDS = ["skip", "take"]
_AUDIO_TIME_FIELDS = ["overlap", "duration"]
if videos:
videos = {
k: _sanitize_time_fields(v, _VIDEO_TIME_FIELDS) for k, v in videos.items()
}
if narration:
narration = {
k: _sanitize_time_fields(v, _NARRATION_TIME_FIELDS)
for k, v in narration.items()
}
if audio:
audio = {
k: _sanitize_time_fields(v, _AUDIO_TIME_FIELDS) for k, v in audio.items()
}
return {
"project": project,
"slides": slides,
"manuscript": manuscript,
"narration": narration,
"audio": audio,
"videos": videos,
"citations": citations,
}
def cmd_push(
project_path: Path, verbose: bool = False, force: bool = False, prod: bool = False
) -> int:
_load_env_file()
if prod:
api_url = os.environ.get("GNOMMOWEB_PROD_URL", "").rstrip("/")
api_key = os.environ.get("GNOMMOWEB_PROD_API_KEY", "")
if not api_url:
print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr)
return 1
if not api_key:
print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr)
return 1
else:
api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/")
api_key = os.environ.get("GNOMMOWEB_API_KEY", "")
if not api_url:
print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr)
return 1
if not api_key:
print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr)
return 1
if verbose:
target = "production" if prod else "local"
print(f"{target}: {api_url}")
project_file = project_path / "project.json"
if not project_file.exists():
print(f"Error: {project_file} not found", file=sys.stderr)
@@ -136,9 +211,90 @@ def cmd_push(
print("Error: project.json must have 'id' and 'name' fields.", file=sys.stderr)
return 1
if prod:
return _push_prod(project, project_path, verbose)
else:
return _push_local(project, project_path, verbose, force)
# ── Production: gnommoeditor POST /api/ingest ─────────────────────────────────
def _push_prod(project: dict, project_path: Path, verbose: bool) -> int:
api_url = os.environ.get("GNOMMOEDITOR_URL", "").rstrip("/")
api_key = os.environ.get("INGEST_API_KEY", "")
if not api_url:
print("Error: GNOMMOEDITOR_URL is not set.", file=sys.stderr)
return 1
if not api_key:
print("Error: INGEST_API_KEY is not set.", file=sys.stderr)
return 1
project_id = project["id"]
payload = _build_ingest_payload(project, project_path, verbose)
# Attach sync state so the server can record it
sync = _read_sync(project_path, prod=True)
if sync:
payload["sync"] = sync
print(f"{api_url}/api/ingest")
try:
r = requests.post(
f"{api_url}/api/ingest",
json=payload,
headers={"Authorization": f"Bearer {api_key}"},
timeout=30,
)
except requests.exceptions.ConnectionError:
print(f"✗ Could not connect to {api_url}")
return 1
if not r.ok:
try:
body = r.json()
except Exception:
body = r.text[:500]
print(f"✗ Server returned {r.status_code}: {body}")
return 1
result = r.json()
video_id = result.get("video_id")
slides_upserted = result.get("slides_upserted", 0)
# Update sync state
now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds")
existing_sync = _read_sync(project_path, prod=True)
_write_sync(
project_path,
{**existing_sync, "last_pushed_at": now_iso},
prod=True,
)
print(f"{project_id} → video #{video_id} ({slides_upserted} slides)")
return 0
# ── Local dev: gnommoweb POST /api/projects/push ──────────────────────────────
def _push_local(project: dict, project_path: Path, verbose: bool, force: bool) -> int:
api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/")
api_key = os.environ.get("GNOMMOWEB_API_KEY", "")
if not api_url:
print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr)
return 1
if not api_key:
print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr)
return 1
if verbose:
print(f" → local: {api_url}")
project_id = project["id"]
parent_project = project.get("parent_project")
# ── Build payload ─────────────────────────────────────────────────────────
if parent_project:
payload = _build_short_payload(project, project_path, verbose)
else:
@@ -148,7 +304,6 @@ def cmd_push(
kind = "short" if parent_project else "parent video"
print(f"Pushing {project_id} ({kind}) to {api_url}")
# ── POST ──────────────────────────────────────────────────────────────────
try:
r = requests.post(
f"{api_url}/api/projects/push",
@@ -171,9 +326,8 @@ def cmd_push(
result = r.json()
server_updated_at = result.get("server_updated_at")
# ── Write sync state ──────────────────────────────────────────────────────
now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds")
existing_sync = _read_sync(project_path, prod)
existing_sync = _read_sync(project_path, prod=False)
_write_sync(
project_path,
{
@@ -181,10 +335,9 @@ def cmd_push(
"last_pushed_at": now_iso,
"server_updated_at": server_updated_at,
},
prod,
prod=False,
)
# ── Print summary ─────────────────────────────────────────────────────────
asset = result.get("asset", {})
if result.get("type") == "short":
print(f"{project_id} → gn_asset #{asset.get('id')} [{asset.get('status')}]")
@@ -202,7 +355,6 @@ def cmd_push(
def _build_parent_payload(project: dict, project_path: Path, verbose: bool) -> dict:
# Read the manuscript file if one is specified
script_content = None
manuscript_str = project.get("manuscript")
if manuscript_str:
@@ -238,7 +390,6 @@ def _build_parent_payload(project: dict, project_path: Path, verbose: bool) -> d
def _build_short_payload(project: dict, project_path: Path, verbose: bool) -> dict:
# Read the script file if one is specified
script_content = None
script_path_str = project.get("script")
if script_path_str: