Adding lots of skills

This commit is contained in:
2026-05-09 22:44:17 +02:00
parent b57fc43fc0
commit bb444759f6
58 changed files with 3585 additions and 86 deletions
+34 -19
View File
@@ -1,39 +1,47 @@
apple-notes:16ffca134c5590714781d8aeef51f8f3
apple-reminders:0273a9a17f6d07c55c84735c4366186b
architecture-diagram:999ab6d4445dbd407a82031857aa9791
airtable:dec8bcab05383e0ca8ae0e3c241d3a48
apple-notes:5e448abf984561fb33b197045ce41388
apple-reminders:cda2963c73800643faf4a34ef813879a
architecture-diagram:8ed67034726b0ac3639d9c009d166222
arxiv:0ad5eb32727a1cb2bbff9e1e8e4dbff7
ascii-art:6eed9eb0c7cedf2bccd3cb7b7c91271c
ascii-video:93697173a0a33f7ecb7c4dc1c27f80e8
audiocraft-audio-generation:41d06b6ec94d1cdb3d864efe452780fd
axolotl:710b8e88805a85efc461dcd70c937cae
baoyu-comic:0be1250d5433538d71a4ab6d81b359dc
baoyu-infographic:d00f808010611c77d3fe00f58d2d7176
baoyu-infographic:567069c2548a69eafcbce09c028438dd
blogwatcher:d0b55ef6acff9ad26f1febace610ca3b
claude-code:88bbb9f0e26f8148141da379e4e837c5
codebase-inspection:5b1f99e926f347fe7c8c2658c9cc15b9
claude-design:6607092a7d19705b9647067a09afd733
codebase-inspection:97bf36f290117abc11ffde72535713e2
codex:79bb6b5d9b47453cd0d7ac25df5a3c97
design-md:267d0d8c363c9809744d1c62d561805e
dogfood:fc03244c3237e6b7325dc8aef387f2e3
comfyui:d6f42584ff328d6aa6a4b2e8e678c030
debugging-hermes-tui-commands:f992bee7976a1d0f59884fa57e58f314
design-md:a09844075e6e856a4a256dbc5f9e899a
dogfood:77ff237be7db22a4ef3850b411d915ed
dspy:5e0770e2563d11d9d4cc040681277c1c
evaluating-llms-harness:784cd66354b654dedf7541cd9b9e4c91
excalidraw:1679ad1d31a591fa3cb636d9150adcc7
findmy:bd50940d7b0104f6d6bf8981fc54b827
fine-tuning-with-trl:b2f0948b0f6e7202a452d9569bbd8f64
findmy:1d7dd3ae39cf25357a374c6bfb956442
fine-tuning-with-trl:f73c765998375978e9fe529cafa6054a
gif-search:dc9206e5c5c2d648774864df5222c95f
github-auth:909ef9bbff492b214a625179f704c09a
github-code-review:e56793f8efef112bbcdad96f69b45ddd
github-issues:ecb864a88aeea8f88f5b8742fec8806b
github-pr-workflow:cab1d57b84e253dddff37bd212f469ca
github-repo-management:7d7131b113d4dc2509a47501a6638e76
github-auth:6afa4cccb1eacad83dcdae2930b818a9
github-code-review:41071b74c0222d4e784de8f0927f757d
github-issues:3e4d98c7a6b1ebd0a55c752abb7a612b
github-pr-workflow:834e9cd72f18ea4598934d8d253b5858
github-repo-management:8479a9fb418f8dcfbbb191caaeccaa37
godmode:c592b460bf06e1f31b51bc6ac299e111
google-workspace:cf9028aff358f6c6b6ebc183672ad947
heartmula:ce53b2e6c9d68238cae5ae727738ecde
hermes-agent:1c55510fc8a7a8c0fee3134866ca5dc2
hermes-agent:286e1312a50b53f11b9714f506989e4f
hermes-agent-skill-authoring:d5b8b704b92d44ffa1e44f8b3d795037
himalaya:9da608734d1af8dab132406492bd5828
huggingface-hub:14002a449cb5f9a5ff8bdc7f730bcb2f
humanizer:0a006757e41d605ba0818ecca10288ed
ideation:0d1719daa364f2c5badd40c94620360f
imessage:f545da0f5cc64dd9ee1ffd2b7733a11b
jupyter-live-kernel:6bda9690d8c71095ac738bd9825e32f2
kanban-orchestrator:1636b60c79180ee89108727bff9383c7
kanban-worker:bc9124639762b2a5c20cd85580ae92e4
linear:ab7a5dbd4001e31e2bd888d86ab699f8
llama-cpp:fcfa4c23d52ac84abccf0b38e9844e07
llm-wiki:9cb710c49d1af6fdba54d06a835a5498
@@ -41,7 +49,8 @@ manim-video:86ba8c24fdd57771d68bea812d3b2466
maps:285f3436aafadf452fac8c0bb5715e40
minecraft-modpack-server:3cc682f8aef5f86d3580601ba28f9ba3
nano-pdf:dd55aca10b8e2844a0cda3c68c757e83
native-mcp:a8644a4f45c8403c1ad3342230b5c154
native-mcp:5564a9d31ce4165b532c575a315ddca4
node-inspect-debugger:e8f38e8586a090b880edcdbcba67ec76
notion:ac54a68c490d4cf1604bc24160083d43
obliteratus:98dfcbfcad4416d27d5dcbd0a491d772
obsidian:1dde562f384c6dc5eaec0b7c214caab4
@@ -49,26 +58,32 @@ ocr-and-documents:0fe461668b245d894370b8b40c3eb012
opencode:e3583bfa72da47385f6466eaf226faef
openhue:0487b4695a071cc62da64c79935bc8d1
outlines:8efbd31f1252f6c8fb340db4d9dcce2f
p5js:80de285f6ef54c19c22e4eafd1877fe4
p5js:5879c824a5487d6553d9380e37aa9c5e
pixel-art:f94fe511926a222052ec8d2dc892b112
plan:6a014103919a9b11d60e2d6267055871
pokemon-player:2a30ed51c1179b22967fb4a33e6e57e4
polymarket:b4a7d758f2fb29efb290dce1094cc625
popular-web-designs:a77ef442dcf747d8d534f5acb6b6f0cf
powerpoint:6ae6326c8fc5ff5a67b8e5283437ec30
requesting-code-review:f9cc90df11a9ce1cc23595c574eacd75
pretext:1a72b0c0b65188ce43917cac6d5b8973
python-debugpy:d40cd39a90885e2c5ac7be13bbf5e832
requesting-code-review:f76de34aee69387c297cf982c85fd6fe
research-paper-writing:e1fa7bb71e73fbc74ea017720f971e9a
segment-anything-model:a2403c1bf179c28cbac2ba7d56357b69
serving-llms-vllm:a8b5453a5316da8df055a0f23c3cbd25
sketch:56b3e77b9ff82d38fe1c7b8c6067de5d
songsee:7738e32bff3ca9ec32b37b32e0a8c9ca
songwriting-and-ai-music:65b4a6757901021ca16d9c8ecab62f7c
spike:a1034fab3d8669745ee75474dd9c3a6b
spotify:af733b32166f235fe3e0026e213ff2d4
subagent-driven-development:3d4c3f5060b7e1577fc3306b9ca36ffd
systematic-debugging:a02cf3ccd7b79909137ac1af46d01ed6
test-driven-development:32bc0784dc0720a9e536ba1ce559fedf
touchdesigner-mcp:3a428984eb83905c5ae89d0abf0ef866
unsloth:6482bcde01d0a9aeaddc247932c3c69c
webhook-subscriptions:edce3200566edfa7259718b51b8f52f3
weights-and-biases:91fd048a0b693f6d74a4639ea08bbd1d
writing-plans:5b72a4318524fd7ffb37fd43e51e3954
writing-plans:c91061baf59682c9b10a317b5ff25617
xurl:97a1749bd7274b93c631d71d2cf92e52
youtube-content:c448e213097433492d51a063d34eb9ae
yuanbao:69fa2e9e8b534a633443d47262e86855
@@ -1,6 +1,6 @@
---
name: apple-notes
description: Manage Apple Notes via the memo CLI on macOS (create, view, search, edit).
description: "Manage Apple Notes via memo CLI: create, search, edit."
version: 1.0.0
author: Hermes Agent
license: MIT
@@ -1,6 +1,6 @@
---
name: apple-reminders
description: Manage Apple Reminders via remindctl CLI (list, add, complete, delete).
description: "Apple Reminders via remindctl: add, list, complete."
version: 1.0.0
author: Hermes Agent
license: MIT
@@ -1,6 +1,6 @@
---
name: findmy
description: Track Apple devices and AirTags via FindMy.app on macOS using AppleScript and screen capture.
description: "Track Apple devices/AirTags via FindMy.app on macOS."
version: 1.0.0
author: Hermes Agent
license: MIT
@@ -1,6 +1,6 @@
---
name: hermes-agent
description: Complete guide to using and extending Hermes Agent — CLI usage, setup, configuration, spawning additional agents, gateway platforms, skills, voice, tools, profiles, and a concise contributor reference. Load this skill when helping users configure Hermes, troubleshoot issues, spawn agent instances, or make code contributions.
description: "Configure, extend, or contribute to Hermes Agent."
version: 2.0.0
author: Hermes Agent + Teknium
license: MIT
@@ -115,7 +115,7 @@ hermes tools disable NAME Disable a toolset
hermes skills list List installed skills
hermes skills search QUERY Search the skills hub
hermes skills install ID Install a skill
hermes skills install ID Install a skill (ID can be a hub identifier OR a direct https://…/SKILL.md URL; pass --name to override when frontmatter has no name)
hermes skills inspect ID Preview without installing
hermes skills config Enable/disable skills per platform
hermes skills check Check for updates
@@ -281,7 +281,6 @@ Type these during an interactive chat session.
### Utility
```
/branch (/fork) Branch the current session
/btw Ephemeral side question (doesn't interrupt main task)
/fast Toggle priority/fast processing
/browser Open CDP browser connection
/history Show conversation history (CLI)
@@ -403,6 +402,63 @@ Tool changes take effect on `/reset` (new session). They do NOT apply mid-conver
---
## Security & Privacy Toggles
Common "why is Hermes doing X to my output / tool calls / commands?" toggles — and the exact commands to change them. Most of these need a fresh session (`/reset` in chat, or start a new `hermes` invocation) because they're read once at startup.
### Secret redaction in tool output
Secret redaction is **off by default** — tool output (terminal stdout, `read_file`, web content, subagent summaries, etc.) passes through unmodified. If the user wants Hermes to auto-mask strings that look like API keys, tokens, and secrets before they enter the conversation context and logs:
```bash
hermes config set security.redact_secrets true # enable globally
```
**Restart required.** `security.redact_secrets` is snapshotted at import time — toggling it mid-session (e.g. via `export HERMES_REDACT_SECRETS=true` from a tool call) will NOT take effect for the running process. Tell the user to run `hermes config set security.redact_secrets true` in a terminal, then start a new session. This is deliberate — it prevents an LLM from flipping the toggle on itself mid-task.
Disable again with:
```bash
hermes config set security.redact_secrets false
```
### PII redaction in gateway messages
Separate from secret redaction. When enabled, the gateway hashes user IDs and strips phone numbers from the session context before it reaches the model:
```bash
hermes config set privacy.redact_pii true # enable
hermes config set privacy.redact_pii false # disable (default)
```
### Command approval prompts
By default (`approvals.mode: manual`), Hermes prompts the user before running shell commands flagged as destructive (`rm -rf`, `git reset --hard`, etc.). The modes are:
- `manual` — always prompt (default)
- `smart` — use an auxiliary LLM to auto-approve low-risk commands, prompt on high-risk
- `off` — skip all approval prompts (equivalent to `--yolo`)
```bash
hermes config set approvals.mode smart # recommended middle ground
hermes config set approvals.mode off # bypass everything (not recommended)
```
Per-invocation bypass without changing config:
- `hermes --yolo …`
- `export HERMES_YOLO_MODE=1`
Note: YOLO / `approvals.mode: off` does NOT turn off secret redaction. They are independent.
### Shell hooks allowlist
Some shell-hook integrations require explicit allowlisting before they fire. Managed via `~/.hermes/shell-hooks-allowlist.json` — prompted interactively the first time a hook wants to run.
### Disabling the web/browser/image-gen tools
To keep the model away from network or media tools entirely, open `hermes tools` and toggle per-platform. Takes effect on next session (`/reset`). See the Tools & Skills section above.
---
## Voice & Transcription
### STT (Voice → Text)
@@ -1,6 +1,6 @@
---
name: architecture-diagram
description: Generate dark-themed SVG diagrams of software systems and cloud infrastructure as standalone HTML files with inline SVG graphics. Semantic component colors (cyan=frontend, emerald=backend, violet=database, amber=cloud/AWS, rose=security, orange=message bus), JetBrains Mono font, grid background. Best suited for software architecture, cloud/VPC topology, microservice maps, service-mesh diagrams, database + API layer diagrams, security groups, message buses — anything that fits a tech-infra deck with a dark aesthetic. If a more specialized diagramming skill exists for the subject (scientific, educational, hand-drawn, animated, etc.), prefer that — otherwise this skill can also serve as a general-purpose SVG diagram fallback. Based on Cocoon AI's architecture-diagram-generator (MIT).
description: "Dark-themed SVG architecture/cloud/infra diagrams as HTML."
version: 1.0.0
author: Cocoon AI (hello@cocoon-ai.com), ported by Hermes Agent
license: MIT
@@ -1,10 +1,18 @@
---
name: ascii-video
description: "Production pipeline for ASCII art video — any format. Converts video/audio/images/generative input into colored ASCII character video output (MP4, GIF, image sequence). Covers: video-to-ASCII conversion, audio-reactive music visualizers, generative ASCII art animations, hybrid video+audio reactive, text/lyrics overlays, real-time terminal rendering. Use when users request: ASCII video, text art video, terminal-style video, character art animation, retro text visualization, audio visualizer in ASCII, converting video to ASCII art, matrix-style effects, or any animated ASCII output."
description: "ASCII video: convert video/audio to colored ASCII MP4/GIF."
---
# ASCII Video Production Pipeline
## When to use
Use when users request: ASCII video, text art video, terminal-style video, character art animation, retro text visualization, audio visualizer in ASCII, converting video to ASCII art, matrix-style effects, or any animated ASCII output.
## What's inside
Production pipeline for ASCII art video — any format. Converts video/audio/images/generative input into colored ASCII character video output (MP4, GIF, image sequence). Covers: video-to-ASCII conversion, audio-reactive music visualizers, generative ASCII art animations, hybrid video+audio reactive, text/lyrics overlays, real-time terminal rendering.
## Creative Standard
This is visual art. ASCII characters are the medium; cinema is the standard.
@@ -1,6 +1,6 @@
---
name: baoyu-infographic
description: Generate professional infographics with 21 layout types and 21 visual styles. Analyzes content, recommends layout×style combinations, and generates publication-ready infographics. Use when user asks to create "infographic", "visual summary", "信息图", "可视化", or "高密度信息大图".
description: "Infographics: 21 layouts x 21 styles (信息图, 可视化)."
version: 1.56.1
author: 宝玉 (JimLiu)
license: MIT
@@ -1,13 +1,13 @@
---
name: design-md
description: Author, validate, diff, and export DESIGN.md files — Google's open-source format spec that gives coding agents a persistent, structured understanding of a design system (tokens + rationale in one file). Use when building a design system, porting style rules between projects, generating UI with consistent brand, or auditing accessibility/contrast.
description: Author/validate/export Google's DESIGN.md token spec files.
version: 1.0.0
author: Hermes Agent
license: MIT
metadata:
hermes:
tags: [design, design-system, tokens, ui, accessibility, wcag, tailwind, dtcg, google]
related_skills: [popular-web-designs, excalidraw, architecture-diagram]
related_skills: [popular-web-designs, claude-design, excalidraw, architecture-diagram]
---
# DESIGN.md Skill
@@ -31,7 +31,9 @@ diffs versions for regressions, and exports to Tailwind or W3C DTCG JSON.
- User wants contrast / WCAG accessibility validation on their color palette
For purely visual inspiration or layout examples, use `popular-web-designs`
instead. This skill is for the *formal spec file* itself.
instead. For *process and taste* when designing a one-off HTML artifact
from scratch (prototype, deck, landing page, component lab), use
`claude-design`. This skill is for the *formal spec file* itself.
## File anatomy
@@ -1,6 +1,6 @@
---
name: p5js
description: "Production pipeline for interactive and generative visual art using p5.js. Creates browser-based sketches, generative art, data visualizations, interactive experiences, 3D scenes, audio-reactive visuals, and motion graphics — exported as HTML, PNG, GIF, MP4, or SVG. Covers: 2D/3D rendering, noise and particle systems, flow fields, shaders (GLSL), pixel manipulation, kinetic typography, WebGL scenes, audio analysis, mouse/keyboard interaction, and headless high-res export. Use when users request: p5.js sketches, creative coding, generative art, interactive visualizations, canvas animations, browser-based visual art, data viz, shader effects, or any p5.js project."
description: "p5.js sketches: gen art, shaders, interactive, 3D."
version: 1.0.0
metadata:
hermes:
@@ -10,6 +10,14 @@ metadata:
# p5.js Production Pipeline
## When to use
Use when users request: p5.js sketches, creative coding, generative art, interactive visualizations, canvas animations, browser-based visual art, data viz, shader effects, or any p5.js project.
## What's inside
Production pipeline for interactive and generative visual art using p5.js. Creates browser-based sketches, generative art, data visualizations, interactive experiences, 3D scenes, audio-reactive visuals, and motion graphics — exported as HTML, PNG, GIF, MP4, or SVG. Covers: 2D/3D rendering, noise and particle systems, flow fields, shaders (GLSL), pixel manipulation, kinetic typography, WebGL scenes, audio analysis, mouse/keyboard interaction, and headless high-res export.
## Creative Standard
This is visual art rendered in the browser. The canvas is the medium; the algorithm is the brush.
@@ -1,11 +1,6 @@
---
name: jupyter-live-kernel
description: >
Use a live Jupyter kernel for stateful, iterative Python execution via hamelnb.
Load this skill when the task involves exploration, iteration, or inspecting
intermediate results — data science, ML experimentation, API exploration, or
building up complex code step-by-step. Uses terminal to run CLI commands against
a live Jupyter kernel. No new tools required.
description: "Iterative Python via live Jupyter kernel (hamelnb)."
version: 1.0.0
author: Hermes Agent
license: MIT
@@ -1,6 +1,6 @@
---
name: dogfood
description: Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports
description: "Exploratory QA of web apps: find bugs, evidence, reports."
version: 1.0.0
metadata:
hermes:
@@ -1,6 +1,6 @@
---
name: codebase-inspection
description: Inspect and analyze codebases using pygount for LOC counting, language breakdown, and code-vs-comment ratios. Use when asked to check lines of code, repo size, language composition, or codebase stats.
description: "Inspect codebases w/ pygount: LOC, languages, ratios."
version: 1.0.0
author: Hermes Agent
license: MIT
@@ -1,6 +1,6 @@
---
name: github-auth
description: Set up GitHub authentication for the agent using git (universally available) or the gh CLI. Covers HTTPS tokens, SSH keys, credential helpers, and gh auth — with a detection flow to pick the right method automatically.
description: "GitHub auth setup: HTTPS tokens, SSH keys, gh CLI login."
version: 1.1.0
author: Hermes Agent
license: MIT
@@ -1,6 +1,6 @@
---
name: github-code-review
description: Review code changes by analyzing git diffs, leaving inline comments on PRs, and performing thorough pre-push review. Works with gh CLI or falls back to git + GitHub REST API via curl.
description: "Review PRs: diffs, inline comments via gh or REST."
version: 1.1.0
author: Hermes Agent
license: MIT
@@ -1,6 +1,6 @@
---
name: github-issues
description: Create, manage, triage, and close GitHub issues. Search existing issues, add labels, assign people, and link to PRs. Works with gh CLI or falls back to git + GitHub REST API via curl.
description: "Create, triage, label, assign GitHub issues via gh or REST."
version: 1.1.0
author: Hermes Agent
license: MIT
@@ -1,6 +1,6 @@
---
name: github-pr-workflow
description: Full pull request lifecycle — create branches, commit changes, open PRs, monitor CI status, auto-fix failures, and merge. Works with gh CLI or falls back to git + GitHub REST API via curl.
description: "GitHub PR lifecycle: branch, commit, open, CI, merge."
version: 1.1.0
author: Hermes Agent
license: MIT
@@ -1,6 +1,6 @@
---
name: github-repo-management
description: Clone, create, fork, configure, and manage GitHub repositories. Manage remotes, secrets, releases, and workflows. Works with gh CLI or falls back to git + GitHub REST API via curl.
description: "Clone/create/fork repos; manage remotes, releases."
version: 1.1.0
author: Hermes Agent
license: MIT
@@ -1,6 +1,6 @@
---
name: native-mcp
description: Built-in MCP (Model Context Protocol) client that connects to external MCP servers, discovers their tools, and registers them as native Hermes Agent tools. Supports stdio and HTTP transports with automatic reconnection, security filtering, and zero-config tool injection.
description: "MCP client: connect servers, register tools (stdio/HTTP)."
version: 1.0.0
author: Hermes Agent
license: MIT
@@ -1,6 +1,6 @@
---
name: huggingface-hub
description: Hugging Face Hub CLI (hf) — search, download, and upload models and datasets, manage repos, query datasets with SQL, deploy inference endpoints, manage Spaces and buckets.
description: "HuggingFace hf CLI: search/download/upload models, datasets."
version: 1.0.0
author: Hugging Face
license: MIT
@@ -0,0 +1,655 @@
---
name: outlines
description: Guarantee valid JSON/XML/code structure during generation, use Pydantic models for type-safe outputs, support local models (Transformers, vLLM), and maximize inference speed with Outlines - dottxt.ai's structured generation library
version: 1.0.0
author: Orchestra Research
license: MIT
dependencies: [outlines, transformers, vllm, pydantic]
metadata:
hermes:
tags: [Prompt Engineering, Outlines, Structured Generation, JSON Schema, Pydantic, Local Models, Grammar-Based Generation, vLLM, Transformers, Type Safety]
---
# Outlines: Structured Text Generation
## When to Use This Skill
Use Outlines when you need to:
- **Guarantee valid JSON/XML/code** structure during generation
- **Use Pydantic models** for type-safe outputs
- **Support local models** (Transformers, llama.cpp, vLLM)
- **Maximize inference speed** with zero-overhead structured generation
- **Generate against JSON schemas** automatically
- **Control token sampling** at the grammar level
**GitHub Stars**: 8,000+ | **From**: dottxt.ai (formerly .txt)
## Installation
```bash
# Base installation
pip install outlines
# With specific backends
pip install outlines transformers # Hugging Face models
pip install outlines llama-cpp-python # llama.cpp
pip install outlines vllm # vLLM for high-throughput
```
## Quick Start
### Basic Example: Classification
```python
import outlines
from typing import Literal
# Load model
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
# Generate with type constraint
prompt = "Sentiment of 'This product is amazing!': "
generator = outlines.generate.choice(model, ["positive", "negative", "neutral"])
sentiment = generator(prompt)
print(sentiment) # "positive" (guaranteed one of these)
```
### With Pydantic Models
```python
from pydantic import BaseModel
import outlines
class User(BaseModel):
name: str
age: int
email: str
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
# Generate structured output
prompt = "Extract user: John Doe, 30 years old, john@example.com"
generator = outlines.generate.json(model, User)
user = generator(prompt)
print(user.name) # "John Doe"
print(user.age) # 30
print(user.email) # "john@example.com"
```
## Core Concepts
### 1. Constrained Token Sampling
Outlines uses Finite State Machines (FSM) to constrain token generation at the logit level.
**How it works:**
1. Convert schema (JSON/Pydantic/regex) to context-free grammar (CFG)
2. Transform CFG into Finite State Machine (FSM)
3. Filter invalid tokens at each step during generation
4. Fast-forward when only one valid token exists
**Benefits:**
- **Zero overhead**: Filtering happens at token level
- **Speed improvement**: Fast-forward through deterministic paths
- **Guaranteed validity**: Invalid outputs impossible
```python
import outlines
# Pydantic model -> JSON schema -> CFG -> FSM
class Person(BaseModel):
name: str
age: int
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
# Behind the scenes:
# 1. Person -> JSON schema
# 2. JSON schema -> CFG
# 3. CFG -> FSM
# 4. FSM filters tokens during generation
generator = outlines.generate.json(model, Person)
result = generator("Generate person: Alice, 25")
```
### 2. Structured Generators
Outlines provides specialized generators for different output types.
#### Choice Generator
```python
# Multiple choice selection
generator = outlines.generate.choice(
model,
["positive", "negative", "neutral"]
)
sentiment = generator("Review: This is great!")
# Result: One of the three choices
```
#### JSON Generator
```python
from pydantic import BaseModel
class Product(BaseModel):
name: str
price: float
in_stock: bool
# Generate valid JSON matching schema
generator = outlines.generate.json(model, Product)
product = generator("Extract: iPhone 15, $999, available")
# Guaranteed valid Product instance
print(type(product)) # <class '__main__.Product'>
```
#### Regex Generator
```python
# Generate text matching regex
generator = outlines.generate.regex(
model,
r"[0-9]{3}-[0-9]{3}-[0-9]{4}" # Phone number pattern
)
phone = generator("Generate phone number:")
# Result: "555-123-4567" (guaranteed to match pattern)
```
#### Integer/Float Generators
```python
# Generate specific numeric types
int_generator = outlines.generate.integer(model)
age = int_generator("Person's age:") # Guaranteed integer
float_generator = outlines.generate.float(model)
price = float_generator("Product price:") # Guaranteed float
```
### 3. Model Backends
Outlines supports multiple local and API-based backends.
#### Transformers (Hugging Face)
```python
import outlines
# Load from Hugging Face
model = outlines.models.transformers(
"microsoft/Phi-3-mini-4k-instruct",
device="cuda" # Or "cpu"
)
# Use with any generator
generator = outlines.generate.json(model, YourModel)
```
#### llama.cpp
```python
# Load GGUF model
model = outlines.models.llamacpp(
"./models/llama-3.1-8b-instruct.Q4_K_M.gguf",
n_gpu_layers=35
)
generator = outlines.generate.json(model, YourModel)
```
#### vLLM (High Throughput)
```python
# For production deployments
model = outlines.models.vllm(
"meta-llama/Llama-3.1-8B-Instruct",
tensor_parallel_size=2 # Multi-GPU
)
generator = outlines.generate.json(model, YourModel)
```
#### OpenAI (Limited Support)
```python
# Basic OpenAI support
model = outlines.models.openai(
"gpt-4o-mini",
api_key="your-api-key"
)
# Note: Some features limited with API models
generator = outlines.generate.json(model, YourModel)
```
### 4. Pydantic Integration
Outlines has first-class Pydantic support with automatic schema translation.
#### Basic Models
```python
from pydantic import BaseModel, Field
class Article(BaseModel):
title: str = Field(description="Article title")
author: str = Field(description="Author name")
word_count: int = Field(description="Number of words", gt=0)
tags: list[str] = Field(description="List of tags")
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, Article)
article = generator("Generate article about AI")
print(article.title)
print(article.word_count) # Guaranteed > 0
```
#### Nested Models
```python
class Address(BaseModel):
street: str
city: str
country: str
class Person(BaseModel):
name: str
age: int
address: Address # Nested model
generator = outlines.generate.json(model, Person)
person = generator("Generate person in New York")
print(person.address.city) # "New York"
```
#### Enums and Literals
```python
from enum import Enum
from typing import Literal
class Status(str, Enum):
PENDING = "pending"
APPROVED = "approved"
REJECTED = "rejected"
class Application(BaseModel):
applicant: str
status: Status # Must be one of enum values
priority: Literal["low", "medium", "high"] # Must be one of literals
generator = outlines.generate.json(model, Application)
app = generator("Generate application")
print(app.status) # Status.PENDING (or APPROVED/REJECTED)
```
## Common Patterns
### Pattern 1: Data Extraction
```python
from pydantic import BaseModel
import outlines
class CompanyInfo(BaseModel):
name: str
founded_year: int
industry: str
employees: int
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, CompanyInfo)
text = """
Apple Inc. was founded in 1976 in the technology industry.
The company employs approximately 164,000 people worldwide.
"""
prompt = f"Extract company information:\n{text}\n\nCompany:"
company = generator(prompt)
print(f"Name: {company.name}")
print(f"Founded: {company.founded_year}")
print(f"Industry: {company.industry}")
print(f"Employees: {company.employees}")
```
### Pattern 2: Classification
```python
from typing import Literal
import outlines
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
# Binary classification
generator = outlines.generate.choice(model, ["spam", "not_spam"])
result = generator("Email: Buy now! 50% off!")
# Multi-class classification
categories = ["technology", "business", "sports", "entertainment"]
category_gen = outlines.generate.choice(model, categories)
category = category_gen("Article: Apple announces new iPhone...")
# With confidence
class Classification(BaseModel):
label: Literal["positive", "negative", "neutral"]
confidence: float
classifier = outlines.generate.json(model, Classification)
result = classifier("Review: This product is okay, nothing special")
```
### Pattern 3: Structured Forms
```python
class UserProfile(BaseModel):
full_name: str
age: int
email: str
phone: str
country: str
interests: list[str]
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, UserProfile)
prompt = """
Extract user profile from:
Name: Alice Johnson
Age: 28
Email: alice@example.com
Phone: 555-0123
Country: USA
Interests: hiking, photography, cooking
"""
profile = generator(prompt)
print(profile.full_name)
print(profile.interests) # ["hiking", "photography", "cooking"]
```
### Pattern 4: Multi-Entity Extraction
```python
class Entity(BaseModel):
name: str
type: Literal["PERSON", "ORGANIZATION", "LOCATION"]
class DocumentEntities(BaseModel):
entities: list[Entity]
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, DocumentEntities)
text = "Tim Cook met with Satya Nadella at Microsoft headquarters in Redmond."
prompt = f"Extract entities from: {text}"
result = generator(prompt)
for entity in result.entities:
print(f"{entity.name} ({entity.type})")
```
### Pattern 5: Code Generation
```python
class PythonFunction(BaseModel):
function_name: str
parameters: list[str]
docstring: str
body: str
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, PythonFunction)
prompt = "Generate a Python function to calculate factorial"
func = generator(prompt)
print(f"def {func.function_name}({', '.join(func.parameters)}):")
print(f' """{func.docstring}"""')
print(f" {func.body}")
```
### Pattern 6: Batch Processing
```python
def batch_extract(texts: list[str], schema: type[BaseModel]):
"""Extract structured data from multiple texts."""
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, schema)
results = []
for text in texts:
result = generator(f"Extract from: {text}")
results.append(result)
return results
class Person(BaseModel):
name: str
age: int
texts = [
"John is 30 years old",
"Alice is 25 years old",
"Bob is 40 years old"
]
people = batch_extract(texts, Person)
for person in people:
print(f"{person.name}: {person.age}")
```
## Backend Configuration
### Transformers
```python
import outlines
# Basic usage
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
# GPU configuration
model = outlines.models.transformers(
"microsoft/Phi-3-mini-4k-instruct",
device="cuda",
model_kwargs={"torch_dtype": "float16"}
)
# Popular models
model = outlines.models.transformers("meta-llama/Llama-3.1-8B-Instruct")
model = outlines.models.transformers("mistralai/Mistral-7B-Instruct-v0.3")
model = outlines.models.transformers("Qwen/Qwen2.5-7B-Instruct")
```
### llama.cpp
```python
# Load GGUF model
model = outlines.models.llamacpp(
"./models/llama-3.1-8b.Q4_K_M.gguf",
n_ctx=4096, # Context window
n_gpu_layers=35, # GPU layers
n_threads=8 # CPU threads
)
# Full GPU offload
model = outlines.models.llamacpp(
"./models/model.gguf",
n_gpu_layers=-1 # All layers on GPU
)
```
### vLLM (Production)
```python
# Single GPU
model = outlines.models.vllm("meta-llama/Llama-3.1-8B-Instruct")
# Multi-GPU
model = outlines.models.vllm(
"meta-llama/Llama-3.1-70B-Instruct",
tensor_parallel_size=4 # 4 GPUs
)
# With quantization
model = outlines.models.vllm(
"meta-llama/Llama-3.1-8B-Instruct",
quantization="awq" # Or "gptq"
)
```
## Best Practices
### 1. Use Specific Types
```python
# ✅ Good: Specific types
class Product(BaseModel):
name: str
price: float # Not str
quantity: int # Not str
in_stock: bool # Not str
# ❌ Bad: Everything as string
class Product(BaseModel):
name: str
price: str # Should be float
quantity: str # Should be int
```
### 2. Add Constraints
```python
from pydantic import Field
# ✅ Good: With constraints
class User(BaseModel):
name: str = Field(min_length=1, max_length=100)
age: int = Field(ge=0, le=120)
email: str = Field(pattern=r"^[\w\.-]+@[\w\.-]+\.\w+$")
# ❌ Bad: No constraints
class User(BaseModel):
name: str
age: int
email: str
```
### 3. Use Enums for Categories
```python
# ✅ Good: Enum for fixed set
class Priority(str, Enum):
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
class Task(BaseModel):
title: str
priority: Priority
# ❌ Bad: Free-form string
class Task(BaseModel):
title: str
priority: str # Can be anything
```
### 4. Provide Context in Prompts
```python
# ✅ Good: Clear context
prompt = """
Extract product information from the following text.
Text: iPhone 15 Pro costs $999 and is currently in stock.
Product:
"""
# ❌ Bad: Minimal context
prompt = "iPhone 15 Pro costs $999 and is currently in stock."
```
### 5. Handle Optional Fields
```python
from typing import Optional
# ✅ Good: Optional fields for incomplete data
class Article(BaseModel):
title: str # Required
author: Optional[str] = None # Optional
date: Optional[str] = None # Optional
tags: list[str] = [] # Default empty list
# Can succeed even if author/date missing
```
## Comparison to Alternatives
| Feature | Outlines | Instructor | Guidance | LMQL |
|---------|----------|------------|----------|------|
| Pydantic Support | ✅ Native | ✅ Native | ❌ No | ❌ No |
| JSON Schema | ✅ Yes | ✅ Yes | ⚠️ Limited | ✅ Yes |
| Regex Constraints | ✅ Yes | ❌ No | ✅ Yes | ✅ Yes |
| Local Models | ✅ Full | ⚠️ Limited | ✅ Full | ✅ Full |
| API Models | ⚠️ Limited | ✅ Full | ✅ Full | ✅ Full |
| Zero Overhead | ✅ Yes | ❌ No | ⚠️ Partial | ✅ Yes |
| Automatic Retrying | ❌ No | ✅ Yes | ❌ No | ❌ No |
| Learning Curve | Low | Low | Low | High |
**When to choose Outlines:**
- Using local models (Transformers, llama.cpp, vLLM)
- Need maximum inference speed
- Want Pydantic model support
- Require zero-overhead structured generation
- Control token sampling process
**When to choose alternatives:**
- Instructor: Need API models with automatic retrying
- Guidance: Need token healing and complex workflows
- LMQL: Prefer declarative query syntax
## Performance Characteristics
**Speed:**
- **Zero overhead**: Structured generation as fast as unconstrained
- **Fast-forward optimization**: Skips deterministic tokens
- **1.2-2x faster** than post-generation validation approaches
**Memory:**
- FSM compiled once per schema (cached)
- Minimal runtime overhead
- Efficient with vLLM for high throughput
**Accuracy:**
- **100% valid outputs** (guaranteed by FSM)
- No retry loops needed
- Deterministic token filtering
## Resources
- **Documentation**: https://outlines-dev.github.io/outlines
- **GitHub**: https://github.com/outlines-dev/outlines (8k+ stars)
- **Discord**: https://discord.gg/R9DSu34mGd
- **Blog**: https://blog.dottxt.co
## See Also
- `references/json_generation.md` - Comprehensive JSON and Pydantic patterns
- `references/backends.md` - Backend-specific configuration
- `references/examples.md` - Production-ready examples
@@ -0,0 +1,615 @@
# Backend Configuration Guide
Complete guide to configuring Outlines with different model backends.
## Table of Contents
- Local Models (Transformers, llama.cpp, vLLM)
- API Models (OpenAI)
- Performance Comparison
- Configuration Examples
- Production Deployment
## Transformers (Hugging Face)
### Basic Setup
```python
import outlines
# Load model from Hugging Face
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
# Use with generator
generator = outlines.generate.json(model, YourModel)
result = generator("Your prompt")
```
### GPU Configuration
```python
# Use CUDA GPU
model = outlines.models.transformers(
"microsoft/Phi-3-mini-4k-instruct",
device="cuda"
)
# Use specific GPU
model = outlines.models.transformers(
"microsoft/Phi-3-mini-4k-instruct",
device="cuda:0" # GPU 0
)
# Use CPU
model = outlines.models.transformers(
"microsoft/Phi-3-mini-4k-instruct",
device="cpu"
)
# Use Apple Silicon MPS
model = outlines.models.transformers(
"microsoft/Phi-3-mini-4k-instruct",
device="mps"
)
```
### Advanced Configuration
```python
# FP16 for faster inference
model = outlines.models.transformers(
"microsoft/Phi-3-mini-4k-instruct",
device="cuda",
model_kwargs={
"torch_dtype": "float16"
}
)
# 8-bit quantization (less memory)
model = outlines.models.transformers(
"microsoft/Phi-3-mini-4k-instruct",
device="cuda",
model_kwargs={
"load_in_8bit": True,
"device_map": "auto"
}
)
# 4-bit quantization (even less memory)
model = outlines.models.transformers(
"meta-llama/Llama-3.1-70B-Instruct",
device="cuda",
model_kwargs={
"load_in_4bit": True,
"device_map": "auto",
"bnb_4bit_compute_dtype": "float16"
}
)
# Multi-GPU
model = outlines.models.transformers(
"meta-llama/Llama-3.1-70B-Instruct",
device="cuda",
model_kwargs={
"device_map": "auto", # Automatic GPU distribution
"max_memory": {0: "40GB", 1: "40GB"} # Per-GPU limits
}
)
```
### Popular Models
```python
# Phi-4 (Microsoft)
model = outlines.models.transformers("microsoft/Phi-4-mini-instruct")
model = outlines.models.transformers("microsoft/Phi-3-medium-4k-instruct")
# Llama 3.1 (Meta)
model = outlines.models.transformers("meta-llama/Llama-3.1-8B-Instruct")
model = outlines.models.transformers("meta-llama/Llama-3.1-70B-Instruct")
model = outlines.models.transformers("meta-llama/Llama-3.1-405B-Instruct")
# Mistral (Mistral AI)
model = outlines.models.transformers("mistralai/Mistral-7B-Instruct-v0.3")
model = outlines.models.transformers("mistralai/Mixtral-8x7B-Instruct-v0.1")
model = outlines.models.transformers("mistralai/Mixtral-8x22B-Instruct-v0.1")
# Qwen (Alibaba)
model = outlines.models.transformers("Qwen/Qwen2.5-7B-Instruct")
model = outlines.models.transformers("Qwen/Qwen2.5-14B-Instruct")
model = outlines.models.transformers("Qwen/Qwen2.5-72B-Instruct")
# Gemma (Google)
model = outlines.models.transformers("google/gemma-2-9b-it")
model = outlines.models.transformers("google/gemma-2-27b-it")
# Llava (Vision)
model = outlines.models.transformers("llava-hf/llava-v1.6-mistral-7b-hf")
```
### Custom Model Loading
```python
from transformers import AutoTokenizer, AutoModelForCausalLM
import outlines
# Load model manually
tokenizer = AutoTokenizer.from_pretrained("your-model")
model_hf = AutoModelForCausalLM.from_pretrained(
"your-model",
device_map="auto",
torch_dtype="float16"
)
# Use with Outlines
model = outlines.models.transformers(
model=model_hf,
tokenizer=tokenizer
)
```
## llama.cpp
### Basic Setup
```python
import outlines
# Load GGUF model
model = outlines.models.llamacpp(
"./models/llama-3.1-8b-instruct.Q4_K_M.gguf",
n_ctx=4096 # Context window
)
# Use with generator
generator = outlines.generate.json(model, YourModel)
```
### GPU Configuration
```python
# CPU only
model = outlines.models.llamacpp(
"./models/model.gguf",
n_ctx=4096,
n_threads=8 # Use 8 CPU threads
)
# GPU offload (partial)
model = outlines.models.llamacpp(
"./models/model.gguf",
n_ctx=4096,
n_gpu_layers=35, # Offload 35 layers to GPU
n_threads=4 # CPU threads for remaining layers
)
# Full GPU offload
model = outlines.models.llamacpp(
"./models/model.gguf",
n_ctx=8192,
n_gpu_layers=-1 # All layers on GPU
)
```
### Advanced Configuration
```python
model = outlines.models.llamacpp(
"./models/llama-3.1-8b.Q4_K_M.gguf",
n_ctx=8192, # Context window (tokens)
n_gpu_layers=35, # GPU layers
n_threads=8, # CPU threads
n_batch=512, # Batch size for prompt processing
use_mmap=True, # Memory-map model file (faster loading)
use_mlock=False, # Lock model in RAM (prevents swapping)
seed=42, # Random seed for reproducibility
verbose=False # Suppress verbose output
)
```
### Quantization Formats
```python
# Q4_K_M (4-bit, recommended for most cases)
# - Size: ~4.5GB for 7B model
# - Quality: Good
# - Speed: Fast
model = outlines.models.llamacpp("./models/model.Q4_K_M.gguf")
# Q5_K_M (5-bit, better quality)
# - Size: ~5.5GB for 7B model
# - Quality: Very good
# - Speed: Slightly slower than Q4
model = outlines.models.llamacpp("./models/model.Q5_K_M.gguf")
# Q6_K (6-bit, high quality)
# - Size: ~6.5GB for 7B model
# - Quality: Excellent
# - Speed: Slower than Q5
model = outlines.models.llamacpp("./models/model.Q6_K.gguf")
# Q8_0 (8-bit, near-original quality)
# - Size: ~8GB for 7B model
# - Quality: Near FP16
# - Speed: Slower than Q6
model = outlines.models.llamacpp("./models/model.Q8_0.gguf")
# F16 (16-bit float, original quality)
# - Size: ~14GB for 7B model
# - Quality: Original
# - Speed: Slowest
model = outlines.models.llamacpp("./models/model.F16.gguf")
```
### Popular GGUF Models
```python
# Llama 3.1
model = outlines.models.llamacpp("llama-3.1-8b-instruct.Q4_K_M.gguf")
model = outlines.models.llamacpp("llama-3.1-70b-instruct.Q4_K_M.gguf")
# Mistral
model = outlines.models.llamacpp("mistral-7b-instruct-v0.3.Q4_K_M.gguf")
# Phi-4
model = outlines.models.llamacpp("phi-4-mini-instruct.Q4_K_M.gguf")
# Qwen
model = outlines.models.llamacpp("qwen2.5-7b-instruct.Q4_K_M.gguf")
```
### Apple Silicon Optimization
```python
# Optimized for M1/M2/M3 Macs
model = outlines.models.llamacpp(
"./models/llama-3.1-8b.Q4_K_M.gguf",
n_ctx=4096,
n_gpu_layers=-1, # Use Metal GPU acceleration
use_mmap=True, # Efficient memory mapping
n_threads=8 # Use performance cores
)
```
## vLLM (Production)
### Basic Setup
```python
import outlines
# Load model with vLLM
model = outlines.models.vllm("meta-llama/Llama-3.1-8B-Instruct")
# Use with generator
generator = outlines.generate.json(model, YourModel)
```
### Single GPU
```python
model = outlines.models.vllm(
"meta-llama/Llama-3.1-8B-Instruct",
gpu_memory_utilization=0.9, # Use 90% of GPU memory
max_model_len=4096 # Max sequence length
)
```
### Multi-GPU
```python
# Tensor parallelism (split model across GPUs)
model = outlines.models.vllm(
"meta-llama/Llama-3.1-70B-Instruct",
tensor_parallel_size=4, # Use 4 GPUs
gpu_memory_utilization=0.9
)
# Pipeline parallelism (rare, for very large models)
model = outlines.models.vllm(
"meta-llama/Llama-3.1-405B-Instruct",
pipeline_parallel_size=8, # 8-GPU pipeline
tensor_parallel_size=4 # 4-GPU tensor split
# Total: 32 GPUs
)
```
### Quantization
```python
# AWQ quantization (4-bit)
model = outlines.models.vllm(
"meta-llama/Llama-3.1-8B-Instruct",
quantization="awq",
dtype="float16"
)
# GPTQ quantization (4-bit)
model = outlines.models.vllm(
"meta-llama/Llama-3.1-8B-Instruct",
quantization="gptq"
)
# SqueezeLLM quantization
model = outlines.models.vllm(
"meta-llama/Llama-3.1-8B-Instruct",
quantization="squeezellm"
)
```
### Advanced Configuration
```python
model = outlines.models.vllm(
"meta-llama/Llama-3.1-8B-Instruct",
tensor_parallel_size=1,
gpu_memory_utilization=0.9,
max_model_len=8192,
max_num_seqs=256, # Max concurrent sequences
max_num_batched_tokens=8192, # Max tokens per batch
dtype="float16",
trust_remote_code=True,
enforce_eager=False, # Use CUDA graphs (faster)
swap_space=4 # CPU swap space (GB)
)
```
### Batch Processing
```python
# vLLM optimized for high-throughput batch processing
model = outlines.models.vllm(
"meta-llama/Llama-3.1-8B-Instruct",
max_num_seqs=128 # Process 128 sequences in parallel
)
generator = outlines.generate.json(model, YourModel)
# Process many prompts efficiently
prompts = ["prompt1", "prompt2", ..., "prompt100"]
results = [generator(p) for p in prompts]
# vLLM automatically batches and optimizes
```
## OpenAI (Limited Support)
### Basic Setup
```python
import outlines
# Basic OpenAI support
model = outlines.models.openai("gpt-4o-mini", api_key="your-api-key")
# Use with generator
generator = outlines.generate.json(model, YourModel)
result = generator("Your prompt")
```
### Configuration
```python
model = outlines.models.openai(
"gpt-4o-mini",
api_key="your-api-key", # Or set OPENAI_API_KEY env var
max_tokens=2048,
temperature=0.7
)
```
### Available Models
```python
# GPT-4o (latest)
model = outlines.models.openai("gpt-4o")
# GPT-4o Mini (cost-effective)
model = outlines.models.openai("gpt-4o-mini")
# GPT-4 Turbo
model = outlines.models.openai("gpt-4-turbo")
# GPT-3.5 Turbo
model = outlines.models.openai("gpt-3.5-turbo")
```
**Note**: OpenAI support is limited compared to local models. Some advanced features may not work.
## Backend Comparison
### Feature Matrix
| Feature | Transformers | llama.cpp | vLLM | OpenAI |
|---------|-------------|-----------|------|--------|
| Structured Generation | ✅ Full | ✅ Full | ✅ Full | ⚠️ Limited |
| FSM Optimization | ✅ Yes | ✅ Yes | ✅ Yes | ❌ No |
| GPU Support | ✅ Yes | ✅ Yes | ✅ Yes | N/A |
| Multi-GPU | ✅ Yes | ✅ Yes | ✅ Yes | N/A |
| Quantization | ✅ Yes | ✅ Yes | ✅ Yes | N/A |
| High Throughput | ⚠️ Medium | ⚠️ Medium | ✅ Excellent | ⚠️ API-limited |
| Setup Difficulty | Easy | Medium | Medium | Easy |
| Cost | Hardware | Hardware | Hardware | API usage |
### Performance Characteristics
**Transformers:**
- **Latency**: 50-200ms (single request, GPU)
- **Throughput**: 10-50 tokens/sec (depends on hardware)
- **Memory**: 2-4GB per 1B parameters (FP16)
- **Best for**: Development, small-scale deployment, flexibility
**llama.cpp:**
- **Latency**: 30-150ms (single request)
- **Throughput**: 20-150 tokens/sec (depends on quantization)
- **Memory**: 0.5-2GB per 1B parameters (Q4-Q8)
- **Best for**: CPU inference, Apple Silicon, edge deployment, low memory
**vLLM:**
- **Latency**: 30-100ms (single request)
- **Throughput**: 100-1000+ tokens/sec (batch processing)
- **Memory**: 2-4GB per 1B parameters (FP16)
- **Best for**: Production, high-throughput, batch processing, serving
**OpenAI:**
- **Latency**: 200-500ms (API call)
- **Throughput**: API rate limits
- **Memory**: N/A (cloud-based)
- **Best for**: Quick prototyping, no infrastructure
### Memory Requirements
**7B Model:**
- FP16: ~14GB
- 8-bit: ~7GB
- 4-bit: ~4GB
- Q4_K_M (GGUF): ~4.5GB
**13B Model:**
- FP16: ~26GB
- 8-bit: ~13GB
- 4-bit: ~7GB
- Q4_K_M (GGUF): ~8GB
**70B Model:**
- FP16: ~140GB (multi-GPU)
- 8-bit: ~70GB (multi-GPU)
- 4-bit: ~35GB (single A100/H100)
- Q4_K_M (GGUF): ~40GB
## Performance Tuning
### Transformers Optimization
```python
# Use FP16
model = outlines.models.transformers(
"meta-llama/Llama-3.1-8B-Instruct",
device="cuda",
model_kwargs={"torch_dtype": "float16"}
)
# Use flash attention (2-4x faster)
model = outlines.models.transformers(
"meta-llama/Llama-3.1-8B-Instruct",
device="cuda",
model_kwargs={
"torch_dtype": "float16",
"use_flash_attention_2": True
}
)
# Use 8-bit quantization (2x less memory)
model = outlines.models.transformers(
"meta-llama/Llama-3.1-8B-Instruct",
device="cuda",
model_kwargs={
"load_in_8bit": True,
"device_map": "auto"
}
)
```
### llama.cpp Optimization
```python
# Maximize GPU usage
model = outlines.models.llamacpp(
"./models/model.Q4_K_M.gguf",
n_gpu_layers=-1, # All layers on GPU
n_ctx=8192,
n_batch=512 # Larger batch = faster
)
# Optimize for CPU (Apple Silicon)
model = outlines.models.llamacpp(
"./models/model.Q4_K_M.gguf",
n_ctx=4096,
n_threads=8, # Use all performance cores
use_mmap=True
)
```
### vLLM Optimization
```python
# High throughput
model = outlines.models.vllm(
"meta-llama/Llama-3.1-8B-Instruct",
gpu_memory_utilization=0.95, # Use 95% of GPU
max_num_seqs=256, # High concurrency
enforce_eager=False # Use CUDA graphs
)
# Multi-GPU
model = outlines.models.vllm(
"meta-llama/Llama-3.1-70B-Instruct",
tensor_parallel_size=4, # 4 GPUs
gpu_memory_utilization=0.9
)
```
## Production Deployment
### Docker with vLLM
```dockerfile
FROM vllm/vllm-openai:latest
# Install outlines
RUN pip install outlines
# Copy your code
COPY app.py /app/
# Run
CMD ["python", "/app/app.py"]
```
### Environment Variables
```bash
# Transformers cache
export HF_HOME="/path/to/cache"
export TRANSFORMERS_CACHE="/path/to/cache"
# GPU selection
export CUDA_VISIBLE_DEVICES=0,1,2,3
# OpenAI API key
export OPENAI_API_KEY="sk-..."
# Disable tokenizers parallelism warning
export TOKENIZERS_PARALLELISM=false
```
### Model Serving
```python
# Simple HTTP server with vLLM
import outlines
from fastapi import FastAPI
from pydantic import BaseModel
app = FastAPI()
# Load model once at startup
model = outlines.models.vllm("meta-llama/Llama-3.1-8B-Instruct")
class User(BaseModel):
name: str
age: int
email: str
generator = outlines.generate.json(model, User)
@app.post("/extract")
def extract(text: str):
result = generator(f"Extract user from: {text}")
return result.model_dump()
```
## Resources
- **Transformers**: https://huggingface.co/docs/transformers
- **llama.cpp**: https://github.com/ggerganov/llama.cpp
- **vLLM**: https://docs.vllm.ai
- **Outlines**: https://github.com/outlines-dev/outlines
@@ -0,0 +1,773 @@
# Production-Ready Examples
Real-world examples of using Outlines for structured generation in production systems.
## Table of Contents
- Data Extraction
- Classification Systems
- Form Processing
- Multi-Entity Extraction
- Code Generation
- Batch Processing
- Production Patterns
## Data Extraction
### Basic Information Extraction
```python
from pydantic import BaseModel, Field
import outlines
class PersonInfo(BaseModel):
name: str = Field(description="Full name")
age: int = Field(ge=0, le=120)
occupation: str
email: str = Field(pattern=r"^[\w\.-]+@[\w\.-]+\.\w+$")
location: str
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, PersonInfo)
text = """
Dr. Sarah Johnson is a 42-year-old research scientist at MIT.
She can be reached at sarah.j@mit.edu and currently lives in Cambridge, MA.
"""
prompt = f"Extract person information from:\n{text}\n\nPerson:"
person = generator(prompt)
print(f"Name: {person.name}")
print(f"Age: {person.age}")
print(f"Occupation: {person.occupation}")
print(f"Email: {person.email}")
print(f"Location: {person.location}")
```
### Company Information
```python
class CompanyInfo(BaseModel):
name: str
founded_year: int = Field(ge=1800, le=2025)
industry: str
headquarters: str
employees: int = Field(gt=0)
revenue: Optional[str] = None
model = outlines.models.transformers("meta-llama/Llama-3.1-8B-Instruct")
generator = outlines.generate.json(model, CompanyInfo)
text = """
Tesla, Inc. was founded in 2003 and operates primarily in the automotive
and energy industries. The company is headquartered in Austin, Texas,
and employs approximately 140,000 people worldwide.
"""
company = generator(f"Extract company information:\n{text}\n\nCompany:")
print(f"Company: {company.name}")
print(f"Founded: {company.founded_year}")
print(f"Industry: {company.industry}")
print(f"HQ: {company.headquarters}")
print(f"Employees: {company.employees:,}")
```
### Product Specifications
```python
class ProductSpec(BaseModel):
name: str
brand: str
price: float = Field(gt=0)
dimensions: str
weight: str
features: list[str]
rating: Optional[float] = Field(None, ge=0, le=5)
generator = outlines.generate.json(model, ProductSpec)
text = """
The Apple iPhone 15 Pro is priced at $999. It measures 146.6 x 70.6 x 8.25 mm
and weighs 187 grams. Key features include the A17 Pro chip, titanium design,
action button, and USB-C port. It has an average customer rating of 4.5 stars.
"""
product = generator(f"Extract product specifications:\n{text}\n\nProduct:")
print(f"Product: {product.brand} {product.name}")
print(f"Price: ${product.price}")
print(f"Features: {', '.join(product.features)}")
```
## Classification Systems
### Sentiment Analysis
```python
from typing import Literal
from enum import Enum
class Sentiment(str, Enum):
VERY_POSITIVE = "very_positive"
POSITIVE = "positive"
NEUTRAL = "neutral"
NEGATIVE = "negative"
VERY_NEGATIVE = "very_negative"
class SentimentAnalysis(BaseModel):
text: str
sentiment: Sentiment
confidence: float = Field(ge=0.0, le=1.0)
aspects: list[str] # What aspects were mentioned
reasoning: str
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, SentimentAnalysis)
review = """
This product completely exceeded my expectations! The build quality is
outstanding, and customer service was incredibly helpful. My only minor
complaint is the packaging could be better.
"""
result = generator(f"Analyze sentiment:\n{review}\n\nAnalysis:")
print(f"Sentiment: {result.sentiment.value}")
print(f"Confidence: {result.confidence:.2%}")
print(f"Aspects: {', '.join(result.aspects)}")
print(f"Reasoning: {result.reasoning}")
```
### Content Classification
```python
class Category(str, Enum):
TECHNOLOGY = "technology"
BUSINESS = "business"
SCIENCE = "science"
POLITICS = "politics"
ENTERTAINMENT = "entertainment"
SPORTS = "sports"
HEALTH = "health"
class ArticleClassification(BaseModel):
primary_category: Category
secondary_categories: list[Category]
keywords: list[str] = Field(min_items=3, max_items=10)
target_audience: Literal["general", "expert", "beginner"]
reading_level: Literal["elementary", "intermediate", "advanced"]
generator = outlines.generate.json(model, ArticleClassification)
article = """
Apple announced groundbreaking advancements in its AI capabilities with the
release of iOS 18. The new features leverage machine learning to significantly
improve battery life and overall device performance. Industry analysts predict
this will strengthen Apple's position in the competitive smartphone market.
"""
classification = generator(f"Classify article:\n{article}\n\nClassification:")
print(f"Primary: {classification.primary_category.value}")
print(f"Secondary: {[c.value for c in classification.secondary_categories]}")
print(f"Keywords: {classification.keywords}")
print(f"Audience: {classification.target_audience}")
```
### Intent Recognition
```python
class Intent(str, Enum):
QUESTION = "question"
COMPLAINT = "complaint"
REQUEST = "request"
FEEDBACK = "feedback"
CANCEL = "cancel"
UPGRADE = "upgrade"
class UserMessage(BaseModel):
original_message: str
intent: Intent
urgency: Literal["low", "medium", "high", "critical"]
department: Literal["support", "sales", "billing", "technical"]
sentiment: Literal["positive", "neutral", "negative"]
action_required: bool
summary: str
generator = outlines.generate.json(model, UserMessage)
message = """
I've been charged twice for my subscription this month! This is the third
time this has happened. I need someone to fix this immediately and refund
the extra charge. Very disappointed with this service.
"""
result = generator(f"Analyze message:\n{message}\n\nAnalysis:")
print(f"Intent: {result.intent.value}")
print(f"Urgency: {result.urgency}")
print(f"Route to: {result.department}")
print(f"Action required: {result.action_required}")
print(f"Summary: {result.summary}")
```
## Form Processing
### Job Application
```python
class Education(BaseModel):
degree: str
field: str
institution: str
year: int
class Experience(BaseModel):
title: str
company: str
duration: str
responsibilities: list[str]
class JobApplication(BaseModel):
full_name: str
email: str
phone: str
education: list[Education]
experience: list[Experience]
skills: list[str]
availability: str
model = outlines.models.transformers("meta-llama/Llama-3.1-8B-Instruct")
generator = outlines.generate.json(model, JobApplication)
resume_text = """
John Smith
Email: john.smith@email.com | Phone: 555-0123
EDUCATION
- BS in Computer Science, MIT, 2018
- MS in Artificial Intelligence, Stanford, 2020
EXPERIENCE
Software Engineer, Google (2020-2023)
- Developed ML pipelines for search ranking
- Led team of 5 engineers
- Improved search quality by 15%
SKILLS: Python, Machine Learning, TensorFlow, System Design
AVAILABILITY: Immediate
"""
application = generator(f"Extract job application:\n{resume_text}\n\nApplication:")
print(f"Applicant: {application.full_name}")
print(f"Email: {application.email}")
print(f"Education: {len(application.education)} degrees")
for edu in application.education:
print(f" - {edu.degree} in {edu.field}, {edu.institution} ({edu.year})")
print(f"Experience: {len(application.experience)} positions")
```
### Invoice Processing
```python
class InvoiceItem(BaseModel):
description: str
quantity: int = Field(gt=0)
unit_price: float = Field(gt=0)
total: float = Field(gt=0)
class Invoice(BaseModel):
invoice_number: str
date: str = Field(pattern=r"\d{4}-\d{2}-\d{2}")
vendor: str
customer: str
items: list[InvoiceItem]
subtotal: float = Field(gt=0)
tax: float = Field(ge=0)
total: float = Field(gt=0)
generator = outlines.generate.json(model, Invoice)
invoice_text = """
INVOICE #INV-2024-001
Date: 2024-01-15
From: Acme Corp
To: Smith & Co
Items:
- Widget A: 10 units @ $50.00 = $500.00
- Widget B: 5 units @ $75.00 = $375.00
- Service Fee: 1 @ $100.00 = $100.00
Subtotal: $975.00
Tax (8%): $78.00
TOTAL: $1,053.00
"""
invoice = generator(f"Extract invoice:\n{invoice_text}\n\nInvoice:")
print(f"Invoice: {invoice.invoice_number}")
print(f"From: {invoice.vendor} → To: {invoice.customer}")
print(f"Items: {len(invoice.items)}")
for item in invoice.items:
print(f" - {item.description}: {item.quantity} × ${item.unit_price} = ${item.total}")
print(f"Total: ${invoice.total}")
```
### Survey Responses
```python
class SurveyResponse(BaseModel):
respondent_id: str
completion_date: str
satisfaction: Literal[1, 2, 3, 4, 5]
would_recommend: bool
favorite_features: list[str]
improvement_areas: list[str]
additional_comments: Optional[str] = None
generator = outlines.generate.json(model, SurveyResponse)
survey_text = """
Survey ID: RESP-12345
Completed: 2024-01-20
How satisfied are you with our product? 4 out of 5
Would you recommend to a friend? Yes
What features do you like most?
- Fast performance
- Easy to use
- Great customer support
What could we improve?
- Better documentation
- More integrations
Additional feedback: Overall great product, keep up the good work!
"""
response = generator(f"Extract survey response:\n{survey_text}\n\nResponse:")
print(f"Respondent: {response.respondent_id}")
print(f"Satisfaction: {response.satisfaction}/5")
print(f"Would recommend: {response.would_recommend}")
print(f"Favorite features: {response.favorite_features}")
print(f"Improvement areas: {response.improvement_areas}")
```
## Multi-Entity Extraction
### News Article Entities
```python
class Person(BaseModel):
name: str
role: Optional[str] = None
affiliation: Optional[str] = None
class Organization(BaseModel):
name: str
type: Optional[str] = None
class Location(BaseModel):
name: str
type: Literal["city", "state", "country", "region"]
class Event(BaseModel):
name: str
date: Optional[str] = None
location: Optional[str] = None
class ArticleEntities(BaseModel):
people: list[Person]
organizations: list[Organization]
locations: list[Location]
events: list[Event]
dates: list[str]
model = outlines.models.transformers("meta-llama/Llama-3.1-8B-Instruct")
generator = outlines.generate.json(model, ArticleEntities)
article = """
Apple CEO Tim Cook met with Microsoft CEO Satya Nadella at Microsoft
headquarters in Redmond, Washington on September 15, 2024, to discuss
potential collaboration opportunities. The meeting was attended by executives
from both companies and focused on AI integration strategies. Apple's
Cupertino offices will host a follow-up meeting on October 20, 2024.
"""
entities = generator(f"Extract all entities:\n{article}\n\nEntities:")
print("People:")
for person in entities.people:
print(f" - {person.name} ({person.role}) @ {person.affiliation}")
print("\nOrganizations:")
for org in entities.organizations:
print(f" - {org.name} ({org.type})")
print("\nLocations:")
for loc in entities.locations:
print(f" - {loc.name} ({loc.type})")
print("\nEvents:")
for event in entities.events:
print(f" - {event.name} on {event.date}")
```
### Document Metadata
```python
class Author(BaseModel):
name: str
email: Optional[str] = None
affiliation: Optional[str] = None
class Reference(BaseModel):
title: str
authors: list[str]
year: int
source: str
class DocumentMetadata(BaseModel):
title: str
authors: list[Author]
abstract: str
keywords: list[str]
publication_date: str
journal: str
doi: Optional[str] = None
references: list[Reference]
generator = outlines.generate.json(model, DocumentMetadata)
paper = """
Title: Advances in Neural Machine Translation
Authors:
- Dr. Jane Smith (jane@university.edu), MIT
- Prof. John Doe (jdoe@stanford.edu), Stanford University
Abstract: This paper presents novel approaches to neural machine translation
using transformer architectures. We demonstrate significant improvements in
translation quality across multiple language pairs.
Keywords: Neural Networks, Machine Translation, Transformers, NLP
Published: Journal of AI Research, 2024-03-15
DOI: 10.1234/jair.2024.001
References:
1. "Attention Is All You Need" by Vaswani et al., 2017, NeurIPS
2. "BERT: Pre-training of Deep Bidirectional Transformers" by Devlin et al., 2019, NAACL
"""
metadata = generator(f"Extract document metadata:\n{paper}\n\nMetadata:")
print(f"Title: {metadata.title}")
print(f"Authors: {', '.join(a.name for a in metadata.authors)}")
print(f"Keywords: {', '.join(metadata.keywords)}")
print(f"References: {len(metadata.references)}")
```
## Code Generation
### Python Function Generation
```python
class Parameter(BaseModel):
name: str = Field(pattern=r"^[a-z_][a-z0-9_]*$")
type_hint: str
default: Optional[str] = None
class PythonFunction(BaseModel):
function_name: str = Field(pattern=r"^[a-z_][a-z0-9_]*$")
parameters: list[Parameter]
return_type: str
docstring: str
body: list[str] # Lines of code
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, PythonFunction)
spec = "Create a function to calculate the factorial of a number"
func = generator(f"Generate Python function:\n{spec}\n\nFunction:")
print(f"def {func.function_name}(", end="")
print(", ".join(f"{p.name}: {p.type_hint}" for p in func.parameters), end="")
print(f") -> {func.return_type}:")
print(f' """{func.docstring}"""')
for line in func.body:
print(f" {line}")
```
### SQL Query Generation
```python
class SQLQuery(BaseModel):
query_type: Literal["SELECT", "INSERT", "UPDATE", "DELETE"]
select_columns: Optional[list[str]] = None
from_tables: list[str]
joins: Optional[list[str]] = None
where_conditions: Optional[list[str]] = None
group_by: Optional[list[str]] = None
order_by: Optional[list[str]] = None
limit: Optional[int] = None
generator = outlines.generate.json(model, SQLQuery)
request = "Get top 10 users who made purchases in the last 30 days, ordered by total spent"
sql = generator(f"Generate SQL query:\n{request}\n\nQuery:")
print(f"Query type: {sql.query_type}")
print(f"SELECT {', '.join(sql.select_columns)}")
print(f"FROM {', '.join(sql.from_tables)}")
if sql.joins:
for join in sql.joins:
print(f" {join}")
if sql.where_conditions:
print(f"WHERE {' AND '.join(sql.where_conditions)}")
if sql.order_by:
print(f"ORDER BY {', '.join(sql.order_by)}")
if sql.limit:
print(f"LIMIT {sql.limit}")
```
### API Endpoint Spec
```python
class Parameter(BaseModel):
name: str
type: str
required: bool
description: str
class APIEndpoint(BaseModel):
method: Literal["GET", "POST", "PUT", "DELETE", "PATCH"]
path: str
description: str
parameters: list[Parameter]
request_body: Optional[dict] = None
response_schema: dict
status_codes: dict[int, str]
generator = outlines.generate.json(model, APIEndpoint)
spec = "Create user endpoint"
endpoint = generator(f"Generate API endpoint:\n{spec}\n\nEndpoint:")
print(f"{endpoint.method} {endpoint.path}")
print(f"Description: {endpoint.description}")
print("\nParameters:")
for param in endpoint.parameters:
req = "required" if param.required else "optional"
print(f" - {param.name} ({param.type}, {req}): {param.description}")
```
## Batch Processing
### Parallel Extraction
```python
def batch_extract(texts: list[str], schema: type[BaseModel], model_name: str):
"""Extract structured data from multiple texts."""
model = outlines.models.transformers(model_name)
generator = outlines.generate.json(model, schema)
results = []
for i, text in enumerate(texts):
print(f"Processing {i+1}/{len(texts)}...", end="\r")
result = generator(f"Extract:\n{text}\n\nData:")
results.append(result)
return results
class Product(BaseModel):
name: str
price: float
category: str
texts = [
"iPhone 15 Pro costs $999 in Electronics",
"Running Shoes are $89.99 in Sports",
"Coffee Maker priced at $49.99 in Home & Kitchen"
]
products = batch_extract(texts, Product, "microsoft/Phi-3-mini-4k-instruct")
for product in products:
print(f"{product.name}: ${product.price} ({product.category})")
```
### CSV Processing
```python
import csv
def process_csv(csv_file: str, schema: type[BaseModel]):
"""Process CSV file and extract structured data."""
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, schema)
results = []
with open(csv_file, 'r') as f:
reader = csv.DictReader(f)
for row in reader:
text = " | ".join(f"{k}: {v}" for k, v in row.items())
result = generator(f"Extract:\n{text}\n\nData:")
results.append(result)
return results
class Customer(BaseModel):
name: str
email: str
tier: Literal["basic", "premium", "enterprise"]
mrr: float
# customers = process_csv("customers.csv", Customer)
```
## Production Patterns
### Error Handling
```python
from pydantic import ValidationError
def safe_extract(text: str, schema: type[BaseModel], retries: int = 3):
"""Extract with error handling and retries."""
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, schema)
for attempt in range(retries):
try:
result = generator(f"Extract:\n{text}\n\nData:")
return result
except ValidationError as e:
print(f"Attempt {attempt + 1} failed: {e}")
if attempt == retries - 1:
raise
except Exception as e:
print(f"Unexpected error: {e}")
if attempt == retries - 1:
raise
return None
```
### Caching
```python
from functools import lru_cache
import hashlib
@lru_cache(maxsize=1000)
def cached_extract(text_hash: str, schema_name: str):
"""Cache extraction results."""
# This would be called with actual extraction logic
pass
def extract_with_cache(text: str, schema: type[BaseModel]):
"""Extract with caching."""
text_hash = hashlib.md5(text.encode()).hexdigest()
schema_name = schema.__name__
cached_result = cached_extract(text_hash, schema_name)
if cached_result:
return cached_result
# Perform actual extraction
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, schema)
result = generator(f"Extract:\n{text}\n\nData:")
return result
```
### Monitoring
```python
import time
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def monitored_extract(text: str, schema: type[BaseModel]):
"""Extract with monitoring and logging."""
start_time = time.time()
try:
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, schema)
result = generator(f"Extract:\n{text}\n\nData:")
elapsed = time.time() - start_time
logger.info(f"Extraction succeeded in {elapsed:.2f}s")
logger.info(f"Input length: {len(text)} chars")
return result
except Exception as e:
elapsed = time.time() - start_time
logger.error(f"Extraction failed after {elapsed:.2f}s: {e}")
raise
```
### Rate Limiting
```python
import time
from threading import Lock
class RateLimiter:
def __init__(self, max_requests: int, time_window: int):
self.max_requests = max_requests
self.time_window = time_window
self.requests = []
self.lock = Lock()
def wait_if_needed(self):
with self.lock:
now = time.time()
# Remove old requests
self.requests = [r for r in self.requests if now - r < self.time_window]
if len(self.requests) >= self.max_requests:
sleep_time = self.time_window - (now - self.requests[0])
time.sleep(sleep_time)
self.requests = []
self.requests.append(now)
def rate_limited_extract(texts: list[str], schema: type[BaseModel]):
"""Extract with rate limiting."""
limiter = RateLimiter(max_requests=10, time_window=60) # 10 req/min
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, schema)
results = []
for text in texts:
limiter.wait_if_needed()
result = generator(f"Extract:\n{text}\n\nData:")
results.append(result)
return results
```
## Resources
- **Outlines Documentation**: https://outlines-dev.github.io/outlines
- **Pydantic Documentation**: https://docs.pydantic.dev
- **GitHub Examples**: https://github.com/outlines-dev/outlines/tree/main/examples
@@ -0,0 +1,652 @@
# Comprehensive JSON Generation Guide
Complete guide to JSON generation with Outlines using Pydantic models and JSON schemas.
## Table of Contents
- Pydantic Models
- JSON Schema Support
- Advanced Patterns
- Nested Structures
- Complex Types
- Validation
- Performance Optimization
## Pydantic Models
### Basic Models
```python
from pydantic import BaseModel
import outlines
class User(BaseModel):
name: str
age: int
email: str
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, User)
user = generator("Generate user: Alice, 25, alice@example.com")
print(user.name) # "Alice"
print(user.age) # 25
print(user.email) # "alice@example.com"
```
###
Field Constraints
```python
from pydantic import BaseModel, Field
class Product(BaseModel):
name: str = Field(min_length=1, max_length=100)
price: float = Field(gt=0, description="Price in USD")
discount: float = Field(ge=0, le=100, description="Discount percentage")
quantity: int = Field(ge=0, description="Available quantity")
sku: str = Field(pattern=r"^[A-Z]{3}-\d{6}$")
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, Product)
product = generator("Generate product: iPhone 15, $999")
# All fields guaranteed to meet constraints
```
**Available Constraints:**
- `min_length`, `max_length`: String length
- `gt`, `ge`, `lt`, `le`: Numeric comparisons
- `multiple_of`: Number must be multiple of value
- `pattern`: Regex pattern for strings
- `min_items`, `max_items`: List length
### Optional Fields
```python
from typing import Optional
class Article(BaseModel):
title: str # Required
author: Optional[str] = None # Optional
published_date: Optional[str] = None # Optional
tags: list[str] = [] # Default empty list
view_count: int = 0 # Default value
generator = outlines.generate.json(model, Article)
# Can generate even if optional fields missing
article = generator("Title: Introduction to AI")
print(article.author) # None (not provided)
print(article.tags) # [] (default)
```
### Default Values
```python
class Config(BaseModel):
debug: bool = False
max_retries: int = 3
timeout: float = 30.0
log_level: str = "INFO"
# Generator uses defaults when not specified
generator = outlines.generate.json(model, Config)
config = generator("Generate config with debug enabled")
print(config.debug) # True (from prompt)
print(config.timeout) # 30.0 (default)
```
## Enums and Literals
### Enum Fields
```python
from enum import Enum
class Status(str, Enum):
PENDING = "pending"
APPROVED = "approved"
REJECTED = "rejected"
CANCELLED = "cancelled"
class Application(BaseModel):
applicant_name: str
status: Status # Must be one of enum values
submitted_date: str
generator = outlines.generate.json(model, Application)
app = generator("Generate application for John Doe")
print(app.status) # Status.PENDING (or one of the enum values)
print(type(app.status)) # <enum 'Status'>
```
### Literal Types
```python
from typing import Literal
class Task(BaseModel):
title: str
priority: Literal["low", "medium", "high", "critical"]
status: Literal["todo", "in_progress", "done"]
assigned_to: str
generator = outlines.generate.json(model, Task)
task = generator("Create high priority task: Fix bug")
print(task.priority) # One of: "low", "medium", "high", "critical"
```
### Multiple Choice Fields
```python
class Survey(BaseModel):
question: str
answer: Literal["strongly_disagree", "disagree", "neutral", "agree", "strongly_agree"]
confidence: Literal["low", "medium", "high"]
generator = outlines.generate.json(model, Survey)
survey = generator("Rate: 'I enjoy using this product'")
```
## Nested Structures
### Nested Models
```python
class Address(BaseModel):
street: str
city: str
state: str
zip_code: str
country: str = "USA"
class Person(BaseModel):
name: str
age: int
email: str
address: Address # Nested model
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, Person)
prompt = """
Extract person:
Name: Alice Johnson
Age: 28
Email: alice@example.com
Address: 123 Main St, Boston, MA, 02101
"""
person = generator(prompt)
print(person.name) # "Alice Johnson"
print(person.address.city) # "Boston"
print(person.address.state) # "MA"
```
### Deep Nesting
```python
class Coordinates(BaseModel):
latitude: float
longitude: float
class Location(BaseModel):
name: str
coordinates: Coordinates
class Event(BaseModel):
title: str
date: str
location: Location
generator = outlines.generate.json(model, Event)
event = generator("Generate event: Tech Conference in San Francisco")
print(event.title) # "Tech Conference"
print(event.location.name) # "San Francisco"
print(event.location.coordinates.latitude) # 37.7749
```
### Lists of Nested Models
```python
class Item(BaseModel):
name: str
quantity: int
price: float
class Order(BaseModel):
order_id: str
customer: str
items: list[Item] # List of nested models
total: float
generator = outlines.generate.json(model, Order)
prompt = """
Generate order for John:
- 2x Widget ($10 each)
- 3x Gadget ($15 each)
Order ID: ORD-001
"""
order = generator(prompt)
print(f"Order ID: {order.order_id}")
for item in order.items:
print(f"- {item.quantity}x {item.name} @ ${item.price}")
print(f"Total: ${order.total}")
```
## Complex Types
### Union Types
```python
from typing import Union
class TextContent(BaseModel):
type: Literal["text"]
content: str
class ImageContent(BaseModel):
type: Literal["image"]
url: str
caption: str
class Post(BaseModel):
title: str
content: Union[TextContent, ImageContent] # Either type
generator = outlines.generate.json(model, Post)
# Can generate either text or image content
post = generator("Generate blog post with image")
if post.content.type == "text":
print(post.content.content)
elif post.content.type == "image":
print(post.content.url)
```
### Lists and Arrays
```python
class Article(BaseModel):
title: str
authors: list[str] # List of strings
tags: list[str]
sections: list[dict[str, str]] # List of dicts
related_ids: list[int]
generator = outlines.generate.json(model, Article)
article = generator("Generate article about AI")
print(article.authors) # ["Alice", "Bob"]
print(article.tags) # ["AI", "Machine Learning", "Technology"]
```
### Dictionaries
```python
class Metadata(BaseModel):
title: str
properties: dict[str, str] # String keys and values
counts: dict[str, int] # String keys, int values
settings: dict[str, Union[str, int, bool]] # Mixed value types
generator = outlines.generate.json(model, Metadata)
meta = generator("Generate metadata")
print(meta.properties) # {"author": "Alice", "version": "1.0"}
print(meta.counts) # {"views": 1000, "likes": 50}
```
### Any Type (Use Sparingly)
```python
from typing import Any
class FlexibleData(BaseModel):
name: str
structured_field: str
flexible_field: Any # Can be anything
# Note: Any reduces type safety, use only when necessary
generator = outlines.generate.json(model, FlexibleData)
```
## JSON Schema Support
### Direct Schema Usage
```python
import outlines
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
# Define JSON schema
schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer", "minimum": 0, "maximum": 120},
"email": {"type": "string", "format": "email"}
},
"required": ["name", "age", "email"]
}
# Generate from schema
generator = outlines.generate.json(model, schema)
result = generator("Generate person: Alice, 25, alice@example.com")
print(result) # Valid JSON matching schema
```
### Schema from Pydantic
```python
class User(BaseModel):
name: str
age: int
email: str
# Get JSON schema from Pydantic model
schema = User.model_json_schema()
print(schema)
# {
# "type": "object",
# "properties": {
# "name": {"type": "string"},
# "age": {"type": "integer"},
# "email": {"type": "string"}
# },
# "required": ["name", "age", "email"]
# }
# Both approaches equivalent:
generator1 = outlines.generate.json(model, User)
generator2 = outlines.generate.json(model, schema)
```
## Advanced Patterns
### Conditional Fields
```python
class Order(BaseModel):
order_type: Literal["standard", "express"]
delivery_date: str
express_fee: Optional[float] = None # Only for express orders
generator = outlines.generate.json(model, Order)
# Express order
order1 = generator("Create express order for tomorrow")
print(order1.express_fee) # 25.0
# Standard order
order2 = generator("Create standard order")
print(order2.express_fee) # None
```
### Recursive Models
```python
from typing import Optional, List
class TreeNode(BaseModel):
value: str
children: Optional[List['TreeNode']] = None
# Enable forward references
TreeNode.model_rebuild()
generator = outlines.generate.json(model, TreeNode)
tree = generator("Generate file tree with subdirectories")
print(tree.value) # "root"
print(tree.children[0].value) # "subdir1"
```
### Model with Validation
```python
from pydantic import field_validator
class DateRange(BaseModel):
start_date: str
end_date: str
@field_validator('end_date')
def end_after_start(cls, v, info):
"""Ensure end_date is after start_date."""
if 'start_date' in info.data:
from datetime import datetime
start = datetime.strptime(info.data['start_date'], '%Y-%m-%d')
end = datetime.strptime(v, '%Y-%m-%d')
if end < start:
raise ValueError('end_date must be after start_date')
return v
generator = outlines.generate.json(model, DateRange)
# Validation happens after generation
```
## Multiple Objects
### Generate List of Objects
```python
class Person(BaseModel):
name: str
age: int
class Team(BaseModel):
team_name: str
members: list[Person]
generator = outlines.generate.json(model, Team)
team = generator("Generate engineering team with 5 members")
print(f"Team: {team.team_name}")
for member in team.members:
print(f"- {member.name}, {member.age}")
```
### Batch Generation
```python
def generate_batch(prompts: list[str], schema: type[BaseModel]):
"""Generate structured outputs for multiple prompts."""
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, schema)
results = []
for prompt in prompts:
result = generator(prompt)
results.append(result)
return results
class Product(BaseModel):
name: str
price: float
prompts = [
"Product: iPhone 15, $999",
"Product: MacBook Pro, $2499",
"Product: AirPods, $179"
]
products = generate_batch(prompts, Product)
for product in products:
print(f"{product.name}: ${product.price}")
```
## Performance Optimization
### Caching Generators
```python
from functools import lru_cache
@lru_cache(maxsize=10)
def get_generator(model_name: str, schema_hash: int):
"""Cache generators for reuse."""
model = outlines.models.transformers(model_name)
return outlines.generate.json(model, schema)
# First call: creates generator
gen1 = get_generator("microsoft/Phi-3-mini-4k-instruct", hash(User))
# Second call: returns cached generator (fast!)
gen2 = get_generator("microsoft/Phi-3-mini-4k-instruct", hash(User))
```
### Batch Processing
```python
# Process multiple items efficiently
model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
generator = outlines.generate.json(model, User)
texts = ["User: Alice, 25", "User: Bob, 30", "User: Carol, 35"]
# Reuse generator (model stays loaded)
users = [generator(text) for text in texts]
```
### Minimize Schema Complexity
```python
# ✅ Good: Simple, flat structure (faster)
class SimplePerson(BaseModel):
name: str
age: int
city: str
# ⚠️ Slower: Deep nesting
class ComplexPerson(BaseModel):
personal_info: PersonalInfo
address: Address
employment: Employment
# ... many nested levels
```
## Error Handling
### Handle Missing Fields
```python
from pydantic import ValidationError
class User(BaseModel):
name: str
age: int
email: str
try:
user = generator("Generate user") # May not include all fields
except ValidationError as e:
print(f"Validation error: {e}")
# Handle gracefully
```
### Fallback with Optional Fields
```python
class RobustUser(BaseModel):
name: str # Required
age: Optional[int] = None # Optional
email: Optional[str] = None # Optional
# More likely to succeed even with incomplete data
user = generator("Generate user: Alice")
print(user.name) # "Alice"
print(user.age) # None (not provided)
```
## Best Practices
### 1. Use Specific Types
```python
# ✅ Good: Specific types
class Product(BaseModel):
name: str
price: float # Not Any or str
quantity: int # Not str
in_stock: bool # Not int
# ❌ Bad: Generic types
class Product(BaseModel):
name: Any
price: str # Should be float
quantity: str # Should be int
```
### 2. Add Descriptions
```python
# ✅ Good: Clear descriptions
class Article(BaseModel):
title: str = Field(description="Article title, 10-100 characters")
content: str = Field(description="Main article content in paragraphs")
tags: list[str] = Field(description="List of relevant topic tags")
# Descriptions help the model understand expected output
```
### 3. Use Constraints
```python
# ✅ Good: With constraints
class Age(BaseModel):
value: int = Field(ge=0, le=120, description="Age in years")
# ❌ Bad: No constraints
class Age(BaseModel):
value: int # Could be negative or > 120
```
### 4. Prefer Enums Over Strings
```python
# ✅ Good: Enum for fixed set
class Priority(str, Enum):
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
class Task(BaseModel):
priority: Priority # Guaranteed valid
# ❌ Bad: Free-form string
class Task(BaseModel):
priority: str # Could be "urgent", "ASAP", "!!", etc.
```
### 5. Test Your Models
```python
# Test models work as expected
def test_product_model():
product = Product(
name="Test Product",
price=19.99,
quantity=10,
in_stock=True
)
assert product.price == 19.99
assert isinstance(product, Product)
# Run tests before using in production
```
## Resources
- **Pydantic Docs**: https://docs.pydantic.dev
- **JSON Schema**: https://json-schema.org
- **Outlines GitHub**: https://github.com/outlines-dev/outlines
@@ -1,6 +1,6 @@
---
name: outlines
description: Guarantee valid JSON/XML/code structure during generation, use Pydantic models for type-safe outputs, support local models (Transformers, vLLM), and maximize inference speed with Outlines - dottxt.ai's structured generation library
description: "Outlines: structured JSON/regex/Pydantic LLM generation."
version: 1.0.0
author: Orchestra Research
license: MIT
@@ -1,6 +1,6 @@
---
name: fine-tuning-with-trl
description: Fine-tune LLMs using reinforcement learning with TRL - SFT for instruction tuning, DPO for preference alignment, PPO/GRPO for reward optimization, and reward model training. Use when need RLHF, align model with preferences, or train from human feedback. Works with HuggingFace Transformers.
description: "TRL: SFT, DPO, PPO, GRPO, reward modeling for LLM RLHF."
version: 1.0.0
author: Orchestra Research
license: MIT
@@ -1,11 +1,6 @@
---
name: maps
description: >
Location intelligence — geocode a place, reverse-geocode coordinates,
find nearby places (46 POI categories), driving/walking/cycling
distance + time, turn-by-turn directions, timezone lookup, bounding
box + area for a named place, and POI search within a rectangle.
Uses OpenStreetMap + Overpass + OSRM. Free, no API key.
description: "Geocode, POIs, routes, timezones via OpenStreetMap/OSRM."
version: 1.2.0
author: Mibayy
license: MIT
@@ -926,13 +926,18 @@ def cmd_timezone(args):
os_ = offset_info.get("seconds", 0)
sign = "+" if oh >= 0 else "-"
utc_offset = f"{sign}{abs(oh):02d}:{om:02d}"
if os_:
utc_offset = f"{utc_offset}:{os_:02d}"
elif tz_data.get("standardUtcOffset"):
offset_info2 = tz_data["standardUtcOffset"]
if isinstance(offset_info2, dict):
oh = offset_info2.get("hours", 0)
om = abs(offset_info2.get("minutes", 0))
os_ = offset_info2.get("seconds", 0)
sign = "+" if oh >= 0 else "-"
utc_offset = f"{sign}{abs(oh):02d}:{om:02d}"
if os_:
utc_offset = f"{utc_offset}:{os_:02d}"
timezone_src = "timeapi.io"
except (RuntimeError, KeyError, TypeError):
pass # API may be down; continue to fallback
@@ -1,6 +1,6 @@
---
name: notion
description: Notion API for creating and managing pages, databases, and blocks via curl. Search, create, update, and query Notion workspaces directly from the terminal.
description: "Notion API via curl: pages, databases, blocks, search."
version: 1.0.0
author: community
license: MIT
@@ -1,9 +1,6 @@
---
name: requesting-code-review
description: >
Pre-commit verification pipeline — static security scan, baseline-aware
quality gates, independent reviewer subagent, and auto-fix loop. Use after
code changes and before committing, pushing, or opening a PR.
description: "Pre-commit review: security scan, quality gates, auto-fix."
version: 2.0.0
author: Hermes Agent (adapted from obra/superpowers + MorAlekss)
license: MIT
@@ -1,6 +1,6 @@
---
name: writing-plans
description: Use when you have a spec or requirements for a multi-step task. Creates comprehensive implementation plans with bite-sized tasks, exact file paths, and complete code examples.
description: "Write implementation plans: bite-sized tasks, paths, code."
version: 1.1.0
author: Hermes Agent (adapted from obra/superpowers)
license: MIT