Skip to content

Index

axm_smelt

axm-smelt - Deterministic token compaction for LLM inputs.

Format

Bases: Enum

Supported input formats.

Source code in packages/axm-smelt/src/axm_smelt/core/models.py
Python
class Format(enum.Enum):
    """Supported input formats."""

    JSON = "json"
    YAML = "yaml"
    XML = "xml"
    TOML = "toml"
    CSV = "csv"
    MARKDOWN = "markdown"
    TEXT = "text"

SmeltReport

Bases: BaseModel

Report produced by the smelt pipeline.

Source code in packages/axm-smelt/src/axm_smelt/core/models.py
Python
class SmeltReport(BaseModel):
    """Report produced by the smelt pipeline."""

    original: str
    compacted: str
    original_tokens: int
    compacted_tokens: int
    savings_pct: float
    format: Format
    strategies_applied: list[str]
    strategy_estimates: dict[str, float] = {}

check(text=None, *, parsed=None)

Analyze text without transforming it.

Source code in packages/axm-smelt/src/axm_smelt/core/pipeline.py
Python
def check(
    text: str | None = None,
    *,
    parsed: dict[str, Any] | list[Any] | None = None,
) -> SmeltReport:
    """Analyze *text* without transforming it."""
    from axm_smelt.strategies import _REGISTRY

    if parsed is not None:
        text = json.dumps(parsed, separators=(",", ":"))
    elif text is None:
        msg = "Either text or parsed must be provided"
        raise ValueError(msg)

    fmt = detect_format(text)
    tokens = count(text)

    ctx = SmeltContext(text=text, format=fmt)
    estimates: dict[str, float] = {}
    for name, cls in _REGISTRY.items():
        strategy = cls()
        result = strategy.apply(ctx)
        if result.text != ctx.text:
            result_tokens = count(result.text)
            savings = (1 - result_tokens / tokens) * 100 if tokens > 0 else 0.0
            if savings > 0:
                estimates[name] = round(savings, 2)

    return SmeltReport(
        original=text,
        compacted=text,
        original_tokens=tokens,
        compacted_tokens=tokens,
        savings_pct=0.0,
        format=fmt,
        strategies_applied=[],
        strategy_estimates=estimates,
    )

count(text, model='o200k_base')

Return the token count for text.

Uses tiktoken with model encoding. Falls back to len(text) // 4 when tiktoken is unavailable.

Source code in packages/axm-smelt/src/axm_smelt/core/counter.py
Python
def count(text: str, model: str = "o200k_base") -> int:
    """Return the token count for *text*.

    Uses tiktoken with *model* encoding.  Falls back to ``len(text) // 4``
    when tiktoken is unavailable.
    """
    try:
        import tiktoken

        enc = tiktoken.get_encoding(model)
        return len(enc.encode(text))
    except Exception:  # noqa: BLE001
        return len(text) // 4

smelt(text=None, strategies=None, preset=None, *, parsed=None)

Run the compaction pipeline and return a report.

Source code in packages/axm-smelt/src/axm_smelt/core/pipeline.py
Python
def smelt(
    text: str | None = None,
    strategies: list[str] | None = None,
    preset: str | None = None,
    *,
    parsed: dict[str, Any] | list[Any] | None = None,
) -> SmeltReport:
    """Run the compaction pipeline and return a report."""
    if parsed is not None:
        text = json.dumps(parsed, separators=(",", ":"))
    elif text is None:
        msg = "Either text or parsed must be provided"
        raise ValueError(msg)

    fmt, _parsed = detect_format_parsed(text)
    if parsed is not None:
        _parsed = parsed
    original_tokens = count(text)

    strats: list[SmeltStrategy]
    if strategies:
        strats = [get_strategy(s) for s in strategies]
    elif preset:
        strats = get_preset(preset)
    else:
        strats = get_preset("safe")

    ctx = SmeltContext(text=text, format=fmt)
    if _parsed is not None:
        ctx._parsed = _parsed
    applied: list[str] = []
    current_tokens = original_tokens
    for s in strats:
        result = s.apply(ctx)
        if result.text != ctx.text:
            result_tokens = count(result.text)
            if result_tokens < current_tokens or (
                result_tokens == current_tokens and len(result.text) < len(ctx.text)
            ):
                applied.append(s.name)
                ctx = result
                current_tokens = result_tokens

    compacted = ctx.text
    compacted_tokens = count(compacted)
    savings = (
        (1 - compacted_tokens / original_tokens) * 100 if original_tokens > 0 else 0.0
    )

    return SmeltReport(
        original=text,
        compacted=compacted,
        original_tokens=original_tokens,
        compacted_tokens=compacted_tokens,
        savings_pct=savings,
        format=fmt,
        strategies_applied=applied,
    )