Collapse-whitespace strategy — reduce redundant blank lines and trailing spaces.
CollapseWhitespaceStrategy
Bases: SmeltStrategy
Collapse consecutive blank lines and strip trailing whitespace.
Source code in packages/axm-smelt/src/axm_smelt/strategies/collapse_whitespace.py
| Python |
|---|
| class CollapseWhitespaceStrategy(SmeltStrategy):
"""Collapse consecutive blank lines and strip trailing whitespace."""
@property
def name(self) -> str:
return "collapse_whitespace"
@property
def category(self) -> str:
return "whitespace"
def apply(self, ctx: SmeltContext) -> SmeltContext:
"""Collapse redundant whitespace outside fenced code blocks."""
if ctx.format in _STRUCTURED_FORMATS:
return ctx
text = ctx.text
# Replace fenced code blocks with placeholders, collapse, then restore
blocks: list[str] = []
def _save_block(m: re.Match[str]) -> str:
idx = len(blocks)
blocks.append(m.group(0))
return f"\x00CODEBLOCK{idx}\x00"
stripped = _FENCED_BLOCK_RE.sub(_save_block, text)
collapsed = _collapse(stripped)
# Normalize newlines around placeholders so that code blocks don't
# introduce extra blank lines: one \n before the first placeholder,
# no separator between adjacent placeholders, one \n after the last.
collapsed = re.sub(r"\n{2,}(\x00CODEBLOCK)", r"\n\1", collapsed)
collapsed = re.sub(r"(CODEBLOCK\d+\x00)\n+(\x00CODEBLOCK)", r"\1\2", collapsed)
collapsed = re.sub(r"(CODEBLOCK\d+\x00)\n{2,}", r"\1\n", collapsed)
# Restore code blocks
result = collapsed
for idx, block in enumerate(blocks):
result = result.replace(f"\x00CODEBLOCK{idx}\x00", block)
if result == text:
return ctx
return SmeltContext(text=result, format=ctx.format)
|
apply(ctx)
Collapse redundant whitespace outside fenced code blocks.
Source code in packages/axm-smelt/src/axm_smelt/strategies/collapse_whitespace.py
| Python |
|---|
| def apply(self, ctx: SmeltContext) -> SmeltContext:
"""Collapse redundant whitespace outside fenced code blocks."""
if ctx.format in _STRUCTURED_FORMATS:
return ctx
text = ctx.text
# Replace fenced code blocks with placeholders, collapse, then restore
blocks: list[str] = []
def _save_block(m: re.Match[str]) -> str:
idx = len(blocks)
blocks.append(m.group(0))
return f"\x00CODEBLOCK{idx}\x00"
stripped = _FENCED_BLOCK_RE.sub(_save_block, text)
collapsed = _collapse(stripped)
# Normalize newlines around placeholders so that code blocks don't
# introduce extra blank lines: one \n before the first placeholder,
# no separator between adjacent placeholders, one \n after the last.
collapsed = re.sub(r"\n{2,}(\x00CODEBLOCK)", r"\n\1", collapsed)
collapsed = re.sub(r"(CODEBLOCK\d+\x00)\n+(\x00CODEBLOCK)", r"\1\2", collapsed)
collapsed = re.sub(r"(CODEBLOCK\d+\x00)\n{2,}", r"\1\n", collapsed)
# Restore code blocks
result = collapsed
for idx, block in enumerate(blocks):
result = result.replace(f"\x00CODEBLOCK{idx}\x00", block)
if result == text:
return ctx
return SmeltContext(text=result, format=ctx.format)
|