Strip-html-comments strategy — remove HTML comments from markdown/text.
Bases: SmeltStrategy
Remove HTML comments from markdown and plain-text content.
Source code in packages/axm-smelt/src/axm_smelt/strategies/strip_html_comments.py
| Python |
|---|
| class StripHtmlCommentsStrategy(SmeltStrategy):
"""Remove HTML comments from markdown and plain-text content."""
@property
def name(self) -> str:
return "strip_html_comments"
@property
def category(self) -> str:
return "cosmetic"
def apply(self, ctx: SmeltContext) -> SmeltContext:
"""Strip HTML comments outside fenced code blocks."""
if ctx.format not in _APPLICABLE_FORMATS:
return ctx
text = ctx.text
# Protect fenced code blocks with placeholders
blocks: list[str] = []
def _save_block(m: re.Match[str]) -> str:
idx = len(blocks)
blocks.append(m.group(0))
return f"\x00CODEBLOCK{idx}\x00"
stripped = _FENCED_BLOCK_RE.sub(_save_block, text)
# Remove HTML comments
result = _strip_comments(stripped)
# Clean up runs of blank lines left by removal
result = _MULTI_BLANK_RE.sub("\n\n", result)
# Restore code blocks
for idx, block in enumerate(blocks):
result = result.replace(f"\x00CODEBLOCK{idx}\x00", block)
if result == text:
return ctx
return SmeltContext(text=result, format=ctx.format)
|
Strip HTML comments outside fenced code blocks.
Source code in packages/axm-smelt/src/axm_smelt/strategies/strip_html_comments.py
| Python |
|---|
| def apply(self, ctx: SmeltContext) -> SmeltContext:
"""Strip HTML comments outside fenced code blocks."""
if ctx.format not in _APPLICABLE_FORMATS:
return ctx
text = ctx.text
# Protect fenced code blocks with placeholders
blocks: list[str] = []
def _save_block(m: re.Match[str]) -> str:
idx = len(blocks)
blocks.append(m.group(0))
return f"\x00CODEBLOCK{idx}\x00"
stripped = _FENCED_BLOCK_RE.sub(_save_block, text)
# Remove HTML comments
result = _strip_comments(stripped)
# Clean up runs of blank lines left by removal
result = _MULTI_BLANK_RE.sub("\n\n", result)
# Restore code blocks
for idx, block in enumerate(blocks):
result = result.replace(f"\x00CODEBLOCK{idx}\x00", block)
if result == text:
return ctx
return SmeltContext(text=result, format=ctx.format)
|