Skip to content

Strip html comments

strip_html_comments

Strip-html-comments strategy — remove HTML comments from markdown/text.

StripHtmlCommentsStrategy

Bases: SmeltStrategy

Remove HTML comments from markdown and plain-text content.

Source code in packages/axm-smelt/src/axm_smelt/strategies/strip_html_comments.py
Python
class StripHtmlCommentsStrategy(SmeltStrategy):
    """Remove HTML comments from markdown and plain-text content."""

    @property
    def name(self) -> str:
        return "strip_html_comments"

    @property
    def category(self) -> str:
        return "cosmetic"

    def apply(self, ctx: SmeltContext) -> SmeltContext:
        """Strip HTML comments outside fenced code blocks."""
        if ctx.format not in _APPLICABLE_FORMATS:
            return ctx

        text = ctx.text

        # Protect fenced code blocks with placeholders
        blocks: list[str] = []

        def _save_block(m: re.Match[str]) -> str:
            idx = len(blocks)
            blocks.append(m.group(0))
            return f"\x00CODEBLOCK{idx}\x00"

        stripped = _FENCED_BLOCK_RE.sub(_save_block, text)

        # Remove HTML comments
        result = _strip_comments(stripped)

        # Clean up runs of blank lines left by removal
        result = _MULTI_BLANK_RE.sub("\n\n", result)

        # Restore code blocks
        for idx, block in enumerate(blocks):
            result = result.replace(f"\x00CODEBLOCK{idx}\x00", block)

        if result == text:
            return ctx
        return SmeltContext(text=result, format=ctx.format)
apply(ctx)

Strip HTML comments outside fenced code blocks.

Source code in packages/axm-smelt/src/axm_smelt/strategies/strip_html_comments.py
Python
def apply(self, ctx: SmeltContext) -> SmeltContext:
    """Strip HTML comments outside fenced code blocks."""
    if ctx.format not in _APPLICABLE_FORMATS:
        return ctx

    text = ctx.text

    # Protect fenced code blocks with placeholders
    blocks: list[str] = []

    def _save_block(m: re.Match[str]) -> str:
        idx = len(blocks)
        blocks.append(m.group(0))
        return f"\x00CODEBLOCK{idx}\x00"

    stripped = _FENCED_BLOCK_RE.sub(_save_block, text)

    # Remove HTML comments
    result = _strip_comments(stripped)

    # Clean up runs of blank lines left by removal
    result = _MULTI_BLANK_RE.sub("\n\n", result)

    # Restore code blocks
    for idx, block in enumerate(blocks):
        result = result.replace(f"\x00CODEBLOCK{idx}\x00", block)

    if result == text:
        return ctx
    return SmeltContext(text=result, format=ctx.format)