from __future__ import annotations import re from html import escape from typing import Dict, Iterable, List, Optional, Tuple import bleach from markdown_it import MarkdownIt from mdit_py_plugins.footnote import footnote_plugin from mdit_py_plugins.tasklists import tasklists_plugin from mdit_py_plugins.deflist import deflist_plugin from mdit_py_plugins.admon import admon_plugin from mdit_py_plugins.anchors import anchors_plugin from flask import url_for from app.utils.slugs import slugify # Build a MarkdownIt renderer with useful plugins _md = ( MarkdownIt("commonmark", {"linkify": True}) .use(footnote_plugin) .use(tasklists_plugin, enabled=True, label=True) .use(deflist_plugin) .use(admon_plugin) # Use only supported options here to avoid version mismatches .use(anchors_plugin, permalink=True, max_level=6, slug_func=slugify) ) # Bleach sanitization configuration ALLOWED_TAGS = [ # Basic text "p", "div", "span", "br", "hr", "blockquote", "pre", "code", # Headings "h1", "h2", "h3", "h4", "h5", "h6", # Lists "ul", "ol", "li", "dl", "dt", "dd", # Tables "table", "thead", "tbody", "tr", "th", "td", # Inline "em", "strong", "kbd", "sup", "sub", "abbr", # Links and images "a", "img", ] ALLOWED_ATTRS = { "*": ["class", "id", "title"], "a": ["href", "name", "target", "rel"], "img": ["src", "alt", "title", "width", "height", "loading"], } ALLOWED_PROTOCOLS = ["http", "https", "mailto", "tel", "data"] # data for images (pasted) class WikiIndex: """ Simple index for resolving [[WikiLinks]] by title, alias, or slug. """ def __init__(self): self._map: Dict[str, Dict] = {} @staticmethod def _norm(key: str) -> str: return key.strip().lower() def add(self, *, id: str, title: str, slug: str, aliases: Iterable[str] = ()): keys = [title, slug, *list(aliases)] for k in keys: n = self._norm(k) if n and n not in self._map: self._map[n] = {"id": id, "title": title, "slug": slug} def resolve(self, key: str) -> Optional[Dict]: return self._map.get(self._norm(key)) def build_wiki_index(notes: Iterable) -> WikiIndex: idx = WikiIndex() for n in notes: idx.add(id=n.id, title=n.title, slug=n.slug, aliases=getattr(n, "aliases", [])) return idx def _rewrite_attachment_paths(md_text: str) -> str: """ Turn relative markdown links/images to attachments (attachments/... or ./attachments/...) into absolute app URLs (/attachments/...) so they work from any route. """ # Replace ![alt](attachments/...) and [text](attachments/...) patterns def repl(m: re.Match) -> str: prefix = m.group(1) # ]( or ](./ or ](../ etc) path = m.group(2) # Normalize to /attachments/ clean = re.sub(r"^(\./|/)?attachments/", "", path) return f"{prefix}/attachments/{clean}" pattern = re.compile(r"(\]\()(\.?/?attachments/[^\)]+)") text = pattern.sub(repl, md_text) # Images img_pattern = re.compile(r"(!\[[^\]]*\]\()(\.?/?attachments/[^\)]+)") text = img_pattern.sub(repl, text) return text def _wikilink_to_md_link(wikilink: str, idx: WikiIndex) -> Tuple[str, Optional[str]]: """ Convert a single [[...]] to a markdown [text](url) if resolvable. Returns (replacement_text, resolved_id or None). Supports [[Note]], [[Note|Alias]], [[Note#Heading]], [[Note#Heading|Alias]]. """ inner = wikilink.strip()[2:-2] # remove [[ ]] # Split alias part if "|" in inner: target_part, alias_text = inner.split("|", 1) else: target_part, alias_text = inner, None # Handle heading fragment if "#" in target_part: target_title, header = target_part.split("#", 1) else: target_title, header = target_part, None target_title = target_title.strip() alias_text = alias_text.strip() if alias_text else None hit = idx.resolve(target_title) link_text = alias_text or (hit["title"] if hit else target_title) if hit: href = url_for("notes.notes_view", note_id=hit["id"]) if header: href = f"{href}#{slugify(header)}" return f"[{link_text}]({href})", hit["id"] else: # Unresolved — return a stylized placeholder anchor that won't navigate safe_label = escape(target_title) if header: safe_label += f"#{escape(header)}" disp = escape(alias_text) if alias_text else safe_label html = f'[[{disp}]]' return html, None def _rewrite_wikilinks(md_text: str, idx: WikiIndex) -> Tuple[str, List[str], List[str]]: """ Replace all [[...]] links. Returns (new_text, resolved_ids, unresolved_texts) """ resolved: List[str] = [] unresolved: List[str] = [] def repl(m: re.Match) -> str: w = m.group(0) replacement, note_id = _wikilink_to_md_link(w, idx) if note_id: resolved.append(note_id) else: unresolved.append(w) return replacement # Match anything between [[ ]] non-greedy pattern = re.compile(r"\[\[(.+?)\]\]") new_text = pattern.sub(repl, md_text) return new_text, resolved, unresolved def sanitize_html(html: str) -> str: cleaned = bleach.clean( html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRS, protocols=ALLOWED_PROTOCOLS, strip=True, ) # Make external links open safely cleaned = bleach.linkify( cleaned, callbacks=[], skip_tags=["pre", "code"], parse_email=True, ) return cleaned def render_markdown(body_md: str, all_notes: Iterable) -> dict: """ Render Markdown with: - Obsidian-style [[WikiLinks]] (resolved by title/alias/slug). - Attachment path rewriting to /attachments/... - Markdown-it-py with useful plugins. - Bleach sanitization. Returns: { "html": "", "outbound_note_ids": [...], "unresolved_wikilinks": ["[[...]]", ...] } """ idx = build_wiki_index(all_notes) # Preprocess: wikilinks -> markdown links (or styled unresolved spans), attachments -> absolute text_with_attachments = _rewrite_attachment_paths(body_md or "") text_with_links, resolved_ids, unresolved = _rewrite_wikilinks(text_with_attachments, idx) # Render to HTML raw_html = _md.render(text_with_links) # Sanitize safe_html = sanitize_html(raw_html) return { "html": safe_html, "outbound_note_ids": resolved_ids, "unresolved_wikilinks": unresolved, }