212 lines
6.6 KiB
Python
212 lines
6.6 KiB
Python
from __future__ import annotations
|
|
|
|
import re
|
|
from html import escape
|
|
from typing import Dict, Iterable, List, Optional, Tuple
|
|
|
|
import bleach
|
|
from markdown_it import MarkdownIt
|
|
from mdit_py_plugins.footnote import footnote_plugin
|
|
from mdit_py_plugins.tasklists import tasklists_plugin
|
|
from mdit_py_plugins.deflist import deflist_plugin
|
|
from mdit_py_plugins.admon import admon_plugin
|
|
from mdit_py_plugins.anchors import anchors_plugin
|
|
from flask import url_for
|
|
|
|
from app.utils.slugs import slugify
|
|
|
|
|
|
# Build a MarkdownIt renderer with useful plugins
|
|
_md = (
|
|
MarkdownIt("commonmark", {"linkify": True})
|
|
.use(footnote_plugin)
|
|
.use(tasklists_plugin, enabled=True, label=True)
|
|
.use(deflist_plugin)
|
|
.use(admon_plugin)
|
|
# Use only supported options here to avoid version mismatches
|
|
.use(anchors_plugin, permalink=True, max_level=6, slug_func=slugify)
|
|
)
|
|
|
|
# Bleach sanitization configuration
|
|
ALLOWED_TAGS = [
|
|
# Basic text
|
|
"p", "div", "span", "br", "hr", "blockquote", "pre", "code",
|
|
# Headings
|
|
"h1", "h2", "h3", "h4", "h5", "h6",
|
|
# Lists
|
|
"ul", "ol", "li", "dl", "dt", "dd",
|
|
# Tables
|
|
"table", "thead", "tbody", "tr", "th", "td",
|
|
# Inline
|
|
"em", "strong", "kbd", "sup", "sub", "abbr",
|
|
# Links and images
|
|
"a", "img",
|
|
]
|
|
ALLOWED_ATTRS = {
|
|
"*": ["class", "id", "title"],
|
|
"a": ["href", "name", "target", "rel"],
|
|
"img": ["src", "alt", "title", "width", "height", "loading"],
|
|
}
|
|
ALLOWED_PROTOCOLS = ["http", "https", "mailto", "tel", "data"] # data for images (pasted)
|
|
|
|
|
|
class WikiIndex:
|
|
"""
|
|
Simple index for resolving [[WikiLinks]] by title, alias, or slug.
|
|
"""
|
|
def __init__(self):
|
|
self._map: Dict[str, Dict] = {}
|
|
|
|
@staticmethod
|
|
def _norm(key: str) -> str:
|
|
return key.strip().lower()
|
|
|
|
def add(self, *, id: str, title: str, slug: str, aliases: Iterable[str] = ()):
|
|
keys = [title, slug, *list(aliases)]
|
|
for k in keys:
|
|
n = self._norm(k)
|
|
if n and n not in self._map:
|
|
self._map[n] = {"id": id, "title": title, "slug": slug}
|
|
|
|
def resolve(self, key: str) -> Optional[Dict]:
|
|
return self._map.get(self._norm(key))
|
|
|
|
|
|
def build_wiki_index(notes: Iterable) -> WikiIndex:
|
|
idx = WikiIndex()
|
|
for n in notes:
|
|
idx.add(id=n.id, title=n.title, slug=n.slug, aliases=getattr(n, "aliases", []))
|
|
return idx
|
|
|
|
|
|
def _rewrite_attachment_paths(md_text: str) -> str:
|
|
"""
|
|
Turn relative markdown links/images to attachments (attachments/... or ./attachments/...)
|
|
into absolute app URLs (/attachments/...) so they work from any route.
|
|
"""
|
|
# Replace  and [text](attachments/...) patterns
|
|
def repl(m: re.Match) -> str:
|
|
prefix = m.group(1) # ]( or ](./ or ](../ etc)
|
|
path = m.group(2)
|
|
# Normalize to /attachments/<path>
|
|
clean = re.sub(r"^(\./|/)?attachments/", "", path)
|
|
return f"{prefix}/attachments/{clean}"
|
|
|
|
pattern = re.compile(r"(\]\()(\.?/?attachments/[^\)]+)")
|
|
text = pattern.sub(repl, md_text)
|
|
# Images
|
|
img_pattern = re.compile(r"(!\[[^\]]*\]\()(\.?/?attachments/[^\)]+)")
|
|
text = img_pattern.sub(repl, text)
|
|
return text
|
|
|
|
|
|
def _wikilink_to_md_link(wikilink: str, idx: WikiIndex) -> Tuple[str, Optional[str]]:
|
|
"""
|
|
Convert a single [[...]] to a markdown [text](url) if resolvable.
|
|
Returns (replacement_text, resolved_id or None).
|
|
Supports [[Note]], [[Note|Alias]], [[Note#Heading]], [[Note#Heading|Alias]].
|
|
"""
|
|
inner = wikilink.strip()[2:-2] # remove [[ ]]
|
|
# Split alias part
|
|
if "|" in inner:
|
|
target_part, alias_text = inner.split("|", 1)
|
|
else:
|
|
target_part, alias_text = inner, None
|
|
|
|
# Handle heading fragment
|
|
if "#" in target_part:
|
|
target_title, header = target_part.split("#", 1)
|
|
else:
|
|
target_title, header = target_part, None
|
|
|
|
target_title = target_title.strip()
|
|
alias_text = alias_text.strip() if alias_text else None
|
|
|
|
hit = idx.resolve(target_title)
|
|
link_text = alias_text or (hit["title"] if hit else target_title)
|
|
if hit:
|
|
href = url_for("notes.notes_view", note_id=hit["id"])
|
|
if header:
|
|
href = f"{href}#{slugify(header)}"
|
|
return f"[{link_text}]({href})", hit["id"]
|
|
else:
|
|
# Unresolved — return a stylized placeholder anchor that won't navigate
|
|
safe_label = escape(target_title)
|
|
if header:
|
|
safe_label += f"#{escape(header)}"
|
|
disp = escape(alias_text) if alias_text else safe_label
|
|
html = f'<span class="text-warning/90 underline decoration-dotted" title="Unresolved wikilink">[[{disp}]]</span>'
|
|
return html, None
|
|
|
|
|
|
def _rewrite_wikilinks(md_text: str, idx: WikiIndex) -> Tuple[str, List[str], List[str]]:
|
|
"""
|
|
Replace all [[...]] links. Returns (new_text, resolved_ids, unresolved_texts)
|
|
"""
|
|
resolved: List[str] = []
|
|
unresolved: List[str] = []
|
|
|
|
def repl(m: re.Match) -> str:
|
|
w = m.group(0)
|
|
replacement, note_id = _wikilink_to_md_link(w, idx)
|
|
if note_id:
|
|
resolved.append(note_id)
|
|
else:
|
|
unresolved.append(w)
|
|
return replacement
|
|
|
|
# Match anything between [[ ]] non-greedy
|
|
pattern = re.compile(r"\[\[(.+?)\]\]")
|
|
new_text = pattern.sub(repl, md_text)
|
|
return new_text, resolved, unresolved
|
|
|
|
|
|
def sanitize_html(html: str) -> str:
|
|
cleaned = bleach.clean(
|
|
html,
|
|
tags=ALLOWED_TAGS,
|
|
attributes=ALLOWED_ATTRS,
|
|
protocols=ALLOWED_PROTOCOLS,
|
|
strip=True,
|
|
)
|
|
# Make external links open safely
|
|
cleaned = bleach.linkify(
|
|
cleaned,
|
|
callbacks=[],
|
|
skip_tags=["pre", "code"],
|
|
parse_email=True,
|
|
)
|
|
return cleaned
|
|
|
|
|
|
def render_markdown(body_md: str, all_notes: Iterable) -> dict:
|
|
"""
|
|
Render Markdown with:
|
|
- Obsidian-style [[WikiLinks]] (resolved by title/alias/slug).
|
|
- Attachment path rewriting to /attachments/...
|
|
- Markdown-it-py with useful plugins.
|
|
- Bleach sanitization.
|
|
Returns:
|
|
{
|
|
"html": "<sanitized html>",
|
|
"outbound_note_ids": [...],
|
|
"unresolved_wikilinks": ["[[...]]", ...]
|
|
}
|
|
"""
|
|
idx = build_wiki_index(all_notes)
|
|
|
|
# Preprocess: wikilinks -> markdown links (or styled unresolved spans), attachments -> absolute
|
|
text_with_attachments = _rewrite_attachment_paths(body_md or "")
|
|
text_with_links, resolved_ids, unresolved = _rewrite_wikilinks(text_with_attachments, idx)
|
|
|
|
# Render to HTML
|
|
raw_html = _md.render(text_with_links)
|
|
|
|
# Sanitize
|
|
safe_html = sanitize_html(raw_html)
|
|
|
|
return {
|
|
"html": safe_html,
|
|
"outbound_note_ids": resolved_ids,
|
|
"unresolved_wikilinks": unresolved,
|
|
} |