Milestone 2.

2025-08-18 21:40:41 +02:00
parent 1646d7b827
commit e283e9f696
13 changed files with 615 additions and 107 deletions
--- a/app/services/notes_fs.py
+++ b/app/services/notes_fs.py
@@ -1,7 +1,7 @@
 from __future__ import annotations

-import json
 import os
+import re
 import uuid
 from dataclasses import dataclass, field
 from typing import List, Optional, Tuple
@@ -15,7 +15,6 @@ from app.utils.slugs import slugify
 from app.utils.time import now_iso_utc, is_iso_utc


-REQUIRED_FIELDS = ("id", "title", "created", "updated")
 OPTIONAL_LIST_FIELDS = ("aliases", "tags")


@@ -42,7 +41,6 @@ def _normalize_metadata(meta: dict) -> dict:
        if val is None:
            val = []
        if isinstance(val, str):
-            # Allow comma-separated string as input
            val = [x.strip() for x in val.split(",") if x.strip()]
        if not isinstance(val, list):
            val = [str(val)]
@@ -112,15 +110,17 @@ def note_path_for_slug(vault: Vault, slug: str) -> Tuple[str, str]:
    return rel, abs_path


-def load_note_from_file(vault: Vault, abs_path: str) -> Note:
-    with open(abs_path, "rb") as f:
-        post = frontmatter.load(f)
+def _load_frontmatter_text(abs_path: str):
+    # Always read as text for python-frontmatter
+    with open(abs_path, "r", encoding="utf-8", errors="replace") as f:
+        return frontmatter.loads(f.read())

+
+def load_note_from_file(vault: Vault, abs_path: str) -> Note:
+    post = _load_frontmatter_text(abs_path)
    meta = _normalize_metadata(post.metadata)
    title = meta["title"]
    slug = slugify(title)
-    rel, _ = note_path_for_slug(vault, slug)
-    # Use actual rel path of the file (could differ from current slug if renamed later)
    rel = vault.relpath(abs_path)

    note = Note(
@@ -161,8 +161,6 @@ def create_note(
    slug = slugify(meta["title"])
    rel, abs_path = note_path_for_slug(vault, slug)
    abs_path = ensure_unique_path(abs_path)
-
-    # If collision caused a suffix, recompute rel path accordingly
    rel = vault.relpath(abs_path)

    note = Note(
@@ -193,29 +191,46 @@ def save_note(vault: Vault, note: Note) -> None:
 def list_notes() -> list[Note]:
    vault = get_vault()
    notes: list[Note] = []
-    for path in vault.iter_markdown_files():
+    for p in vault.iter_markdown_files():
        try:
-            notes.append(load_note_from_file(vault, path))
+            abs_path = p if os.path.isabs(p) else vault.abspath(p)
+            notes.append(load_note_from_file(vault, abs_path))
        except Exception:
-            # Ignore malformed files for now (could log)
            continue
-    # Sort by updated desc
    notes.sort(key=lambda n: n.updated, reverse=True)
    return notes


+_hex_re = re.compile(r"[0-9a-fA-F]")
+def _uuid_key(value: str) -> Optional[str]:
+    if value is None:
+        return None
+    s = "".join(_hex_re.findall(str(value)))
+    if len(s) == 32:
+        return s.lower()
+    try:
+        return uuid.UUID(str(value)).hex
+    except Exception:
+        return None
+
+
 def load_note_by_id(note_id: str) -> Optional[Note]:
    vault = get_vault()
-    for path in vault.iter_markdown_files():
+    target_key = _uuid_key(note_id)
+    if not target_key:
+        return None
+
+    for p in vault.iter_markdown_files():
        try:
-            with open(path, "rb") as f:
-                post = frontmatter.load(f)
-            meta = _normalize_metadata(post.metadata)
-            if str(meta.get("id")) == str(note_id):
-                # Load fully to construct Note with body and rel path
-                return load_note_from_file(vault, path)
+            abs_path = p if os.path.isabs(p) else vault.abspath(p)
+            post = _load_frontmatter_text(abs_path)
+            raw_id = (post.metadata or {}).get("id")
+            cand_key = _uuid_key(raw_id)
+            if cand_key and cand_key == target_key:
+                return load_note_from_file(vault, abs_path)
        except Exception:
            continue
+
    return None


@@ -224,7 +239,6 @@ def update_note_body(note_id: str, new_body: str) -> Optional[Note]:
    if not note:
        return None
    note.body = new_body
-    # Update timestamp
    note.updated = now_iso_utc()
    vault = get_vault()
    save_note(vault, note)
--- a/app/services/renderer.py
+++ b/app/services/renderer.py
@@ -0,0 +1,212 @@
+from __future__ import annotations
+
+import re
+from html import escape
+from typing import Dict, Iterable, List, Optional, Tuple
+
+import bleach
+from markdown_it import MarkdownIt
+from mdit_py_plugins.footnote import footnote_plugin
+from mdit_py_plugins.tasklists import tasklists_plugin
+from mdit_py_plugins.deflist import deflist_plugin
+from mdit_py_plugins.admon import admon_plugin
+from mdit_py_plugins.anchors import anchors_plugin
+from flask import url_for
+
+from app.utils.slugs import slugify
+
+
+# Build a MarkdownIt renderer with useful plugins
+_md = (
+    MarkdownIt("commonmark", {"linkify": True})
+    .use(footnote_plugin)
+    .use(tasklists_plugin, enabled=True, label=True)
+    .use(deflist_plugin)
+    .use(admon_plugin)
+    # Use only supported options here to avoid version mismatches
+    .use(anchors_plugin, permalink=True, max_level=6, slug_func=slugify)
+)
+
+# Bleach sanitization configuration
+ALLOWED_TAGS = [
+    # Basic text
+    "p", "div", "span", "br", "hr", "blockquote", "pre", "code",
+    # Headings
+    "h1", "h2", "h3", "h4", "h5", "h6",
+    # Lists
+    "ul", "ol", "li", "dl", "dt", "dd",
+    # Tables
+    "table", "thead", "tbody", "tr", "th", "td",
+    # Inline
+    "em", "strong", "kbd", "sup", "sub", "abbr",
+    # Links and images
+    "a", "img",
+]
+ALLOWED_ATTRS = {
+    "*": ["class", "id", "title"],
+    "a": ["href", "name", "target", "rel"],
+    "img": ["src", "alt", "title", "width", "height", "loading"],
+}
+ALLOWED_PROTOCOLS = ["http", "https", "mailto", "tel", "data"]  # data for images (pasted)
+
+
+class WikiIndex:
+    """
+    Simple index for resolving [[WikiLinks]] by title, alias, or slug.
+    """
+    def __init__(self):
+        self._map: Dict[str, Dict] = {}
+
+    @staticmethod
+    def _norm(key: str) -> str:
+        return key.strip().lower()
+
+    def add(self, *, id: str, title: str, slug: str, aliases: Iterable[str] = ()):
+        keys = [title, slug, *list(aliases)]
+        for k in keys:
+            n = self._norm(k)
+            if n and n not in self._map:
+                self._map[n] = {"id": id, "title": title, "slug": slug}
+
+    def resolve(self, key: str) -> Optional[Dict]:
+        return self._map.get(self._norm(key))
+
+
+def build_wiki_index(notes: Iterable) -> WikiIndex:
+    idx = WikiIndex()
+    for n in notes:
+        idx.add(id=n.id, title=n.title, slug=n.slug, aliases=getattr(n, "aliases", []))
+    return idx
+
+
+def _rewrite_attachment_paths(md_text: str) -> str:
+    """
+    Turn relative markdown links/images to attachments (attachments/... or ./attachments/...)
+    into absolute app URLs (/attachments/...) so they work from any route.
+    """
+    # Replace ![alt](attachments/...) and [text](attachments/...) patterns
+    def repl(m: re.Match) -> str:
+        prefix = m.group(1)  # ]( or ](./ or ](../ etc)
+        path = m.group(2)
+        # Normalize to /attachments/<path>
+        clean = re.sub(r"^(\./|/)?attachments/", "", path)
+        return f"{prefix}/attachments/{clean}"
+
+    pattern = re.compile(r"(\]\()(\.?/?attachments/[^\)]+)")
+    text = pattern.sub(repl, md_text)
+    # Images
+    img_pattern = re.compile(r"(!\[[^\]]*\]\()(\.?/?attachments/[^\)]+)")
+    text = img_pattern.sub(repl, text)
+    return text
+
+
+def _wikilink_to_md_link(wikilink: str, idx: WikiIndex) -> Tuple[str, Optional[str]]:
+    """
+    Convert a single [[...]] to a markdown [text](url) if resolvable.
+    Returns (replacement_text, resolved_id or None).
+    Supports [[Note]], [[Note|Alias]], [[Note#Heading]], [[Note#Heading|Alias]].
+    """
+    inner = wikilink.strip()[2:-2]  # remove [[ ]]
+    # Split alias part
+    if "|" in inner:
+        target_part, alias_text = inner.split("|", 1)
+    else:
+        target_part, alias_text = inner, None
+
+    # Handle heading fragment
+    if "#" in target_part:
+        target_title, header = target_part.split("#", 1)
+    else:
+        target_title, header = target_part, None
+
+    target_title = target_title.strip()
+    alias_text = alias_text.strip() if alias_text else None
+
+    hit = idx.resolve(target_title)
+    link_text = alias_text or (hit["title"] if hit else target_title)
+    if hit:
+        href = url_for("notes.notes_view", note_id=hit["id"])
+        if header:
+            href = f"{href}#{slugify(header)}"
+        return f"[{link_text}]({href})", hit["id"]
+    else:
+        # Unresolved — return a stylized placeholder anchor that won't navigate
+        safe_label = escape(target_title)
+        if header:
+            safe_label += f"#{escape(header)}"
+        disp = escape(alias_text) if alias_text else safe_label
+        html = f'<span class="text-warning/90 underline decoration-dotted" title="Unresolved wikilink">[[{disp}]]</span>'
+        return html, None
+
+
+def _rewrite_wikilinks(md_text: str, idx: WikiIndex) -> Tuple[str, List[str], List[str]]:
+    """
+    Replace all [[...]] links. Returns (new_text, resolved_ids, unresolved_texts)
+    """
+    resolved: List[str] = []
+    unresolved: List[str] = []
+
+    def repl(m: re.Match) -> str:
+        w = m.group(0)
+        replacement, note_id = _wikilink_to_md_link(w, idx)
+        if note_id:
+            resolved.append(note_id)
+        else:
+            unresolved.append(w)
+        return replacement
+
+    # Match anything between [[ ]] non-greedy
+    pattern = re.compile(r"\[\[(.+?)\]\]")
+    new_text = pattern.sub(repl, md_text)
+    return new_text, resolved, unresolved
+
+
+def sanitize_html(html: str) -> str:
+    cleaned = bleach.clean(
+        html,
+        tags=ALLOWED_TAGS,
+        attributes=ALLOWED_ATTRS,
+        protocols=ALLOWED_PROTOCOLS,
+        strip=True,
+    )
+    # Make external links open safely
+    cleaned = bleach.linkify(
+        cleaned,
+        callbacks=[],
+        skip_tags=["pre", "code"],
+        parse_email=True,
+    )
+    return cleaned
+
+
+def render_markdown(body_md: str, all_notes: Iterable) -> dict:
+    """
+    Render Markdown with:
+    - Obsidian-style [[WikiLinks]] (resolved by title/alias/slug).
+    - Attachment path rewriting to /attachments/...
+    - Markdown-it-py with useful plugins.
+    - Bleach sanitization.
+    Returns:
+      {
+        "html": "<sanitized html>",
+        "outbound_note_ids": [...],
+        "unresolved_wikilinks": ["[[...]]", ...]
+      }
+    """
+    idx = build_wiki_index(all_notes)
+
+    # Preprocess: wikilinks -> markdown links (or styled unresolved spans), attachments -> absolute
+    text_with_attachments = _rewrite_attachment_paths(body_md or "")
+    text_with_links, resolved_ids, unresolved = _rewrite_wikilinks(text_with_attachments, idx)
+
+    # Render to HTML
+    raw_html = _md.render(text_with_links)
+
+    # Sanitize
+    safe_html = sanitize_html(raw_html)
+
+    return {
+        "html": safe_html,
+        "outbound_note_ids": resolved_ids,
+        "unresolved_wikilinks": unresolved,
+    }
--- a/app/services/vault.py
+++ b/app/services/vault.py
@@ -2,7 +2,8 @@ from __future__ import annotations

 import os
 from dataclasses import dataclass
-from typing import Iterable
+from pathlib import Path
+from typing import Iterator


@dataclass(frozen=True)
@@ -15,31 +16,48 @@ class VaultPaths:

 class Vault:
    def __init__(self, root_path: str):
-        if not root_path:
-            raise ValueError("Vault root path must be provided")
-        self.root_path = os.path.abspath(root_path)
-        self.paths = VaultPaths(
-            root=self.root_path,
-            notes=os.path.join(self.root_path, "notes"),
-            attachments=os.path.join(self.root_path, "attachments"),
-            kb=os.path.join(self.root_path, ".kb"),
+        root = Path(root_path).expanduser().resolve()
+        object.__setattr__(
+            self,
+            "paths",
+            VaultPaths(
+                root=str(root),
+                notes=str(root / "notes"),
+                attachments=str(root / "attachments"),
+                kb=str(root / ".kb"),
+            ),
        )

    def ensure_structure(self) -> None:
-        os.makedirs(self.paths.root, exist_ok=True)
-        os.makedirs(self.paths.notes, exist_ok=True)
-        os.makedirs(self.paths.attachments, exist_ok=True)
-        os.makedirs(self.paths.kb, exist_ok=True)
-
-    def relpath(self, abs_path: str) -> str:
-        return os.path.relpath(abs_path, self.paths.root)
+        Path(self.paths.notes).mkdir(parents=True, exist_ok=True)
+        Path(self.paths.attachments).mkdir(parents=True, exist_ok=True)
+        Path(self.paths.kb).mkdir(parents=True, exist_ok=True)

    def abspath(self, rel_path: str) -> str:
-        return os.path.join(self.paths.root, rel_path)
+        # If rel_path is absolute, Path(root) / rel_path will return rel_path as-is.
+        return str((Path(self.paths.root) / rel_path).resolve())

-    def iter_markdown_files(self) -> Iterable[str]:
-        """Yield absolute file paths for all .md files under notes/ recursively."""
-        for dirpath, _, filenames in os.walk(self.paths.notes):
-            for fn in filenames:
-                if fn.lower().endswith(".md"):
-                    yield os.path.join(dirpath, fn)
+    def relpath(self, abs_path: str) -> str:
+        return str(Path(abs_path).resolve().relative_to(Path(self.paths.root).resolve()))
+
+    def iter_markdown_files(self) -> Iterator[str]:
+        """
+        Yield absolute paths to .md files under <vault>/notes recursively.
+        - Allows the vault root to be hidden or not.
+        - Skips hidden subdirectories within notes/ (names starting with '.').
+        - Skips hidden files (names starting with '.').
+        """
+        notes_dir = Path(self.paths.notes)
+        if not notes_dir.exists():
+            return iter(())
+
+        # Walk manually to filter hidden dirs/files
+        for dirpath, dirnames, filenames in os.walk(notes_dir):
+            # Remove hidden subdirectories in-place (prevents os.walk from entering them)
+            dirnames[:] = [d for d in dirnames if not d.startswith(".")]
+            for fname in filenames:
+                if fname.startswith("."):
+                    continue
+                if not fname.endswith(".md"):
+                    continue
+                yield str(Path(dirpath, fname).resolve())