Milestone 2.
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Tuple
|
||||
@@ -15,7 +15,6 @@ from app.utils.slugs import slugify
|
||||
from app.utils.time import now_iso_utc, is_iso_utc
|
||||
|
||||
|
||||
REQUIRED_FIELDS = ("id", "title", "created", "updated")
|
||||
OPTIONAL_LIST_FIELDS = ("aliases", "tags")
|
||||
|
||||
|
||||
@@ -42,7 +41,6 @@ def _normalize_metadata(meta: dict) -> dict:
|
||||
if val is None:
|
||||
val = []
|
||||
if isinstance(val, str):
|
||||
# Allow comma-separated string as input
|
||||
val = [x.strip() for x in val.split(",") if x.strip()]
|
||||
if not isinstance(val, list):
|
||||
val = [str(val)]
|
||||
@@ -112,15 +110,17 @@ def note_path_for_slug(vault: Vault, slug: str) -> Tuple[str, str]:
|
||||
return rel, abs_path
|
||||
|
||||
|
||||
def load_note_from_file(vault: Vault, abs_path: str) -> Note:
|
||||
with open(abs_path, "rb") as f:
|
||||
post = frontmatter.load(f)
|
||||
def _load_frontmatter_text(abs_path: str):
|
||||
# Always read as text for python-frontmatter
|
||||
with open(abs_path, "r", encoding="utf-8", errors="replace") as f:
|
||||
return frontmatter.loads(f.read())
|
||||
|
||||
|
||||
def load_note_from_file(vault: Vault, abs_path: str) -> Note:
|
||||
post = _load_frontmatter_text(abs_path)
|
||||
meta = _normalize_metadata(post.metadata)
|
||||
title = meta["title"]
|
||||
slug = slugify(title)
|
||||
rel, _ = note_path_for_slug(vault, slug)
|
||||
# Use actual rel path of the file (could differ from current slug if renamed later)
|
||||
rel = vault.relpath(abs_path)
|
||||
|
||||
note = Note(
|
||||
@@ -161,8 +161,6 @@ def create_note(
|
||||
slug = slugify(meta["title"])
|
||||
rel, abs_path = note_path_for_slug(vault, slug)
|
||||
abs_path = ensure_unique_path(abs_path)
|
||||
|
||||
# If collision caused a suffix, recompute rel path accordingly
|
||||
rel = vault.relpath(abs_path)
|
||||
|
||||
note = Note(
|
||||
@@ -193,29 +191,46 @@ def save_note(vault: Vault, note: Note) -> None:
|
||||
def list_notes() -> list[Note]:
|
||||
vault = get_vault()
|
||||
notes: list[Note] = []
|
||||
for path in vault.iter_markdown_files():
|
||||
for p in vault.iter_markdown_files():
|
||||
try:
|
||||
notes.append(load_note_from_file(vault, path))
|
||||
abs_path = p if os.path.isabs(p) else vault.abspath(p)
|
||||
notes.append(load_note_from_file(vault, abs_path))
|
||||
except Exception:
|
||||
# Ignore malformed files for now (could log)
|
||||
continue
|
||||
# Sort by updated desc
|
||||
notes.sort(key=lambda n: n.updated, reverse=True)
|
||||
return notes
|
||||
|
||||
|
||||
_hex_re = re.compile(r"[0-9a-fA-F]")
|
||||
def _uuid_key(value: str) -> Optional[str]:
|
||||
if value is None:
|
||||
return None
|
||||
s = "".join(_hex_re.findall(str(value)))
|
||||
if len(s) == 32:
|
||||
return s.lower()
|
||||
try:
|
||||
return uuid.UUID(str(value)).hex
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def load_note_by_id(note_id: str) -> Optional[Note]:
|
||||
vault = get_vault()
|
||||
for path in vault.iter_markdown_files():
|
||||
target_key = _uuid_key(note_id)
|
||||
if not target_key:
|
||||
return None
|
||||
|
||||
for p in vault.iter_markdown_files():
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
post = frontmatter.load(f)
|
||||
meta = _normalize_metadata(post.metadata)
|
||||
if str(meta.get("id")) == str(note_id):
|
||||
# Load fully to construct Note with body and rel path
|
||||
return load_note_from_file(vault, path)
|
||||
abs_path = p if os.path.isabs(p) else vault.abspath(p)
|
||||
post = _load_frontmatter_text(abs_path)
|
||||
raw_id = (post.metadata or {}).get("id")
|
||||
cand_key = _uuid_key(raw_id)
|
||||
if cand_key and cand_key == target_key:
|
||||
return load_note_from_file(vault, abs_path)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@@ -224,7 +239,6 @@ def update_note_body(note_id: str, new_body: str) -> Optional[Note]:
|
||||
if not note:
|
||||
return None
|
||||
note.body = new_body
|
||||
# Update timestamp
|
||||
note.updated = now_iso_utc()
|
||||
vault = get_vault()
|
||||
save_note(vault, note)
|
||||
|
212
app/services/renderer.py
Normal file
212
app/services/renderer.py
Normal file
@@ -0,0 +1,212 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from html import escape
|
||||
from typing import Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
import bleach
|
||||
from markdown_it import MarkdownIt
|
||||
from mdit_py_plugins.footnote import footnote_plugin
|
||||
from mdit_py_plugins.tasklists import tasklists_plugin
|
||||
from mdit_py_plugins.deflist import deflist_plugin
|
||||
from mdit_py_plugins.admon import admon_plugin
|
||||
from mdit_py_plugins.anchors import anchors_plugin
|
||||
from flask import url_for
|
||||
|
||||
from app.utils.slugs import slugify
|
||||
|
||||
|
||||
# Build a MarkdownIt renderer with useful plugins
|
||||
_md = (
|
||||
MarkdownIt("commonmark", {"linkify": True})
|
||||
.use(footnote_plugin)
|
||||
.use(tasklists_plugin, enabled=True, label=True)
|
||||
.use(deflist_plugin)
|
||||
.use(admon_plugin)
|
||||
# Use only supported options here to avoid version mismatches
|
||||
.use(anchors_plugin, permalink=True, max_level=6, slug_func=slugify)
|
||||
)
|
||||
|
||||
# Bleach sanitization configuration
|
||||
ALLOWED_TAGS = [
|
||||
# Basic text
|
||||
"p", "div", "span", "br", "hr", "blockquote", "pre", "code",
|
||||
# Headings
|
||||
"h1", "h2", "h3", "h4", "h5", "h6",
|
||||
# Lists
|
||||
"ul", "ol", "li", "dl", "dt", "dd",
|
||||
# Tables
|
||||
"table", "thead", "tbody", "tr", "th", "td",
|
||||
# Inline
|
||||
"em", "strong", "kbd", "sup", "sub", "abbr",
|
||||
# Links and images
|
||||
"a", "img",
|
||||
]
|
||||
ALLOWED_ATTRS = {
|
||||
"*": ["class", "id", "title"],
|
||||
"a": ["href", "name", "target", "rel"],
|
||||
"img": ["src", "alt", "title", "width", "height", "loading"],
|
||||
}
|
||||
ALLOWED_PROTOCOLS = ["http", "https", "mailto", "tel", "data"] # data for images (pasted)
|
||||
|
||||
|
||||
class WikiIndex:
|
||||
"""
|
||||
Simple index for resolving [[WikiLinks]] by title, alias, or slug.
|
||||
"""
|
||||
def __init__(self):
|
||||
self._map: Dict[str, Dict] = {}
|
||||
|
||||
@staticmethod
|
||||
def _norm(key: str) -> str:
|
||||
return key.strip().lower()
|
||||
|
||||
def add(self, *, id: str, title: str, slug: str, aliases: Iterable[str] = ()):
|
||||
keys = [title, slug, *list(aliases)]
|
||||
for k in keys:
|
||||
n = self._norm(k)
|
||||
if n and n not in self._map:
|
||||
self._map[n] = {"id": id, "title": title, "slug": slug}
|
||||
|
||||
def resolve(self, key: str) -> Optional[Dict]:
|
||||
return self._map.get(self._norm(key))
|
||||
|
||||
|
||||
def build_wiki_index(notes: Iterable) -> WikiIndex:
|
||||
idx = WikiIndex()
|
||||
for n in notes:
|
||||
idx.add(id=n.id, title=n.title, slug=n.slug, aliases=getattr(n, "aliases", []))
|
||||
return idx
|
||||
|
||||
|
||||
def _rewrite_attachment_paths(md_text: str) -> str:
|
||||
"""
|
||||
Turn relative markdown links/images to attachments (attachments/... or ./attachments/...)
|
||||
into absolute app URLs (/attachments/...) so they work from any route.
|
||||
"""
|
||||
# Replace  and [text](attachments/...) patterns
|
||||
def repl(m: re.Match) -> str:
|
||||
prefix = m.group(1) # ]( or ](./ or ](../ etc)
|
||||
path = m.group(2)
|
||||
# Normalize to /attachments/<path>
|
||||
clean = re.sub(r"^(\./|/)?attachments/", "", path)
|
||||
return f"{prefix}/attachments/{clean}"
|
||||
|
||||
pattern = re.compile(r"(\]\()(\.?/?attachments/[^\)]+)")
|
||||
text = pattern.sub(repl, md_text)
|
||||
# Images
|
||||
img_pattern = re.compile(r"(!\[[^\]]*\]\()(\.?/?attachments/[^\)]+)")
|
||||
text = img_pattern.sub(repl, text)
|
||||
return text
|
||||
|
||||
|
||||
def _wikilink_to_md_link(wikilink: str, idx: WikiIndex) -> Tuple[str, Optional[str]]:
|
||||
"""
|
||||
Convert a single [[...]] to a markdown [text](url) if resolvable.
|
||||
Returns (replacement_text, resolved_id or None).
|
||||
Supports [[Note]], [[Note|Alias]], [[Note#Heading]], [[Note#Heading|Alias]].
|
||||
"""
|
||||
inner = wikilink.strip()[2:-2] # remove [[ ]]
|
||||
# Split alias part
|
||||
if "|" in inner:
|
||||
target_part, alias_text = inner.split("|", 1)
|
||||
else:
|
||||
target_part, alias_text = inner, None
|
||||
|
||||
# Handle heading fragment
|
||||
if "#" in target_part:
|
||||
target_title, header = target_part.split("#", 1)
|
||||
else:
|
||||
target_title, header = target_part, None
|
||||
|
||||
target_title = target_title.strip()
|
||||
alias_text = alias_text.strip() if alias_text else None
|
||||
|
||||
hit = idx.resolve(target_title)
|
||||
link_text = alias_text or (hit["title"] if hit else target_title)
|
||||
if hit:
|
||||
href = url_for("notes.notes_view", note_id=hit["id"])
|
||||
if header:
|
||||
href = f"{href}#{slugify(header)}"
|
||||
return f"[{link_text}]({href})", hit["id"]
|
||||
else:
|
||||
# Unresolved — return a stylized placeholder anchor that won't navigate
|
||||
safe_label = escape(target_title)
|
||||
if header:
|
||||
safe_label += f"#{escape(header)}"
|
||||
disp = escape(alias_text) if alias_text else safe_label
|
||||
html = f'<span class="text-warning/90 underline decoration-dotted" title="Unresolved wikilink">[[{disp}]]</span>'
|
||||
return html, None
|
||||
|
||||
|
||||
def _rewrite_wikilinks(md_text: str, idx: WikiIndex) -> Tuple[str, List[str], List[str]]:
|
||||
"""
|
||||
Replace all [[...]] links. Returns (new_text, resolved_ids, unresolved_texts)
|
||||
"""
|
||||
resolved: List[str] = []
|
||||
unresolved: List[str] = []
|
||||
|
||||
def repl(m: re.Match) -> str:
|
||||
w = m.group(0)
|
||||
replacement, note_id = _wikilink_to_md_link(w, idx)
|
||||
if note_id:
|
||||
resolved.append(note_id)
|
||||
else:
|
||||
unresolved.append(w)
|
||||
return replacement
|
||||
|
||||
# Match anything between [[ ]] non-greedy
|
||||
pattern = re.compile(r"\[\[(.+?)\]\]")
|
||||
new_text = pattern.sub(repl, md_text)
|
||||
return new_text, resolved, unresolved
|
||||
|
||||
|
||||
def sanitize_html(html: str) -> str:
|
||||
cleaned = bleach.clean(
|
||||
html,
|
||||
tags=ALLOWED_TAGS,
|
||||
attributes=ALLOWED_ATTRS,
|
||||
protocols=ALLOWED_PROTOCOLS,
|
||||
strip=True,
|
||||
)
|
||||
# Make external links open safely
|
||||
cleaned = bleach.linkify(
|
||||
cleaned,
|
||||
callbacks=[],
|
||||
skip_tags=["pre", "code"],
|
||||
parse_email=True,
|
||||
)
|
||||
return cleaned
|
||||
|
||||
|
||||
def render_markdown(body_md: str, all_notes: Iterable) -> dict:
|
||||
"""
|
||||
Render Markdown with:
|
||||
- Obsidian-style [[WikiLinks]] (resolved by title/alias/slug).
|
||||
- Attachment path rewriting to /attachments/...
|
||||
- Markdown-it-py with useful plugins.
|
||||
- Bleach sanitization.
|
||||
Returns:
|
||||
{
|
||||
"html": "<sanitized html>",
|
||||
"outbound_note_ids": [...],
|
||||
"unresolved_wikilinks": ["[[...]]", ...]
|
||||
}
|
||||
"""
|
||||
idx = build_wiki_index(all_notes)
|
||||
|
||||
# Preprocess: wikilinks -> markdown links (or styled unresolved spans), attachments -> absolute
|
||||
text_with_attachments = _rewrite_attachment_paths(body_md or "")
|
||||
text_with_links, resolved_ids, unresolved = _rewrite_wikilinks(text_with_attachments, idx)
|
||||
|
||||
# Render to HTML
|
||||
raw_html = _md.render(text_with_links)
|
||||
|
||||
# Sanitize
|
||||
safe_html = sanitize_html(raw_html)
|
||||
|
||||
return {
|
||||
"html": safe_html,
|
||||
"outbound_note_ids": resolved_ids,
|
||||
"unresolved_wikilinks": unresolved,
|
||||
}
|
@@ -2,7 +2,8 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Iterable
|
||||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -15,31 +16,48 @@ class VaultPaths:
|
||||
|
||||
class Vault:
|
||||
def __init__(self, root_path: str):
|
||||
if not root_path:
|
||||
raise ValueError("Vault root path must be provided")
|
||||
self.root_path = os.path.abspath(root_path)
|
||||
self.paths = VaultPaths(
|
||||
root=self.root_path,
|
||||
notes=os.path.join(self.root_path, "notes"),
|
||||
attachments=os.path.join(self.root_path, "attachments"),
|
||||
kb=os.path.join(self.root_path, ".kb"),
|
||||
root = Path(root_path).expanduser().resolve()
|
||||
object.__setattr__(
|
||||
self,
|
||||
"paths",
|
||||
VaultPaths(
|
||||
root=str(root),
|
||||
notes=str(root / "notes"),
|
||||
attachments=str(root / "attachments"),
|
||||
kb=str(root / ".kb"),
|
||||
),
|
||||
)
|
||||
|
||||
def ensure_structure(self) -> None:
|
||||
os.makedirs(self.paths.root, exist_ok=True)
|
||||
os.makedirs(self.paths.notes, exist_ok=True)
|
||||
os.makedirs(self.paths.attachments, exist_ok=True)
|
||||
os.makedirs(self.paths.kb, exist_ok=True)
|
||||
|
||||
def relpath(self, abs_path: str) -> str:
|
||||
return os.path.relpath(abs_path, self.paths.root)
|
||||
Path(self.paths.notes).mkdir(parents=True, exist_ok=True)
|
||||
Path(self.paths.attachments).mkdir(parents=True, exist_ok=True)
|
||||
Path(self.paths.kb).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def abspath(self, rel_path: str) -> str:
|
||||
return os.path.join(self.paths.root, rel_path)
|
||||
# If rel_path is absolute, Path(root) / rel_path will return rel_path as-is.
|
||||
return str((Path(self.paths.root) / rel_path).resolve())
|
||||
|
||||
def iter_markdown_files(self) -> Iterable[str]:
|
||||
"""Yield absolute file paths for all .md files under notes/ recursively."""
|
||||
for dirpath, _, filenames in os.walk(self.paths.notes):
|
||||
for fn in filenames:
|
||||
if fn.lower().endswith(".md"):
|
||||
yield os.path.join(dirpath, fn)
|
||||
def relpath(self, abs_path: str) -> str:
|
||||
return str(Path(abs_path).resolve().relative_to(Path(self.paths.root).resolve()))
|
||||
|
||||
def iter_markdown_files(self) -> Iterator[str]:
|
||||
"""
|
||||
Yield absolute paths to .md files under <vault>/notes recursively.
|
||||
- Allows the vault root to be hidden or not.
|
||||
- Skips hidden subdirectories within notes/ (names starting with '.').
|
||||
- Skips hidden files (names starting with '.').
|
||||
"""
|
||||
notes_dir = Path(self.paths.notes)
|
||||
if not notes_dir.exists():
|
||||
return iter(())
|
||||
|
||||
# Walk manually to filter hidden dirs/files
|
||||
for dirpath, dirnames, filenames in os.walk(notes_dir):
|
||||
# Remove hidden subdirectories in-place (prevents os.walk from entering them)
|
||||
dirnames[:] = [d for d in dirnames if not d.startswith(".")]
|
||||
for fname in filenames:
|
||||
if fname.startswith("."):
|
||||
continue
|
||||
if not fname.endswith(".md"):
|
||||
continue
|
||||
yield str(Path(dirpath, fname).resolve())
|
Reference in New Issue
Block a user