notedeck

One damus client to rule them all
git clone git://jb55.com/notedeck
Log | Files | Refs | README | LICENSE

commit bdd53f4e0be7c3d1d01e6db501226d857e02b303
parent ee38bcceef0d0924c91607d4e77e395e9f15244e
Author: William Casarin <jb55@jb55.com>
Date:   Tue, 17 Feb 2026 10:47:31 -0800

sand-themed syntax highlighting with tokenizer tests

Replace egui_extras CodeTheme with custom warm sand palette for code
blocks. Extract tokenizer into testable pure function with 32 tests
covering edge cases (unicode, unclosed strings, roundtrip invariants).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Diffstat:
Mcrates/notedeck_dave/src/ui/markdown_ui.rs | 501++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 486 insertions(+), 15 deletions(-)

diff --git a/crates/notedeck_dave/src/ui/markdown_ui.rs b/crates/notedeck_dave/src/ui/markdown_ui.rs @@ -225,9 +225,236 @@ fn render_inlines(inlines: &[InlineElement], theme: &MdTheme, buffer: &str, ui: flush_job(&mut job, ui); } -fn render_code_block(language: Option<&str>, content: &str, theme: &MdTheme, ui: &mut Ui) { - use egui_extras::syntax_highlighting::{self, CodeTheme}; +/// Sand-themed syntax highlighting colors (warm, Claude-Code-esque palette) +struct SandCodeTheme { + comment: Color32, + keyword: Color32, + literal: Color32, + string: Color32, + punctuation: Color32, + plain: Color32, +} + +impl SandCodeTheme { + fn from_visuals(visuals: &egui::Visuals) -> Self { + if visuals.dark_mode { + Self { + comment: Color32::from_rgb(0x8A, 0x80, 0x72), // Warm gray-brown + keyword: Color32::from_rgb(0xD4, 0xA5, 0x74), // Amber sand + literal: Color32::from_rgb(0xC4, 0x8A, 0x6A), // Terra cotta + string: Color32::from_rgb(0xC6, 0xB4, 0x6A), // Golden wheat + punctuation: Color32::from_rgb(0xA0, 0x96, 0x88), // Light sand + plain: Color32::from_rgb(0xD5, 0xCE, 0xC4), // Warm off-white + } + } else { + Self { + comment: Color32::from_rgb(0x8A, 0x7E, 0x6E), // Warm gray + keyword: Color32::from_rgb(0x9A, 0x60, 0x2A), // Dark amber + literal: Color32::from_rgb(0x8B, 0x4C, 0x30), // Dark terra cotta + string: Color32::from_rgb(0x6B, 0x5C, 0x1A), // Dark golden + punctuation: Color32::from_rgb(0x6E, 0x64, 0x56), // Dark sand + plain: Color32::from_rgb(0x3A, 0x35, 0x2E), // Dark brown-black + } + } + } + + fn format(&self, token: SandToken, font_id: &FontId) -> TextFormat { + let color = match token { + SandToken::Comment => self.comment, + SandToken::Keyword => self.keyword, + SandToken::Literal => self.literal, + SandToken::String => self.string, + SandToken::Punctuation => self.punctuation, + SandToken::Plain => self.plain, + SandToken::Whitespace => Color32::TRANSPARENT, + }; + TextFormat::simple(font_id.clone(), color) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum SandToken { + Comment, + Keyword, + Literal, + String, + Punctuation, + Plain, + Whitespace, +} + +struct LangConfig<'a> { + keywords: &'a [&'a str], + double_slash_comments: bool, + hash_comments: bool, +} + +impl<'a> LangConfig<'a> { + fn from_language(language: &str) -> Option<Self> { + match language.to_lowercase().as_str() { + "rs" | "rust" => Some(Self { + keywords: &[ + "as", "async", "await", "break", "const", "continue", "crate", "dyn", "else", + "enum", "extern", "false", "fn", "for", "if", "impl", "in", "let", "loop", + "match", "mod", "move", "mut", "pub", "ref", "return", "self", "Self", + "static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where", + "while", + ], + double_slash_comments: true, + hash_comments: false, + }), + "c" | "h" | "hpp" | "cpp" | "c++" => Some(Self { + keywords: &[ + "auto", + "break", + "case", + "char", + "const", + "continue", + "default", + "do", + "double", + "else", + "enum", + "extern", + "false", + "float", + "for", + "goto", + "if", + "inline", + "int", + "long", + "namespace", + "new", + "nullptr", + "return", + "short", + "signed", + "sizeof", + "static", + "struct", + "switch", + "template", + "this", + "true", + "typedef", + "union", + "unsigned", + "using", + "virtual", + "void", + "volatile", + "while", + "class", + "public", + "private", + "protected", + ], + double_slash_comments: true, + hash_comments: false, + }), + "py" | "python" => Some(Self { + keywords: &[ + "and", "as", "assert", "break", "class", "continue", "def", "del", "elif", + "else", "except", "False", "finally", "for", "from", "global", "if", "import", + "in", "is", "lambda", "None", "nonlocal", "not", "or", "pass", "raise", + "return", "True", "try", "while", "with", "yield", + ], + double_slash_comments: false, + hash_comments: true, + }), + "toml" => Some(Self { + keywords: &[], + double_slash_comments: false, + hash_comments: true, + }), + "bash" | "sh" | "zsh" => Some(Self { + keywords: &[ + "if", "then", "else", "elif", "fi", "case", "esac", "for", "while", "until", + "do", "done", "in", "function", "return", "local", "export", "set", "unset", + ], + double_slash_comments: false, + hash_comments: true, + }), + _ => None, + } + } +} + +/// Tokenize source code into (token_type, text_slice) pairs. +/// Separated from rendering so it can be unit tested. +fn tokenize_code<'a>(code: &'a str, language: &str) -> Vec<(SandToken, &'a str)> { + let Some(lang) = LangConfig::from_language(language) else { + return vec![(SandToken::Plain, code)]; + }; + + let mut tokens = Vec::new(); + let mut text = code; + + while !text.is_empty() { + if (lang.double_slash_comments && text.starts_with("//")) + || (lang.hash_comments && text.starts_with('#')) + { + let end = text.find('\n').unwrap_or(text.len()); + tokens.push((SandToken::Comment, &text[..end])); + text = &text[end..]; + } else if text.starts_with('"') { + let end = text[1..] + .find('"') + .map(|i| i + 2) + .or_else(|| text.find('\n')) + .unwrap_or(text.len()); + tokens.push((SandToken::String, &text[..end])); + text = &text[end..]; + } else if text.starts_with(|c: char| c.is_ascii_alphanumeric() || c == '_') { + let end = text[1..] + .find(|c: char| !c.is_ascii_alphanumeric() && c != '_') + .map_or_else(|| text.len(), |i| i + 1); + let word = &text[..end]; + let token = if lang.keywords.contains(&word) { + SandToken::Keyword + } else { + SandToken::Literal + }; + tokens.push((token, word)); + text = &text[end..]; + } else if text.starts_with(|c: char| c.is_ascii_whitespace()) { + let end = text[1..] + .find(|c: char| !c.is_ascii_whitespace()) + .map_or_else(|| text.len(), |i| i + 1); + tokens.push((SandToken::Whitespace, &text[..end])); + text = &text[end..]; + } else { + let mut it = text.char_indices(); + it.next(); + let end = it.next().map_or(text.len(), |(idx, _)| idx); + tokens.push((SandToken::Punctuation, &text[..end])); + text = &text[end..]; + } + } + + tokens +} + +/// Simple syntax highlighter with sand-colored theme. +/// Supports Rust, C/C++, Python, TOML, bash, and falls back to plain text. +fn highlight_sand(code: &str, language: &str, ui: &Ui) -> LayoutJob { + let theme = SandCodeTheme::from_visuals(ui.visuals()); + let font_id = ui + .style() + .override_font_id + .clone() + .unwrap_or_else(|| egui::TextStyle::Monospace.resolve(ui.style())); + + let mut job = LayoutJob::default(); + for (token, text) in tokenize_code(code, language) { + job.append(text, 0.0, theme.format(token, &font_id)); + } + job +} +fn render_code_block(language: Option<&str>, content: &str, theme: &MdTheme, ui: &mut Ui) { egui::Frame::default() .fill(theme.code_bg) .inner_margin(8.0) @@ -238,9 +465,7 @@ fn render_code_block(language: Option<&str>, content: &str, theme: &MdTheme, ui: } let lang = language.unwrap_or("text"); - let code_theme = CodeTheme::from_style(ui.style()); - let layout_job = - syntax_highlighting::highlight(ui.ctx(), ui.style(), &code_theme, content, lang); + let layout_job = highlight_sand(content, lang, ui); ui.add(egui::Label::new(layout_job).wrap()); }); ui.add_space(8.0); @@ -319,8 +544,6 @@ fn render_partial(partial: &Partial, theme: &MdTheme, buffer: &str, ui: &mut Ui) match &partial.kind { PartialKind::CodeFence { language, .. } => { - use egui_extras::syntax_highlighting::{self, CodeTheme}; - egui::Frame::default() .fill(theme.code_bg) .inner_margin(8.0) @@ -332,14 +555,7 @@ fn render_partial(partial: &Partial, theme: &MdTheme, buffer: &str, ui: &mut Ui) } let lang = lang_str.unwrap_or("text"); - let code_theme = CodeTheme::from_style(ui.style()); - let layout_job = syntax_highlighting::highlight( - ui.ctx(), - ui.style(), - &code_theme, - content, - lang, - ); + let layout_job = highlight_sand(content, lang, ui); ui.add(egui::Label::new(layout_job).wrap()); ui.label(RichText::new("_").weak()); }); @@ -379,3 +595,258 @@ fn render_partial(partial: &Partial, theme: &MdTheme, buffer: &str, ui: &mut Ui) } } } + +#[cfg(test)] +mod tests { + use super::*; + + /// Helper: collect (token, text) pairs + fn tokens<'a>(code: &'a str, lang: &str) -> Vec<(SandToken, &'a str)> { + tokenize_code(code, lang) + } + + /// Reassembled tokens must equal the original input (no bytes lost or duplicated) + fn assert_roundtrip(code: &str, lang: &str) { + let result: String = tokenize_code(code, lang) + .into_iter() + .map(|(_, s)| s) + .collect(); + assert_eq!(result, code, "roundtrip failed for lang={lang}"); + } + + // ---- Basic token classification ---- + + #[test] + fn test_rust_keyword() { + let toks = tokens("fn main", "rust"); + assert_eq!(toks[0], (SandToken::Keyword, "fn")); + assert_eq!(toks[1], (SandToken::Whitespace, " ")); + assert_eq!(toks[2], (SandToken::Literal, "main")); + } + + #[test] + fn test_rust_comment() { + let toks = tokens("// hello", "rust"); + assert_eq!(toks, vec![(SandToken::Comment, "// hello")]); + } + + #[test] + fn test_rust_string() { + let toks = tokens("\"hello world\"", "rust"); + assert_eq!(toks, vec![(SandToken::String, "\"hello world\"")]); + } + + #[test] + fn test_python_hash_comment() { + let toks = tokens("# comment", "python"); + assert_eq!(toks, vec![(SandToken::Comment, "# comment")]); + } + + #[test] + fn test_python_keyword() { + let toks = tokens("def foo", "py"); + assert_eq!(toks[0], (SandToken::Keyword, "def")); + } + + #[test] + fn test_punctuation() { + let toks = tokens("();", "rust"); + assert_eq!( + toks, + vec![ + (SandToken::Punctuation, "("), + (SandToken::Punctuation, ")"), + (SandToken::Punctuation, ";"), + ] + ); + } + + #[test] + fn test_underscore_identifier() { + let toks = tokens("_foo_bar", "rust"); + assert_eq!(toks, vec![(SandToken::Literal, "_foo_bar")]); + } + + // ---- Unsupported languages ---- + + #[test] + fn test_unknown_lang_plain() { + let toks = tokens("anything goes here", "brainfuck"); + assert_eq!(toks, vec![(SandToken::Plain, "anything goes here")]); + } + + #[test] + fn test_text_lang_plain() { + let toks = tokens("plain text", "text"); + assert_eq!(toks, vec![(SandToken::Plain, "plain text")]); + } + + // ---- Edge cases for string indexing ---- + + #[test] + fn test_empty_input() { + assert!(tokenize_code("", "rust").is_empty()); + } + + #[test] + fn test_single_char_keyword() { + // "if" is a keyword, "i" is not + let toks = tokens("i", "rust"); + assert_eq!(toks, vec![(SandToken::Literal, "i")]); + } + + #[test] + fn test_unclosed_string() { + // String that never closes — should consume to end of line or end of input + let toks = tokens("\"unclosed", "rust"); + assert_eq!(toks, vec![(SandToken::String, "\"unclosed")]); + } + + #[test] + fn test_unclosed_string_with_newline() { + let toks = tokens("\"unclosed\nnext", "rust"); + // Should stop the string at the newline + assert_eq!(toks[0], (SandToken::String, "\"unclosed")); + } + + #[test] + fn test_empty_string() { + let toks = tokens("\"\"", "rust"); + assert_eq!(toks, vec![(SandToken::String, "\"\"")]); + } + + #[test] + fn test_comment_at_end_no_newline() { + let toks = tokens("// no newline", "rust"); + assert_eq!(toks, vec![(SandToken::Comment, "// no newline")]); + } + + #[test] + fn test_comment_with_newline() { + let toks = tokens("// comment\ncode", "rust"); + assert_eq!(toks[0], (SandToken::Comment, "// comment")); + assert_eq!(toks[1], (SandToken::Whitespace, "\n")); + assert_eq!(toks[2], (SandToken::Literal, "code")); + } + + #[test] + fn test_multibyte_unicode_punctuation() { + // Ensure multi-byte chars don't cause panics from byte indexing + let toks = tokens("→", "rust"); + assert_eq!(toks, vec![(SandToken::Punctuation, "→")]); + } + + #[test] + fn test_mixed_unicode_and_ascii() { + let code = "let x = «val»;"; + assert_roundtrip(code, "rust"); + } + + #[test] + fn test_only_whitespace() { + let toks = tokens(" \n\t", "rust"); + assert_eq!(toks, vec![(SandToken::Whitespace, " \n\t")]); + } + + #[test] + fn test_only_punctuation() { + let toks = tokens("()", "rust"); + assert_eq!( + toks, + vec![(SandToken::Punctuation, "("), (SandToken::Punctuation, ")"),] + ); + } + + // ---- Roundtrip (no bytes lost) ---- + + #[test] + fn test_roundtrip_rust() { + assert_roundtrip( + "fn main() {\n let x = \"hello\";\n // done\n}", + "rust", + ); + } + + #[test] + fn test_roundtrip_python() { + assert_roundtrip("def foo():\n # comment\n return \"bar\"", "python"); + } + + #[test] + fn test_roundtrip_cpp() { + assert_roundtrip("#include <stdio.h>\nint main() { return 0; }", "cpp"); + } + + #[test] + fn test_roundtrip_unknown() { + assert_roundtrip("anything goes 🎉 here!", "unknown"); + } + + #[test] + fn test_roundtrip_empty() { + assert_roundtrip("", "rust"); + } + + #[test] + fn test_roundtrip_bash() { + assert_roundtrip( + "#!/bin/bash\nif [ -f \"$1\" ]; then\n echo \"exists\"\nfi", + "bash", + ); + } + + // ---- Multi-line code blocks ---- + + #[test] + fn test_multiline_rust() { + let code = "use std::io;\n\nfn main() {\n let x = 42;\n println!(\"{}\", x);\n}"; + assert_roundtrip(code, "rust"); + let toks = tokens(code, "rust"); + assert_eq!(toks[0], (SandToken::Keyword, "use")); + } + + // ---- Language detection ---- + + #[test] + fn test_case_insensitive_language() { + let toks = tokens("fn test", "Rust"); + assert_eq!(toks[0], (SandToken::Keyword, "fn")); + + let toks = tokens("def test", "PYTHON"); + assert_eq!(toks[0], (SandToken::Keyword, "def")); + } + + // ---- Bash support ---- + + #[test] + fn test_bash_keywords() { + let toks = tokens("if then fi", "bash"); + assert_eq!(toks[0], (SandToken::Keyword, "if")); + assert_eq!(toks[2], (SandToken::Keyword, "then")); + assert_eq!(toks[4], (SandToken::Keyword, "fi")); + } + + #[test] + fn test_bash_hash_comment() { + let toks = tokens("# this is a comment", "sh"); + assert_eq!(toks, vec![(SandToken::Comment, "# this is a comment")]); + } + + // ---- TOML ---- + + #[test] + fn test_toml_hash_comment() { + let toks = tokens("# config", "toml"); + assert_eq!(toks, vec![(SandToken::Comment, "# config")]); + } + + #[test] + fn test_toml_key_value() { + let toks = tokens("name = \"notedeck\"", "toml"); + assert_eq!(toks[0], (SandToken::Literal, "name")); + // = is punctuation + assert!(toks + .iter() + .any(|(t, s)| *t == SandToken::String && *s == "\"notedeck\"")); + } +}