notedeck

One damus client to rule them all
git clone git://jb55.com/notedeck
Log | Files | Refs | README | LICENSE

markdown_ui.rs (28218B)


      1 //! Markdown rendering for assistant messages using egui.
      2 
      3 use egui::text::LayoutJob;
      4 use egui::{Color32, FontFamily, FontId, RichText, TextFormat, Ui};
      5 use md_stream::{
      6     parse_inline, CodeBlock, InlineElement, InlineStyle, ListItem, MdElement, Partial, PartialKind,
      7     Span,
      8 };
      9 
     10 /// Theme for markdown rendering, derived from egui visuals.
     11 pub struct MdTheme {
     12     pub heading_sizes: [f32; 6],
     13     pub code_bg: Color32,
     14     pub code_text: Color32,
     15     pub link_color: Color32,
     16     pub blockquote_border: Color32,
     17     pub blockquote_bg: Color32,
     18 }
     19 
     20 impl MdTheme {
     21     pub fn from_visuals(visuals: &egui::Visuals) -> Self {
     22         let bg = visuals.panel_fill;
     23         // Code bg: slightly lighter than panel background
     24         let code_bg = Color32::from_rgb(
     25             bg.r().saturating_add(25),
     26             bg.g().saturating_add(25),
     27             bg.b().saturating_add(25),
     28         );
     29         Self {
     30             heading_sizes: [24.0, 20.0, 18.0, 16.0, 14.0, 12.0],
     31             code_bg,
     32             code_text: Color32::from_rgb(0xD4, 0xA5, 0x74), // Muted amber/sand
     33             link_color: Color32::from_rgb(100, 149, 237),   // Cornflower blue
     34             blockquote_border: visuals.widgets.noninteractive.bg_stroke.color,
     35             blockquote_bg: visuals.faint_bg_color,
     36         }
     37     }
     38 }
     39 
     40 /// Render all parsed markdown elements plus any partial state.
     41 pub fn render_assistant_message(
     42     elements: &[MdElement],
     43     partial: Option<&Partial>,
     44     buffer: &str,
     45     ui: &mut Ui,
     46 ) {
     47     let theme = MdTheme::from_visuals(ui.visuals());
     48 
     49     ui.vertical(|ui| {
     50         for element in elements {
     51             render_element(element, &theme, buffer, ui);
     52         }
     53 
     54         // Render partial (speculative) content for immediate feedback
     55         if let Some(partial) = partial {
     56             render_partial(partial, &theme, buffer, ui);
     57         }
     58     });
     59 }
     60 
     61 fn render_element(element: &MdElement, theme: &MdTheme, buffer: &str, ui: &mut Ui) {
     62     match element {
     63         MdElement::Heading { level, content } => {
     64             let size = theme.heading_sizes[(*level as usize).saturating_sub(1).min(5)];
     65             ui.add(
     66                 egui::Label::new(RichText::new(content.resolve(buffer)).size(size).strong()).wrap(),
     67             );
     68             ui.add_space(4.0);
     69         }
     70 
     71         MdElement::Paragraph(inlines) => {
     72             ui.horizontal_wrapped(|ui| {
     73                 render_inlines(inlines, theme, buffer, ui);
     74             });
     75             ui.add_space(8.0);
     76         }
     77 
     78         MdElement::CodeBlock(CodeBlock { language, content }) => {
     79             render_code_block(
     80                 language.map(|s| s.resolve(buffer)),
     81                 content.resolve(buffer),
     82                 theme,
     83                 ui,
     84             );
     85         }
     86 
     87         MdElement::BlockQuote(nested) => {
     88             egui::Frame::default()
     89                 .fill(theme.blockquote_bg)
     90                 .stroke(egui::Stroke::new(2.0, theme.blockquote_border))
     91                 .inner_margin(egui::Margin::symmetric(8, 4))
     92                 .show(ui, |ui| {
     93                     for elem in nested {
     94                         render_element(elem, theme, buffer, ui);
     95                     }
     96                 });
     97             ui.add_space(8.0);
     98         }
     99 
    100         MdElement::UnorderedList(items) => {
    101             for item in items {
    102                 render_list_item(item, "\u{2022}", theme, buffer, ui);
    103             }
    104             ui.add_space(8.0);
    105         }
    106 
    107         MdElement::OrderedList { start, items } => {
    108             for (i, item) in items.iter().enumerate() {
    109                 let marker = format!("{}.", start + i as u32);
    110                 render_list_item(item, &marker, theme, buffer, ui);
    111             }
    112             ui.add_space(8.0);
    113         }
    114 
    115         MdElement::Table { headers, rows } => {
    116             render_table(headers, rows, theme, buffer, ui);
    117         }
    118 
    119         MdElement::ThematicBreak => {
    120             ui.separator();
    121             ui.add_space(8.0);
    122         }
    123 
    124         MdElement::Text(span) => {
    125             ui.label(span.resolve(buffer));
    126         }
    127     }
    128 }
    129 
    130 /// Flush a LayoutJob as a wrapped label if it has any content.
    131 fn flush_job(job: &mut LayoutJob, ui: &mut Ui) {
    132     if !job.text.is_empty() {
    133         job.wrap.max_width = ui.available_width();
    134         ui.add(egui::Label::new(std::mem::take(job)).wrap());
    135     }
    136 }
    137 
    138 fn render_inlines(inlines: &[InlineElement], theme: &MdTheme, buffer: &str, ui: &mut Ui) {
    139     let font_size = ui.style().text_styles[&egui::TextStyle::Body].size;
    140     let text_color = ui.visuals().text_color();
    141 
    142     let text_fmt = TextFormat {
    143         font_id: FontId::new(font_size, FontFamily::Proportional),
    144         color: text_color,
    145         ..Default::default()
    146     };
    147 
    148     let code_fmt = TextFormat {
    149         font_id: FontId::new(font_size, FontFamily::Monospace),
    150         color: theme.code_text,
    151         background: theme.code_bg,
    152         ..Default::default()
    153     };
    154 
    155     let italic_fmt = TextFormat {
    156         font_id: FontId::new(font_size, FontFamily::Proportional),
    157         color: text_color,
    158         italics: true,
    159         ..Default::default()
    160     };
    161 
    162     let strikethrough_fmt = TextFormat {
    163         font_id: FontId::new(font_size, FontFamily::Proportional),
    164         color: text_color,
    165         strikethrough: egui::Stroke::new(1.0, text_color),
    166         ..Default::default()
    167     };
    168 
    169     let mut job = LayoutJob::default();
    170 
    171     for inline in inlines {
    172         match inline {
    173             InlineElement::Text(span) => {
    174                 job.append(span.resolve(buffer), 0.0, text_fmt.clone());
    175             }
    176 
    177             InlineElement::Code(span) => {
    178                 job.append(span.resolve(buffer), 0.0, code_fmt.clone());
    179             }
    180 
    181             InlineElement::Styled { style, content } => {
    182                 let text = content.resolve(buffer);
    183                 match style {
    184                     InlineStyle::Italic => {
    185                         job.append(text, 0.0, italic_fmt.clone());
    186                     }
    187                     InlineStyle::Strikethrough => {
    188                         job.append(text, 0.0, strikethrough_fmt.clone());
    189                     }
    190                     InlineStyle::Bold | InlineStyle::BoldItalic => {
    191                         // TextFormat has no bold/weight — flush and render as separate label
    192                         flush_job(&mut job, ui);
    193                         let rt = if matches!(style, InlineStyle::BoldItalic) {
    194                             RichText::new(text).strong().italics()
    195                         } else {
    196                             RichText::new(text).strong()
    197                         };
    198                         ui.label(rt);
    199                     }
    200                 }
    201             }
    202 
    203             InlineElement::Link { text, url } => {
    204                 flush_job(&mut job, ui);
    205                 ui.hyperlink_to(
    206                     RichText::new(text.resolve(buffer)).color(theme.link_color),
    207                     url.resolve(buffer),
    208                 );
    209             }
    210 
    211             InlineElement::Image { alt, url } => {
    212                 flush_job(&mut job, ui);
    213                 ui.hyperlink_to(
    214                     format!("[Image: {}]", alt.resolve(buffer)),
    215                     url.resolve(buffer),
    216                 );
    217             }
    218 
    219             InlineElement::LineBreak => {
    220                 job.append("\n", 0.0, text_fmt.clone());
    221             }
    222         }
    223     }
    224 
    225     flush_job(&mut job, ui);
    226 }
    227 
    228 /// Sand-themed syntax highlighting colors (warm, Claude-Code-esque palette)
    229 pub(crate) struct SandCodeTheme {
    230     comment: Color32,
    231     keyword: Color32,
    232     literal: Color32,
    233     string: Color32,
    234     punctuation: Color32,
    235     plain: Color32,
    236 }
    237 
    238 impl SandCodeTheme {
    239     pub(crate) fn from_visuals(visuals: &egui::Visuals) -> Self {
    240         if visuals.dark_mode {
    241             Self {
    242                 comment: Color32::from_rgb(0x8A, 0x80, 0x72), // Warm gray-brown
    243                 keyword: Color32::from_rgb(0xD4, 0xA5, 0x74), // Amber sand
    244                 literal: Color32::from_rgb(0xC4, 0x8A, 0x6A), // Terra cotta
    245                 string: Color32::from_rgb(0xC6, 0xB4, 0x6A),  // Golden wheat
    246                 punctuation: Color32::from_rgb(0xA0, 0x96, 0x88), // Light sand
    247                 plain: Color32::from_rgb(0xD5, 0xCE, 0xC4),   // Warm off-white
    248             }
    249         } else {
    250             Self {
    251                 comment: Color32::from_rgb(0x8A, 0x7E, 0x6E), // Warm gray
    252                 keyword: Color32::from_rgb(0x9A, 0x60, 0x2A), // Dark amber
    253                 literal: Color32::from_rgb(0x8B, 0x4C, 0x30), // Dark terra cotta
    254                 string: Color32::from_rgb(0x6B, 0x5C, 0x1A),  // Dark golden
    255                 punctuation: Color32::from_rgb(0x6E, 0x64, 0x56), // Dark sand
    256                 plain: Color32::from_rgb(0x3A, 0x35, 0x2E),   // Dark brown-black
    257             }
    258         }
    259     }
    260 
    261     pub(crate) fn format(&self, token: SandToken, font_id: &FontId) -> TextFormat {
    262         let color = match token {
    263             SandToken::Comment => self.comment,
    264             SandToken::Keyword => self.keyword,
    265             SandToken::Literal => self.literal,
    266             SandToken::String => self.string,
    267             SandToken::Punctuation => self.punctuation,
    268             SandToken::Plain => self.plain,
    269             SandToken::Whitespace => Color32::TRANSPARENT,
    270         };
    271         TextFormat::simple(font_id.clone(), color)
    272     }
    273 }
    274 
    275 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
    276 pub(crate) enum SandToken {
    277     Comment,
    278     Keyword,
    279     Literal,
    280     String,
    281     Punctuation,
    282     Plain,
    283     Whitespace,
    284 }
    285 
    286 struct LangConfig<'a> {
    287     keywords: &'a [&'a str],
    288     double_slash_comments: bool,
    289     hash_comments: bool,
    290 }
    291 
    292 impl<'a> LangConfig<'a> {
    293     fn from_language(language: &str) -> Option<Self> {
    294         match language.to_lowercase().as_str() {
    295             "rs" | "rust" => Some(Self {
    296                 keywords: &[
    297                     "as", "async", "await", "break", "const", "continue", "crate", "dyn", "else",
    298                     "enum", "extern", "false", "fn", "for", "if", "impl", "in", "let", "loop",
    299                     "match", "mod", "move", "mut", "pub", "ref", "return", "self", "Self",
    300                     "static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where",
    301                     "while",
    302                 ],
    303                 double_slash_comments: true,
    304                 hash_comments: false,
    305             }),
    306             "c" | "h" | "hpp" | "cpp" | "c++" => Some(Self {
    307                 keywords: &[
    308                     "auto",
    309                     "break",
    310                     "case",
    311                     "char",
    312                     "const",
    313                     "continue",
    314                     "default",
    315                     "do",
    316                     "double",
    317                     "else",
    318                     "enum",
    319                     "extern",
    320                     "false",
    321                     "float",
    322                     "for",
    323                     "goto",
    324                     "if",
    325                     "inline",
    326                     "int",
    327                     "long",
    328                     "namespace",
    329                     "new",
    330                     "nullptr",
    331                     "return",
    332                     "short",
    333                     "signed",
    334                     "sizeof",
    335                     "static",
    336                     "struct",
    337                     "switch",
    338                     "template",
    339                     "this",
    340                     "true",
    341                     "typedef",
    342                     "union",
    343                     "unsigned",
    344                     "using",
    345                     "virtual",
    346                     "void",
    347                     "volatile",
    348                     "while",
    349                     "class",
    350                     "public",
    351                     "private",
    352                     "protected",
    353                 ],
    354                 double_slash_comments: true,
    355                 hash_comments: false,
    356             }),
    357             "py" | "python" => Some(Self {
    358                 keywords: &[
    359                     "and", "as", "assert", "break", "class", "continue", "def", "del", "elif",
    360                     "else", "except", "False", "finally", "for", "from", "global", "if", "import",
    361                     "in", "is", "lambda", "None", "nonlocal", "not", "or", "pass", "raise",
    362                     "return", "True", "try", "while", "with", "yield",
    363                 ],
    364                 double_slash_comments: false,
    365                 hash_comments: true,
    366             }),
    367             "toml" => Some(Self {
    368                 keywords: &[],
    369                 double_slash_comments: false,
    370                 hash_comments: true,
    371             }),
    372             "bash" | "sh" | "zsh" => Some(Self {
    373                 keywords: &[
    374                     "if", "then", "else", "elif", "fi", "case", "esac", "for", "while", "until",
    375                     "do", "done", "in", "function", "return", "local", "export", "set", "unset",
    376                 ],
    377                 double_slash_comments: false,
    378                 hash_comments: true,
    379             }),
    380             _ => None,
    381         }
    382     }
    383 }
    384 
    385 /// Tokenize source code into (token_type, text_slice) pairs.
    386 /// Separated from rendering so it can be unit tested.
    387 pub(crate) fn tokenize_code<'a>(code: &'a str, language: &str) -> Vec<(SandToken, &'a str)> {
    388     let Some(lang) = LangConfig::from_language(language) else {
    389         return vec![(SandToken::Plain, code)];
    390     };
    391 
    392     let mut tokens = Vec::new();
    393     let mut text = code;
    394 
    395     while !text.is_empty() {
    396         if (lang.double_slash_comments && text.starts_with("//"))
    397             || (lang.hash_comments && text.starts_with('#'))
    398         {
    399             let end = text.find('\n').unwrap_or(text.len());
    400             tokens.push((SandToken::Comment, &text[..end]));
    401             text = &text[end..];
    402         } else if text.starts_with('"') {
    403             let end = text[1..]
    404                 .find('"')
    405                 .map(|i| i + 2)
    406                 .or_else(|| text.find('\n'))
    407                 .unwrap_or(text.len());
    408             tokens.push((SandToken::String, &text[..end]));
    409             text = &text[end..];
    410         } else if text.starts_with(|c: char| c.is_ascii_alphanumeric() || c == '_') {
    411             let end = text[1..]
    412                 .find(|c: char| !c.is_ascii_alphanumeric() && c != '_')
    413                 .map_or_else(|| text.len(), |i| i + 1);
    414             let word = &text[..end];
    415             let token = if lang.keywords.contains(&word) {
    416                 SandToken::Keyword
    417             } else {
    418                 SandToken::Literal
    419             };
    420             tokens.push((token, word));
    421             text = &text[end..];
    422         } else if text.starts_with(|c: char| c.is_ascii_whitespace()) {
    423             let end = text[1..]
    424                 .find(|c: char| !c.is_ascii_whitespace())
    425                 .map_or_else(|| text.len(), |i| i + 1);
    426             tokens.push((SandToken::Whitespace, &text[..end]));
    427             text = &text[end..];
    428         } else {
    429             let mut it = text.char_indices();
    430             it.next();
    431             let end = it.next().map_or(text.len(), |(idx, _)| idx);
    432             tokens.push((SandToken::Punctuation, &text[..end]));
    433             text = &text[end..];
    434         }
    435     }
    436 
    437     tokens
    438 }
    439 
    440 /// Simple syntax highlighter with sand-colored theme.
    441 /// Supports Rust, C/C++, Python, TOML, bash, and falls back to plain text.
    442 fn highlight_sand(code: &str, language: &str, ui: &Ui) -> LayoutJob {
    443     let theme = SandCodeTheme::from_visuals(ui.visuals());
    444     let font_id = ui
    445         .style()
    446         .override_font_id
    447         .clone()
    448         .unwrap_or_else(|| egui::TextStyle::Monospace.resolve(ui.style()));
    449 
    450     let mut job = LayoutJob::default();
    451     for (token, text) in tokenize_code(code, language) {
    452         job.append(text, 0.0, theme.format(token, &font_id));
    453     }
    454     job
    455 }
    456 
    457 fn render_code_block(language: Option<&str>, content: &str, theme: &MdTheme, ui: &mut Ui) {
    458     egui::Frame::default()
    459         .fill(theme.code_bg)
    460         .inner_margin(8.0)
    461         .corner_radius(4.0)
    462         .show(ui, |ui| {
    463             if let Some(lang) = language {
    464                 ui.label(RichText::new(lang).small().weak());
    465             }
    466 
    467             let lang = language.unwrap_or("text");
    468             let layout_job = highlight_sand(content, lang, ui);
    469             ui.add(egui::Label::new(layout_job).wrap());
    470         });
    471     ui.add_space(8.0);
    472 }
    473 
    474 fn render_list_item(item: &ListItem, marker: &str, theme: &MdTheme, buffer: &str, ui: &mut Ui) {
    475     ui.horizontal(|ui| {
    476         ui.label(RichText::new(marker).weak());
    477         ui.vertical(|ui| {
    478             ui.horizontal_wrapped(|ui| {
    479                 render_inlines(&item.content, theme, buffer, ui);
    480             });
    481             // Render nested list if present
    482             if let Some(nested) = &item.nested {
    483                 ui.indent("nested", |ui| {
    484                     render_element(nested, theme, buffer, ui);
    485                 });
    486             }
    487         });
    488     });
    489 }
    490 
    491 fn render_table(headers: &[Span], rows: &[Vec<Span>], theme: &MdTheme, buffer: &str, ui: &mut Ui) {
    492     use egui_extras::{Column, TableBuilder};
    493 
    494     let num_cols = headers.len();
    495     if num_cols == 0 {
    496         return;
    497     }
    498 
    499     let cell_padding = egui::Margin::symmetric(8, 4);
    500 
    501     // Use first header's byte offset as id_salt so multiple tables don't clash
    502     let salt = headers.first().map_or(0, |h| h.start);
    503 
    504     // Wrap in horizontal scroll so wide tables don't break layout on small screens
    505     egui::ScrollArea::horizontal()
    506         .id_salt(("md_table_scroll", salt))
    507         .show(ui, |ui| {
    508             let mut builder = TableBuilder::new(ui)
    509                 .id_salt(salt)
    510                 .vscroll(false)
    511                 .auto_shrink([false, false]);
    512             for _ in 0..num_cols {
    513                 builder = builder.column(Column::auto().resizable(true));
    514             }
    515 
    516             let header_bg = theme.code_bg;
    517 
    518             builder
    519                 .header(28.0, |mut header| {
    520                     for h in headers {
    521                         header.col(|ui| {
    522                             ui.painter().rect_filled(ui.max_rect(), 0.0, header_bg);
    523                             egui::Frame::NONE.inner_margin(cell_padding).show(ui, |ui| {
    524                                 ui.strong(h.resolve(buffer));
    525                             });
    526                         });
    527                     }
    528                 })
    529                 .body(|mut body| {
    530                     for row in rows {
    531                         body.row(28.0, |mut table_row| {
    532                             for i in 0..num_cols {
    533                                 table_row.col(|ui| {
    534                                     egui::Frame::NONE.inner_margin(cell_padding).show(ui, |ui| {
    535                                         if let Some(cell) = row.get(i) {
    536                                             ui.label(cell.resolve(buffer));
    537                                         }
    538                                     });
    539                                 });
    540                             }
    541                         });
    542                     }
    543                 });
    544         });
    545     ui.add_space(8.0);
    546 }
    547 
    548 fn render_partial(partial: &Partial, theme: &MdTheme, buffer: &str, ui: &mut Ui) {
    549     let content = partial.content(buffer);
    550     if content.is_empty() {
    551         return;
    552     }
    553 
    554     match &partial.kind {
    555         PartialKind::CodeFence { language, .. } => {
    556             egui::Frame::default()
    557                 .fill(theme.code_bg)
    558                 .inner_margin(8.0)
    559                 .corner_radius(4.0)
    560                 .show(ui, |ui| {
    561                     let lang_str = language.map(|s| s.resolve(buffer));
    562                     if let Some(lang) = lang_str {
    563                         ui.label(RichText::new(lang).small().weak());
    564                     }
    565 
    566                     let lang = lang_str.unwrap_or("text");
    567                     let layout_job = highlight_sand(content, lang, ui);
    568                     ui.add(egui::Label::new(layout_job).wrap());
    569                     ui.label(RichText::new("_").weak());
    570                 });
    571         }
    572 
    573         PartialKind::Heading { level } => {
    574             let size = theme.heading_sizes[(*level as usize).saturating_sub(1).min(5)];
    575             ui.add(egui::Label::new(RichText::new(content).size(size).strong()).wrap());
    576         }
    577 
    578         PartialKind::Table {
    579             headers,
    580             rows,
    581             seen_separator,
    582         } => {
    583             if *seen_separator {
    584                 render_table(headers, rows, theme, buffer, ui);
    585             } else {
    586                 ui.label(content);
    587             }
    588         }
    589 
    590         PartialKind::Paragraph => {
    591             // Parse inline elements from the partial content for proper formatting
    592             let inlines = parse_inline(content, partial.content_start);
    593             ui.horizontal_wrapped(|ui| {
    594                 render_inlines(&inlines, theme, buffer, ui);
    595             });
    596         }
    597 
    598         _ => {
    599             // Other partial kinds - parse inline elements too
    600             let inlines = parse_inline(content, partial.content_start);
    601             ui.horizontal_wrapped(|ui| {
    602                 render_inlines(&inlines, theme, buffer, ui);
    603             });
    604         }
    605     }
    606 }
    607 
    608 #[cfg(test)]
    609 mod tests {
    610     use super::*;
    611 
    612     /// Helper: collect (token, text) pairs
    613     fn tokens<'a>(code: &'a str, lang: &str) -> Vec<(SandToken, &'a str)> {
    614         tokenize_code(code, lang)
    615     }
    616 
    617     /// Reassembled tokens must equal the original input (no bytes lost or duplicated)
    618     fn assert_roundtrip(code: &str, lang: &str) {
    619         let result: String = tokenize_code(code, lang)
    620             .into_iter()
    621             .map(|(_, s)| s)
    622             .collect();
    623         assert_eq!(result, code, "roundtrip failed for lang={lang}");
    624     }
    625 
    626     // ---- Basic token classification ----
    627 
    628     #[test]
    629     fn test_rust_keyword() {
    630         let toks = tokens("fn main", "rust");
    631         assert_eq!(toks[0], (SandToken::Keyword, "fn"));
    632         assert_eq!(toks[1], (SandToken::Whitespace, " "));
    633         assert_eq!(toks[2], (SandToken::Literal, "main"));
    634     }
    635 
    636     #[test]
    637     fn test_rust_comment() {
    638         let toks = tokens("// hello", "rust");
    639         assert_eq!(toks, vec![(SandToken::Comment, "// hello")]);
    640     }
    641 
    642     #[test]
    643     fn test_rust_string() {
    644         let toks = tokens("\"hello world\"", "rust");
    645         assert_eq!(toks, vec![(SandToken::String, "\"hello world\"")]);
    646     }
    647 
    648     #[test]
    649     fn test_python_hash_comment() {
    650         let toks = tokens("# comment", "python");
    651         assert_eq!(toks, vec![(SandToken::Comment, "# comment")]);
    652     }
    653 
    654     #[test]
    655     fn test_python_keyword() {
    656         let toks = tokens("def foo", "py");
    657         assert_eq!(toks[0], (SandToken::Keyword, "def"));
    658     }
    659 
    660     #[test]
    661     fn test_punctuation() {
    662         let toks = tokens("();", "rust");
    663         assert_eq!(
    664             toks,
    665             vec![
    666                 (SandToken::Punctuation, "("),
    667                 (SandToken::Punctuation, ")"),
    668                 (SandToken::Punctuation, ";"),
    669             ]
    670         );
    671     }
    672 
    673     #[test]
    674     fn test_underscore_identifier() {
    675         let toks = tokens("_foo_bar", "rust");
    676         assert_eq!(toks, vec![(SandToken::Literal, "_foo_bar")]);
    677     }
    678 
    679     // ---- Unsupported languages ----
    680 
    681     #[test]
    682     fn test_unknown_lang_plain() {
    683         let toks = tokens("anything goes here", "brainfuck");
    684         assert_eq!(toks, vec![(SandToken::Plain, "anything goes here")]);
    685     }
    686 
    687     #[test]
    688     fn test_text_lang_plain() {
    689         let toks = tokens("plain text", "text");
    690         assert_eq!(toks, vec![(SandToken::Plain, "plain text")]);
    691     }
    692 
    693     // ---- Edge cases for string indexing ----
    694 
    695     #[test]
    696     fn test_empty_input() {
    697         assert!(tokenize_code("", "rust").is_empty());
    698     }
    699 
    700     #[test]
    701     fn test_single_char_keyword() {
    702         // "if" is a keyword, "i" is not
    703         let toks = tokens("i", "rust");
    704         assert_eq!(toks, vec![(SandToken::Literal, "i")]);
    705     }
    706 
    707     #[test]
    708     fn test_unclosed_string() {
    709         // String that never closes — should consume to end of line or end of input
    710         let toks = tokens("\"unclosed", "rust");
    711         assert_eq!(toks, vec![(SandToken::String, "\"unclosed")]);
    712     }
    713 
    714     #[test]
    715     fn test_unclosed_string_with_newline() {
    716         let toks = tokens("\"unclosed\nnext", "rust");
    717         // Should stop the string at the newline
    718         assert_eq!(toks[0], (SandToken::String, "\"unclosed"));
    719     }
    720 
    721     #[test]
    722     fn test_empty_string() {
    723         let toks = tokens("\"\"", "rust");
    724         assert_eq!(toks, vec![(SandToken::String, "\"\"")]);
    725     }
    726 
    727     #[test]
    728     fn test_comment_at_end_no_newline() {
    729         let toks = tokens("// no newline", "rust");
    730         assert_eq!(toks, vec![(SandToken::Comment, "// no newline")]);
    731     }
    732 
    733     #[test]
    734     fn test_comment_with_newline() {
    735         let toks = tokens("// comment\ncode", "rust");
    736         assert_eq!(toks[0], (SandToken::Comment, "// comment"));
    737         assert_eq!(toks[1], (SandToken::Whitespace, "\n"));
    738         assert_eq!(toks[2], (SandToken::Literal, "code"));
    739     }
    740 
    741     #[test]
    742     fn test_multibyte_unicode_punctuation() {
    743         // Ensure multi-byte chars don't cause panics from byte indexing
    744         let toks = tokens("→", "rust");
    745         assert_eq!(toks, vec![(SandToken::Punctuation, "→")]);
    746     }
    747 
    748     #[test]
    749     fn test_mixed_unicode_and_ascii() {
    750         let code = "let x = «val»;";
    751         assert_roundtrip(code, "rust");
    752     }
    753 
    754     #[test]
    755     fn test_only_whitespace() {
    756         let toks = tokens("   \n\t", "rust");
    757         assert_eq!(toks, vec![(SandToken::Whitespace, "   \n\t")]);
    758     }
    759 
    760     #[test]
    761     fn test_only_punctuation() {
    762         let toks = tokens("()", "rust");
    763         assert_eq!(
    764             toks,
    765             vec![(SandToken::Punctuation, "("), (SandToken::Punctuation, ")"),]
    766         );
    767     }
    768 
    769     // ---- Roundtrip (no bytes lost) ----
    770 
    771     #[test]
    772     fn test_roundtrip_rust() {
    773         assert_roundtrip(
    774             "fn main() {\n    let x = \"hello\";\n    // done\n}",
    775             "rust",
    776         );
    777     }
    778 
    779     #[test]
    780     fn test_roundtrip_python() {
    781         assert_roundtrip("def foo():\n    # comment\n    return \"bar\"", "python");
    782     }
    783 
    784     #[test]
    785     fn test_roundtrip_cpp() {
    786         assert_roundtrip("#include <stdio.h>\nint main() { return 0; }", "cpp");
    787     }
    788 
    789     #[test]
    790     fn test_roundtrip_unknown() {
    791         assert_roundtrip("anything goes 🎉 here!", "unknown");
    792     }
    793 
    794     #[test]
    795     fn test_roundtrip_empty() {
    796         assert_roundtrip("", "rust");
    797     }
    798 
    799     #[test]
    800     fn test_roundtrip_bash() {
    801         assert_roundtrip(
    802             "#!/bin/bash\nif [ -f \"$1\" ]; then\n  echo \"exists\"\nfi",
    803             "bash",
    804         );
    805     }
    806 
    807     // ---- Multi-line code blocks ----
    808 
    809     #[test]
    810     fn test_multiline_rust() {
    811         let code = "use std::io;\n\nfn main() {\n    let x = 42;\n    println!(\"{}\", x);\n}";
    812         assert_roundtrip(code, "rust");
    813         let toks = tokens(code, "rust");
    814         assert_eq!(toks[0], (SandToken::Keyword, "use"));
    815     }
    816 
    817     // ---- Language detection ----
    818 
    819     #[test]
    820     fn test_case_insensitive_language() {
    821         let toks = tokens("fn test", "Rust");
    822         assert_eq!(toks[0], (SandToken::Keyword, "fn"));
    823 
    824         let toks = tokens("def test", "PYTHON");
    825         assert_eq!(toks[0], (SandToken::Keyword, "def"));
    826     }
    827 
    828     // ---- Bash support ----
    829 
    830     #[test]
    831     fn test_bash_keywords() {
    832         let toks = tokens("if then fi", "bash");
    833         assert_eq!(toks[0], (SandToken::Keyword, "if"));
    834         assert_eq!(toks[2], (SandToken::Keyword, "then"));
    835         assert_eq!(toks[4], (SandToken::Keyword, "fi"));
    836     }
    837 
    838     #[test]
    839     fn test_bash_hash_comment() {
    840         let toks = tokens("# this is a comment", "sh");
    841         assert_eq!(toks, vec![(SandToken::Comment, "# this is a comment")]);
    842     }
    843 
    844     // ---- TOML ----
    845 
    846     #[test]
    847     fn test_toml_hash_comment() {
    848         let toks = tokens("# config", "toml");
    849         assert_eq!(toks, vec![(SandToken::Comment, "# config")]);
    850     }
    851 
    852     #[test]
    853     fn test_toml_key_value() {
    854         let toks = tokens("name = \"notedeck\"", "toml");
    855         assert_eq!(toks[0], (SandToken::Literal, "name"));
    856         // = is punctuation
    857         assert!(toks
    858             .iter()
    859             .any(|(t, s)| *t == SandToken::String && *s == "\"notedeck\""));
    860     }
    861 }