markdown_ui.rs (28218B)
1 //! Markdown rendering for assistant messages using egui. 2 3 use egui::text::LayoutJob; 4 use egui::{Color32, FontFamily, FontId, RichText, TextFormat, Ui}; 5 use md_stream::{ 6 parse_inline, CodeBlock, InlineElement, InlineStyle, ListItem, MdElement, Partial, PartialKind, 7 Span, 8 }; 9 10 /// Theme for markdown rendering, derived from egui visuals. 11 pub struct MdTheme { 12 pub heading_sizes: [f32; 6], 13 pub code_bg: Color32, 14 pub code_text: Color32, 15 pub link_color: Color32, 16 pub blockquote_border: Color32, 17 pub blockquote_bg: Color32, 18 } 19 20 impl MdTheme { 21 pub fn from_visuals(visuals: &egui::Visuals) -> Self { 22 let bg = visuals.panel_fill; 23 // Code bg: slightly lighter than panel background 24 let code_bg = Color32::from_rgb( 25 bg.r().saturating_add(25), 26 bg.g().saturating_add(25), 27 bg.b().saturating_add(25), 28 ); 29 Self { 30 heading_sizes: [24.0, 20.0, 18.0, 16.0, 14.0, 12.0], 31 code_bg, 32 code_text: Color32::from_rgb(0xD4, 0xA5, 0x74), // Muted amber/sand 33 link_color: Color32::from_rgb(100, 149, 237), // Cornflower blue 34 blockquote_border: visuals.widgets.noninteractive.bg_stroke.color, 35 blockquote_bg: visuals.faint_bg_color, 36 } 37 } 38 } 39 40 /// Render all parsed markdown elements plus any partial state. 41 pub fn render_assistant_message( 42 elements: &[MdElement], 43 partial: Option<&Partial>, 44 buffer: &str, 45 ui: &mut Ui, 46 ) { 47 let theme = MdTheme::from_visuals(ui.visuals()); 48 49 ui.vertical(|ui| { 50 for element in elements { 51 render_element(element, &theme, buffer, ui); 52 } 53 54 // Render partial (speculative) content for immediate feedback 55 if let Some(partial) = partial { 56 render_partial(partial, &theme, buffer, ui); 57 } 58 }); 59 } 60 61 fn render_element(element: &MdElement, theme: &MdTheme, buffer: &str, ui: &mut Ui) { 62 match element { 63 MdElement::Heading { level, content } => { 64 let size = theme.heading_sizes[(*level as usize).saturating_sub(1).min(5)]; 65 ui.add( 66 egui::Label::new(RichText::new(content.resolve(buffer)).size(size).strong()).wrap(), 67 ); 68 ui.add_space(4.0); 69 } 70 71 MdElement::Paragraph(inlines) => { 72 ui.horizontal_wrapped(|ui| { 73 render_inlines(inlines, theme, buffer, ui); 74 }); 75 ui.add_space(8.0); 76 } 77 78 MdElement::CodeBlock(CodeBlock { language, content }) => { 79 render_code_block( 80 language.map(|s| s.resolve(buffer)), 81 content.resolve(buffer), 82 theme, 83 ui, 84 ); 85 } 86 87 MdElement::BlockQuote(nested) => { 88 egui::Frame::default() 89 .fill(theme.blockquote_bg) 90 .stroke(egui::Stroke::new(2.0, theme.blockquote_border)) 91 .inner_margin(egui::Margin::symmetric(8, 4)) 92 .show(ui, |ui| { 93 for elem in nested { 94 render_element(elem, theme, buffer, ui); 95 } 96 }); 97 ui.add_space(8.0); 98 } 99 100 MdElement::UnorderedList(items) => { 101 for item in items { 102 render_list_item(item, "\u{2022}", theme, buffer, ui); 103 } 104 ui.add_space(8.0); 105 } 106 107 MdElement::OrderedList { start, items } => { 108 for (i, item) in items.iter().enumerate() { 109 let marker = format!("{}.", start + i as u32); 110 render_list_item(item, &marker, theme, buffer, ui); 111 } 112 ui.add_space(8.0); 113 } 114 115 MdElement::Table { headers, rows } => { 116 render_table(headers, rows, theme, buffer, ui); 117 } 118 119 MdElement::ThematicBreak => { 120 ui.separator(); 121 ui.add_space(8.0); 122 } 123 124 MdElement::Text(span) => { 125 ui.label(span.resolve(buffer)); 126 } 127 } 128 } 129 130 /// Flush a LayoutJob as a wrapped label if it has any content. 131 fn flush_job(job: &mut LayoutJob, ui: &mut Ui) { 132 if !job.text.is_empty() { 133 job.wrap.max_width = ui.available_width(); 134 ui.add(egui::Label::new(std::mem::take(job)).wrap()); 135 } 136 } 137 138 fn render_inlines(inlines: &[InlineElement], theme: &MdTheme, buffer: &str, ui: &mut Ui) { 139 let font_size = ui.style().text_styles[&egui::TextStyle::Body].size; 140 let text_color = ui.visuals().text_color(); 141 142 let text_fmt = TextFormat { 143 font_id: FontId::new(font_size, FontFamily::Proportional), 144 color: text_color, 145 ..Default::default() 146 }; 147 148 let code_fmt = TextFormat { 149 font_id: FontId::new(font_size, FontFamily::Monospace), 150 color: theme.code_text, 151 background: theme.code_bg, 152 ..Default::default() 153 }; 154 155 let italic_fmt = TextFormat { 156 font_id: FontId::new(font_size, FontFamily::Proportional), 157 color: text_color, 158 italics: true, 159 ..Default::default() 160 }; 161 162 let strikethrough_fmt = TextFormat { 163 font_id: FontId::new(font_size, FontFamily::Proportional), 164 color: text_color, 165 strikethrough: egui::Stroke::new(1.0, text_color), 166 ..Default::default() 167 }; 168 169 let mut job = LayoutJob::default(); 170 171 for inline in inlines { 172 match inline { 173 InlineElement::Text(span) => { 174 job.append(span.resolve(buffer), 0.0, text_fmt.clone()); 175 } 176 177 InlineElement::Code(span) => { 178 job.append(span.resolve(buffer), 0.0, code_fmt.clone()); 179 } 180 181 InlineElement::Styled { style, content } => { 182 let text = content.resolve(buffer); 183 match style { 184 InlineStyle::Italic => { 185 job.append(text, 0.0, italic_fmt.clone()); 186 } 187 InlineStyle::Strikethrough => { 188 job.append(text, 0.0, strikethrough_fmt.clone()); 189 } 190 InlineStyle::Bold | InlineStyle::BoldItalic => { 191 // TextFormat has no bold/weight — flush and render as separate label 192 flush_job(&mut job, ui); 193 let rt = if matches!(style, InlineStyle::BoldItalic) { 194 RichText::new(text).strong().italics() 195 } else { 196 RichText::new(text).strong() 197 }; 198 ui.label(rt); 199 } 200 } 201 } 202 203 InlineElement::Link { text, url } => { 204 flush_job(&mut job, ui); 205 ui.hyperlink_to( 206 RichText::new(text.resolve(buffer)).color(theme.link_color), 207 url.resolve(buffer), 208 ); 209 } 210 211 InlineElement::Image { alt, url } => { 212 flush_job(&mut job, ui); 213 ui.hyperlink_to( 214 format!("[Image: {}]", alt.resolve(buffer)), 215 url.resolve(buffer), 216 ); 217 } 218 219 InlineElement::LineBreak => { 220 job.append("\n", 0.0, text_fmt.clone()); 221 } 222 } 223 } 224 225 flush_job(&mut job, ui); 226 } 227 228 /// Sand-themed syntax highlighting colors (warm, Claude-Code-esque palette) 229 pub(crate) struct SandCodeTheme { 230 comment: Color32, 231 keyword: Color32, 232 literal: Color32, 233 string: Color32, 234 punctuation: Color32, 235 plain: Color32, 236 } 237 238 impl SandCodeTheme { 239 pub(crate) fn from_visuals(visuals: &egui::Visuals) -> Self { 240 if visuals.dark_mode { 241 Self { 242 comment: Color32::from_rgb(0x8A, 0x80, 0x72), // Warm gray-brown 243 keyword: Color32::from_rgb(0xD4, 0xA5, 0x74), // Amber sand 244 literal: Color32::from_rgb(0xC4, 0x8A, 0x6A), // Terra cotta 245 string: Color32::from_rgb(0xC6, 0xB4, 0x6A), // Golden wheat 246 punctuation: Color32::from_rgb(0xA0, 0x96, 0x88), // Light sand 247 plain: Color32::from_rgb(0xD5, 0xCE, 0xC4), // Warm off-white 248 } 249 } else { 250 Self { 251 comment: Color32::from_rgb(0x8A, 0x7E, 0x6E), // Warm gray 252 keyword: Color32::from_rgb(0x9A, 0x60, 0x2A), // Dark amber 253 literal: Color32::from_rgb(0x8B, 0x4C, 0x30), // Dark terra cotta 254 string: Color32::from_rgb(0x6B, 0x5C, 0x1A), // Dark golden 255 punctuation: Color32::from_rgb(0x6E, 0x64, 0x56), // Dark sand 256 plain: Color32::from_rgb(0x3A, 0x35, 0x2E), // Dark brown-black 257 } 258 } 259 } 260 261 pub(crate) fn format(&self, token: SandToken, font_id: &FontId) -> TextFormat { 262 let color = match token { 263 SandToken::Comment => self.comment, 264 SandToken::Keyword => self.keyword, 265 SandToken::Literal => self.literal, 266 SandToken::String => self.string, 267 SandToken::Punctuation => self.punctuation, 268 SandToken::Plain => self.plain, 269 SandToken::Whitespace => Color32::TRANSPARENT, 270 }; 271 TextFormat::simple(font_id.clone(), color) 272 } 273 } 274 275 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 276 pub(crate) enum SandToken { 277 Comment, 278 Keyword, 279 Literal, 280 String, 281 Punctuation, 282 Plain, 283 Whitespace, 284 } 285 286 struct LangConfig<'a> { 287 keywords: &'a [&'a str], 288 double_slash_comments: bool, 289 hash_comments: bool, 290 } 291 292 impl<'a> LangConfig<'a> { 293 fn from_language(language: &str) -> Option<Self> { 294 match language.to_lowercase().as_str() { 295 "rs" | "rust" => Some(Self { 296 keywords: &[ 297 "as", "async", "await", "break", "const", "continue", "crate", "dyn", "else", 298 "enum", "extern", "false", "fn", "for", "if", "impl", "in", "let", "loop", 299 "match", "mod", "move", "mut", "pub", "ref", "return", "self", "Self", 300 "static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where", 301 "while", 302 ], 303 double_slash_comments: true, 304 hash_comments: false, 305 }), 306 "c" | "h" | "hpp" | "cpp" | "c++" => Some(Self { 307 keywords: &[ 308 "auto", 309 "break", 310 "case", 311 "char", 312 "const", 313 "continue", 314 "default", 315 "do", 316 "double", 317 "else", 318 "enum", 319 "extern", 320 "false", 321 "float", 322 "for", 323 "goto", 324 "if", 325 "inline", 326 "int", 327 "long", 328 "namespace", 329 "new", 330 "nullptr", 331 "return", 332 "short", 333 "signed", 334 "sizeof", 335 "static", 336 "struct", 337 "switch", 338 "template", 339 "this", 340 "true", 341 "typedef", 342 "union", 343 "unsigned", 344 "using", 345 "virtual", 346 "void", 347 "volatile", 348 "while", 349 "class", 350 "public", 351 "private", 352 "protected", 353 ], 354 double_slash_comments: true, 355 hash_comments: false, 356 }), 357 "py" | "python" => Some(Self { 358 keywords: &[ 359 "and", "as", "assert", "break", "class", "continue", "def", "del", "elif", 360 "else", "except", "False", "finally", "for", "from", "global", "if", "import", 361 "in", "is", "lambda", "None", "nonlocal", "not", "or", "pass", "raise", 362 "return", "True", "try", "while", "with", "yield", 363 ], 364 double_slash_comments: false, 365 hash_comments: true, 366 }), 367 "toml" => Some(Self { 368 keywords: &[], 369 double_slash_comments: false, 370 hash_comments: true, 371 }), 372 "bash" | "sh" | "zsh" => Some(Self { 373 keywords: &[ 374 "if", "then", "else", "elif", "fi", "case", "esac", "for", "while", "until", 375 "do", "done", "in", "function", "return", "local", "export", "set", "unset", 376 ], 377 double_slash_comments: false, 378 hash_comments: true, 379 }), 380 _ => None, 381 } 382 } 383 } 384 385 /// Tokenize source code into (token_type, text_slice) pairs. 386 /// Separated from rendering so it can be unit tested. 387 pub(crate) fn tokenize_code<'a>(code: &'a str, language: &str) -> Vec<(SandToken, &'a str)> { 388 let Some(lang) = LangConfig::from_language(language) else { 389 return vec![(SandToken::Plain, code)]; 390 }; 391 392 let mut tokens = Vec::new(); 393 let mut text = code; 394 395 while !text.is_empty() { 396 if (lang.double_slash_comments && text.starts_with("//")) 397 || (lang.hash_comments && text.starts_with('#')) 398 { 399 let end = text.find('\n').unwrap_or(text.len()); 400 tokens.push((SandToken::Comment, &text[..end])); 401 text = &text[end..]; 402 } else if text.starts_with('"') { 403 let end = text[1..] 404 .find('"') 405 .map(|i| i + 2) 406 .or_else(|| text.find('\n')) 407 .unwrap_or(text.len()); 408 tokens.push((SandToken::String, &text[..end])); 409 text = &text[end..]; 410 } else if text.starts_with(|c: char| c.is_ascii_alphanumeric() || c == '_') { 411 let end = text[1..] 412 .find(|c: char| !c.is_ascii_alphanumeric() && c != '_') 413 .map_or_else(|| text.len(), |i| i + 1); 414 let word = &text[..end]; 415 let token = if lang.keywords.contains(&word) { 416 SandToken::Keyword 417 } else { 418 SandToken::Literal 419 }; 420 tokens.push((token, word)); 421 text = &text[end..]; 422 } else if text.starts_with(|c: char| c.is_ascii_whitespace()) { 423 let end = text[1..] 424 .find(|c: char| !c.is_ascii_whitespace()) 425 .map_or_else(|| text.len(), |i| i + 1); 426 tokens.push((SandToken::Whitespace, &text[..end])); 427 text = &text[end..]; 428 } else { 429 let mut it = text.char_indices(); 430 it.next(); 431 let end = it.next().map_or(text.len(), |(idx, _)| idx); 432 tokens.push((SandToken::Punctuation, &text[..end])); 433 text = &text[end..]; 434 } 435 } 436 437 tokens 438 } 439 440 /// Simple syntax highlighter with sand-colored theme. 441 /// Supports Rust, C/C++, Python, TOML, bash, and falls back to plain text. 442 fn highlight_sand(code: &str, language: &str, ui: &Ui) -> LayoutJob { 443 let theme = SandCodeTheme::from_visuals(ui.visuals()); 444 let font_id = ui 445 .style() 446 .override_font_id 447 .clone() 448 .unwrap_or_else(|| egui::TextStyle::Monospace.resolve(ui.style())); 449 450 let mut job = LayoutJob::default(); 451 for (token, text) in tokenize_code(code, language) { 452 job.append(text, 0.0, theme.format(token, &font_id)); 453 } 454 job 455 } 456 457 fn render_code_block(language: Option<&str>, content: &str, theme: &MdTheme, ui: &mut Ui) { 458 egui::Frame::default() 459 .fill(theme.code_bg) 460 .inner_margin(8.0) 461 .corner_radius(4.0) 462 .show(ui, |ui| { 463 if let Some(lang) = language { 464 ui.label(RichText::new(lang).small().weak()); 465 } 466 467 let lang = language.unwrap_or("text"); 468 let layout_job = highlight_sand(content, lang, ui); 469 ui.add(egui::Label::new(layout_job).wrap()); 470 }); 471 ui.add_space(8.0); 472 } 473 474 fn render_list_item(item: &ListItem, marker: &str, theme: &MdTheme, buffer: &str, ui: &mut Ui) { 475 ui.horizontal(|ui| { 476 ui.label(RichText::new(marker).weak()); 477 ui.vertical(|ui| { 478 ui.horizontal_wrapped(|ui| { 479 render_inlines(&item.content, theme, buffer, ui); 480 }); 481 // Render nested list if present 482 if let Some(nested) = &item.nested { 483 ui.indent("nested", |ui| { 484 render_element(nested, theme, buffer, ui); 485 }); 486 } 487 }); 488 }); 489 } 490 491 fn render_table(headers: &[Span], rows: &[Vec<Span>], theme: &MdTheme, buffer: &str, ui: &mut Ui) { 492 use egui_extras::{Column, TableBuilder}; 493 494 let num_cols = headers.len(); 495 if num_cols == 0 { 496 return; 497 } 498 499 let cell_padding = egui::Margin::symmetric(8, 4); 500 501 // Use first header's byte offset as id_salt so multiple tables don't clash 502 let salt = headers.first().map_or(0, |h| h.start); 503 504 // Wrap in horizontal scroll so wide tables don't break layout on small screens 505 egui::ScrollArea::horizontal() 506 .id_salt(("md_table_scroll", salt)) 507 .show(ui, |ui| { 508 let mut builder = TableBuilder::new(ui) 509 .id_salt(salt) 510 .vscroll(false) 511 .auto_shrink([false, false]); 512 for _ in 0..num_cols { 513 builder = builder.column(Column::auto().resizable(true)); 514 } 515 516 let header_bg = theme.code_bg; 517 518 builder 519 .header(28.0, |mut header| { 520 for h in headers { 521 header.col(|ui| { 522 ui.painter().rect_filled(ui.max_rect(), 0.0, header_bg); 523 egui::Frame::NONE.inner_margin(cell_padding).show(ui, |ui| { 524 ui.strong(h.resolve(buffer)); 525 }); 526 }); 527 } 528 }) 529 .body(|mut body| { 530 for row in rows { 531 body.row(28.0, |mut table_row| { 532 for i in 0..num_cols { 533 table_row.col(|ui| { 534 egui::Frame::NONE.inner_margin(cell_padding).show(ui, |ui| { 535 if let Some(cell) = row.get(i) { 536 ui.label(cell.resolve(buffer)); 537 } 538 }); 539 }); 540 } 541 }); 542 } 543 }); 544 }); 545 ui.add_space(8.0); 546 } 547 548 fn render_partial(partial: &Partial, theme: &MdTheme, buffer: &str, ui: &mut Ui) { 549 let content = partial.content(buffer); 550 if content.is_empty() { 551 return; 552 } 553 554 match &partial.kind { 555 PartialKind::CodeFence { language, .. } => { 556 egui::Frame::default() 557 .fill(theme.code_bg) 558 .inner_margin(8.0) 559 .corner_radius(4.0) 560 .show(ui, |ui| { 561 let lang_str = language.map(|s| s.resolve(buffer)); 562 if let Some(lang) = lang_str { 563 ui.label(RichText::new(lang).small().weak()); 564 } 565 566 let lang = lang_str.unwrap_or("text"); 567 let layout_job = highlight_sand(content, lang, ui); 568 ui.add(egui::Label::new(layout_job).wrap()); 569 ui.label(RichText::new("_").weak()); 570 }); 571 } 572 573 PartialKind::Heading { level } => { 574 let size = theme.heading_sizes[(*level as usize).saturating_sub(1).min(5)]; 575 ui.add(egui::Label::new(RichText::new(content).size(size).strong()).wrap()); 576 } 577 578 PartialKind::Table { 579 headers, 580 rows, 581 seen_separator, 582 } => { 583 if *seen_separator { 584 render_table(headers, rows, theme, buffer, ui); 585 } else { 586 ui.label(content); 587 } 588 } 589 590 PartialKind::Paragraph => { 591 // Parse inline elements from the partial content for proper formatting 592 let inlines = parse_inline(content, partial.content_start); 593 ui.horizontal_wrapped(|ui| { 594 render_inlines(&inlines, theme, buffer, ui); 595 }); 596 } 597 598 _ => { 599 // Other partial kinds - parse inline elements too 600 let inlines = parse_inline(content, partial.content_start); 601 ui.horizontal_wrapped(|ui| { 602 render_inlines(&inlines, theme, buffer, ui); 603 }); 604 } 605 } 606 } 607 608 #[cfg(test)] 609 mod tests { 610 use super::*; 611 612 /// Helper: collect (token, text) pairs 613 fn tokens<'a>(code: &'a str, lang: &str) -> Vec<(SandToken, &'a str)> { 614 tokenize_code(code, lang) 615 } 616 617 /// Reassembled tokens must equal the original input (no bytes lost or duplicated) 618 fn assert_roundtrip(code: &str, lang: &str) { 619 let result: String = tokenize_code(code, lang) 620 .into_iter() 621 .map(|(_, s)| s) 622 .collect(); 623 assert_eq!(result, code, "roundtrip failed for lang={lang}"); 624 } 625 626 // ---- Basic token classification ---- 627 628 #[test] 629 fn test_rust_keyword() { 630 let toks = tokens("fn main", "rust"); 631 assert_eq!(toks[0], (SandToken::Keyword, "fn")); 632 assert_eq!(toks[1], (SandToken::Whitespace, " ")); 633 assert_eq!(toks[2], (SandToken::Literal, "main")); 634 } 635 636 #[test] 637 fn test_rust_comment() { 638 let toks = tokens("// hello", "rust"); 639 assert_eq!(toks, vec![(SandToken::Comment, "// hello")]); 640 } 641 642 #[test] 643 fn test_rust_string() { 644 let toks = tokens("\"hello world\"", "rust"); 645 assert_eq!(toks, vec![(SandToken::String, "\"hello world\"")]); 646 } 647 648 #[test] 649 fn test_python_hash_comment() { 650 let toks = tokens("# comment", "python"); 651 assert_eq!(toks, vec![(SandToken::Comment, "# comment")]); 652 } 653 654 #[test] 655 fn test_python_keyword() { 656 let toks = tokens("def foo", "py"); 657 assert_eq!(toks[0], (SandToken::Keyword, "def")); 658 } 659 660 #[test] 661 fn test_punctuation() { 662 let toks = tokens("();", "rust"); 663 assert_eq!( 664 toks, 665 vec![ 666 (SandToken::Punctuation, "("), 667 (SandToken::Punctuation, ")"), 668 (SandToken::Punctuation, ";"), 669 ] 670 ); 671 } 672 673 #[test] 674 fn test_underscore_identifier() { 675 let toks = tokens("_foo_bar", "rust"); 676 assert_eq!(toks, vec![(SandToken::Literal, "_foo_bar")]); 677 } 678 679 // ---- Unsupported languages ---- 680 681 #[test] 682 fn test_unknown_lang_plain() { 683 let toks = tokens("anything goes here", "brainfuck"); 684 assert_eq!(toks, vec![(SandToken::Plain, "anything goes here")]); 685 } 686 687 #[test] 688 fn test_text_lang_plain() { 689 let toks = tokens("plain text", "text"); 690 assert_eq!(toks, vec![(SandToken::Plain, "plain text")]); 691 } 692 693 // ---- Edge cases for string indexing ---- 694 695 #[test] 696 fn test_empty_input() { 697 assert!(tokenize_code("", "rust").is_empty()); 698 } 699 700 #[test] 701 fn test_single_char_keyword() { 702 // "if" is a keyword, "i" is not 703 let toks = tokens("i", "rust"); 704 assert_eq!(toks, vec![(SandToken::Literal, "i")]); 705 } 706 707 #[test] 708 fn test_unclosed_string() { 709 // String that never closes — should consume to end of line or end of input 710 let toks = tokens("\"unclosed", "rust"); 711 assert_eq!(toks, vec![(SandToken::String, "\"unclosed")]); 712 } 713 714 #[test] 715 fn test_unclosed_string_with_newline() { 716 let toks = tokens("\"unclosed\nnext", "rust"); 717 // Should stop the string at the newline 718 assert_eq!(toks[0], (SandToken::String, "\"unclosed")); 719 } 720 721 #[test] 722 fn test_empty_string() { 723 let toks = tokens("\"\"", "rust"); 724 assert_eq!(toks, vec![(SandToken::String, "\"\"")]); 725 } 726 727 #[test] 728 fn test_comment_at_end_no_newline() { 729 let toks = tokens("// no newline", "rust"); 730 assert_eq!(toks, vec![(SandToken::Comment, "// no newline")]); 731 } 732 733 #[test] 734 fn test_comment_with_newline() { 735 let toks = tokens("// comment\ncode", "rust"); 736 assert_eq!(toks[0], (SandToken::Comment, "// comment")); 737 assert_eq!(toks[1], (SandToken::Whitespace, "\n")); 738 assert_eq!(toks[2], (SandToken::Literal, "code")); 739 } 740 741 #[test] 742 fn test_multibyte_unicode_punctuation() { 743 // Ensure multi-byte chars don't cause panics from byte indexing 744 let toks = tokens("→", "rust"); 745 assert_eq!(toks, vec![(SandToken::Punctuation, "→")]); 746 } 747 748 #[test] 749 fn test_mixed_unicode_and_ascii() { 750 let code = "let x = «val»;"; 751 assert_roundtrip(code, "rust"); 752 } 753 754 #[test] 755 fn test_only_whitespace() { 756 let toks = tokens(" \n\t", "rust"); 757 assert_eq!(toks, vec![(SandToken::Whitespace, " \n\t")]); 758 } 759 760 #[test] 761 fn test_only_punctuation() { 762 let toks = tokens("()", "rust"); 763 assert_eq!( 764 toks, 765 vec![(SandToken::Punctuation, "("), (SandToken::Punctuation, ")"),] 766 ); 767 } 768 769 // ---- Roundtrip (no bytes lost) ---- 770 771 #[test] 772 fn test_roundtrip_rust() { 773 assert_roundtrip( 774 "fn main() {\n let x = \"hello\";\n // done\n}", 775 "rust", 776 ); 777 } 778 779 #[test] 780 fn test_roundtrip_python() { 781 assert_roundtrip("def foo():\n # comment\n return \"bar\"", "python"); 782 } 783 784 #[test] 785 fn test_roundtrip_cpp() { 786 assert_roundtrip("#include <stdio.h>\nint main() { return 0; }", "cpp"); 787 } 788 789 #[test] 790 fn test_roundtrip_unknown() { 791 assert_roundtrip("anything goes 🎉 here!", "unknown"); 792 } 793 794 #[test] 795 fn test_roundtrip_empty() { 796 assert_roundtrip("", "rust"); 797 } 798 799 #[test] 800 fn test_roundtrip_bash() { 801 assert_roundtrip( 802 "#!/bin/bash\nif [ -f \"$1\" ]; then\n echo \"exists\"\nfi", 803 "bash", 804 ); 805 } 806 807 // ---- Multi-line code blocks ---- 808 809 #[test] 810 fn test_multiline_rust() { 811 let code = "use std::io;\n\nfn main() {\n let x = 42;\n println!(\"{}\", x);\n}"; 812 assert_roundtrip(code, "rust"); 813 let toks = tokens(code, "rust"); 814 assert_eq!(toks[0], (SandToken::Keyword, "use")); 815 } 816 817 // ---- Language detection ---- 818 819 #[test] 820 fn test_case_insensitive_language() { 821 let toks = tokens("fn test", "Rust"); 822 assert_eq!(toks[0], (SandToken::Keyword, "fn")); 823 824 let toks = tokens("def test", "PYTHON"); 825 assert_eq!(toks[0], (SandToken::Keyword, "def")); 826 } 827 828 // ---- Bash support ---- 829 830 #[test] 831 fn test_bash_keywords() { 832 let toks = tokens("if then fi", "bash"); 833 assert_eq!(toks[0], (SandToken::Keyword, "if")); 834 assert_eq!(toks[2], (SandToken::Keyword, "then")); 835 assert_eq!(toks[4], (SandToken::Keyword, "fi")); 836 } 837 838 #[test] 839 fn test_bash_hash_comment() { 840 let toks = tokens("# this is a comment", "sh"); 841 assert_eq!(toks, vec![(SandToken::Comment, "# this is a comment")]); 842 } 843 844 // ---- TOML ---- 845 846 #[test] 847 fn test_toml_hash_comment() { 848 let toks = tokens("# config", "toml"); 849 assert_eq!(toks, vec![(SandToken::Comment, "# config")]); 850 } 851 852 #[test] 853 fn test_toml_key_value() { 854 let toks = tokens("name = \"notedeck\"", "toml"); 855 assert_eq!(toks[0], (SandToken::Literal, "name")); 856 // = is punctuation 857 assert!(toks 858 .iter() 859 .any(|(t, s)| *t == SandToken::String && *s == "\"notedeck\"")); 860 } 861 }