notedeck

One damus client to rule them all
git clone git://jb55.com/notedeck
Log | Files | Refs | README | LICENSE

commit 42baa19529e8e36f2fb98cc8709282b1b5b1d790
parent 92a1f6b7b8f07580deea2e6ce83aeac18e917699
Author: William Casarin <jb55@jb55.com>
Date:   Sun, 15 Feb 2026 20:59:37 -0800

md-stream: zero-copy parser using Span indices instead of String allocations

Eliminate all heap allocations from the parsing hot path:
- Replace remaining().to_string() clone with internal self.remaining() calls
- Add PartialDispatch enum to avoid cloning PartialKind (which contains Vecs)
- All process_* methods now compute remaining slice internally

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Diffstat:
Mcrates/md-stream/src/element.rs | 48+++++++++++++++++++++++++++++++++++++-----------
Mcrates/md-stream/src/inline.rs | 221++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Mcrates/md-stream/src/lib.rs | 2+-
Mcrates/md-stream/src/parser.rs | 485+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Mcrates/md-stream/src/partial.rs | 39++++++++++++++++++++++++++++++---------
Mcrates/md-stream/src/tests.rs | 313++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Mcrates/notedeck_dave/src/backend/claude.rs | 11++++++-----
Mcrates/notedeck_dave/src/messages.rs | 18++++++++++++++++--
Mcrates/notedeck_dave/src/ui/dave.rs | 16+++++++++++-----
Mcrates/notedeck_dave/src/ui/markdown_ui.rs | 129++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
10 files changed, 884 insertions(+), 398 deletions(-)

diff --git a/crates/md-stream/src/element.rs b/crates/md-stream/src/element.rs @@ -1,10 +1,36 @@ //! Markdown elements - the stable output of parsing. +/// A byte range into the parser's source buffer. Zero-copy reference to content. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Span { + pub start: usize, + pub end: usize, +} + +impl Span { + pub fn new(start: usize, end: usize) -> Self { + debug_assert!(start <= end); + Self { start, end } + } + + pub fn resolve<'a>(&self, buffer: &'a str) -> &'a str { + &buffer[self.start..self.end] + } + + pub fn is_empty(&self) -> bool { + self.start == self.end + } + + pub fn len(&self) -> usize { + self.end - self.start + } +} + /// A complete, stable markdown element ready for rendering. #[derive(Debug, Clone, PartialEq)] pub enum MdElement { /// Heading with level (1-6) and content - Heading { level: u8, content: String }, + Heading { level: u8, content: Span }, /// Paragraph of text (may contain inline elements) Paragraph(Vec<InlineElement>), @@ -23,22 +49,22 @@ pub enum MdElement { /// Markdown table with headers and data rows Table { - headers: Vec<String>, - rows: Vec<Vec<String>>, + headers: Vec<Span>, + rows: Vec<Vec<Span>>, }, /// Thematic break (---, ***, ___) ThematicBreak, /// Raw text (when nothing else matches) - Text(String), + Text(Span), } /// A fenced code block with optional language. #[derive(Debug, Clone, PartialEq)] pub struct CodeBlock { - pub language: Option<String>, - pub content: String, + pub language: Option<Span>, + pub content: Span, } /// A list item (may contain nested elements). @@ -52,19 +78,19 @@ pub struct ListItem { #[derive(Debug, Clone, PartialEq)] pub enum InlineElement { /// Plain text - Text(String), + Text(Span), /// Styled text (bold, italic, etc.) - Styled { style: InlineStyle, content: String }, + Styled { style: InlineStyle, content: Span }, /// Inline code (`code`) - Code(String), + Code(Span), /// Link [text](url) - Link { text: String, url: String }, + Link { text: Span, url: Span }, /// Image ![alt](url) - Image { alt: String, url: String }, + Image { alt: Span, url: Span }, /// Hard line break LineBreak, diff --git a/crates/md-stream/src/inline.rs b/crates/md-stream/src/inline.rs @@ -1,14 +1,15 @@ //! Inline element parsing for bold, italic, code, links, etc. -use crate::element::{InlineElement, InlineStyle}; +use crate::element::{InlineElement, InlineStyle, Span}; use crate::partial::PartialKind; /// Parses inline elements from text. -/// Returns a vector of inline elements. +/// `base_offset` is the position of `text` within the parser's buffer. +/// All returned Spans are absolute buffer positions. /// /// Note: This is called on complete paragraph text, not streaming. /// For streaming, we use PartialKind to track incomplete markers. -pub fn parse_inline(text: &str) -> Vec<InlineElement> { +pub fn parse_inline(text: &str, base_offset: usize) -> Vec<InlineElement> { let mut result = Vec::new(); let mut chars = text.char_indices().peekable(); let mut plain_start = 0; @@ -19,7 +20,10 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> { '`' => { // Flush any pending plain text if i > plain_start { - result.push(InlineElement::Text(text[plain_start..i].to_string())); + result.push(InlineElement::Text(Span::new( + base_offset + plain_start, + base_offset + i, + ))); } // Count backticks @@ -33,17 +37,22 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> { // Find closing backticks (same count) if let Some(end_pos) = find_closing_backticks(&text[start_pos..], backtick_count) { - let code_content = &text[start_pos..start_pos + end_pos]; + let code_start = start_pos; + let code_end = start_pos + end_pos; + let code_content = &text[code_start..code_end]; // Strip single leading/trailing space if present (CommonMark rule) - let trimmed = if code_content.starts_with(' ') + let (trim_start, trim_end) = if code_content.starts_with(' ') && code_content.ends_with(' ') && code_content.len() > 1 { - &code_content[1..code_content.len() - 1] + (code_start + 1, code_end - 1) } else { - code_content + (code_start, code_end) }; - result.push(InlineElement::Code(trimmed.to_string())); + result.push(InlineElement::Code(Span::new( + base_offset + trim_start, + base_offset + trim_end, + ))); // Advance past closing backticks let skip_to = start_pos + end_pos + backtick_count; @@ -93,14 +102,15 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> { let content_start = marker_start + count; // Look for closing marker - if let Some((content, close_len, end_pos)) = + if let Some((content_end_local, close_len)) = find_closing_emphasis(&text[content_start..], marker, effective_count) { // Flush pending plain text if marker_start > plain_start { - result.push(InlineElement::Text( - text[plain_start..marker_start].to_string(), - )); + result.push(InlineElement::Text(Span::new( + base_offset + plain_start, + base_offset + marker_start, + ))); } let style = match close_len { @@ -111,11 +121,14 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> { result.push(InlineElement::Styled { style, - content: content.to_string(), + content: Span::new( + base_offset + content_start, + base_offset + content_start + content_end_local, + ), }); // Advance past the content and closing marker - let skip_to = content_start + end_pos + close_len; + let skip_to = content_start + content_end_local + close_len; while chars.peek().map(|(idx, _)| *idx < skip_to).unwrap_or(false) { chars.next(); } @@ -131,17 +144,22 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> { // Flush pending text if i > plain_start { - result.push(InlineElement::Text(text[plain_start..i].to_string())); + result.push(InlineElement::Text(Span::new( + base_offset + plain_start, + base_offset + i, + ))); } let content_start = i + 2; // Find closing ~~ if let Some(end_pos) = text[content_start..].find("~~") { - let content = &text[content_start..content_start + end_pos]; result.push(InlineElement::Styled { style: InlineStyle::Strikethrough, - content: content.to_string(), + content: Span::new( + base_offset + content_start, + base_offset + content_start + end_pos, + ), }); let skip_to = content_start + end_pos + 2; @@ -160,13 +178,18 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> { '[' => { // Flush pending text if i > plain_start { - result.push(InlineElement::Text(text[plain_start..i].to_string())); + result.push(InlineElement::Text(Span::new( + base_offset + plain_start, + base_offset + i, + ))); } - if let Some((text_content, url, total_len)) = parse_link(&text[i..]) { + if let Some((text_span, url_span, total_len)) = + parse_link(&text[i..], base_offset + i) + { result.push(InlineElement::Link { - text: text_content, - url, + text: text_span, + url: url_span, }); let skip_to = i + total_len; @@ -185,13 +208,21 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> { if chars.peek().map(|(_, c)| *c == '[').unwrap_or(false) { // Flush pending text if i > plain_start { - result.push(InlineElement::Text(text[plain_start..i].to_string())); + result.push(InlineElement::Text(Span::new( + base_offset + plain_start, + base_offset + i, + ))); } chars.next(); // consume [ - if let Some((alt, url, link_len)) = parse_link(&text[i + 1..]) { - result.push(InlineElement::Image { alt, url }); + if let Some((alt_span, url_span, link_len)) = + parse_link(&text[i + 1..], base_offset + i + 1) + { + result.push(InlineElement::Image { + alt: alt_span, + url: url_span, + }); let skip_to = i + 1 + link_len; while chars.peek().map(|(idx, _)| *idx < skip_to).unwrap_or(false) { @@ -212,7 +243,10 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> { // Flush text without trailing spaces let text_end = i - 2; if text_end > plain_start { - result.push(InlineElement::Text(text[plain_start..text_end].to_string())); + result.push(InlineElement::Text(Span::new( + base_offset + plain_start, + base_offset + text_end, + ))); } result.push(InlineElement::LineBreak); plain_start = i + 1; @@ -228,10 +262,10 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> { // Flush remaining plain text if plain_start < text.len() { - let remaining = &text[plain_start..]; - if !remaining.is_empty() { - result.push(InlineElement::Text(remaining.to_string())); - } + result.push(InlineElement::Text(Span::new( + base_offset + plain_start, + base_offset + text.len(), + ))); } // Collapse adjacent Text elements @@ -242,21 +276,23 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> { /// Find closing backticks matching the opening count. fn find_closing_backticks(text: &str, count: usize) -> Option<usize> { - let target: String = "`".repeat(count); + let bytes = text.as_bytes(); let mut i = 0; - while i < text.len() { - if text[i..].starts_with(&target) { - // Make sure it's exactly this many backticks - let after = i + count; - if after >= text.len() || !text[after..].starts_with('`') { - return Some(i); - } - // More backticks - skip them - while i < text.len() && text[i..].starts_with('`') { + while i < bytes.len() { + if bytes[i] == b'`' { + // Count consecutive backticks at this position + let run_start = i; + while i < bytes.len() && bytes[i] == b'`' { i += 1; } + let run_len = i - run_start; + if run_len == count { + return Some(run_start); + } + // Not the right count, continue } else { + // Skip non-backtick character (handle UTF-8) i += text[i..].chars().next().map(|c| c.len_utf8()).unwrap_or(1); } } @@ -264,53 +300,40 @@ fn find_closing_backticks(text: &str, count: usize) -> Option<usize> { } /// Find closing emphasis marker. -/// Returns (content, actual_close_len, end_position) if found. -fn find_closing_emphasis( - text: &str, - marker: char, - open_count: usize, -) -> Option<(&str, usize, usize)> { - let chars: Vec<(usize, char)> = text.char_indices().collect(); - let mut i = 0; - - while i < chars.len() { - let (pos, c) = chars[i]; +/// Returns (end_position, actual_close_len) if found. +fn find_closing_emphasis(text: &str, marker: char, open_count: usize) -> Option<(usize, usize)> { + let mut chars = text.char_indices().peekable(); + while let Some((pos, c)) = chars.next() { if c == marker { // Count consecutive markers let mut count = 1; - while i + count < chars.len() && chars[i + count].1 == marker { + while chars.peek().map(|(_, ch)| *ch == marker).unwrap_or(false) { + chars.next(); count += 1; } // Check if this could close (not followed by alphanumeric for _) let can_close = if marker == '_' { - i + count >= chars.len() || { - let next_char = chars.get(i + count).map(|(_, c)| *c); - next_char - .map(|c| c.is_whitespace() || c.is_ascii_punctuation()) - .unwrap_or(true) - } + chars.peek().is_none_or(|(_, next_c)| { + next_c.is_whitespace() || next_c.is_ascii_punctuation() + }) } else { true }; if can_close && count >= open_count.min(3) { let close_len = count.min(open_count).min(3); - return Some((&text[..pos], close_len, pos)); + return Some((pos, close_len)); } - - i += count; - } else { - i += 1; } } None } /// Parse a link starting with [ -/// Returns (text, url, total_bytes_consumed) -fn parse_link(text: &str) -> Option<(String, String, usize)> { +/// Returns (text_span, url_span, total_bytes_consumed) +fn parse_link(text: &str, base_offset: usize) -> Option<(Span, Span, usize)> { if !text.starts_with('[') { return None; } @@ -334,7 +357,6 @@ fn parse_link(text: &str) -> Option<(String, String, usize)> { } let bracket_end = bracket_end?; - let link_text = &text[1..bracket_end]; // Check for ( immediately after ] let rest = &text[bracket_end + 1..]; @@ -361,12 +383,18 @@ fn parse_link(text: &str) -> Option<(String, String, usize)> { } let paren_end = paren_end?; - let url = &rest[1..paren_end]; + + // text_span: content between [ and ] + let text_span = Span::new(base_offset + 1, base_offset + bracket_end); + // url_span: content between ( and ) + let url_start = bracket_end + 1 + 1; // ] + ( + let url_end = bracket_end + 1 + paren_end; // position of ) + let url_span = Span::new(base_offset + url_start, base_offset + url_end); // Total consumed: [ + text + ] + ( + url + ) let total = bracket_end + 1 + paren_end + 1; - Some((link_text.to_string(), url.to_string(), total)) + Some((text_span, url_span, total)) } /// Collapse adjacent Text elements into one. @@ -380,8 +408,9 @@ fn collapse_text_elements(elements: &mut Vec<InlineElement>) { if let (InlineElement::Text(a), InlineElement::Text(b)) = (&elements[write], &elements[read]) { - let combined = format!("{}{}", a, b); - elements[write] = InlineElement::Text(combined); + // Merge spans — contiguous or not, just extend to cover both + let merged = Span::new(a.start, b.end); + elements[write] = InlineElement::Text(merged); } else { write += 1; if write != read { @@ -434,18 +463,12 @@ impl InlineState { /// Finalize - return whatever we have as parsed elements. pub fn finalize(self) -> Vec<InlineElement> { - parse_inline(&self.buffer) + parse_inline(&self.buffer, 0) } /// Extract complete inline elements from the buffer. fn extract_complete(&mut self) -> Vec<InlineElement> { - // For streaming, we're conservative - only return elements when - // we're confident they won't change. - // - // Strategy: Parse the whole buffer, but only return elements that - // end before any trailing ambiguous characters. - - let result = parse_inline(&self.buffer); + let result = parse_inline(&self.buffer, 0); // Check if the buffer might have incomplete markers at the end if self.has_incomplete_tail() { @@ -494,62 +517,74 @@ impl Default for InlineState { mod tests { use super::*; + fn resolve<'a>(span: &Span, text: &'a str) -> &'a str { + span.resolve(text) + } + #[test] fn test_inline_code() { - let result = parse_inline("some `code` here"); - assert!(result - .iter() - .any(|e| matches!(e, InlineElement::Code(s) if s == "code"))); + let text = "some `code` here"; + let result = parse_inline(text, 0); + assert!(result.iter().any(|e| matches!( + e, + InlineElement::Code(s) if resolve(s, text) == "code" + ))); } #[test] fn test_bold() { - let result = parse_inline("some **bold** text"); + let text = "some **bold** text"; + let result = parse_inline(text, 0); assert!(result.iter().any(|e| matches!( e, - InlineElement::Styled { style: InlineStyle::Bold, content } if content == "bold" + InlineElement::Styled { style: InlineStyle::Bold, content } if resolve(content, text) == "bold" ))); } #[test] fn test_italic() { - let result = parse_inline("some *italic* text"); + let text = "some *italic* text"; + let result = parse_inline(text, 0); assert!(result.iter().any(|e| matches!( e, - InlineElement::Styled { style: InlineStyle::Italic, content } if content == "italic" + InlineElement::Styled { style: InlineStyle::Italic, content } if resolve(content, text) == "italic" ))); } #[test] fn test_link() { - let result = parse_inline("check [this](https://example.com) out"); + let text = "check [this](https://example.com) out"; + let result = parse_inline(text, 0); assert!(result.iter().any(|e| matches!( e, - InlineElement::Link { text, url } if text == "this" && url == "https://example.com" + InlineElement::Link { text: t, url } if resolve(t, text) == "this" && resolve(url, text) == "https://example.com" ))); } #[test] fn test_image() { - let result = parse_inline("see ![alt](img.png) here"); + let text = "see ![alt](img.png) here"; + let result = parse_inline(text, 0); assert!(result.iter().any(|e| matches!( e, - InlineElement::Image { alt, url } if alt == "alt" && url == "img.png" + InlineElement::Image { alt, url } if resolve(alt, text) == "alt" && resolve(url, text) == "img.png" ))); } #[test] fn test_strikethrough() { - let result = parse_inline("some ~~deleted~~ text"); + let text = "some ~~deleted~~ text"; + let result = parse_inline(text, 0); assert!(result.iter().any(|e| matches!( e, - InlineElement::Styled { style: InlineStyle::Strikethrough, content } if content == "deleted" + InlineElement::Styled { style: InlineStyle::Strikethrough, content } if resolve(content, text) == "deleted" ))); } #[test] fn test_mixed() { - let result = parse_inline("**bold** and *italic* and `code`"); + let text = "**bold** and *italic* and `code`"; + let result = parse_inline(text, 0); assert_eq!( result .iter() diff --git a/crates/md-stream/src/lib.rs b/crates/md-stream/src/lib.rs @@ -8,7 +8,7 @@ mod inline; mod parser; mod partial; -pub use element::{CodeBlock, InlineElement, InlineStyle, ListItem, MdElement}; +pub use element::{CodeBlock, InlineElement, InlineStyle, ListItem, MdElement, Span}; pub use inline::{parse_inline, InlineState}; pub use parser::StreamParser; pub use partial::{LinkState, Partial, PartialKind}; diff --git a/crates/md-stream/src/parser.rs b/crates/md-stream/src/parser.rs @@ -1,6 +1,6 @@ //! Core streaming parser implementation. -use crate::element::{CodeBlock, MdElement}; +use crate::element::{CodeBlock, MdElement, Span}; use crate::inline::parse_inline; use crate::partial::{Partial, PartialKind}; @@ -25,6 +25,17 @@ pub struct StreamParser { at_line_start: bool, } +/// Lightweight dispatch tag for partial state, avoiding Clone on PartialKind +/// which contains Vecs (table headers/rows). +#[derive(Clone, Copy)] +enum PartialDispatch { + CodeFence { fence_char: char, fence_len: usize }, + Heading { level: u8 }, + Table, + Paragraph, + Other, +} + impl StreamParser { pub fn new() -> Self { Self { @@ -51,6 +62,16 @@ impl StreamParser { &self.parsed } + /// Get the parser's buffer for resolving spans. + pub fn buffer(&self) -> &str { + &self.buffer + } + + /// Consume the parser and return the completed elements and buffer. + pub fn into_parts(self) -> (Vec<MdElement>, String) { + (self.parsed, self.buffer) + } + /// Consume the parser and return the completed elements. pub fn into_parsed(self) -> Vec<MdElement> { self.parsed @@ -64,7 +85,7 @@ impl StreamParser { /// Get the speculative content that would render from partial state. /// Returns the raw accumulated text that isn't yet a complete element. pub fn partial_content(&self) -> Option<&str> { - self.partial.as_ref().map(|p| p.content.as_str()) + self.partial.as_ref().map(|p| p.content(&self.buffer)) } /// Check if we're currently inside a code block. @@ -80,50 +101,78 @@ impl StreamParser { &self.buffer[self.process_pos..] } + /// Compute a trimmed span (strip leading/trailing whitespace). + fn trim_span(&self, span: Span) -> Span { + let s = &self.buffer[span.start..span.end]; + let trimmed = s.trim(); + if trimmed.is_empty() { + return Span::new(span.start, span.start); + } + let ltrim = s.len() - s.trim_start().len(); + Span::new(span.start + ltrim, span.start + ltrim + trimmed.len()) + } + + /// Extract the dispatch info from the current partial state. + /// Returns only small Copy data to avoid cloning Vecs in PartialKind::Table. + fn partial_dispatch(&self) -> Option<PartialDispatch> { + self.partial.as_ref().map(|p| match &p.kind { + PartialKind::CodeFence { + fence_char, + fence_len, + .. + } => PartialDispatch::CodeFence { + fence_char: *fence_char, + fence_len: *fence_len, + }, + PartialKind::Heading { level } => PartialDispatch::Heading { level: *level }, + PartialKind::Table { .. } => PartialDispatch::Table, + PartialKind::Paragraph => PartialDispatch::Paragraph, + _ => PartialDispatch::Other, + }) + } + /// Process newly added content. fn process_new_content(&mut self) { while self.process_pos < self.buffer.len() { - let remaining = self.remaining().to_string(); - // Handle based on current partial state - let partial_kind = self.partial.as_ref().map(|p| p.kind.clone()); - if let Some(kind) = partial_kind { - match kind { - PartialKind::CodeFence { + if let Some(dispatch) = self.partial_dispatch() { + match dispatch { + PartialDispatch::CodeFence { fence_char, fence_len, - .. } => { - if self.process_code_fence(fence_char, fence_len, &remaining) { + if self.process_code_fence(fence_char, fence_len) { continue; } return; // Need more input } - PartialKind::Heading { level } => { - if self.process_heading(level, &remaining) { + PartialDispatch::Heading { level } => { + if self.process_heading(level) { continue; } return; } - PartialKind::Table { .. } => { - if self.process_table(&remaining) { + PartialDispatch::Table => { + if self.process_table() { continue; } return; } - PartialKind::Paragraph => { + PartialDispatch::Paragraph => { // For paragraphs, check if we're at a line start that could be a block element if self.at_line_start { // Take the paragraph partial first — try_block_start may // replace self.partial with the new block element let para_partial = self.partial.take(); - if let Some(consumed) = self.try_block_start(&remaining) { + if let Some(consumed) = self.try_block_start() { // Emit the saved paragraph before the new block if let Some(partial) = para_partial { - let trimmed = partial.content.trim(); + let span = partial.content_span(); + let trimmed = self.trim_span(span); if !trimmed.is_empty() { - let inline_elements = parse_inline(trimmed); + let content = trimmed.resolve(&self.buffer); + let inline_elements = parse_inline(content, trimmed.start); self.parsed.push(MdElement::Paragraph(inline_elements)); } } @@ -136,19 +185,19 @@ impl StreamParser { // If remaining could be the start of a block element but we // don't have enough chars yet, wait for more input rather than // consuming into the paragraph (e.g. "`" could become "```") - if self.could_be_block_start(&remaining) { + if self.could_be_block_start() { return; } } // Continue with inline processing - if self.process_inline(&remaining) { + if self.process_inline() { continue; } return; } - _ => { + PartialDispatch::Other => { // For other inline elements, process character by character - if self.process_inline(&remaining) { + if self.process_inline() { continue; } return; @@ -158,28 +207,28 @@ impl StreamParser { // No partial state - detect new elements if self.at_line_start { - if let Some(consumed) = self.try_block_start(&remaining) { + if let Some(consumed) = self.try_block_start() { self.advance(consumed); continue; } - if self.could_be_block_start(&remaining) { + if self.could_be_block_start() { return; } } // Fall back to inline processing - if self.process_inline(&remaining) { + if self.process_inline() { continue; } return; } } - /// Check if text could be the start of a block element but we don't + /// Check if remaining text could be the start of a block element but we don't /// have enough characters to confirm yet. Used to defer consuming /// ambiguous prefixes like "`" or "``" that might become "```". - fn could_be_block_start(&self, text: &str) -> bool { - let trimmed = text.trim_start(); + fn could_be_block_start(&self) -> bool { + let trimmed = self.remaining().trim_start(); if trimmed.is_empty() { return false; } @@ -211,7 +260,8 @@ impl StreamParser { /// Try to detect a block-level element at line start. /// Returns bytes consumed if successful. - fn try_block_start(&mut self, text: &str) -> Option<usize> { + fn try_block_start(&mut self) -> Option<usize> { + let text = self.remaining(); let trimmed = text.trim_start(); let leading_space = text.len() - trimmed.len(); @@ -221,12 +271,17 @@ impl StreamParser { if level <= 6 { if let Some(rest) = trimmed.get(level..) { if rest.starts_with(' ') || rest.is_empty() { - self.partial = Some(Partial::new( + let consumed = leading_space + level + rest.starts_with(' ') as usize; + let content_start = self.process_pos + consumed; + let mut partial = Partial::new( PartialKind::Heading { level: level as u8 }, self.process_pos, - )); + ); + partial.content_start = content_start; + partial.content_end = content_start; + self.partial = Some(partial); self.at_line_start = false; - return Some(leading_space + level + rest.starts_with(' ') as usize); + return Some(consumed); } } } @@ -241,37 +296,49 @@ impl StreamParser { let after_fence = &trimmed[fence_len..]; let (language, consumed_lang) = if let Some(nl_pos) = after_fence.find('\n') { let lang = after_fence[..nl_pos].trim(); - ( - if lang.is_empty() { - None - } else { - Some(lang.to_string()) - }, - nl_pos + 1, - ) + let lang_span = if lang.is_empty() { + None + } else { + // Compute absolute span for the language + let lang_start_in_after = after_fence[..nl_pos].as_ptr() as usize + - after_fence.as_ptr() as usize + + (after_fence[..nl_pos].len() + - after_fence[..nl_pos].trim_start().len()); + let abs_start = + self.process_pos + leading_space + fence_len + lang_start_in_after; + Some(Span::new(abs_start, abs_start + lang.len())) + }; + (lang_span, nl_pos + 1) } else { // No newline yet - language might be incomplete let lang = after_fence.trim(); - ( - if lang.is_empty() { - None - } else { - Some(lang.to_string()) - }, - after_fence.len(), - ) + let lang_span = if lang.is_empty() { + None + } else { + let lang_start_in_after = + after_fence.len() - after_fence.trim_start().len(); + let abs_start = + self.process_pos + leading_space + fence_len + lang_start_in_after; + Some(Span::new(abs_start, abs_start + lang.len())) + }; + (lang_span, after_fence.len()) }; - self.partial = Some(Partial::new( + let consumed = leading_space + fence_len + consumed_lang; + let content_start = self.process_pos + consumed; + let mut partial = Partial::new( PartialKind::CodeFence { fence_char, fence_len, language, }, self.process_pos, - )); + ); + partial.content_start = content_start; + partial.content_end = content_start; + self.partial = Some(partial); self.at_line_start = false; - return Some(leading_space + fence_len + consumed_lang); + return Some(consumed); } } @@ -296,16 +363,20 @@ impl StreamParser { if trimmed.starts_with('|') { if let Some(nl_pos) = trimmed.find('\n') { let line = &trimmed[..nl_pos]; - let cells = parse_table_row(line); + let line_abs_offset = self.process_pos + leading_space; + let cells = parse_table_row(line, line_abs_offset); if !cells.is_empty() { - self.partial = Some(Partial::new( + let mut partial = Partial::new( PartialKind::Table { headers: cells, rows: Vec::new(), seen_separator: false, }, self.process_pos, - )); + ); + partial.content_start = self.process_pos; + partial.content_end = self.process_pos + leading_space + nl_pos; + self.partial = Some(partial); self.at_line_start = true; return Some(leading_space + nl_pos + 1); } @@ -317,40 +388,56 @@ impl StreamParser { /// Process content inside a code fence. /// Returns true if we should continue processing, false if we need more input. - fn process_code_fence(&mut self, fence_char: char, fence_len: usize, text: &str) -> bool { - let partial = self.partial.as_mut().unwrap(); + fn process_code_fence(&mut self, fence_char: char, fence_len: usize) -> bool { + let text_start = self.process_pos; + let text_end = self.buffer.len(); + let mut pos = text_start; + + while pos < text_end { + // Find next line boundary + let line_end = self.buffer[pos..text_end] + .find('\n') + .map(|i| pos + i + 1) + .unwrap_or(text_end); + let line = &self.buffer[pos..line_end]; + + let partial = self.partial.as_mut().unwrap(); - for line in text.split_inclusive('\n') { // Check if we're at a line start within the code fence let at_content_line_start = - partial.content.is_empty() || partial.content.ends_with('\n'); + partial.content_is_empty() || self.buffer[..partial.content_end].ends_with('\n'); if at_content_line_start { let trimmed = line.trim_start(); // Check for closing fence if trimmed.len() >= fence_len - && trimmed.as_bytes().iter().take(fence_len).all(|&b| b == fence_char as u8) + && trimmed + .as_bytes() + .iter() + .take(fence_len) + .all(|&b| b == fence_char as u8) { let after_fence = &trimmed[fence_len..]; if after_fence.trim().is_empty() || after_fence.starts_with('\n') { // Found closing fence! Complete the code block let language = if let PartialKind::CodeFence { language, .. } = &partial.kind { - language.clone() + *language } else { None }; - let content = std::mem::take(&mut partial.content); - self.parsed - .push(MdElement::CodeBlock(CodeBlock { language, content })); + let content_span = partial.content_span(); + self.parsed.push(MdElement::CodeBlock(CodeBlock { + language, + content: content_span, + })); self.partial = None; self.at_line_start = true; // Advance past the closing fence line - let consumed = text.find(line).unwrap() + line.len(); - self.advance(consumed); + self.advance(line_end - text_start); return true; } } @@ -367,46 +454,63 @@ impl StreamParser { } } - // Not a closing fence - add to content - partial.content.push_str(line); + // Not a closing fence - extend content span to include this line + partial.content_end += line.len(); + pos = line_end; } // Consumed all available text, need more - self.advance(text.len()); + self.advance(text_end - text_start); false } /// Process heading content until newline. - fn process_heading(&mut self, level: u8, text: &str) -> bool { - if let Some(nl_pos) = text.find('\n') { + fn process_heading(&mut self, level: u8) -> bool { + let remaining = self.remaining(); + if let Some(nl_pos) = remaining.find('\n') { let partial = self.partial.as_mut().unwrap(); - partial.content.push_str(&text[..nl_pos]); + partial.content_end += nl_pos; - let content = std::mem::take(&mut partial.content).trim().to_string(); - self.parsed.push(MdElement::Heading { level, content }); + let content_span = partial.content_span(); + let trimmed = self.trim_span(content_span); + self.parsed.push(MdElement::Heading { + level, + content: trimmed, + }); self.partial = None; self.at_line_start = true; self.advance(nl_pos + 1); true } else { // No newline yet - accumulate + let len = remaining.len(); let partial = self.partial.as_mut().unwrap(); - partial.content.push_str(text); - self.advance(text.len()); + partial.content_end += len; + self.advance(len); false } } /// Process table content line by line. /// Returns true if we should continue processing, false if we need more input. - fn process_table(&mut self, text: &str) -> bool { + fn process_table(&mut self) -> bool { + let remaining = self.remaining(); // We need at least one complete line to process - if let Some(nl_pos) = text.find('\n') { - let line = &text[..nl_pos]; + if let Some(nl_pos) = remaining.find('\n') { + let line = &remaining[..nl_pos]; let trimmed = line.trim(); // Check if this line continues the table if trimmed.starts_with('|') { + // Capture everything we need from remaining before dropping the borrow + let is_sep = is_separator_row(trimmed); + let line_abs_offset = self.process_pos; + let trim_offset = line.len() - trimmed.len(); + let trimmed_span = Span::new( + self.process_pos + trim_offset, + self.process_pos + trim_offset + trimmed.len(), + ); + let cells = parse_table_row(trimmed, line_abs_offset + trim_offset); let partial = self.partial.as_mut().unwrap(); if let PartialKind::Table { ref mut rows, @@ -417,15 +521,22 @@ impl StreamParser { { if !*seen_separator { // Expecting separator row - if is_separator_row(trimmed) { + if is_sep { *seen_separator = true; } else { // Not a valid table — emit header as paragraph - let header_text = format!("| {} |", headers.join(" | ")); - let row_text = trimmed.to_string(); + let header_text = format!( + "| {} |", + headers + .iter() + .map(|s| s.resolve(&self.buffer)) + .collect::<Vec<_>>() + .join(" | ") + ); + let row_text = trimmed_span.resolve(&self.buffer); self.partial = None; let combined = format!("{}\n{}", header_text, row_text); - let inlines = parse_inline(&combined); + let inlines = parse_inline(&combined, 0); self.parsed.push(MdElement::Paragraph(inlines)); self.at_line_start = true; self.advance(nl_pos + 1); @@ -433,7 +544,6 @@ impl StreamParser { } } else { // Data row - let cells = parse_table_row(trimmed); rows.push(cells); } } @@ -453,8 +563,15 @@ impl StreamParser { self.parsed.push(MdElement::Table { headers, rows }); } else { // Never saw separator — emit as paragraph - let text = format!("| {} |", headers.join(" | ")); - let inlines = parse_inline(&text); + let text = format!( + "| {} |", + headers + .iter() + .map(|s| s.resolve(&self.buffer)) + .collect::<Vec<_>>() + .join(" | ") + ); + let inlines = parse_inline(&text, 0); self.parsed.push(MdElement::Paragraph(inlines)); } } @@ -465,7 +582,7 @@ impl StreamParser { // No newline yet — check if we have a partial line starting with | // If so, wait for more input. If not, table is done. - let trimmed = text.trim(); + let trimmed = remaining.trim(); if trimmed.starts_with('|') || trimmed.is_empty() { // Could be another table row, wait for newline return false; @@ -482,8 +599,15 @@ impl StreamParser { if seen_separator { self.parsed.push(MdElement::Table { headers, rows }); } else { - let text = format!("| {} |", headers.join(" | ")); - let inlines = parse_inline(&text); + let text = format!( + "| {} |", + headers + .iter() + .map(|s| s.resolve(&self.buffer)) + .collect::<Vec<_>>() + .join(" | ") + ); + let inlines = parse_inline(&text, 0); self.parsed.push(MdElement::Paragraph(inlines)); } } @@ -492,18 +616,22 @@ impl StreamParser { } /// Process inline content. - fn process_inline(&mut self, text: &str) -> bool { + fn process_inline(&mut self) -> bool { + let remaining = self.remaining(); + // Check for paragraph break split across tokens: // partial content ends with \n and new text starts with \n - if text.starts_with('\n') { + if remaining.starts_with('\n') { if let Some(ref partial) = self.partial { - if partial.content.ends_with('\n') { + if self.buffer[..partial.content_end].ends_with('\n') { // Double newline split across token boundary — emit paragraph - let para_text = std::mem::take(&mut self.partial.as_mut().unwrap().content); + let span = partial.content_span(); + let trimmed = self.trim_span(span); self.partial = None; - if !para_text.trim().is_empty() { - let inline_elements = parse_inline(para_text.trim()); + if !trimmed.is_empty() { + let content = trimmed.resolve(&self.buffer); + let inline_elements = parse_inline(content, trimmed.start); self.parsed.push(MdElement::Paragraph(inline_elements)); } self.at_line_start = true; @@ -513,13 +641,11 @@ impl StreamParser { } } - if let Some(nl_pos) = text.find('\n') { - let after_nl = &text[nl_pos + 1..]; + if let Some(nl_pos) = remaining.find('\n') { + let after_nl = &remaining[nl_pos + 1..]; // Check if text after the newline starts a block element (code fence, heading, etc.) // If so, emit the current paragraph and let the block parser handle the rest. - // This must happen before the \n\n check so that block starts aren't - // gobbled into paragraph text by a later double-newline. if !after_nl.is_empty() { let trimmed_after = after_nl.trim_start(); let is_block_start = trimmed_after.starts_with("```") @@ -528,17 +654,28 @@ impl StreamParser { || trimmed_after.starts_with('|'); if is_block_start { // Accumulate text before the newline into the paragraph - let para_text = if let Some(ref mut partial) = self.partial { - partial.content.push_str(&text[..nl_pos]); - std::mem::take(&mut partial.content) - } else { - text[..nl_pos].to_string() - }; - self.partial = None; + if let Some(ref mut partial) = self.partial { + partial.content_end += nl_pos; + let span = partial.content_span(); + let trimmed = self.trim_span(span); + self.partial = None; - if !para_text.trim().is_empty() { - let inline_elements = parse_inline(para_text.trim()); - self.parsed.push(MdElement::Paragraph(inline_elements)); + if !trimmed.is_empty() { + let content = trimmed.resolve(&self.buffer); + let inline_elements = parse_inline(content, trimmed.start); + self.parsed.push(MdElement::Paragraph(inline_elements)); + } + } else { + let start = self.process_pos; + let end = self.process_pos + nl_pos; + let span = Span::new(start, end); + let trimmed = self.trim_span(span); + + if !trimmed.is_empty() { + let content = trimmed.resolve(&self.buffer); + let inline_elements = parse_inline(content, trimmed.start); + self.parsed.push(MdElement::Paragraph(inline_elements)); + } } self.at_line_start = true; self.advance(nl_pos + 1); @@ -547,39 +684,53 @@ impl StreamParser { } } - if let Some(nl_pos) = text.find("\n\n") { + // Re-borrow remaining since prior branches may not have taken + let remaining = self.remaining(); + + if let Some(nl_pos) = remaining.find("\n\n") { // Double newline = paragraph break // Combine accumulated partial content with text before \n\n - let para_text = if let Some(ref mut partial) = self.partial { - partial.content.push_str(&text[..nl_pos]); - std::mem::take(&mut partial.content) + if let Some(ref mut partial) = self.partial { + partial.content_end += nl_pos; + let span = partial.content_span(); + let trimmed = self.trim_span(span); + self.partial = None; + + if !trimmed.is_empty() { + let content = trimmed.resolve(&self.buffer); + let inline_elements = parse_inline(content, trimmed.start); + self.parsed.push(MdElement::Paragraph(inline_elements)); + } } else { - text[..nl_pos].to_string() - }; - self.partial = None; - - if !para_text.trim().is_empty() { - // Parse inline elements from the full paragraph text - let inline_elements = parse_inline(para_text.trim()); - self.parsed.push(MdElement::Paragraph(inline_elements)); + let start = self.process_pos; + let end = self.process_pos + nl_pos; + let span = Span::new(start, end); + let trimmed = self.trim_span(span); + + if !trimmed.is_empty() { + let content = trimmed.resolve(&self.buffer); + let inline_elements = parse_inline(content, trimmed.start); + self.parsed.push(MdElement::Paragraph(inline_elements)); + } } self.at_line_start = true; self.advance(nl_pos + 2); return true; } - if let Some(nl_pos) = text.find('\n') { - + if let Some(nl_pos) = remaining.find('\n') { // Single newline - continue accumulating but track position if let Some(ref mut partial) = self.partial { - partial.content.push_str(&text[..=nl_pos]); + partial.content_end += nl_pos + 1; } else { // Start accumulating paragraph - let content = text[..=nl_pos].to_string(); + let content_start = self.process_pos; + let content_end = self.process_pos + nl_pos + 1; self.partial = Some(Partial { kind: PartialKind::Paragraph, start_pos: self.process_pos, - content, + content_start, + content_end, }); } self.at_line_start = true; @@ -588,17 +739,21 @@ impl StreamParser { } // No newline - accumulate + let len = remaining.len(); if let Some(ref mut partial) = self.partial { - partial.content.push_str(text); + partial.content_end += len; } else { + let content_start = self.process_pos; + let content_end = self.process_pos + len; self.partial = Some(Partial { kind: PartialKind::Paragraph, start_pos: self.process_pos, - content: text.to_string(), + content_start, + content_end, }); } self.at_line_start = false; - self.advance(text.len()); + self.advance(len); false } @@ -616,13 +771,14 @@ impl StreamParser { // Unclosed code block - emit what we have self.parsed.push(MdElement::CodeBlock(CodeBlock { language, - content: partial.content, + content: partial.content_span(), })); } PartialKind::Heading { level } => { + let trimmed = self.trim_span(partial.content_span()); self.parsed.push(MdElement::Heading { level, - content: partial.content.trim().to_string(), + content: trimmed, }); } PartialKind::Table { @@ -634,21 +790,32 @@ impl StreamParser { self.parsed.push(MdElement::Table { headers, rows }); } else { // Never saw separator — not a real table, emit as paragraph - let text = format!("| {} |", headers.join(" | ")); - let inlines = parse_inline(&text); + let text = format!( + "| {} |", + headers + .iter() + .map(|s| s.resolve(&self.buffer)) + .collect::<Vec<_>>() + .join(" | ") + ); + let inlines = parse_inline(&text, 0); self.parsed.push(MdElement::Paragraph(inlines)); } } PartialKind::Paragraph => { - if !partial.content.trim().is_empty() { - let inline_elements = parse_inline(partial.content.trim()); + let trimmed = self.trim_span(partial.content_span()); + if !trimmed.is_empty() { + let content = trimmed.resolve(&self.buffer); + let inline_elements = parse_inline(content, trimmed.start); self.parsed.push(MdElement::Paragraph(inline_elements)); } } _ => { // Other partial kinds (lists, blockquotes, etc.) - emit as paragraph for now - if !partial.content.trim().is_empty() { - let inline_elements = parse_inline(partial.content.trim()); + let trimmed = self.trim_span(partial.content_span()); + if !trimmed.is_empty() { + let content = trimmed.resolve(&self.buffer); + let inline_elements = parse_inline(content, trimmed.start); self.parsed.push(MdElement::Paragraph(inline_elements)); } } @@ -663,21 +830,51 @@ impl Default for StreamParser { } } -/// Parse a table row into cells by splitting on `|`. -/// Strips outer pipes and trims each cell. -fn parse_table_row(line: &str) -> Vec<String> { +/// Parse a table row into cell spans by splitting on `|`. +/// `line_offset` is the absolute buffer position of `line`. +fn parse_table_row(line: &str, line_offset: usize) -> Vec<Span> { let trimmed = line.trim(); - let inner = trimmed.strip_prefix('|').unwrap_or(trimmed); - let inner = inner.strip_suffix('|').unwrap_or(inner); - inner.split('|').map(|c| c.trim().to_string()).collect() + let trim_start = line.len() - line.trim_start().len(); + let base = line_offset + trim_start; + + let inner_start; + let inner; + if let Some(stripped) = trimmed.strip_prefix('|') { + inner_start = base + 1; + inner = stripped.strip_suffix('|').unwrap_or(stripped); + } else { + inner_start = base; + inner = trimmed.strip_suffix('|').unwrap_or(trimmed); + }; + + let mut result = Vec::new(); + let mut pos = 0; + for cell in inner.split('|') { + let cell_start = inner_start + pos; + let cell_trimmed = cell.trim(); + if cell_trimmed.is_empty() { + // Empty cell — use a zero-length span at the position + result.push(Span::new(cell_start, cell_start)); + } else { + let ltrim = cell.len() - cell.trim_start().len(); + let span_start = cell_start + ltrim; + let span_end = span_start + cell_trimmed.len(); + result.push(Span::new(span_start, span_end)); + } + pos += cell.len() + 1; // +1 for the | delimiter + } + result } /// Check if a line is a table separator row (e.g. `|---|---|`). fn is_separator_row(line: &str) -> bool { - let cells = parse_table_row(line); + let trimmed = line.trim(); + let inner = trimmed.strip_prefix('|').unwrap_or(trimmed); + let inner = inner.strip_suffix('|').unwrap_or(inner); + let cells: Vec<&str> = inner.split('|').map(|c| c.trim()).collect(); !cells.is_empty() && cells.iter().all(|c| { - let t = c.trim().trim_matches(':'); + let t = c.trim_matches(':'); !t.is_empty() && t.chars().all(|ch| ch == '-') }) } diff --git a/crates/md-stream/src/partial.rs b/crates/md-stream/src/partial.rs @@ -1,5 +1,7 @@ //! Partial state tracking for incomplete markdown elements. +use crate::element::Span; + /// Tracks an in-progress markdown element that might be completed /// when more tokens arrive. #[derive(Debug, Clone)] @@ -10,8 +12,11 @@ pub struct Partial { /// Byte offset into the buffer where this element starts pub start_pos: usize, - /// Accumulated content so far (for elements that need it) - pub content: String, + /// Start of content region (after markers like "# ") + pub content_start: usize, + + /// End of content accumulated so far + pub content_end: usize, } impl Partial { @@ -19,9 +24,25 @@ impl Partial { Self { kind, start_pos, - content: String::new(), + content_start: start_pos, + content_end: start_pos, } } + + /// Get the content span + pub fn content_span(&self) -> Span { + Span::new(self.content_start, self.content_end) + } + + /// Get the content as a string slice from the buffer + pub fn content<'a>(&self, buffer: &'a str) -> &'a str { + &buffer[self.content_start..self.content_end] + } + + /// Check if content is empty + pub fn content_is_empty(&self) -> bool { + self.content_start == self.content_end + } } /// The kind of partial element being tracked. @@ -32,7 +53,7 @@ pub enum PartialKind { CodeFence { fence_char: char, // ` or ~ fence_len: usize, // typically 3 - language: Option<String>, + language: Option<Span>, }, /// Inline code waiting for closing backtick(s) @@ -54,10 +75,10 @@ pub enum PartialKind { /// Link: seen [, waiting for ](url) /// States: text, post-bracket, url - Link { state: LinkState, text: String }, + Link { state: LinkState, text: Span }, /// Image: seen ![, waiting for ](url) - Image { state: LinkState, alt: String }, + Image { state: LinkState, alt: Span }, /// Heading started with # at line start, collecting content Heading { level: u8 }, @@ -74,8 +95,8 @@ pub enum PartialKind { /// Table being accumulated row by row Table { - headers: Vec<String>, - rows: Vec<Vec<String>>, + headers: Vec<Span>, + rows: Vec<Vec<Span>>, seen_separator: bool, }, @@ -91,5 +112,5 @@ pub enum LinkState { /// Seen ], expecting ( PostBracket, /// Collecting URL between ( and ) - Url(String), + Url(Span), } diff --git a/crates/md-stream/src/tests.rs b/crates/md-stream/src/tests.rs @@ -1,21 +1,27 @@ //! Tests for streaming parser behavior. +use crate::element::Span; use crate::partial::PartialKind; use crate::{InlineElement, InlineStyle, MdElement, StreamParser}; +/// Helper to resolve a Span against a parser's buffer. +fn r<'a>(span: &Span, buf: &'a str) -> &'a str { + span.resolve(buf) +} + #[test] fn test_heading_complete() { let mut parser = StreamParser::new(); parser.push("# Hello World\n"); assert_eq!(parser.parsed().len(), 1); - assert_eq!( - parser.parsed()[0], - MdElement::Heading { - level: 1, - content: "Hello World".to_string() + match &parser.parsed()[0] { + MdElement::Heading { level, content } => { + assert_eq!(*level, 1); + assert_eq!(r(content, parser.buffer()), "Hello World"); } - ); + other => panic!("Expected heading, got {:?}", other), + } } #[test] @@ -32,13 +38,13 @@ fn test_heading_streaming() { parser.push("ld\n"); assert_eq!(parser.parsed().len(), 1); - assert_eq!( - parser.parsed()[0], - MdElement::Heading { - level: 1, - content: "Hello World".to_string() + match &parser.parsed()[0] { + MdElement::Heading { level, content } => { + assert_eq!(*level, 1); + assert_eq!(r(content, parser.buffer()), "Hello World"); } - ); + other => panic!("Expected heading, got {:?}", other), + } } #[test] @@ -49,8 +55,8 @@ fn test_code_block_complete() { assert_eq!(parser.parsed().len(), 1); match &parser.parsed()[0] { MdElement::CodeBlock(cb) => { - assert_eq!(cb.language.as_deref(), Some("rust")); - assert_eq!(cb.content, "fn main() {}\n"); + assert_eq!(cb.language.map(|s| r(&s, parser.buffer())), Some("rust")); + assert_eq!(r(&cb.content, parser.buffer()), "fn main() {}\n"); } _ => panic!("Expected code block"), } @@ -104,7 +110,7 @@ fn test_finalize_incomplete_code() { assert_eq!(parser.parsed().len(), 1); match &parser.parsed()[0] { MdElement::CodeBlock(cb) => { - assert!(cb.content.contains("unclosed code")); + assert!(r(&cb.content, parser.buffer()).contains("unclosed code")); } _ => panic!("Expected code block"), } @@ -198,10 +204,11 @@ fn test_inline_bold() { assert_eq!(parser.parsed().len(), 1); if let MdElement::Paragraph(inlines) = &parser.parsed()[0] { + let buf = parser.buffer(); assert!( inlines.iter().any(|e| matches!( e, - InlineElement::Styled { style: InlineStyle::Bold, content } if content == "bold" + InlineElement::Styled { style: InlineStyle::Bold, content } if r(content, buf) == "bold" )), "Expected bold element, got: {:?}", inlines @@ -218,10 +225,11 @@ fn test_inline_italic() { assert_eq!(parser.parsed().len(), 1); if let MdElement::Paragraph(inlines) = &parser.parsed()[0] { + let buf = parser.buffer(); assert!( inlines.iter().any(|e| matches!( e, - InlineElement::Styled { style: InlineStyle::Italic, content } if content == "italic" + InlineElement::Styled { style: InlineStyle::Italic, content } if r(content, buf) == "italic" )), "Expected italic element, got: {:?}", inlines @@ -238,10 +246,11 @@ fn test_inline_code() { assert_eq!(parser.parsed().len(), 1); if let MdElement::Paragraph(inlines) = &parser.parsed()[0] { + let buf = parser.buffer(); assert!( inlines.iter().any(|e| matches!( e, - InlineElement::Code(s) if s == "code" + InlineElement::Code(s) if r(s, buf) == "code" )), "Expected code element, got: {:?}", inlines @@ -258,9 +267,10 @@ fn test_inline_link() { assert_eq!(parser.parsed().len(), 1); if let MdElement::Paragraph(inlines) = &parser.parsed()[0] { + let buf = parser.buffer(); assert!(inlines.iter().any(|e| matches!( e, - InlineElement::Link { text, url } if text == "this link" && url == "https://example.com" + InlineElement::Link { text, url } if r(text, buf) == "this link" && r(url, buf) == "https://example.com" )), "Expected link element, got: {:?}", inlines); } else { panic!("Expected paragraph"); @@ -274,10 +284,11 @@ fn test_inline_image() { assert_eq!(parser.parsed().len(), 1); if let MdElement::Paragraph(inlines) = &parser.parsed()[0] { + let buf = parser.buffer(); assert!( inlines.iter().any(|e| matches!( e, - InlineElement::Image { alt, url } if alt == "alt text" && url == "image.png" + InlineElement::Image { alt, url } if r(alt, buf) == "alt text" && r(url, buf) == "image.png" )), "Expected image element, got: {:?}", inlines @@ -294,9 +305,10 @@ fn test_inline_strikethrough() { assert_eq!(parser.parsed().len(), 1); if let MdElement::Paragraph(inlines) = &parser.parsed()[0] { + let buf = parser.buffer(); assert!(inlines.iter().any(|e| matches!( e, - InlineElement::Styled { style: InlineStyle::Strikethrough, content } if content == "deleted" + InlineElement::Styled { style: InlineStyle::Strikethrough, content } if r(content, buf) == "deleted" )), "Expected strikethrough element, got: {:?}", inlines); } else { panic!("Expected paragraph"); @@ -351,9 +363,10 @@ fn test_inline_finalize() { // Now should have parsed with inline formatting assert_eq!(parser.parsed().len(), 1); if let MdElement::Paragraph(inlines) = &parser.parsed()[0] { + let buf = parser.buffer(); assert!(inlines.iter().any(|e| matches!( e, - InlineElement::Styled { style: InlineStyle::Bold, content } if content == "bold" + InlineElement::Styled { style: InlineStyle::Bold, content } if r(content, buf) == "bold" ))); } else { panic!("Expected paragraph"); @@ -422,9 +435,10 @@ fn test_paragraph_finalize_emits_content() { assert_eq!(parser.parsed().len(), 1); if let MdElement::Paragraph(inlines) = &parser.parsed()[0] { + let buf = parser.buffer(); assert!(inlines.iter().any(|e| matches!( e, - InlineElement::Text(s) if s.contains("Incomplete paragraph") + InlineElement::Text(s) if r(s, buf).contains("Incomplete paragraph") ))); } else { panic!("Expected paragraph"); @@ -435,21 +449,27 @@ fn test_paragraph_finalize_emits_content() { fn test_inline_code_with_angle_brackets() { // Test parse_inline directly let input = "Generic Rust: `impl Iterator<Item = &str>` returns a `Result<(), anyhow::Error>`"; - let result = crate::parse_inline(input); + let result = crate::parse_inline(input, 0); eprintln!("parse_inline result: {:#?}", result); let code_elements: Vec<_> = result .iter() .filter(|e| matches!(e, InlineElement::Code(_))) .collect(); - assert_eq!(code_elements.len(), 2, "Expected 2 code spans, got: {:#?}", result); + assert_eq!( + code_elements.len(), + 2, + "Expected 2 code spans, got: {:#?}", + result + ); } #[test] fn test_streaming_inline_code_with_angle_brackets() { // Test streaming parser with token-by-token delivery let mut parser = StreamParser::new(); - let input = "5. Generic Rust: `impl Iterator<Item = &str>` returns a `Result<(), anyhow::Error>`\n\n"; + let input = + "5. Generic Rust: `impl Iterator<Item = &str>` returns a `Result<(), anyhow::Error>`\n\n"; // Simulate streaming token by token for ch in input.chars() { @@ -467,7 +487,12 @@ fn test_streaming_inline_code_with_angle_brackets() { .iter() .filter(|e| matches!(e, InlineElement::Code(_))) .collect(); - assert_eq!(code_elements.len(), 2, "Expected 2 code spans, got: {:#?}", inlines); + assert_eq!( + code_elements.len(), + 2, + "Expected 2 code spans, got: {:#?}", + inlines + ); } else { panic!("Expected paragraph, got: {:?}", parser.parsed()[0]); } @@ -477,7 +502,8 @@ fn test_streaming_inline_code_with_angle_brackets() { fn test_streaming_multiple_code_spans_with_angle_brackets() { // From the screenshot: multiple code spans with nested angle brackets let mut parser = StreamParser::new(); - let input = "use `HashMap<K, V>` or `Vec<String>` or `Option<Box<dyn Error>>` in your types\n\n"; + let input = + "use `HashMap<K, V>` or `Vec<String>` or `Option<Box<dyn Error>>` in your types\n\n"; for ch in input.chars() { parser.push(&ch.to_string()); @@ -490,7 +516,12 @@ fn test_streaming_multiple_code_spans_with_angle_brackets() { .iter() .filter(|e| matches!(e, InlineElement::Code(_))) .collect(); - assert_eq!(code_elements.len(), 3, "Expected 3 code spans, got: {:#?}", inlines); + assert_eq!( + code_elements.len(), + 3, + "Expected 3 code spans, got: {:#?}", + inlines + ); } else { panic!("Expected paragraph, got: {:?}", parser.parsed()[0]); } @@ -499,7 +530,6 @@ fn test_streaming_multiple_code_spans_with_angle_brackets() { #[test] fn test_code_block_after_paragraph_single_newline() { // Reproduces: paragraph text ending with ":\n" then "```\n" code block - // This is the common pattern: "All events share these common tags:\n```\n..." let mut parser = StreamParser::new(); let input = "All events share these common tags:\n```\n[\"d\", \"<session-id>\"]\n```\n"; parser.push(input); @@ -512,8 +542,14 @@ fn test_code_block_after_paragraph_single_newline() { eprintln!("After finalize - parsed: {:#?}", parser.parsed()); // Should have: paragraph + code block - let has_paragraph = parser.parsed().iter().any(|e| matches!(e, MdElement::Paragraph(_))); - let has_code_block = parser.parsed().iter().any(|e| matches!(e, MdElement::CodeBlock(_))); + let has_paragraph = parser + .parsed() + .iter() + .any(|e| matches!(e, MdElement::Paragraph(_))); + let has_code_block = parser + .parsed() + .iter() + .any(|e| matches!(e, MdElement::CodeBlock(_))); assert!(has_paragraph, "Missing paragraph element"); assert!(has_code_block, "Missing code block element"); @@ -531,14 +567,23 @@ fn test_code_block_after_paragraph_single_newline_streaming() { eprintln!("Before finalize - parsed: {:#?}", parser.parsed()); eprintln!("Before finalize - partial: {:#?}", parser.partial()); - eprintln!("Before finalize - in_code_block: {}", parser.in_code_block()); + eprintln!( + "Before finalize - in_code_block: {}", + parser.in_code_block() + ); parser.finalize(); eprintln!("After finalize - parsed: {:#?}", parser.parsed()); - let has_paragraph = parser.parsed().iter().any(|e| matches!(e, MdElement::Paragraph(_))); - let has_code_block = parser.parsed().iter().any(|e| matches!(e, MdElement::CodeBlock(_))); + let has_paragraph = parser + .parsed() + .iter() + .any(|e| matches!(e, MdElement::Paragraph(_))); + let has_code_block = parser + .parsed() + .iter() + .any(|e| matches!(e, MdElement::CodeBlock(_))); assert!(has_paragraph, "Missing paragraph element"); assert!(has_code_block, "Missing code block element"); @@ -564,14 +609,27 @@ fn test_table_basic_batch() { let mut parser = StreamParser::new(); parser.push("| Name | Age |\n|------|-----|\n| Alice | 30 |\n| Bob | 25 |\n\n"); - let tables: Vec<_> = parser.parsed().iter().filter(|e| matches!(e, MdElement::Table { .. })).collect(); - assert_eq!(tables.len(), 1, "Expected 1 table, got: {:#?}", parser.parsed()); + let tables: Vec<_> = parser + .parsed() + .iter() + .filter(|e| matches!(e, MdElement::Table { .. })) + .collect(); + assert_eq!( + tables.len(), + 1, + "Expected 1 table, got: {:#?}", + parser.parsed() + ); if let MdElement::Table { headers, rows } = &tables[0] { - assert_eq!(headers, &["Name", "Age"]); + let buf = parser.buffer(); + let h: Vec<&str> = headers.iter().map(|s| r(s, buf)).collect(); + assert_eq!(h, &["Name", "Age"]); assert_eq!(rows.len(), 2); - assert_eq!(rows[0], &["Alice", "30"]); - assert_eq!(rows[1], &["Bob", "25"]); + let r0: Vec<&str> = rows[0].iter().map(|s| r(s, buf)).collect(); + let r1: Vec<&str> = rows[1].iter().map(|s| r(s, buf)).collect(); + assert_eq!(r0, &["Alice", "30"]); + assert_eq!(r1, &["Bob", "25"]); } } @@ -584,14 +642,27 @@ fn test_table_streaming_char_by_char() { parser.push(&ch.to_string()); } - let tables: Vec<_> = parser.parsed().iter().filter(|e| matches!(e, MdElement::Table { .. })).collect(); - assert_eq!(tables.len(), 1, "Expected 1 table, got: {:#?}", parser.parsed()); + let tables: Vec<_> = parser + .parsed() + .iter() + .filter(|e| matches!(e, MdElement::Table { .. })) + .collect(); + assert_eq!( + tables.len(), + 1, + "Expected 1 table, got: {:#?}", + parser.parsed() + ); if let MdElement::Table { headers, rows } = &tables[0] { - assert_eq!(headers, &["Name", "Age"]); + let buf = parser.buffer(); + let h: Vec<&str> = headers.iter().map(|s| r(s, buf)).collect(); + assert_eq!(h, &["Name", "Age"]); assert_eq!(rows.len(), 2); - assert_eq!(rows[0], &["Alice", "30"]); - assert_eq!(rows[1], &["Bob", "25"]); + let r0: Vec<&str> = rows[0].iter().map(|s| r(s, buf)).collect(); + let r1: Vec<&str> = rows[1].iter().map(|s| r(s, buf)).collect(); + assert_eq!(r0, &["Alice", "30"]); + assert_eq!(r1, &["Bob", "25"]); } } @@ -600,10 +671,20 @@ fn test_table_after_paragraph() { let mut parser = StreamParser::new(); parser.push("Here is a comparison:\n| A | B |\n|---|---|\n| 1 | 2 |\n\n"); - let has_paragraph = parser.parsed().iter().any(|e| matches!(e, MdElement::Paragraph(_))); - let has_table = parser.parsed().iter().any(|e| matches!(e, MdElement::Table { .. })); + let has_paragraph = parser + .parsed() + .iter() + .any(|e| matches!(e, MdElement::Paragraph(_))); + let has_table = parser + .parsed() + .iter() + .any(|e| matches!(e, MdElement::Table { .. })); - assert!(has_paragraph, "Missing paragraph, got: {:#?}", parser.parsed()); + assert!( + has_paragraph, + "Missing paragraph, got: {:#?}", + parser.parsed() + ); assert!(has_table, "Missing table, got: {:#?}", parser.parsed()); } @@ -616,10 +697,20 @@ fn test_table_after_paragraph_streaming() { parser.push(&ch.to_string()); } - let has_paragraph = parser.parsed().iter().any(|e| matches!(e, MdElement::Paragraph(_))); - let has_table = parser.parsed().iter().any(|e| matches!(e, MdElement::Table { .. })); + let has_paragraph = parser + .parsed() + .iter() + .any(|e| matches!(e, MdElement::Paragraph(_))); + let has_table = parser + .parsed() + .iter() + .any(|e| matches!(e, MdElement::Table { .. })); - assert!(has_paragraph, "Missing paragraph, got: {:#?}", parser.parsed()); + assert!( + has_paragraph, + "Missing paragraph, got: {:#?}", + parser.parsed() + ); assert!(has_table, "Missing table, got: {:#?}", parser.parsed()); } @@ -628,11 +719,21 @@ fn test_table_then_paragraph() { let mut parser = StreamParser::new(); parser.push("| X | Y |\n|---|---|\n| a | b |\n\nSome text after.\n\n"); - let has_table = parser.parsed().iter().any(|e| matches!(e, MdElement::Table { .. })); - let has_paragraph = parser.parsed().iter().any(|e| matches!(e, MdElement::Paragraph(_))); + let has_table = parser + .parsed() + .iter() + .any(|e| matches!(e, MdElement::Table { .. })); + let has_paragraph = parser + .parsed() + .iter() + .any(|e| matches!(e, MdElement::Paragraph(_))); assert!(has_table, "Missing table, got: {:#?}", parser.parsed()); - assert!(has_paragraph, "Missing paragraph, got: {:#?}", parser.parsed()); + assert!( + has_paragraph, + "Missing paragraph, got: {:#?}", + parser.parsed() + ); } #[test] @@ -641,8 +742,15 @@ fn test_table_no_separator_not_a_table() { // Two pipe rows but no separator — should not be a table parser.push("| foo | bar |\n| baz | qux |\n\n"); - let has_table = parser.parsed().iter().any(|e| matches!(e, MdElement::Table { .. })); - assert!(!has_table, "Should NOT be a table without separator row, got: {:#?}", parser.parsed()); + let has_table = parser + .parsed() + .iter() + .any(|e| matches!(e, MdElement::Table { .. })); + assert!( + !has_table, + "Should NOT be a table without separator row, got: {:#?}", + parser.parsed() + ); } #[test] @@ -650,7 +758,11 @@ fn test_table_uneven_columns() { let mut parser = StreamParser::new(); parser.push("| A | B | C |\n|---|---|---|\n| 1 | 2 |\n| x | y | z |\n\n"); - let tables: Vec<_> = parser.parsed().iter().filter(|e| matches!(e, MdElement::Table { .. })).collect(); + let tables: Vec<_> = parser + .parsed() + .iter() + .filter(|e| matches!(e, MdElement::Table { .. })) + .collect(); assert_eq!(tables.len(), 1); if let MdElement::Table { headers, rows } = &tables[0] { @@ -666,13 +778,25 @@ fn test_table_with_alignment() { let mut parser = StreamParser::new(); parser.push("| Left | Center | Right |\n|:-----|:------:|------:|\n| a | b | c |\n\n"); - let tables: Vec<_> = parser.parsed().iter().filter(|e| matches!(e, MdElement::Table { .. })).collect(); - assert_eq!(tables.len(), 1, "Expected table with alignment separators, got: {:#?}", parser.parsed()); + let tables: Vec<_> = parser + .parsed() + .iter() + .filter(|e| matches!(e, MdElement::Table { .. })) + .collect(); + assert_eq!( + tables.len(), + 1, + "Expected table with alignment separators, got: {:#?}", + parser.parsed() + ); if let MdElement::Table { headers, rows } = &tables[0] { - assert_eq!(headers, &["Left", "Center", "Right"]); + let buf = parser.buffer(); + let h: Vec<&str> = headers.iter().map(|s| r(s, buf)).collect(); + assert_eq!(h, &["Left", "Center", "Right"]); assert_eq!(rows.len(), 1); - assert_eq!(rows[0], &["a", "b", "c"]); + let r0: Vec<&str> = rows[0].iter().map(|s| r(s, buf)).collect(); + assert_eq!(r0, &["a", "b", "c"]); } } @@ -686,8 +810,15 @@ fn test_table_finalize_incomplete() { parser.finalize(); - let has_table = parser.parsed().iter().any(|e| matches!(e, MdElement::Table { .. })); - assert!(has_table, "Finalize should emit the table, got: {:#?}", parser.parsed()); + let has_table = parser + .parsed() + .iter() + .any(|e| matches!(e, MdElement::Table { .. })); + assert!( + has_table, + "Finalize should emit the table, got: {:#?}", + parser.parsed() + ); } #[test] @@ -695,11 +826,17 @@ fn test_table_single_column() { let mut parser = StreamParser::new(); parser.push("| Item |\n|------|\n| Apple |\n| Banana |\n\n"); - let tables: Vec<_> = parser.parsed().iter().filter(|e| matches!(e, MdElement::Table { .. })).collect(); + let tables: Vec<_> = parser + .parsed() + .iter() + .filter(|e| matches!(e, MdElement::Table { .. })) + .collect(); assert_eq!(tables.len(), 1); if let MdElement::Table { headers, rows } = &tables[0] { - assert_eq!(headers, &["Item"]); + let buf = parser.buffer(); + let h: Vec<&str> = headers.iter().map(|s| r(s, buf)).collect(); + assert_eq!(h, &["Item"]); assert_eq!(rows.len(), 2); } } @@ -709,13 +846,21 @@ fn test_table_empty_cells() { let mut parser = StreamParser::new(); parser.push("| A | B |\n|---|---|\n| | val |\n| val | |\n\n"); - let tables: Vec<_> = parser.parsed().iter().filter(|e| matches!(e, MdElement::Table { .. })).collect(); + let tables: Vec<_> = parser + .parsed() + .iter() + .filter(|e| matches!(e, MdElement::Table { .. })) + .collect(); assert_eq!(tables.len(), 1); if let MdElement::Table { headers, rows } = &tables[0] { - assert_eq!(headers, &["A", "B"]); - assert_eq!(rows[0], &["", "val"]); - assert_eq!(rows[1], &["val", ""]); + let buf = parser.buffer(); + let h: Vec<&str> = headers.iter().map(|s| r(s, buf)).collect(); + assert_eq!(h, &["A", "B"]); + let r0: Vec<&str> = rows[0].iter().map(|s| r(s, buf)).collect(); + let r1: Vec<&str> = rows[1].iter().map(|s| r(s, buf)).collect(); + assert_eq!(r0, &["", "val"]); + assert_eq!(r1, &["val", ""]); } } @@ -747,13 +892,27 @@ fn test_table_streaming_realistic_llm_chunks() { } parser.finalize(); - let has_paragraph = parser.parsed().iter().any(|e| matches!(e, MdElement::Paragraph(_))); - let has_table = parser.parsed().iter().any(|e| matches!(e, MdElement::Table { .. })); + let has_paragraph = parser + .parsed() + .iter() + .any(|e| matches!(e, MdElement::Paragraph(_))); + let has_table = parser + .parsed() + .iter() + .any(|e| matches!(e, MdElement::Table { .. })); - assert!(has_paragraph, "Missing paragraph, got: {:#?}", parser.parsed()); + assert!( + has_paragraph, + "Missing paragraph, got: {:#?}", + parser.parsed() + ); assert!(has_table, "Missing table, got: {:#?}", parser.parsed()); - if let Some(MdElement::Table { headers, rows }) = parser.parsed().iter().find(|e| matches!(e, MdElement::Table { .. })) { + if let Some(MdElement::Table { headers, rows }) = parser + .parsed() + .iter() + .find(|e| matches!(e, MdElement::Table { .. })) + { assert_eq!(headers.len(), 3, "Expected 3 headers, got: {:?}", headers); assert_eq!(rows.len(), 2, "Expected 2 rows, got: {:?}", rows); } @@ -768,7 +927,13 @@ fn test_table_partial_shows_during_streaming() { // Should have a table partial with seen_separator=true let partial = parser.partial().expect("Should have partial"); assert!( - matches!(&partial.kind, PartialKind::Table { seen_separator: true, .. }), + matches!( + &partial.kind, + PartialKind::Table { + seen_separator: true, + .. + } + ), "Expected table partial with seen_separator=true, got: {:?}", partial.kind ); diff --git a/crates/notedeck_dave/src/backend/claude.rs b/crates/notedeck_dave/src/backend/claude.rs @@ -5,8 +5,8 @@ use crate::backend::tool_summary::{ }; use crate::backend::traits::AiBackend; use crate::messages::{ - CompactionInfo, DaveApiResponse, PendingPermission, PermissionRequest, PermissionResponse, - SubagentInfo, SubagentStatus, ToolResult, + CompactionInfo, DaveApiResponse, ParsedMarkdown, PendingPermission, PermissionRequest, + PermissionResponse, SubagentInfo, SubagentStatus, ToolResult, }; use crate::tools::Tool; use crate::Message; @@ -341,14 +341,15 @@ async fn session_actor( let request_id = Uuid::new_v4(); let (ui_resp_tx, ui_resp_rx) = oneshot::channel(); - let cached_plan_elements = if perm_req.tool_name == "ExitPlanMode" { + let cached_plan = if perm_req.tool_name == "ExitPlanMode" { perm_req.tool_input.get("plan") .and_then(|v| v.as_str()) .map(|plan| { let mut parser = md_stream::StreamParser::new(); parser.push(plan); parser.finalize(); - parser.into_parsed() + let (elements, source) = parser.into_parts(); + ParsedMarkdown { source, elements } }) } else { None @@ -360,7 +361,7 @@ async fn session_actor( tool_input: perm_req.tool_input.clone(), response: None, answer_summary: None, - cached_plan_elements, + cached_plan, }; let pending = PendingPermission { diff --git a/crates/notedeck_dave/src/messages.rs b/crates/notedeck_dave/src/messages.rs @@ -1,6 +1,13 @@ use crate::tools::{ToolCall, ToolResponse}; use async_openai::types::*; use md_stream::{MdElement, Partial, StreamParser}; + +/// Pre-parsed markdown with source text for span resolution. +#[derive(Debug, Clone)] +pub struct ParsedMarkdown { + pub source: String, + pub elements: Vec<MdElement>, +} use nostrdb::{Ndb, Transaction}; use serde::{Deserialize, Serialize}; use tokio::sync::oneshot; @@ -52,8 +59,8 @@ pub struct PermissionRequest { pub response: Option<PermissionResponseType>, /// For AskUserQuestion: pre-computed summary of answers for display pub answer_summary: Option<AnswerSummary>, - /// For ExitPlanMode: pre-parsed markdown elements from the plan content - pub cached_plan_elements: Option<Vec<MdElement>>, + /// For ExitPlanMode: pre-parsed markdown with source text for span resolution + pub cached_plan: Option<ParsedMarkdown>, } /// A single entry in an answer summary @@ -247,6 +254,13 @@ impl AssistantMessage { &self.text } + /// Get the buffer for resolving spans in parsed elements. + /// This is the same as text() — both the parser and AssistantMessage + /// maintain identical buffers via push_str(token). + pub fn buffer(&self) -> &str { + &self.text + } + /// Get parsed markdown elements. pub fn parsed_elements(&self) -> &[MdElement] { if let Some(cached) = &self.cached_elements { diff --git a/crates/notedeck_dave/src/ui/dave.rs b/crates/notedeck_dave/src/ui/dave.rs @@ -698,13 +698,18 @@ impl<'a> DaveUi<'a> { ui.add_space(8.0); // Render plan content as markdown (pre-parsed at construction) - if let Some(elements) = &request.cached_plan_elements { - markdown_ui::render_assistant_message(elements, None, ui); - } else if let Some(plan) = + if let Some(plan) = &request.cached_plan { + markdown_ui::render_assistant_message( + &plan.elements, + None, + &plan.source, + ui, + ); + } else if let Some(plan_text) = request.tool_input.get("plan").and_then(|v| v.as_str()) { // Fallback: render as plain text - ui.label(plan); + ui.label(plan_text); } ui.add_space(8.0); @@ -1113,6 +1118,7 @@ impl<'a> DaveUi<'a> { fn assistant_chat(&self, msg: &AssistantMessage, ui: &mut egui::Ui) { let elements = msg.parsed_elements(); let partial = msg.partial(); - markdown_ui::render_assistant_message(elements, partial, ui); + let buffer = msg.buffer(); + markdown_ui::render_assistant_message(elements, partial, buffer, ui); } } diff --git a/crates/notedeck_dave/src/ui/markdown_ui.rs b/crates/notedeck_dave/src/ui/markdown_ui.rs @@ -4,6 +4,7 @@ use egui::text::LayoutJob; use egui::{Color32, FontFamily, FontId, RichText, TextFormat, Ui}; use md_stream::{ parse_inline, CodeBlock, InlineElement, InlineStyle, ListItem, MdElement, Partial, PartialKind, + Span, }; /// Theme for markdown rendering, derived from egui visuals. @@ -29,7 +30,7 @@ impl MdTheme { heading_sizes: [24.0, 20.0, 18.0, 16.0, 14.0, 12.0], code_bg, code_text: Color32::from_rgb(0xD4, 0xA5, 0x74), // Muted amber/sand - link_color: Color32::from_rgb(100, 149, 237), // Cornflower blue + link_color: Color32::from_rgb(100, 149, 237), // Cornflower blue blockquote_border: visuals.widgets.noninteractive.bg_stroke.color, blockquote_bg: visuals.faint_bg_color, } @@ -37,38 +38,50 @@ impl MdTheme { } /// Render all parsed markdown elements plus any partial state. -pub fn render_assistant_message(elements: &[MdElement], partial: Option<&Partial>, ui: &mut Ui) { +pub fn render_assistant_message( + elements: &[MdElement], + partial: Option<&Partial>, + buffer: &str, + ui: &mut Ui, +) { let theme = MdTheme::from_visuals(ui.visuals()); ui.vertical(|ui| { for element in elements { - render_element(element, &theme, ui); + render_element(element, &theme, buffer, ui); } // Render partial (speculative) content for immediate feedback if let Some(partial) = partial { - render_partial(partial, &theme, ui); + render_partial(partial, &theme, buffer, ui); } }); } -fn render_element(element: &MdElement, theme: &MdTheme, ui: &mut Ui) { +fn render_element(element: &MdElement, theme: &MdTheme, buffer: &str, ui: &mut Ui) { match element { MdElement::Heading { level, content } => { let size = theme.heading_sizes[(*level as usize).saturating_sub(1).min(5)]; - ui.add(egui::Label::new(RichText::new(content).size(size).strong()).wrap()); + ui.add( + egui::Label::new(RichText::new(content.resolve(buffer)).size(size).strong()).wrap(), + ); ui.add_space(4.0); } MdElement::Paragraph(inlines) => { ui.horizontal_wrapped(|ui| { - render_inlines(inlines, theme, ui); + render_inlines(inlines, theme, buffer, ui); }); ui.add_space(8.0); } MdElement::CodeBlock(CodeBlock { language, content }) => { - render_code_block(language.as_deref(), content, theme, ui); + render_code_block( + language.map(|s| s.resolve(buffer)), + content.resolve(buffer), + theme, + ui, + ); } MdElement::BlockQuote(nested) => { @@ -78,7 +91,7 @@ fn render_element(element: &MdElement, theme: &MdTheme, ui: &mut Ui) { .inner_margin(egui::Margin::symmetric(8, 4)) .show(ui, |ui| { for elem in nested { - render_element(elem, theme, ui); + render_element(elem, theme, buffer, ui); } }); ui.add_space(8.0); @@ -86,7 +99,7 @@ fn render_element(element: &MdElement, theme: &MdTheme, ui: &mut Ui) { MdElement::UnorderedList(items) => { for item in items { - render_list_item(item, "\u{2022}", theme, ui); + render_list_item(item, "\u{2022}", theme, buffer, ui); } ui.add_space(8.0); } @@ -94,13 +107,13 @@ fn render_element(element: &MdElement, theme: &MdTheme, ui: &mut Ui) { MdElement::OrderedList { start, items } => { for (i, item) in items.iter().enumerate() { let marker = format!("{}.", start + i as u32); - render_list_item(item, &marker, theme, ui); + render_list_item(item, &marker, theme, buffer, ui); } ui.add_space(8.0); } MdElement::Table { headers, rows } => { - render_table(headers, rows, theme, ui); + render_table(headers, rows, theme, buffer, ui); } MdElement::ThematicBreak => { @@ -108,8 +121,8 @@ fn render_element(element: &MdElement, theme: &MdTheme, ui: &mut Ui) { ui.add_space(8.0); } - MdElement::Text(text) => { - ui.label(text); + MdElement::Text(span) => { + ui.label(span.resolve(buffer)); } } } @@ -122,7 +135,7 @@ fn flush_job(job: &mut LayoutJob, ui: &mut Ui) { } } -fn render_inlines(inlines: &[InlineElement], theme: &MdTheme, ui: &mut Ui) { +fn render_inlines(inlines: &[InlineElement], theme: &MdTheme, buffer: &str, ui: &mut Ui) { let font_size = ui.style().text_styles[&egui::TextStyle::Body].size; let text_color = ui.visuals().text_color(); @@ -157,41 +170,50 @@ fn render_inlines(inlines: &[InlineElement], theme: &MdTheme, ui: &mut Ui) { for inline in inlines { match inline { - InlineElement::Text(text) => { - job.append(text, 0.0, text_fmt.clone()); + InlineElement::Text(span) => { + job.append(span.resolve(buffer), 0.0, text_fmt.clone()); } - InlineElement::Code(code) => { - job.append(code, 0.0, code_fmt.clone()); + InlineElement::Code(span) => { + job.append(span.resolve(buffer), 0.0, code_fmt.clone()); } - InlineElement::Styled { style, content } => match style { - InlineStyle::Italic => { - job.append(content, 0.0, italic_fmt.clone()); - } - InlineStyle::Strikethrough => { - job.append(content, 0.0, strikethrough_fmt.clone()); - } - InlineStyle::Bold | InlineStyle::BoldItalic => { - // TextFormat has no bold/weight — flush and render as separate label - flush_job(&mut job, ui); - let rt = if matches!(style, InlineStyle::BoldItalic) { - RichText::new(content).strong().italics() - } else { - RichText::new(content).strong() - }; - ui.label(rt); + InlineElement::Styled { style, content } => { + let text = content.resolve(buffer); + match style { + InlineStyle::Italic => { + job.append(text, 0.0, italic_fmt.clone()); + } + InlineStyle::Strikethrough => { + job.append(text, 0.0, strikethrough_fmt.clone()); + } + InlineStyle::Bold | InlineStyle::BoldItalic => { + // TextFormat has no bold/weight — flush and render as separate label + flush_job(&mut job, ui); + let rt = if matches!(style, InlineStyle::BoldItalic) { + RichText::new(text).strong().italics() + } else { + RichText::new(text).strong() + }; + ui.label(rt); + } } - }, + } InlineElement::Link { text, url } => { flush_job(&mut job, ui); - ui.hyperlink_to(RichText::new(text).color(theme.link_color), url); + ui.hyperlink_to( + RichText::new(text.resolve(buffer)).color(theme.link_color), + url.resolve(buffer), + ); } InlineElement::Image { alt, url } => { flush_job(&mut job, ui); - ui.hyperlink_to(format!("[Image: {}]", alt), url); + ui.hyperlink_to( + format!("[Image: {}]", alt.resolve(buffer)), + url.resolve(buffer), + ); } InlineElement::LineBreak => { @@ -222,24 +244,24 @@ fn render_code_block(language: Option<&str>, content: &str, theme: &MdTheme, ui: ui.add_space(8.0); } -fn render_list_item(item: &ListItem, marker: &str, theme: &MdTheme, ui: &mut Ui) { +fn render_list_item(item: &ListItem, marker: &str, theme: &MdTheme, buffer: &str, ui: &mut Ui) { ui.horizontal(|ui| { ui.label(RichText::new(marker).weak()); ui.vertical(|ui| { ui.horizontal_wrapped(|ui| { - render_inlines(&item.content, theme, ui); + render_inlines(&item.content, theme, buffer, ui); }); // Render nested list if present if let Some(nested) = &item.nested { ui.indent("nested", |ui| { - render_element(nested, theme, ui); + render_element(nested, theme, buffer, ui); }); } }); }); } -fn render_table(headers: &[String], rows: &[Vec<String>], theme: &MdTheme, ui: &mut Ui) { +fn render_table(headers: &[Span], rows: &[Vec<Span>], theme: &MdTheme, buffer: &str, ui: &mut Ui) { use egui_extras::{Column, TableBuilder}; let num_cols = headers.len(); @@ -260,10 +282,9 @@ fn render_table(headers: &[String], rows: &[Vec<String>], theme: &MdTheme, ui: & .header(28.0, |mut header| { for h in headers { header.col(|ui| { - ui.painter() - .rect_filled(ui.max_rect(), 0.0, header_bg); + ui.painter().rect_filled(ui.max_rect(), 0.0, header_bg); egui::Frame::NONE.inner_margin(cell_padding).show(ui, |ui| { - ui.strong(h); + ui.strong(h.resolve(buffer)); }); }); } @@ -275,7 +296,7 @@ fn render_table(headers: &[String], rows: &[Vec<String>], theme: &MdTheme, ui: & table_row.col(|ui| { egui::Frame::NONE.inner_margin(cell_padding).show(ui, |ui| { if let Some(cell) = row.get(i) { - ui.label(cell); + ui.label(cell.resolve(buffer)); } }); }); @@ -286,8 +307,8 @@ fn render_table(headers: &[String], rows: &[Vec<String>], theme: &MdTheme, ui: & ui.add_space(8.0); } -fn render_partial(partial: &Partial, theme: &MdTheme, ui: &mut Ui) { - let content = &partial.content; +fn render_partial(partial: &Partial, theme: &MdTheme, buffer: &str, ui: &mut Ui) { + let content = partial.content(buffer); if content.is_empty() { return; } @@ -301,7 +322,7 @@ fn render_partial(partial: &Partial, theme: &MdTheme, ui: &mut Ui) { .corner_radius(4.0) .show(ui, |ui| { if let Some(lang) = language { - ui.label(RichText::new(lang).small().weak()); + ui.label(RichText::new(lang.resolve(buffer)).small().weak()); } ui.add( egui::Label::new(RichText::new(content).monospace().color(theme.code_text)) @@ -323,7 +344,7 @@ fn render_partial(partial: &Partial, theme: &MdTheme, ui: &mut Ui) { seen_separator, } => { if *seen_separator { - render_table(headers, rows, theme, ui); + render_table(headers, rows, theme, buffer, ui); } else { ui.label(content); } @@ -331,17 +352,17 @@ fn render_partial(partial: &Partial, theme: &MdTheme, ui: &mut Ui) { PartialKind::Paragraph => { // Parse inline elements from the partial content for proper formatting - let inlines = parse_inline(content); + let inlines = parse_inline(content, partial.content_start); ui.horizontal_wrapped(|ui| { - render_inlines(&inlines, theme, ui); + render_inlines(&inlines, theme, buffer, ui); }); } _ => { // Other partial kinds - parse inline elements too - let inlines = parse_inline(content); + let inlines = parse_inline(content, partial.content_start); ui.horizontal_wrapped(|ui| { - render_inlines(&inlines, theme, ui); + render_inlines(&inlines, theme, buffer, ui); }); } }