commit 42baa19529e8e36f2fb98cc8709282b1b5b1d790
parent 92a1f6b7b8f07580deea2e6ce83aeac18e917699
Author: William Casarin <jb55@jb55.com>
Date: Sun, 15 Feb 2026 20:59:37 -0800
md-stream: zero-copy parser using Span indices instead of String allocations
Eliminate all heap allocations from the parsing hot path:
- Replace remaining().to_string() clone with internal self.remaining() calls
- Add PartialDispatch enum to avoid cloning PartialKind (which contains Vecs)
- All process_* methods now compute remaining slice internally
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
10 files changed, 884 insertions(+), 398 deletions(-)
diff --git a/crates/md-stream/src/element.rs b/crates/md-stream/src/element.rs
@@ -1,10 +1,36 @@
//! Markdown elements - the stable output of parsing.
+/// A byte range into the parser's source buffer. Zero-copy reference to content.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct Span {
+ pub start: usize,
+ pub end: usize,
+}
+
+impl Span {
+ pub fn new(start: usize, end: usize) -> Self {
+ debug_assert!(start <= end);
+ Self { start, end }
+ }
+
+ pub fn resolve<'a>(&self, buffer: &'a str) -> &'a str {
+ &buffer[self.start..self.end]
+ }
+
+ pub fn is_empty(&self) -> bool {
+ self.start == self.end
+ }
+
+ pub fn len(&self) -> usize {
+ self.end - self.start
+ }
+}
+
/// A complete, stable markdown element ready for rendering.
#[derive(Debug, Clone, PartialEq)]
pub enum MdElement {
/// Heading with level (1-6) and content
- Heading { level: u8, content: String },
+ Heading { level: u8, content: Span },
/// Paragraph of text (may contain inline elements)
Paragraph(Vec<InlineElement>),
@@ -23,22 +49,22 @@ pub enum MdElement {
/// Markdown table with headers and data rows
Table {
- headers: Vec<String>,
- rows: Vec<Vec<String>>,
+ headers: Vec<Span>,
+ rows: Vec<Vec<Span>>,
},
/// Thematic break (---, ***, ___)
ThematicBreak,
/// Raw text (when nothing else matches)
- Text(String),
+ Text(Span),
}
/// A fenced code block with optional language.
#[derive(Debug, Clone, PartialEq)]
pub struct CodeBlock {
- pub language: Option<String>,
- pub content: String,
+ pub language: Option<Span>,
+ pub content: Span,
}
/// A list item (may contain nested elements).
@@ -52,19 +78,19 @@ pub struct ListItem {
#[derive(Debug, Clone, PartialEq)]
pub enum InlineElement {
/// Plain text
- Text(String),
+ Text(Span),
/// Styled text (bold, italic, etc.)
- Styled { style: InlineStyle, content: String },
+ Styled { style: InlineStyle, content: Span },
/// Inline code (`code`)
- Code(String),
+ Code(Span),
/// Link [text](url)
- Link { text: String, url: String },
+ Link { text: Span, url: Span },
/// Image 
- Image { alt: String, url: String },
+ Image { alt: Span, url: Span },
/// Hard line break
LineBreak,
diff --git a/crates/md-stream/src/inline.rs b/crates/md-stream/src/inline.rs
@@ -1,14 +1,15 @@
//! Inline element parsing for bold, italic, code, links, etc.
-use crate::element::{InlineElement, InlineStyle};
+use crate::element::{InlineElement, InlineStyle, Span};
use crate::partial::PartialKind;
/// Parses inline elements from text.
-/// Returns a vector of inline elements.
+/// `base_offset` is the position of `text` within the parser's buffer.
+/// All returned Spans are absolute buffer positions.
///
/// Note: This is called on complete paragraph text, not streaming.
/// For streaming, we use PartialKind to track incomplete markers.
-pub fn parse_inline(text: &str) -> Vec<InlineElement> {
+pub fn parse_inline(text: &str, base_offset: usize) -> Vec<InlineElement> {
let mut result = Vec::new();
let mut chars = text.char_indices().peekable();
let mut plain_start = 0;
@@ -19,7 +20,10 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> {
'`' => {
// Flush any pending plain text
if i > plain_start {
- result.push(InlineElement::Text(text[plain_start..i].to_string()));
+ result.push(InlineElement::Text(Span::new(
+ base_offset + plain_start,
+ base_offset + i,
+ )));
}
// Count backticks
@@ -33,17 +37,22 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> {
// Find closing backticks (same count)
if let Some(end_pos) = find_closing_backticks(&text[start_pos..], backtick_count) {
- let code_content = &text[start_pos..start_pos + end_pos];
+ let code_start = start_pos;
+ let code_end = start_pos + end_pos;
+ let code_content = &text[code_start..code_end];
// Strip single leading/trailing space if present (CommonMark rule)
- let trimmed = if code_content.starts_with(' ')
+ let (trim_start, trim_end) = if code_content.starts_with(' ')
&& code_content.ends_with(' ')
&& code_content.len() > 1
{
- &code_content[1..code_content.len() - 1]
+ (code_start + 1, code_end - 1)
} else {
- code_content
+ (code_start, code_end)
};
- result.push(InlineElement::Code(trimmed.to_string()));
+ result.push(InlineElement::Code(Span::new(
+ base_offset + trim_start,
+ base_offset + trim_end,
+ )));
// Advance past closing backticks
let skip_to = start_pos + end_pos + backtick_count;
@@ -93,14 +102,15 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> {
let content_start = marker_start + count;
// Look for closing marker
- if let Some((content, close_len, end_pos)) =
+ if let Some((content_end_local, close_len)) =
find_closing_emphasis(&text[content_start..], marker, effective_count)
{
// Flush pending plain text
if marker_start > plain_start {
- result.push(InlineElement::Text(
- text[plain_start..marker_start].to_string(),
- ));
+ result.push(InlineElement::Text(Span::new(
+ base_offset + plain_start,
+ base_offset + marker_start,
+ )));
}
let style = match close_len {
@@ -111,11 +121,14 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> {
result.push(InlineElement::Styled {
style,
- content: content.to_string(),
+ content: Span::new(
+ base_offset + content_start,
+ base_offset + content_start + content_end_local,
+ ),
});
// Advance past the content and closing marker
- let skip_to = content_start + end_pos + close_len;
+ let skip_to = content_start + content_end_local + close_len;
while chars.peek().map(|(idx, _)| *idx < skip_to).unwrap_or(false) {
chars.next();
}
@@ -131,17 +144,22 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> {
// Flush pending text
if i > plain_start {
- result.push(InlineElement::Text(text[plain_start..i].to_string()));
+ result.push(InlineElement::Text(Span::new(
+ base_offset + plain_start,
+ base_offset + i,
+ )));
}
let content_start = i + 2;
// Find closing ~~
if let Some(end_pos) = text[content_start..].find("~~") {
- let content = &text[content_start..content_start + end_pos];
result.push(InlineElement::Styled {
style: InlineStyle::Strikethrough,
- content: content.to_string(),
+ content: Span::new(
+ base_offset + content_start,
+ base_offset + content_start + end_pos,
+ ),
});
let skip_to = content_start + end_pos + 2;
@@ -160,13 +178,18 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> {
'[' => {
// Flush pending text
if i > plain_start {
- result.push(InlineElement::Text(text[plain_start..i].to_string()));
+ result.push(InlineElement::Text(Span::new(
+ base_offset + plain_start,
+ base_offset + i,
+ )));
}
- if let Some((text_content, url, total_len)) = parse_link(&text[i..]) {
+ if let Some((text_span, url_span, total_len)) =
+ parse_link(&text[i..], base_offset + i)
+ {
result.push(InlineElement::Link {
- text: text_content,
- url,
+ text: text_span,
+ url: url_span,
});
let skip_to = i + total_len;
@@ -185,13 +208,21 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> {
if chars.peek().map(|(_, c)| *c == '[').unwrap_or(false) {
// Flush pending text
if i > plain_start {
- result.push(InlineElement::Text(text[plain_start..i].to_string()));
+ result.push(InlineElement::Text(Span::new(
+ base_offset + plain_start,
+ base_offset + i,
+ )));
}
chars.next(); // consume [
- if let Some((alt, url, link_len)) = parse_link(&text[i + 1..]) {
- result.push(InlineElement::Image { alt, url });
+ if let Some((alt_span, url_span, link_len)) =
+ parse_link(&text[i + 1..], base_offset + i + 1)
+ {
+ result.push(InlineElement::Image {
+ alt: alt_span,
+ url: url_span,
+ });
let skip_to = i + 1 + link_len;
while chars.peek().map(|(idx, _)| *idx < skip_to).unwrap_or(false) {
@@ -212,7 +243,10 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> {
// Flush text without trailing spaces
let text_end = i - 2;
if text_end > plain_start {
- result.push(InlineElement::Text(text[plain_start..text_end].to_string()));
+ result.push(InlineElement::Text(Span::new(
+ base_offset + plain_start,
+ base_offset + text_end,
+ )));
}
result.push(InlineElement::LineBreak);
plain_start = i + 1;
@@ -228,10 +262,10 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> {
// Flush remaining plain text
if plain_start < text.len() {
- let remaining = &text[plain_start..];
- if !remaining.is_empty() {
- result.push(InlineElement::Text(remaining.to_string()));
- }
+ result.push(InlineElement::Text(Span::new(
+ base_offset + plain_start,
+ base_offset + text.len(),
+ )));
}
// Collapse adjacent Text elements
@@ -242,21 +276,23 @@ pub fn parse_inline(text: &str) -> Vec<InlineElement> {
/// Find closing backticks matching the opening count.
fn find_closing_backticks(text: &str, count: usize) -> Option<usize> {
- let target: String = "`".repeat(count);
+ let bytes = text.as_bytes();
let mut i = 0;
- while i < text.len() {
- if text[i..].starts_with(&target) {
- // Make sure it's exactly this many backticks
- let after = i + count;
- if after >= text.len() || !text[after..].starts_with('`') {
- return Some(i);
- }
- // More backticks - skip them
- while i < text.len() && text[i..].starts_with('`') {
+ while i < bytes.len() {
+ if bytes[i] == b'`' {
+ // Count consecutive backticks at this position
+ let run_start = i;
+ while i < bytes.len() && bytes[i] == b'`' {
i += 1;
}
+ let run_len = i - run_start;
+ if run_len == count {
+ return Some(run_start);
+ }
+ // Not the right count, continue
} else {
+ // Skip non-backtick character (handle UTF-8)
i += text[i..].chars().next().map(|c| c.len_utf8()).unwrap_or(1);
}
}
@@ -264,53 +300,40 @@ fn find_closing_backticks(text: &str, count: usize) -> Option<usize> {
}
/// Find closing emphasis marker.
-/// Returns (content, actual_close_len, end_position) if found.
-fn find_closing_emphasis(
- text: &str,
- marker: char,
- open_count: usize,
-) -> Option<(&str, usize, usize)> {
- let chars: Vec<(usize, char)> = text.char_indices().collect();
- let mut i = 0;
-
- while i < chars.len() {
- let (pos, c) = chars[i];
+/// Returns (end_position, actual_close_len) if found.
+fn find_closing_emphasis(text: &str, marker: char, open_count: usize) -> Option<(usize, usize)> {
+ let mut chars = text.char_indices().peekable();
+ while let Some((pos, c)) = chars.next() {
if c == marker {
// Count consecutive markers
let mut count = 1;
- while i + count < chars.len() && chars[i + count].1 == marker {
+ while chars.peek().map(|(_, ch)| *ch == marker).unwrap_or(false) {
+ chars.next();
count += 1;
}
// Check if this could close (not followed by alphanumeric for _)
let can_close = if marker == '_' {
- i + count >= chars.len() || {
- let next_char = chars.get(i + count).map(|(_, c)| *c);
- next_char
- .map(|c| c.is_whitespace() || c.is_ascii_punctuation())
- .unwrap_or(true)
- }
+ chars.peek().is_none_or(|(_, next_c)| {
+ next_c.is_whitespace() || next_c.is_ascii_punctuation()
+ })
} else {
true
};
if can_close && count >= open_count.min(3) {
let close_len = count.min(open_count).min(3);
- return Some((&text[..pos], close_len, pos));
+ return Some((pos, close_len));
}
-
- i += count;
- } else {
- i += 1;
}
}
None
}
/// Parse a link starting with [
-/// Returns (text, url, total_bytes_consumed)
-fn parse_link(text: &str) -> Option<(String, String, usize)> {
+/// Returns (text_span, url_span, total_bytes_consumed)
+fn parse_link(text: &str, base_offset: usize) -> Option<(Span, Span, usize)> {
if !text.starts_with('[') {
return None;
}
@@ -334,7 +357,6 @@ fn parse_link(text: &str) -> Option<(String, String, usize)> {
}
let bracket_end = bracket_end?;
- let link_text = &text[1..bracket_end];
// Check for ( immediately after ]
let rest = &text[bracket_end + 1..];
@@ -361,12 +383,18 @@ fn parse_link(text: &str) -> Option<(String, String, usize)> {
}
let paren_end = paren_end?;
- let url = &rest[1..paren_end];
+
+ // text_span: content between [ and ]
+ let text_span = Span::new(base_offset + 1, base_offset + bracket_end);
+ // url_span: content between ( and )
+ let url_start = bracket_end + 1 + 1; // ] + (
+ let url_end = bracket_end + 1 + paren_end; // position of )
+ let url_span = Span::new(base_offset + url_start, base_offset + url_end);
// Total consumed: [ + text + ] + ( + url + )
let total = bracket_end + 1 + paren_end + 1;
- Some((link_text.to_string(), url.to_string(), total))
+ Some((text_span, url_span, total))
}
/// Collapse adjacent Text elements into one.
@@ -380,8 +408,9 @@ fn collapse_text_elements(elements: &mut Vec<InlineElement>) {
if let (InlineElement::Text(a), InlineElement::Text(b)) =
(&elements[write], &elements[read])
{
- let combined = format!("{}{}", a, b);
- elements[write] = InlineElement::Text(combined);
+ // Merge spans — contiguous or not, just extend to cover both
+ let merged = Span::new(a.start, b.end);
+ elements[write] = InlineElement::Text(merged);
} else {
write += 1;
if write != read {
@@ -434,18 +463,12 @@ impl InlineState {
/// Finalize - return whatever we have as parsed elements.
pub fn finalize(self) -> Vec<InlineElement> {
- parse_inline(&self.buffer)
+ parse_inline(&self.buffer, 0)
}
/// Extract complete inline elements from the buffer.
fn extract_complete(&mut self) -> Vec<InlineElement> {
- // For streaming, we're conservative - only return elements when
- // we're confident they won't change.
- //
- // Strategy: Parse the whole buffer, but only return elements that
- // end before any trailing ambiguous characters.
-
- let result = parse_inline(&self.buffer);
+ let result = parse_inline(&self.buffer, 0);
// Check if the buffer might have incomplete markers at the end
if self.has_incomplete_tail() {
@@ -494,62 +517,74 @@ impl Default for InlineState {
mod tests {
use super::*;
+ fn resolve<'a>(span: &Span, text: &'a str) -> &'a str {
+ span.resolve(text)
+ }
+
#[test]
fn test_inline_code() {
- let result = parse_inline("some `code` here");
- assert!(result
- .iter()
- .any(|e| matches!(e, InlineElement::Code(s) if s == "code")));
+ let text = "some `code` here";
+ let result = parse_inline(text, 0);
+ assert!(result.iter().any(|e| matches!(
+ e,
+ InlineElement::Code(s) if resolve(s, text) == "code"
+ )));
}
#[test]
fn test_bold() {
- let result = parse_inline("some **bold** text");
+ let text = "some **bold** text";
+ let result = parse_inline(text, 0);
assert!(result.iter().any(|e| matches!(
e,
- InlineElement::Styled { style: InlineStyle::Bold, content } if content == "bold"
+ InlineElement::Styled { style: InlineStyle::Bold, content } if resolve(content, text) == "bold"
)));
}
#[test]
fn test_italic() {
- let result = parse_inline("some *italic* text");
+ let text = "some *italic* text";
+ let result = parse_inline(text, 0);
assert!(result.iter().any(|e| matches!(
e,
- InlineElement::Styled { style: InlineStyle::Italic, content } if content == "italic"
+ InlineElement::Styled { style: InlineStyle::Italic, content } if resolve(content, text) == "italic"
)));
}
#[test]
fn test_link() {
- let result = parse_inline("check [this](https://example.com) out");
+ let text = "check [this](https://example.com) out";
+ let result = parse_inline(text, 0);
assert!(result.iter().any(|e| matches!(
e,
- InlineElement::Link { text, url } if text == "this" && url == "https://example.com"
+ InlineElement::Link { text: t, url } if resolve(t, text) == "this" && resolve(url, text) == "https://example.com"
)));
}
#[test]
fn test_image() {
- let result = parse_inline("see  here");
+ let text = "see  here";
+ let result = parse_inline(text, 0);
assert!(result.iter().any(|e| matches!(
e,
- InlineElement::Image { alt, url } if alt == "alt" && url == "img.png"
+ InlineElement::Image { alt, url } if resolve(alt, text) == "alt" && resolve(url, text) == "img.png"
)));
}
#[test]
fn test_strikethrough() {
- let result = parse_inline("some ~~deleted~~ text");
+ let text = "some ~~deleted~~ text";
+ let result = parse_inline(text, 0);
assert!(result.iter().any(|e| matches!(
e,
- InlineElement::Styled { style: InlineStyle::Strikethrough, content } if content == "deleted"
+ InlineElement::Styled { style: InlineStyle::Strikethrough, content } if resolve(content, text) == "deleted"
)));
}
#[test]
fn test_mixed() {
- let result = parse_inline("**bold** and *italic* and `code`");
+ let text = "**bold** and *italic* and `code`";
+ let result = parse_inline(text, 0);
assert_eq!(
result
.iter()
diff --git a/crates/md-stream/src/lib.rs b/crates/md-stream/src/lib.rs
@@ -8,7 +8,7 @@ mod inline;
mod parser;
mod partial;
-pub use element::{CodeBlock, InlineElement, InlineStyle, ListItem, MdElement};
+pub use element::{CodeBlock, InlineElement, InlineStyle, ListItem, MdElement, Span};
pub use inline::{parse_inline, InlineState};
pub use parser::StreamParser;
pub use partial::{LinkState, Partial, PartialKind};
diff --git a/crates/md-stream/src/parser.rs b/crates/md-stream/src/parser.rs
@@ -1,6 +1,6 @@
//! Core streaming parser implementation.
-use crate::element::{CodeBlock, MdElement};
+use crate::element::{CodeBlock, MdElement, Span};
use crate::inline::parse_inline;
use crate::partial::{Partial, PartialKind};
@@ -25,6 +25,17 @@ pub struct StreamParser {
at_line_start: bool,
}
+/// Lightweight dispatch tag for partial state, avoiding Clone on PartialKind
+/// which contains Vecs (table headers/rows).
+#[derive(Clone, Copy)]
+enum PartialDispatch {
+ CodeFence { fence_char: char, fence_len: usize },
+ Heading { level: u8 },
+ Table,
+ Paragraph,
+ Other,
+}
+
impl StreamParser {
pub fn new() -> Self {
Self {
@@ -51,6 +62,16 @@ impl StreamParser {
&self.parsed
}
+ /// Get the parser's buffer for resolving spans.
+ pub fn buffer(&self) -> &str {
+ &self.buffer
+ }
+
+ /// Consume the parser and return the completed elements and buffer.
+ pub fn into_parts(self) -> (Vec<MdElement>, String) {
+ (self.parsed, self.buffer)
+ }
+
/// Consume the parser and return the completed elements.
pub fn into_parsed(self) -> Vec<MdElement> {
self.parsed
@@ -64,7 +85,7 @@ impl StreamParser {
/// Get the speculative content that would render from partial state.
/// Returns the raw accumulated text that isn't yet a complete element.
pub fn partial_content(&self) -> Option<&str> {
- self.partial.as_ref().map(|p| p.content.as_str())
+ self.partial.as_ref().map(|p| p.content(&self.buffer))
}
/// Check if we're currently inside a code block.
@@ -80,50 +101,78 @@ impl StreamParser {
&self.buffer[self.process_pos..]
}
+ /// Compute a trimmed span (strip leading/trailing whitespace).
+ fn trim_span(&self, span: Span) -> Span {
+ let s = &self.buffer[span.start..span.end];
+ let trimmed = s.trim();
+ if trimmed.is_empty() {
+ return Span::new(span.start, span.start);
+ }
+ let ltrim = s.len() - s.trim_start().len();
+ Span::new(span.start + ltrim, span.start + ltrim + trimmed.len())
+ }
+
+ /// Extract the dispatch info from the current partial state.
+ /// Returns only small Copy data to avoid cloning Vecs in PartialKind::Table.
+ fn partial_dispatch(&self) -> Option<PartialDispatch> {
+ self.partial.as_ref().map(|p| match &p.kind {
+ PartialKind::CodeFence {
+ fence_char,
+ fence_len,
+ ..
+ } => PartialDispatch::CodeFence {
+ fence_char: *fence_char,
+ fence_len: *fence_len,
+ },
+ PartialKind::Heading { level } => PartialDispatch::Heading { level: *level },
+ PartialKind::Table { .. } => PartialDispatch::Table,
+ PartialKind::Paragraph => PartialDispatch::Paragraph,
+ _ => PartialDispatch::Other,
+ })
+ }
+
/// Process newly added content.
fn process_new_content(&mut self) {
while self.process_pos < self.buffer.len() {
- let remaining = self.remaining().to_string();
-
// Handle based on current partial state
- let partial_kind = self.partial.as_ref().map(|p| p.kind.clone());
- if let Some(kind) = partial_kind {
- match kind {
- PartialKind::CodeFence {
+ if let Some(dispatch) = self.partial_dispatch() {
+ match dispatch {
+ PartialDispatch::CodeFence {
fence_char,
fence_len,
- ..
} => {
- if self.process_code_fence(fence_char, fence_len, &remaining) {
+ if self.process_code_fence(fence_char, fence_len) {
continue;
}
return; // Need more input
}
- PartialKind::Heading { level } => {
- if self.process_heading(level, &remaining) {
+ PartialDispatch::Heading { level } => {
+ if self.process_heading(level) {
continue;
}
return;
}
- PartialKind::Table { .. } => {
- if self.process_table(&remaining) {
+ PartialDispatch::Table => {
+ if self.process_table() {
continue;
}
return;
}
- PartialKind::Paragraph => {
+ PartialDispatch::Paragraph => {
// For paragraphs, check if we're at a line start that could be a block element
if self.at_line_start {
// Take the paragraph partial first — try_block_start may
// replace self.partial with the new block element
let para_partial = self.partial.take();
- if let Some(consumed) = self.try_block_start(&remaining) {
+ if let Some(consumed) = self.try_block_start() {
// Emit the saved paragraph before the new block
if let Some(partial) = para_partial {
- let trimmed = partial.content.trim();
+ let span = partial.content_span();
+ let trimmed = self.trim_span(span);
if !trimmed.is_empty() {
- let inline_elements = parse_inline(trimmed);
+ let content = trimmed.resolve(&self.buffer);
+ let inline_elements = parse_inline(content, trimmed.start);
self.parsed.push(MdElement::Paragraph(inline_elements));
}
}
@@ -136,19 +185,19 @@ impl StreamParser {
// If remaining could be the start of a block element but we
// don't have enough chars yet, wait for more input rather than
// consuming into the paragraph (e.g. "`" could become "```")
- if self.could_be_block_start(&remaining) {
+ if self.could_be_block_start() {
return;
}
}
// Continue with inline processing
- if self.process_inline(&remaining) {
+ if self.process_inline() {
continue;
}
return;
}
- _ => {
+ PartialDispatch::Other => {
// For other inline elements, process character by character
- if self.process_inline(&remaining) {
+ if self.process_inline() {
continue;
}
return;
@@ -158,28 +207,28 @@ impl StreamParser {
// No partial state - detect new elements
if self.at_line_start {
- if let Some(consumed) = self.try_block_start(&remaining) {
+ if let Some(consumed) = self.try_block_start() {
self.advance(consumed);
continue;
}
- if self.could_be_block_start(&remaining) {
+ if self.could_be_block_start() {
return;
}
}
// Fall back to inline processing
- if self.process_inline(&remaining) {
+ if self.process_inline() {
continue;
}
return;
}
}
- /// Check if text could be the start of a block element but we don't
+ /// Check if remaining text could be the start of a block element but we don't
/// have enough characters to confirm yet. Used to defer consuming
/// ambiguous prefixes like "`" or "``" that might become "```".
- fn could_be_block_start(&self, text: &str) -> bool {
- let trimmed = text.trim_start();
+ fn could_be_block_start(&self) -> bool {
+ let trimmed = self.remaining().trim_start();
if trimmed.is_empty() {
return false;
}
@@ -211,7 +260,8 @@ impl StreamParser {
/// Try to detect a block-level element at line start.
/// Returns bytes consumed if successful.
- fn try_block_start(&mut self, text: &str) -> Option<usize> {
+ fn try_block_start(&mut self) -> Option<usize> {
+ let text = self.remaining();
let trimmed = text.trim_start();
let leading_space = text.len() - trimmed.len();
@@ -221,12 +271,17 @@ impl StreamParser {
if level <= 6 {
if let Some(rest) = trimmed.get(level..) {
if rest.starts_with(' ') || rest.is_empty() {
- self.partial = Some(Partial::new(
+ let consumed = leading_space + level + rest.starts_with(' ') as usize;
+ let content_start = self.process_pos + consumed;
+ let mut partial = Partial::new(
PartialKind::Heading { level: level as u8 },
self.process_pos,
- ));
+ );
+ partial.content_start = content_start;
+ partial.content_end = content_start;
+ self.partial = Some(partial);
self.at_line_start = false;
- return Some(leading_space + level + rest.starts_with(' ') as usize);
+ return Some(consumed);
}
}
}
@@ -241,37 +296,49 @@ impl StreamParser {
let after_fence = &trimmed[fence_len..];
let (language, consumed_lang) = if let Some(nl_pos) = after_fence.find('\n') {
let lang = after_fence[..nl_pos].trim();
- (
- if lang.is_empty() {
- None
- } else {
- Some(lang.to_string())
- },
- nl_pos + 1,
- )
+ let lang_span = if lang.is_empty() {
+ None
+ } else {
+ // Compute absolute span for the language
+ let lang_start_in_after = after_fence[..nl_pos].as_ptr() as usize
+ - after_fence.as_ptr() as usize
+ + (after_fence[..nl_pos].len()
+ - after_fence[..nl_pos].trim_start().len());
+ let abs_start =
+ self.process_pos + leading_space + fence_len + lang_start_in_after;
+ Some(Span::new(abs_start, abs_start + lang.len()))
+ };
+ (lang_span, nl_pos + 1)
} else {
// No newline yet - language might be incomplete
let lang = after_fence.trim();
- (
- if lang.is_empty() {
- None
- } else {
- Some(lang.to_string())
- },
- after_fence.len(),
- )
+ let lang_span = if lang.is_empty() {
+ None
+ } else {
+ let lang_start_in_after =
+ after_fence.len() - after_fence.trim_start().len();
+ let abs_start =
+ self.process_pos + leading_space + fence_len + lang_start_in_after;
+ Some(Span::new(abs_start, abs_start + lang.len()))
+ };
+ (lang_span, after_fence.len())
};
- self.partial = Some(Partial::new(
+ let consumed = leading_space + fence_len + consumed_lang;
+ let content_start = self.process_pos + consumed;
+ let mut partial = Partial::new(
PartialKind::CodeFence {
fence_char,
fence_len,
language,
},
self.process_pos,
- ));
+ );
+ partial.content_start = content_start;
+ partial.content_end = content_start;
+ self.partial = Some(partial);
self.at_line_start = false;
- return Some(leading_space + fence_len + consumed_lang);
+ return Some(consumed);
}
}
@@ -296,16 +363,20 @@ impl StreamParser {
if trimmed.starts_with('|') {
if let Some(nl_pos) = trimmed.find('\n') {
let line = &trimmed[..nl_pos];
- let cells = parse_table_row(line);
+ let line_abs_offset = self.process_pos + leading_space;
+ let cells = parse_table_row(line, line_abs_offset);
if !cells.is_empty() {
- self.partial = Some(Partial::new(
+ let mut partial = Partial::new(
PartialKind::Table {
headers: cells,
rows: Vec::new(),
seen_separator: false,
},
self.process_pos,
- ));
+ );
+ partial.content_start = self.process_pos;
+ partial.content_end = self.process_pos + leading_space + nl_pos;
+ self.partial = Some(partial);
self.at_line_start = true;
return Some(leading_space + nl_pos + 1);
}
@@ -317,40 +388,56 @@ impl StreamParser {
/// Process content inside a code fence.
/// Returns true if we should continue processing, false if we need more input.
- fn process_code_fence(&mut self, fence_char: char, fence_len: usize, text: &str) -> bool {
- let partial = self.partial.as_mut().unwrap();
+ fn process_code_fence(&mut self, fence_char: char, fence_len: usize) -> bool {
+ let text_start = self.process_pos;
+ let text_end = self.buffer.len();
+ let mut pos = text_start;
+
+ while pos < text_end {
+ // Find next line boundary
+ let line_end = self.buffer[pos..text_end]
+ .find('\n')
+ .map(|i| pos + i + 1)
+ .unwrap_or(text_end);
+ let line = &self.buffer[pos..line_end];
+
+ let partial = self.partial.as_mut().unwrap();
- for line in text.split_inclusive('\n') {
// Check if we're at a line start within the code fence
let at_content_line_start =
- partial.content.is_empty() || partial.content.ends_with('\n');
+ partial.content_is_empty() || self.buffer[..partial.content_end].ends_with('\n');
if at_content_line_start {
let trimmed = line.trim_start();
// Check for closing fence
if trimmed.len() >= fence_len
- && trimmed.as_bytes().iter().take(fence_len).all(|&b| b == fence_char as u8)
+ && trimmed
+ .as_bytes()
+ .iter()
+ .take(fence_len)
+ .all(|&b| b == fence_char as u8)
{
let after_fence = &trimmed[fence_len..];
if after_fence.trim().is_empty() || after_fence.starts_with('\n') {
// Found closing fence! Complete the code block
let language =
if let PartialKind::CodeFence { language, .. } = &partial.kind {
- language.clone()
+ *language
} else {
None
};
- let content = std::mem::take(&mut partial.content);
- self.parsed
- .push(MdElement::CodeBlock(CodeBlock { language, content }));
+ let content_span = partial.content_span();
+ self.parsed.push(MdElement::CodeBlock(CodeBlock {
+ language,
+ content: content_span,
+ }));
self.partial = None;
self.at_line_start = true;
// Advance past the closing fence line
- let consumed = text.find(line).unwrap() + line.len();
- self.advance(consumed);
+ self.advance(line_end - text_start);
return true;
}
}
@@ -367,46 +454,63 @@ impl StreamParser {
}
}
- // Not a closing fence - add to content
- partial.content.push_str(line);
+ // Not a closing fence - extend content span to include this line
+ partial.content_end += line.len();
+ pos = line_end;
}
// Consumed all available text, need more
- self.advance(text.len());
+ self.advance(text_end - text_start);
false
}
/// Process heading content until newline.
- fn process_heading(&mut self, level: u8, text: &str) -> bool {
- if let Some(nl_pos) = text.find('\n') {
+ fn process_heading(&mut self, level: u8) -> bool {
+ let remaining = self.remaining();
+ if let Some(nl_pos) = remaining.find('\n') {
let partial = self.partial.as_mut().unwrap();
- partial.content.push_str(&text[..nl_pos]);
+ partial.content_end += nl_pos;
- let content = std::mem::take(&mut partial.content).trim().to_string();
- self.parsed.push(MdElement::Heading { level, content });
+ let content_span = partial.content_span();
+ let trimmed = self.trim_span(content_span);
+ self.parsed.push(MdElement::Heading {
+ level,
+ content: trimmed,
+ });
self.partial = None;
self.at_line_start = true;
self.advance(nl_pos + 1);
true
} else {
// No newline yet - accumulate
+ let len = remaining.len();
let partial = self.partial.as_mut().unwrap();
- partial.content.push_str(text);
- self.advance(text.len());
+ partial.content_end += len;
+ self.advance(len);
false
}
}
/// Process table content line by line.
/// Returns true if we should continue processing, false if we need more input.
- fn process_table(&mut self, text: &str) -> bool {
+ fn process_table(&mut self) -> bool {
+ let remaining = self.remaining();
// We need at least one complete line to process
- if let Some(nl_pos) = text.find('\n') {
- let line = &text[..nl_pos];
+ if let Some(nl_pos) = remaining.find('\n') {
+ let line = &remaining[..nl_pos];
let trimmed = line.trim();
// Check if this line continues the table
if trimmed.starts_with('|') {
+ // Capture everything we need from remaining before dropping the borrow
+ let is_sep = is_separator_row(trimmed);
+ let line_abs_offset = self.process_pos;
+ let trim_offset = line.len() - trimmed.len();
+ let trimmed_span = Span::new(
+ self.process_pos + trim_offset,
+ self.process_pos + trim_offset + trimmed.len(),
+ );
+ let cells = parse_table_row(trimmed, line_abs_offset + trim_offset);
let partial = self.partial.as_mut().unwrap();
if let PartialKind::Table {
ref mut rows,
@@ -417,15 +521,22 @@ impl StreamParser {
{
if !*seen_separator {
// Expecting separator row
- if is_separator_row(trimmed) {
+ if is_sep {
*seen_separator = true;
} else {
// Not a valid table — emit header as paragraph
- let header_text = format!("| {} |", headers.join(" | "));
- let row_text = trimmed.to_string();
+ let header_text = format!(
+ "| {} |",
+ headers
+ .iter()
+ .map(|s| s.resolve(&self.buffer))
+ .collect::<Vec<_>>()
+ .join(" | ")
+ );
+ let row_text = trimmed_span.resolve(&self.buffer);
self.partial = None;
let combined = format!("{}\n{}", header_text, row_text);
- let inlines = parse_inline(&combined);
+ let inlines = parse_inline(&combined, 0);
self.parsed.push(MdElement::Paragraph(inlines));
self.at_line_start = true;
self.advance(nl_pos + 1);
@@ -433,7 +544,6 @@ impl StreamParser {
}
} else {
// Data row
- let cells = parse_table_row(trimmed);
rows.push(cells);
}
}
@@ -453,8 +563,15 @@ impl StreamParser {
self.parsed.push(MdElement::Table { headers, rows });
} else {
// Never saw separator — emit as paragraph
- let text = format!("| {} |", headers.join(" | "));
- let inlines = parse_inline(&text);
+ let text = format!(
+ "| {} |",
+ headers
+ .iter()
+ .map(|s| s.resolve(&self.buffer))
+ .collect::<Vec<_>>()
+ .join(" | ")
+ );
+ let inlines = parse_inline(&text, 0);
self.parsed.push(MdElement::Paragraph(inlines));
}
}
@@ -465,7 +582,7 @@ impl StreamParser {
// No newline yet — check if we have a partial line starting with |
// If so, wait for more input. If not, table is done.
- let trimmed = text.trim();
+ let trimmed = remaining.trim();
if trimmed.starts_with('|') || trimmed.is_empty() {
// Could be another table row, wait for newline
return false;
@@ -482,8 +599,15 @@ impl StreamParser {
if seen_separator {
self.parsed.push(MdElement::Table { headers, rows });
} else {
- let text = format!("| {} |", headers.join(" | "));
- let inlines = parse_inline(&text);
+ let text = format!(
+ "| {} |",
+ headers
+ .iter()
+ .map(|s| s.resolve(&self.buffer))
+ .collect::<Vec<_>>()
+ .join(" | ")
+ );
+ let inlines = parse_inline(&text, 0);
self.parsed.push(MdElement::Paragraph(inlines));
}
}
@@ -492,18 +616,22 @@ impl StreamParser {
}
/// Process inline content.
- fn process_inline(&mut self, text: &str) -> bool {
+ fn process_inline(&mut self) -> bool {
+ let remaining = self.remaining();
+
// Check for paragraph break split across tokens:
// partial content ends with \n and new text starts with \n
- if text.starts_with('\n') {
+ if remaining.starts_with('\n') {
if let Some(ref partial) = self.partial {
- if partial.content.ends_with('\n') {
+ if self.buffer[..partial.content_end].ends_with('\n') {
// Double newline split across token boundary — emit paragraph
- let para_text = std::mem::take(&mut self.partial.as_mut().unwrap().content);
+ let span = partial.content_span();
+ let trimmed = self.trim_span(span);
self.partial = None;
- if !para_text.trim().is_empty() {
- let inline_elements = parse_inline(para_text.trim());
+ if !trimmed.is_empty() {
+ let content = trimmed.resolve(&self.buffer);
+ let inline_elements = parse_inline(content, trimmed.start);
self.parsed.push(MdElement::Paragraph(inline_elements));
}
self.at_line_start = true;
@@ -513,13 +641,11 @@ impl StreamParser {
}
}
- if let Some(nl_pos) = text.find('\n') {
- let after_nl = &text[nl_pos + 1..];
+ if let Some(nl_pos) = remaining.find('\n') {
+ let after_nl = &remaining[nl_pos + 1..];
// Check if text after the newline starts a block element (code fence, heading, etc.)
// If so, emit the current paragraph and let the block parser handle the rest.
- // This must happen before the \n\n check so that block starts aren't
- // gobbled into paragraph text by a later double-newline.
if !after_nl.is_empty() {
let trimmed_after = after_nl.trim_start();
let is_block_start = trimmed_after.starts_with("```")
@@ -528,17 +654,28 @@ impl StreamParser {
|| trimmed_after.starts_with('|');
if is_block_start {
// Accumulate text before the newline into the paragraph
- let para_text = if let Some(ref mut partial) = self.partial {
- partial.content.push_str(&text[..nl_pos]);
- std::mem::take(&mut partial.content)
- } else {
- text[..nl_pos].to_string()
- };
- self.partial = None;
+ if let Some(ref mut partial) = self.partial {
+ partial.content_end += nl_pos;
+ let span = partial.content_span();
+ let trimmed = self.trim_span(span);
+ self.partial = None;
- if !para_text.trim().is_empty() {
- let inline_elements = parse_inline(para_text.trim());
- self.parsed.push(MdElement::Paragraph(inline_elements));
+ if !trimmed.is_empty() {
+ let content = trimmed.resolve(&self.buffer);
+ let inline_elements = parse_inline(content, trimmed.start);
+ self.parsed.push(MdElement::Paragraph(inline_elements));
+ }
+ } else {
+ let start = self.process_pos;
+ let end = self.process_pos + nl_pos;
+ let span = Span::new(start, end);
+ let trimmed = self.trim_span(span);
+
+ if !trimmed.is_empty() {
+ let content = trimmed.resolve(&self.buffer);
+ let inline_elements = parse_inline(content, trimmed.start);
+ self.parsed.push(MdElement::Paragraph(inline_elements));
+ }
}
self.at_line_start = true;
self.advance(nl_pos + 1);
@@ -547,39 +684,53 @@ impl StreamParser {
}
}
- if let Some(nl_pos) = text.find("\n\n") {
+ // Re-borrow remaining since prior branches may not have taken
+ let remaining = self.remaining();
+
+ if let Some(nl_pos) = remaining.find("\n\n") {
// Double newline = paragraph break
// Combine accumulated partial content with text before \n\n
- let para_text = if let Some(ref mut partial) = self.partial {
- partial.content.push_str(&text[..nl_pos]);
- std::mem::take(&mut partial.content)
+ if let Some(ref mut partial) = self.partial {
+ partial.content_end += nl_pos;
+ let span = partial.content_span();
+ let trimmed = self.trim_span(span);
+ self.partial = None;
+
+ if !trimmed.is_empty() {
+ let content = trimmed.resolve(&self.buffer);
+ let inline_elements = parse_inline(content, trimmed.start);
+ self.parsed.push(MdElement::Paragraph(inline_elements));
+ }
} else {
- text[..nl_pos].to_string()
- };
- self.partial = None;
-
- if !para_text.trim().is_empty() {
- // Parse inline elements from the full paragraph text
- let inline_elements = parse_inline(para_text.trim());
- self.parsed.push(MdElement::Paragraph(inline_elements));
+ let start = self.process_pos;
+ let end = self.process_pos + nl_pos;
+ let span = Span::new(start, end);
+ let trimmed = self.trim_span(span);
+
+ if !trimmed.is_empty() {
+ let content = trimmed.resolve(&self.buffer);
+ let inline_elements = parse_inline(content, trimmed.start);
+ self.parsed.push(MdElement::Paragraph(inline_elements));
+ }
}
self.at_line_start = true;
self.advance(nl_pos + 2);
return true;
}
- if let Some(nl_pos) = text.find('\n') {
-
+ if let Some(nl_pos) = remaining.find('\n') {
// Single newline - continue accumulating but track position
if let Some(ref mut partial) = self.partial {
- partial.content.push_str(&text[..=nl_pos]);
+ partial.content_end += nl_pos + 1;
} else {
// Start accumulating paragraph
- let content = text[..=nl_pos].to_string();
+ let content_start = self.process_pos;
+ let content_end = self.process_pos + nl_pos + 1;
self.partial = Some(Partial {
kind: PartialKind::Paragraph,
start_pos: self.process_pos,
- content,
+ content_start,
+ content_end,
});
}
self.at_line_start = true;
@@ -588,17 +739,21 @@ impl StreamParser {
}
// No newline - accumulate
+ let len = remaining.len();
if let Some(ref mut partial) = self.partial {
- partial.content.push_str(text);
+ partial.content_end += len;
} else {
+ let content_start = self.process_pos;
+ let content_end = self.process_pos + len;
self.partial = Some(Partial {
kind: PartialKind::Paragraph,
start_pos: self.process_pos,
- content: text.to_string(),
+ content_start,
+ content_end,
});
}
self.at_line_start = false;
- self.advance(text.len());
+ self.advance(len);
false
}
@@ -616,13 +771,14 @@ impl StreamParser {
// Unclosed code block - emit what we have
self.parsed.push(MdElement::CodeBlock(CodeBlock {
language,
- content: partial.content,
+ content: partial.content_span(),
}));
}
PartialKind::Heading { level } => {
+ let trimmed = self.trim_span(partial.content_span());
self.parsed.push(MdElement::Heading {
level,
- content: partial.content.trim().to_string(),
+ content: trimmed,
});
}
PartialKind::Table {
@@ -634,21 +790,32 @@ impl StreamParser {
self.parsed.push(MdElement::Table { headers, rows });
} else {
// Never saw separator — not a real table, emit as paragraph
- let text = format!("| {} |", headers.join(" | "));
- let inlines = parse_inline(&text);
+ let text = format!(
+ "| {} |",
+ headers
+ .iter()
+ .map(|s| s.resolve(&self.buffer))
+ .collect::<Vec<_>>()
+ .join(" | ")
+ );
+ let inlines = parse_inline(&text, 0);
self.parsed.push(MdElement::Paragraph(inlines));
}
}
PartialKind::Paragraph => {
- if !partial.content.trim().is_empty() {
- let inline_elements = parse_inline(partial.content.trim());
+ let trimmed = self.trim_span(partial.content_span());
+ if !trimmed.is_empty() {
+ let content = trimmed.resolve(&self.buffer);
+ let inline_elements = parse_inline(content, trimmed.start);
self.parsed.push(MdElement::Paragraph(inline_elements));
}
}
_ => {
// Other partial kinds (lists, blockquotes, etc.) - emit as paragraph for now
- if !partial.content.trim().is_empty() {
- let inline_elements = parse_inline(partial.content.trim());
+ let trimmed = self.trim_span(partial.content_span());
+ if !trimmed.is_empty() {
+ let content = trimmed.resolve(&self.buffer);
+ let inline_elements = parse_inline(content, trimmed.start);
self.parsed.push(MdElement::Paragraph(inline_elements));
}
}
@@ -663,21 +830,51 @@ impl Default for StreamParser {
}
}
-/// Parse a table row into cells by splitting on `|`.
-/// Strips outer pipes and trims each cell.
-fn parse_table_row(line: &str) -> Vec<String> {
+/// Parse a table row into cell spans by splitting on `|`.
+/// `line_offset` is the absolute buffer position of `line`.
+fn parse_table_row(line: &str, line_offset: usize) -> Vec<Span> {
let trimmed = line.trim();
- let inner = trimmed.strip_prefix('|').unwrap_or(trimmed);
- let inner = inner.strip_suffix('|').unwrap_or(inner);
- inner.split('|').map(|c| c.trim().to_string()).collect()
+ let trim_start = line.len() - line.trim_start().len();
+ let base = line_offset + trim_start;
+
+ let inner_start;
+ let inner;
+ if let Some(stripped) = trimmed.strip_prefix('|') {
+ inner_start = base + 1;
+ inner = stripped.strip_suffix('|').unwrap_or(stripped);
+ } else {
+ inner_start = base;
+ inner = trimmed.strip_suffix('|').unwrap_or(trimmed);
+ };
+
+ let mut result = Vec::new();
+ let mut pos = 0;
+ for cell in inner.split('|') {
+ let cell_start = inner_start + pos;
+ let cell_trimmed = cell.trim();
+ if cell_trimmed.is_empty() {
+ // Empty cell — use a zero-length span at the position
+ result.push(Span::new(cell_start, cell_start));
+ } else {
+ let ltrim = cell.len() - cell.trim_start().len();
+ let span_start = cell_start + ltrim;
+ let span_end = span_start + cell_trimmed.len();
+ result.push(Span::new(span_start, span_end));
+ }
+ pos += cell.len() + 1; // +1 for the | delimiter
+ }
+ result
}
/// Check if a line is a table separator row (e.g. `|---|---|`).
fn is_separator_row(line: &str) -> bool {
- let cells = parse_table_row(line);
+ let trimmed = line.trim();
+ let inner = trimmed.strip_prefix('|').unwrap_or(trimmed);
+ let inner = inner.strip_suffix('|').unwrap_or(inner);
+ let cells: Vec<&str> = inner.split('|').map(|c| c.trim()).collect();
!cells.is_empty()
&& cells.iter().all(|c| {
- let t = c.trim().trim_matches(':');
+ let t = c.trim_matches(':');
!t.is_empty() && t.chars().all(|ch| ch == '-')
})
}
diff --git a/crates/md-stream/src/partial.rs b/crates/md-stream/src/partial.rs
@@ -1,5 +1,7 @@
//! Partial state tracking for incomplete markdown elements.
+use crate::element::Span;
+
/// Tracks an in-progress markdown element that might be completed
/// when more tokens arrive.
#[derive(Debug, Clone)]
@@ -10,8 +12,11 @@ pub struct Partial {
/// Byte offset into the buffer where this element starts
pub start_pos: usize,
- /// Accumulated content so far (for elements that need it)
- pub content: String,
+ /// Start of content region (after markers like "# ")
+ pub content_start: usize,
+
+ /// End of content accumulated so far
+ pub content_end: usize,
}
impl Partial {
@@ -19,9 +24,25 @@ impl Partial {
Self {
kind,
start_pos,
- content: String::new(),
+ content_start: start_pos,
+ content_end: start_pos,
}
}
+
+ /// Get the content span
+ pub fn content_span(&self) -> Span {
+ Span::new(self.content_start, self.content_end)
+ }
+
+ /// Get the content as a string slice from the buffer
+ pub fn content<'a>(&self, buffer: &'a str) -> &'a str {
+ &buffer[self.content_start..self.content_end]
+ }
+
+ /// Check if content is empty
+ pub fn content_is_empty(&self) -> bool {
+ self.content_start == self.content_end
+ }
}
/// The kind of partial element being tracked.
@@ -32,7 +53,7 @@ pub enum PartialKind {
CodeFence {
fence_char: char, // ` or ~
fence_len: usize, // typically 3
- language: Option<String>,
+ language: Option<Span>,
},
/// Inline code waiting for closing backtick(s)
@@ -54,10 +75,10 @@ pub enum PartialKind {
/// Link: seen [, waiting for ](url)
/// States: text, post-bracket, url
- Link { state: LinkState, text: String },
+ Link { state: LinkState, text: Span },
/// Image: seen 
- Image { state: LinkState, alt: String },
+ Image { state: LinkState, alt: Span },
/// Heading started with # at line start, collecting content
Heading { level: u8 },
@@ -74,8 +95,8 @@ pub enum PartialKind {
/// Table being accumulated row by row
Table {
- headers: Vec<String>,
- rows: Vec<Vec<String>>,
+ headers: Vec<Span>,
+ rows: Vec<Vec<Span>>,
seen_separator: bool,
},
@@ -91,5 +112,5 @@ pub enum LinkState {
/// Seen ], expecting (
PostBracket,
/// Collecting URL between ( and )
- Url(String),
+ Url(Span),
}
diff --git a/crates/md-stream/src/tests.rs b/crates/md-stream/src/tests.rs
@@ -1,21 +1,27 @@
//! Tests for streaming parser behavior.
+use crate::element::Span;
use crate::partial::PartialKind;
use crate::{InlineElement, InlineStyle, MdElement, StreamParser};
+/// Helper to resolve a Span against a parser's buffer.
+fn r<'a>(span: &Span, buf: &'a str) -> &'a str {
+ span.resolve(buf)
+}
+
#[test]
fn test_heading_complete() {
let mut parser = StreamParser::new();
parser.push("# Hello World\n");
assert_eq!(parser.parsed().len(), 1);
- assert_eq!(
- parser.parsed()[0],
- MdElement::Heading {
- level: 1,
- content: "Hello World".to_string()
+ match &parser.parsed()[0] {
+ MdElement::Heading { level, content } => {
+ assert_eq!(*level, 1);
+ assert_eq!(r(content, parser.buffer()), "Hello World");
}
- );
+ other => panic!("Expected heading, got {:?}", other),
+ }
}
#[test]
@@ -32,13 +38,13 @@ fn test_heading_streaming() {
parser.push("ld\n");
assert_eq!(parser.parsed().len(), 1);
- assert_eq!(
- parser.parsed()[0],
- MdElement::Heading {
- level: 1,
- content: "Hello World".to_string()
+ match &parser.parsed()[0] {
+ MdElement::Heading { level, content } => {
+ assert_eq!(*level, 1);
+ assert_eq!(r(content, parser.buffer()), "Hello World");
}
- );
+ other => panic!("Expected heading, got {:?}", other),
+ }
}
#[test]
@@ -49,8 +55,8 @@ fn test_code_block_complete() {
assert_eq!(parser.parsed().len(), 1);
match &parser.parsed()[0] {
MdElement::CodeBlock(cb) => {
- assert_eq!(cb.language.as_deref(), Some("rust"));
- assert_eq!(cb.content, "fn main() {}\n");
+ assert_eq!(cb.language.map(|s| r(&s, parser.buffer())), Some("rust"));
+ assert_eq!(r(&cb.content, parser.buffer()), "fn main() {}\n");
}
_ => panic!("Expected code block"),
}
@@ -104,7 +110,7 @@ fn test_finalize_incomplete_code() {
assert_eq!(parser.parsed().len(), 1);
match &parser.parsed()[0] {
MdElement::CodeBlock(cb) => {
- assert!(cb.content.contains("unclosed code"));
+ assert!(r(&cb.content, parser.buffer()).contains("unclosed code"));
}
_ => panic!("Expected code block"),
}
@@ -198,10 +204,11 @@ fn test_inline_bold() {
assert_eq!(parser.parsed().len(), 1);
if let MdElement::Paragraph(inlines) = &parser.parsed()[0] {
+ let buf = parser.buffer();
assert!(
inlines.iter().any(|e| matches!(
e,
- InlineElement::Styled { style: InlineStyle::Bold, content } if content == "bold"
+ InlineElement::Styled { style: InlineStyle::Bold, content } if r(content, buf) == "bold"
)),
"Expected bold element, got: {:?}",
inlines
@@ -218,10 +225,11 @@ fn test_inline_italic() {
assert_eq!(parser.parsed().len(), 1);
if let MdElement::Paragraph(inlines) = &parser.parsed()[0] {
+ let buf = parser.buffer();
assert!(
inlines.iter().any(|e| matches!(
e,
- InlineElement::Styled { style: InlineStyle::Italic, content } if content == "italic"
+ InlineElement::Styled { style: InlineStyle::Italic, content } if r(content, buf) == "italic"
)),
"Expected italic element, got: {:?}",
inlines
@@ -238,10 +246,11 @@ fn test_inline_code() {
assert_eq!(parser.parsed().len(), 1);
if let MdElement::Paragraph(inlines) = &parser.parsed()[0] {
+ let buf = parser.buffer();
assert!(
inlines.iter().any(|e| matches!(
e,
- InlineElement::Code(s) if s == "code"
+ InlineElement::Code(s) if r(s, buf) == "code"
)),
"Expected code element, got: {:?}",
inlines
@@ -258,9 +267,10 @@ fn test_inline_link() {
assert_eq!(parser.parsed().len(), 1);
if let MdElement::Paragraph(inlines) = &parser.parsed()[0] {
+ let buf = parser.buffer();
assert!(inlines.iter().any(|e| matches!(
e,
- InlineElement::Link { text, url } if text == "this link" && url == "https://example.com"
+ InlineElement::Link { text, url } if r(text, buf) == "this link" && r(url, buf) == "https://example.com"
)), "Expected link element, got: {:?}", inlines);
} else {
panic!("Expected paragraph");
@@ -274,10 +284,11 @@ fn test_inline_image() {
assert_eq!(parser.parsed().len(), 1);
if let MdElement::Paragraph(inlines) = &parser.parsed()[0] {
+ let buf = parser.buffer();
assert!(
inlines.iter().any(|e| matches!(
e,
- InlineElement::Image { alt, url } if alt == "alt text" && url == "image.png"
+ InlineElement::Image { alt, url } if r(alt, buf) == "alt text" && r(url, buf) == "image.png"
)),
"Expected image element, got: {:?}",
inlines
@@ -294,9 +305,10 @@ fn test_inline_strikethrough() {
assert_eq!(parser.parsed().len(), 1);
if let MdElement::Paragraph(inlines) = &parser.parsed()[0] {
+ let buf = parser.buffer();
assert!(inlines.iter().any(|e| matches!(
e,
- InlineElement::Styled { style: InlineStyle::Strikethrough, content } if content == "deleted"
+ InlineElement::Styled { style: InlineStyle::Strikethrough, content } if r(content, buf) == "deleted"
)), "Expected strikethrough element, got: {:?}", inlines);
} else {
panic!("Expected paragraph");
@@ -351,9 +363,10 @@ fn test_inline_finalize() {
// Now should have parsed with inline formatting
assert_eq!(parser.parsed().len(), 1);
if let MdElement::Paragraph(inlines) = &parser.parsed()[0] {
+ let buf = parser.buffer();
assert!(inlines.iter().any(|e| matches!(
e,
- InlineElement::Styled { style: InlineStyle::Bold, content } if content == "bold"
+ InlineElement::Styled { style: InlineStyle::Bold, content } if r(content, buf) == "bold"
)));
} else {
panic!("Expected paragraph");
@@ -422,9 +435,10 @@ fn test_paragraph_finalize_emits_content() {
assert_eq!(parser.parsed().len(), 1);
if let MdElement::Paragraph(inlines) = &parser.parsed()[0] {
+ let buf = parser.buffer();
assert!(inlines.iter().any(|e| matches!(
e,
- InlineElement::Text(s) if s.contains("Incomplete paragraph")
+ InlineElement::Text(s) if r(s, buf).contains("Incomplete paragraph")
)));
} else {
panic!("Expected paragraph");
@@ -435,21 +449,27 @@ fn test_paragraph_finalize_emits_content() {
fn test_inline_code_with_angle_brackets() {
// Test parse_inline directly
let input = "Generic Rust: `impl Iterator<Item = &str>` returns a `Result<(), anyhow::Error>`";
- let result = crate::parse_inline(input);
+ let result = crate::parse_inline(input, 0);
eprintln!("parse_inline result: {:#?}", result);
let code_elements: Vec<_> = result
.iter()
.filter(|e| matches!(e, InlineElement::Code(_)))
.collect();
- assert_eq!(code_elements.len(), 2, "Expected 2 code spans, got: {:#?}", result);
+ assert_eq!(
+ code_elements.len(),
+ 2,
+ "Expected 2 code spans, got: {:#?}",
+ result
+ );
}
#[test]
fn test_streaming_inline_code_with_angle_brackets() {
// Test streaming parser with token-by-token delivery
let mut parser = StreamParser::new();
- let input = "5. Generic Rust: `impl Iterator<Item = &str>` returns a `Result<(), anyhow::Error>`\n\n";
+ let input =
+ "5. Generic Rust: `impl Iterator<Item = &str>` returns a `Result<(), anyhow::Error>`\n\n";
// Simulate streaming token by token
for ch in input.chars() {
@@ -467,7 +487,12 @@ fn test_streaming_inline_code_with_angle_brackets() {
.iter()
.filter(|e| matches!(e, InlineElement::Code(_)))
.collect();
- assert_eq!(code_elements.len(), 2, "Expected 2 code spans, got: {:#?}", inlines);
+ assert_eq!(
+ code_elements.len(),
+ 2,
+ "Expected 2 code spans, got: {:#?}",
+ inlines
+ );
} else {
panic!("Expected paragraph, got: {:?}", parser.parsed()[0]);
}
@@ -477,7 +502,8 @@ fn test_streaming_inline_code_with_angle_brackets() {
fn test_streaming_multiple_code_spans_with_angle_brackets() {
// From the screenshot: multiple code spans with nested angle brackets
let mut parser = StreamParser::new();
- let input = "use `HashMap<K, V>` or `Vec<String>` or `Option<Box<dyn Error>>` in your types\n\n";
+ let input =
+ "use `HashMap<K, V>` or `Vec<String>` or `Option<Box<dyn Error>>` in your types\n\n";
for ch in input.chars() {
parser.push(&ch.to_string());
@@ -490,7 +516,12 @@ fn test_streaming_multiple_code_spans_with_angle_brackets() {
.iter()
.filter(|e| matches!(e, InlineElement::Code(_)))
.collect();
- assert_eq!(code_elements.len(), 3, "Expected 3 code spans, got: {:#?}", inlines);
+ assert_eq!(
+ code_elements.len(),
+ 3,
+ "Expected 3 code spans, got: {:#?}",
+ inlines
+ );
} else {
panic!("Expected paragraph, got: {:?}", parser.parsed()[0]);
}
@@ -499,7 +530,6 @@ fn test_streaming_multiple_code_spans_with_angle_brackets() {
#[test]
fn test_code_block_after_paragraph_single_newline() {
// Reproduces: paragraph text ending with ":\n" then "```\n" code block
- // This is the common pattern: "All events share these common tags:\n```\n..."
let mut parser = StreamParser::new();
let input = "All events share these common tags:\n```\n[\"d\", \"<session-id>\"]\n```\n";
parser.push(input);
@@ -512,8 +542,14 @@ fn test_code_block_after_paragraph_single_newline() {
eprintln!("After finalize - parsed: {:#?}", parser.parsed());
// Should have: paragraph + code block
- let has_paragraph = parser.parsed().iter().any(|e| matches!(e, MdElement::Paragraph(_)));
- let has_code_block = parser.parsed().iter().any(|e| matches!(e, MdElement::CodeBlock(_)));
+ let has_paragraph = parser
+ .parsed()
+ .iter()
+ .any(|e| matches!(e, MdElement::Paragraph(_)));
+ let has_code_block = parser
+ .parsed()
+ .iter()
+ .any(|e| matches!(e, MdElement::CodeBlock(_)));
assert!(has_paragraph, "Missing paragraph element");
assert!(has_code_block, "Missing code block element");
@@ -531,14 +567,23 @@ fn test_code_block_after_paragraph_single_newline_streaming() {
eprintln!("Before finalize - parsed: {:#?}", parser.parsed());
eprintln!("Before finalize - partial: {:#?}", parser.partial());
- eprintln!("Before finalize - in_code_block: {}", parser.in_code_block());
+ eprintln!(
+ "Before finalize - in_code_block: {}",
+ parser.in_code_block()
+ );
parser.finalize();
eprintln!("After finalize - parsed: {:#?}", parser.parsed());
- let has_paragraph = parser.parsed().iter().any(|e| matches!(e, MdElement::Paragraph(_)));
- let has_code_block = parser.parsed().iter().any(|e| matches!(e, MdElement::CodeBlock(_)));
+ let has_paragraph = parser
+ .parsed()
+ .iter()
+ .any(|e| matches!(e, MdElement::Paragraph(_)));
+ let has_code_block = parser
+ .parsed()
+ .iter()
+ .any(|e| matches!(e, MdElement::CodeBlock(_)));
assert!(has_paragraph, "Missing paragraph element");
assert!(has_code_block, "Missing code block element");
@@ -564,14 +609,27 @@ fn test_table_basic_batch() {
let mut parser = StreamParser::new();
parser.push("| Name | Age |\n|------|-----|\n| Alice | 30 |\n| Bob | 25 |\n\n");
- let tables: Vec<_> = parser.parsed().iter().filter(|e| matches!(e, MdElement::Table { .. })).collect();
- assert_eq!(tables.len(), 1, "Expected 1 table, got: {:#?}", parser.parsed());
+ let tables: Vec<_> = parser
+ .parsed()
+ .iter()
+ .filter(|e| matches!(e, MdElement::Table { .. }))
+ .collect();
+ assert_eq!(
+ tables.len(),
+ 1,
+ "Expected 1 table, got: {:#?}",
+ parser.parsed()
+ );
if let MdElement::Table { headers, rows } = &tables[0] {
- assert_eq!(headers, &["Name", "Age"]);
+ let buf = parser.buffer();
+ let h: Vec<&str> = headers.iter().map(|s| r(s, buf)).collect();
+ assert_eq!(h, &["Name", "Age"]);
assert_eq!(rows.len(), 2);
- assert_eq!(rows[0], &["Alice", "30"]);
- assert_eq!(rows[1], &["Bob", "25"]);
+ let r0: Vec<&str> = rows[0].iter().map(|s| r(s, buf)).collect();
+ let r1: Vec<&str> = rows[1].iter().map(|s| r(s, buf)).collect();
+ assert_eq!(r0, &["Alice", "30"]);
+ assert_eq!(r1, &["Bob", "25"]);
}
}
@@ -584,14 +642,27 @@ fn test_table_streaming_char_by_char() {
parser.push(&ch.to_string());
}
- let tables: Vec<_> = parser.parsed().iter().filter(|e| matches!(e, MdElement::Table { .. })).collect();
- assert_eq!(tables.len(), 1, "Expected 1 table, got: {:#?}", parser.parsed());
+ let tables: Vec<_> = parser
+ .parsed()
+ .iter()
+ .filter(|e| matches!(e, MdElement::Table { .. }))
+ .collect();
+ assert_eq!(
+ tables.len(),
+ 1,
+ "Expected 1 table, got: {:#?}",
+ parser.parsed()
+ );
if let MdElement::Table { headers, rows } = &tables[0] {
- assert_eq!(headers, &["Name", "Age"]);
+ let buf = parser.buffer();
+ let h: Vec<&str> = headers.iter().map(|s| r(s, buf)).collect();
+ assert_eq!(h, &["Name", "Age"]);
assert_eq!(rows.len(), 2);
- assert_eq!(rows[0], &["Alice", "30"]);
- assert_eq!(rows[1], &["Bob", "25"]);
+ let r0: Vec<&str> = rows[0].iter().map(|s| r(s, buf)).collect();
+ let r1: Vec<&str> = rows[1].iter().map(|s| r(s, buf)).collect();
+ assert_eq!(r0, &["Alice", "30"]);
+ assert_eq!(r1, &["Bob", "25"]);
}
}
@@ -600,10 +671,20 @@ fn test_table_after_paragraph() {
let mut parser = StreamParser::new();
parser.push("Here is a comparison:\n| A | B |\n|---|---|\n| 1 | 2 |\n\n");
- let has_paragraph = parser.parsed().iter().any(|e| matches!(e, MdElement::Paragraph(_)));
- let has_table = parser.parsed().iter().any(|e| matches!(e, MdElement::Table { .. }));
+ let has_paragraph = parser
+ .parsed()
+ .iter()
+ .any(|e| matches!(e, MdElement::Paragraph(_)));
+ let has_table = parser
+ .parsed()
+ .iter()
+ .any(|e| matches!(e, MdElement::Table { .. }));
- assert!(has_paragraph, "Missing paragraph, got: {:#?}", parser.parsed());
+ assert!(
+ has_paragraph,
+ "Missing paragraph, got: {:#?}",
+ parser.parsed()
+ );
assert!(has_table, "Missing table, got: {:#?}", parser.parsed());
}
@@ -616,10 +697,20 @@ fn test_table_after_paragraph_streaming() {
parser.push(&ch.to_string());
}
- let has_paragraph = parser.parsed().iter().any(|e| matches!(e, MdElement::Paragraph(_)));
- let has_table = parser.parsed().iter().any(|e| matches!(e, MdElement::Table { .. }));
+ let has_paragraph = parser
+ .parsed()
+ .iter()
+ .any(|e| matches!(e, MdElement::Paragraph(_)));
+ let has_table = parser
+ .parsed()
+ .iter()
+ .any(|e| matches!(e, MdElement::Table { .. }));
- assert!(has_paragraph, "Missing paragraph, got: {:#?}", parser.parsed());
+ assert!(
+ has_paragraph,
+ "Missing paragraph, got: {:#?}",
+ parser.parsed()
+ );
assert!(has_table, "Missing table, got: {:#?}", parser.parsed());
}
@@ -628,11 +719,21 @@ fn test_table_then_paragraph() {
let mut parser = StreamParser::new();
parser.push("| X | Y |\n|---|---|\n| a | b |\n\nSome text after.\n\n");
- let has_table = parser.parsed().iter().any(|e| matches!(e, MdElement::Table { .. }));
- let has_paragraph = parser.parsed().iter().any(|e| matches!(e, MdElement::Paragraph(_)));
+ let has_table = parser
+ .parsed()
+ .iter()
+ .any(|e| matches!(e, MdElement::Table { .. }));
+ let has_paragraph = parser
+ .parsed()
+ .iter()
+ .any(|e| matches!(e, MdElement::Paragraph(_)));
assert!(has_table, "Missing table, got: {:#?}", parser.parsed());
- assert!(has_paragraph, "Missing paragraph, got: {:#?}", parser.parsed());
+ assert!(
+ has_paragraph,
+ "Missing paragraph, got: {:#?}",
+ parser.parsed()
+ );
}
#[test]
@@ -641,8 +742,15 @@ fn test_table_no_separator_not_a_table() {
// Two pipe rows but no separator — should not be a table
parser.push("| foo | bar |\n| baz | qux |\n\n");
- let has_table = parser.parsed().iter().any(|e| matches!(e, MdElement::Table { .. }));
- assert!(!has_table, "Should NOT be a table without separator row, got: {:#?}", parser.parsed());
+ let has_table = parser
+ .parsed()
+ .iter()
+ .any(|e| matches!(e, MdElement::Table { .. }));
+ assert!(
+ !has_table,
+ "Should NOT be a table without separator row, got: {:#?}",
+ parser.parsed()
+ );
}
#[test]
@@ -650,7 +758,11 @@ fn test_table_uneven_columns() {
let mut parser = StreamParser::new();
parser.push("| A | B | C |\n|---|---|---|\n| 1 | 2 |\n| x | y | z |\n\n");
- let tables: Vec<_> = parser.parsed().iter().filter(|e| matches!(e, MdElement::Table { .. })).collect();
+ let tables: Vec<_> = parser
+ .parsed()
+ .iter()
+ .filter(|e| matches!(e, MdElement::Table { .. }))
+ .collect();
assert_eq!(tables.len(), 1);
if let MdElement::Table { headers, rows } = &tables[0] {
@@ -666,13 +778,25 @@ fn test_table_with_alignment() {
let mut parser = StreamParser::new();
parser.push("| Left | Center | Right |\n|:-----|:------:|------:|\n| a | b | c |\n\n");
- let tables: Vec<_> = parser.parsed().iter().filter(|e| matches!(e, MdElement::Table { .. })).collect();
- assert_eq!(tables.len(), 1, "Expected table with alignment separators, got: {:#?}", parser.parsed());
+ let tables: Vec<_> = parser
+ .parsed()
+ .iter()
+ .filter(|e| matches!(e, MdElement::Table { .. }))
+ .collect();
+ assert_eq!(
+ tables.len(),
+ 1,
+ "Expected table with alignment separators, got: {:#?}",
+ parser.parsed()
+ );
if let MdElement::Table { headers, rows } = &tables[0] {
- assert_eq!(headers, &["Left", "Center", "Right"]);
+ let buf = parser.buffer();
+ let h: Vec<&str> = headers.iter().map(|s| r(s, buf)).collect();
+ assert_eq!(h, &["Left", "Center", "Right"]);
assert_eq!(rows.len(), 1);
- assert_eq!(rows[0], &["a", "b", "c"]);
+ let r0: Vec<&str> = rows[0].iter().map(|s| r(s, buf)).collect();
+ assert_eq!(r0, &["a", "b", "c"]);
}
}
@@ -686,8 +810,15 @@ fn test_table_finalize_incomplete() {
parser.finalize();
- let has_table = parser.parsed().iter().any(|e| matches!(e, MdElement::Table { .. }));
- assert!(has_table, "Finalize should emit the table, got: {:#?}", parser.parsed());
+ let has_table = parser
+ .parsed()
+ .iter()
+ .any(|e| matches!(e, MdElement::Table { .. }));
+ assert!(
+ has_table,
+ "Finalize should emit the table, got: {:#?}",
+ parser.parsed()
+ );
}
#[test]
@@ -695,11 +826,17 @@ fn test_table_single_column() {
let mut parser = StreamParser::new();
parser.push("| Item |\n|------|\n| Apple |\n| Banana |\n\n");
- let tables: Vec<_> = parser.parsed().iter().filter(|e| matches!(e, MdElement::Table { .. })).collect();
+ let tables: Vec<_> = parser
+ .parsed()
+ .iter()
+ .filter(|e| matches!(e, MdElement::Table { .. }))
+ .collect();
assert_eq!(tables.len(), 1);
if let MdElement::Table { headers, rows } = &tables[0] {
- assert_eq!(headers, &["Item"]);
+ let buf = parser.buffer();
+ let h: Vec<&str> = headers.iter().map(|s| r(s, buf)).collect();
+ assert_eq!(h, &["Item"]);
assert_eq!(rows.len(), 2);
}
}
@@ -709,13 +846,21 @@ fn test_table_empty_cells() {
let mut parser = StreamParser::new();
parser.push("| A | B |\n|---|---|\n| | val |\n| val | |\n\n");
- let tables: Vec<_> = parser.parsed().iter().filter(|e| matches!(e, MdElement::Table { .. })).collect();
+ let tables: Vec<_> = parser
+ .parsed()
+ .iter()
+ .filter(|e| matches!(e, MdElement::Table { .. }))
+ .collect();
assert_eq!(tables.len(), 1);
if let MdElement::Table { headers, rows } = &tables[0] {
- assert_eq!(headers, &["A", "B"]);
- assert_eq!(rows[0], &["", "val"]);
- assert_eq!(rows[1], &["val", ""]);
+ let buf = parser.buffer();
+ let h: Vec<&str> = headers.iter().map(|s| r(s, buf)).collect();
+ assert_eq!(h, &["A", "B"]);
+ let r0: Vec<&str> = rows[0].iter().map(|s| r(s, buf)).collect();
+ let r1: Vec<&str> = rows[1].iter().map(|s| r(s, buf)).collect();
+ assert_eq!(r0, &["", "val"]);
+ assert_eq!(r1, &["val", ""]);
}
}
@@ -747,13 +892,27 @@ fn test_table_streaming_realistic_llm_chunks() {
}
parser.finalize();
- let has_paragraph = parser.parsed().iter().any(|e| matches!(e, MdElement::Paragraph(_)));
- let has_table = parser.parsed().iter().any(|e| matches!(e, MdElement::Table { .. }));
+ let has_paragraph = parser
+ .parsed()
+ .iter()
+ .any(|e| matches!(e, MdElement::Paragraph(_)));
+ let has_table = parser
+ .parsed()
+ .iter()
+ .any(|e| matches!(e, MdElement::Table { .. }));
- assert!(has_paragraph, "Missing paragraph, got: {:#?}", parser.parsed());
+ assert!(
+ has_paragraph,
+ "Missing paragraph, got: {:#?}",
+ parser.parsed()
+ );
assert!(has_table, "Missing table, got: {:#?}", parser.parsed());
- if let Some(MdElement::Table { headers, rows }) = parser.parsed().iter().find(|e| matches!(e, MdElement::Table { .. })) {
+ if let Some(MdElement::Table { headers, rows }) = parser
+ .parsed()
+ .iter()
+ .find(|e| matches!(e, MdElement::Table { .. }))
+ {
assert_eq!(headers.len(), 3, "Expected 3 headers, got: {:?}", headers);
assert_eq!(rows.len(), 2, "Expected 2 rows, got: {:?}", rows);
}
@@ -768,7 +927,13 @@ fn test_table_partial_shows_during_streaming() {
// Should have a table partial with seen_separator=true
let partial = parser.partial().expect("Should have partial");
assert!(
- matches!(&partial.kind, PartialKind::Table { seen_separator: true, .. }),
+ matches!(
+ &partial.kind,
+ PartialKind::Table {
+ seen_separator: true,
+ ..
+ }
+ ),
"Expected table partial with seen_separator=true, got: {:?}",
partial.kind
);
diff --git a/crates/notedeck_dave/src/backend/claude.rs b/crates/notedeck_dave/src/backend/claude.rs
@@ -5,8 +5,8 @@ use crate::backend::tool_summary::{
};
use crate::backend::traits::AiBackend;
use crate::messages::{
- CompactionInfo, DaveApiResponse, PendingPermission, PermissionRequest, PermissionResponse,
- SubagentInfo, SubagentStatus, ToolResult,
+ CompactionInfo, DaveApiResponse, ParsedMarkdown, PendingPermission, PermissionRequest,
+ PermissionResponse, SubagentInfo, SubagentStatus, ToolResult,
};
use crate::tools::Tool;
use crate::Message;
@@ -341,14 +341,15 @@ async fn session_actor(
let request_id = Uuid::new_v4();
let (ui_resp_tx, ui_resp_rx) = oneshot::channel();
- let cached_plan_elements = if perm_req.tool_name == "ExitPlanMode" {
+ let cached_plan = if perm_req.tool_name == "ExitPlanMode" {
perm_req.tool_input.get("plan")
.and_then(|v| v.as_str())
.map(|plan| {
let mut parser = md_stream::StreamParser::new();
parser.push(plan);
parser.finalize();
- parser.into_parsed()
+ let (elements, source) = parser.into_parts();
+ ParsedMarkdown { source, elements }
})
} else {
None
@@ -360,7 +361,7 @@ async fn session_actor(
tool_input: perm_req.tool_input.clone(),
response: None,
answer_summary: None,
- cached_plan_elements,
+ cached_plan,
};
let pending = PendingPermission {
diff --git a/crates/notedeck_dave/src/messages.rs b/crates/notedeck_dave/src/messages.rs
@@ -1,6 +1,13 @@
use crate::tools::{ToolCall, ToolResponse};
use async_openai::types::*;
use md_stream::{MdElement, Partial, StreamParser};
+
+/// Pre-parsed markdown with source text for span resolution.
+#[derive(Debug, Clone)]
+pub struct ParsedMarkdown {
+ pub source: String,
+ pub elements: Vec<MdElement>,
+}
use nostrdb::{Ndb, Transaction};
use serde::{Deserialize, Serialize};
use tokio::sync::oneshot;
@@ -52,8 +59,8 @@ pub struct PermissionRequest {
pub response: Option<PermissionResponseType>,
/// For AskUserQuestion: pre-computed summary of answers for display
pub answer_summary: Option<AnswerSummary>,
- /// For ExitPlanMode: pre-parsed markdown elements from the plan content
- pub cached_plan_elements: Option<Vec<MdElement>>,
+ /// For ExitPlanMode: pre-parsed markdown with source text for span resolution
+ pub cached_plan: Option<ParsedMarkdown>,
}
/// A single entry in an answer summary
@@ -247,6 +254,13 @@ impl AssistantMessage {
&self.text
}
+ /// Get the buffer for resolving spans in parsed elements.
+ /// This is the same as text() — both the parser and AssistantMessage
+ /// maintain identical buffers via push_str(token).
+ pub fn buffer(&self) -> &str {
+ &self.text
+ }
+
/// Get parsed markdown elements.
pub fn parsed_elements(&self) -> &[MdElement] {
if let Some(cached) = &self.cached_elements {
diff --git a/crates/notedeck_dave/src/ui/dave.rs b/crates/notedeck_dave/src/ui/dave.rs
@@ -698,13 +698,18 @@ impl<'a> DaveUi<'a> {
ui.add_space(8.0);
// Render plan content as markdown (pre-parsed at construction)
- if let Some(elements) = &request.cached_plan_elements {
- markdown_ui::render_assistant_message(elements, None, ui);
- } else if let Some(plan) =
+ if let Some(plan) = &request.cached_plan {
+ markdown_ui::render_assistant_message(
+ &plan.elements,
+ None,
+ &plan.source,
+ ui,
+ );
+ } else if let Some(plan_text) =
request.tool_input.get("plan").and_then(|v| v.as_str())
{
// Fallback: render as plain text
- ui.label(plan);
+ ui.label(plan_text);
}
ui.add_space(8.0);
@@ -1113,6 +1118,7 @@ impl<'a> DaveUi<'a> {
fn assistant_chat(&self, msg: &AssistantMessage, ui: &mut egui::Ui) {
let elements = msg.parsed_elements();
let partial = msg.partial();
- markdown_ui::render_assistant_message(elements, partial, ui);
+ let buffer = msg.buffer();
+ markdown_ui::render_assistant_message(elements, partial, buffer, ui);
}
}
diff --git a/crates/notedeck_dave/src/ui/markdown_ui.rs b/crates/notedeck_dave/src/ui/markdown_ui.rs
@@ -4,6 +4,7 @@ use egui::text::LayoutJob;
use egui::{Color32, FontFamily, FontId, RichText, TextFormat, Ui};
use md_stream::{
parse_inline, CodeBlock, InlineElement, InlineStyle, ListItem, MdElement, Partial, PartialKind,
+ Span,
};
/// Theme for markdown rendering, derived from egui visuals.
@@ -29,7 +30,7 @@ impl MdTheme {
heading_sizes: [24.0, 20.0, 18.0, 16.0, 14.0, 12.0],
code_bg,
code_text: Color32::from_rgb(0xD4, 0xA5, 0x74), // Muted amber/sand
- link_color: Color32::from_rgb(100, 149, 237), // Cornflower blue
+ link_color: Color32::from_rgb(100, 149, 237), // Cornflower blue
blockquote_border: visuals.widgets.noninteractive.bg_stroke.color,
blockquote_bg: visuals.faint_bg_color,
}
@@ -37,38 +38,50 @@ impl MdTheme {
}
/// Render all parsed markdown elements plus any partial state.
-pub fn render_assistant_message(elements: &[MdElement], partial: Option<&Partial>, ui: &mut Ui) {
+pub fn render_assistant_message(
+ elements: &[MdElement],
+ partial: Option<&Partial>,
+ buffer: &str,
+ ui: &mut Ui,
+) {
let theme = MdTheme::from_visuals(ui.visuals());
ui.vertical(|ui| {
for element in elements {
- render_element(element, &theme, ui);
+ render_element(element, &theme, buffer, ui);
}
// Render partial (speculative) content for immediate feedback
if let Some(partial) = partial {
- render_partial(partial, &theme, ui);
+ render_partial(partial, &theme, buffer, ui);
}
});
}
-fn render_element(element: &MdElement, theme: &MdTheme, ui: &mut Ui) {
+fn render_element(element: &MdElement, theme: &MdTheme, buffer: &str, ui: &mut Ui) {
match element {
MdElement::Heading { level, content } => {
let size = theme.heading_sizes[(*level as usize).saturating_sub(1).min(5)];
- ui.add(egui::Label::new(RichText::new(content).size(size).strong()).wrap());
+ ui.add(
+ egui::Label::new(RichText::new(content.resolve(buffer)).size(size).strong()).wrap(),
+ );
ui.add_space(4.0);
}
MdElement::Paragraph(inlines) => {
ui.horizontal_wrapped(|ui| {
- render_inlines(inlines, theme, ui);
+ render_inlines(inlines, theme, buffer, ui);
});
ui.add_space(8.0);
}
MdElement::CodeBlock(CodeBlock { language, content }) => {
- render_code_block(language.as_deref(), content, theme, ui);
+ render_code_block(
+ language.map(|s| s.resolve(buffer)),
+ content.resolve(buffer),
+ theme,
+ ui,
+ );
}
MdElement::BlockQuote(nested) => {
@@ -78,7 +91,7 @@ fn render_element(element: &MdElement, theme: &MdTheme, ui: &mut Ui) {
.inner_margin(egui::Margin::symmetric(8, 4))
.show(ui, |ui| {
for elem in nested {
- render_element(elem, theme, ui);
+ render_element(elem, theme, buffer, ui);
}
});
ui.add_space(8.0);
@@ -86,7 +99,7 @@ fn render_element(element: &MdElement, theme: &MdTheme, ui: &mut Ui) {
MdElement::UnorderedList(items) => {
for item in items {
- render_list_item(item, "\u{2022}", theme, ui);
+ render_list_item(item, "\u{2022}", theme, buffer, ui);
}
ui.add_space(8.0);
}
@@ -94,13 +107,13 @@ fn render_element(element: &MdElement, theme: &MdTheme, ui: &mut Ui) {
MdElement::OrderedList { start, items } => {
for (i, item) in items.iter().enumerate() {
let marker = format!("{}.", start + i as u32);
- render_list_item(item, &marker, theme, ui);
+ render_list_item(item, &marker, theme, buffer, ui);
}
ui.add_space(8.0);
}
MdElement::Table { headers, rows } => {
- render_table(headers, rows, theme, ui);
+ render_table(headers, rows, theme, buffer, ui);
}
MdElement::ThematicBreak => {
@@ -108,8 +121,8 @@ fn render_element(element: &MdElement, theme: &MdTheme, ui: &mut Ui) {
ui.add_space(8.0);
}
- MdElement::Text(text) => {
- ui.label(text);
+ MdElement::Text(span) => {
+ ui.label(span.resolve(buffer));
}
}
}
@@ -122,7 +135,7 @@ fn flush_job(job: &mut LayoutJob, ui: &mut Ui) {
}
}
-fn render_inlines(inlines: &[InlineElement], theme: &MdTheme, ui: &mut Ui) {
+fn render_inlines(inlines: &[InlineElement], theme: &MdTheme, buffer: &str, ui: &mut Ui) {
let font_size = ui.style().text_styles[&egui::TextStyle::Body].size;
let text_color = ui.visuals().text_color();
@@ -157,41 +170,50 @@ fn render_inlines(inlines: &[InlineElement], theme: &MdTheme, ui: &mut Ui) {
for inline in inlines {
match inline {
- InlineElement::Text(text) => {
- job.append(text, 0.0, text_fmt.clone());
+ InlineElement::Text(span) => {
+ job.append(span.resolve(buffer), 0.0, text_fmt.clone());
}
- InlineElement::Code(code) => {
- job.append(code, 0.0, code_fmt.clone());
+ InlineElement::Code(span) => {
+ job.append(span.resolve(buffer), 0.0, code_fmt.clone());
}
- InlineElement::Styled { style, content } => match style {
- InlineStyle::Italic => {
- job.append(content, 0.0, italic_fmt.clone());
- }
- InlineStyle::Strikethrough => {
- job.append(content, 0.0, strikethrough_fmt.clone());
- }
- InlineStyle::Bold | InlineStyle::BoldItalic => {
- // TextFormat has no bold/weight — flush and render as separate label
- flush_job(&mut job, ui);
- let rt = if matches!(style, InlineStyle::BoldItalic) {
- RichText::new(content).strong().italics()
- } else {
- RichText::new(content).strong()
- };
- ui.label(rt);
+ InlineElement::Styled { style, content } => {
+ let text = content.resolve(buffer);
+ match style {
+ InlineStyle::Italic => {
+ job.append(text, 0.0, italic_fmt.clone());
+ }
+ InlineStyle::Strikethrough => {
+ job.append(text, 0.0, strikethrough_fmt.clone());
+ }
+ InlineStyle::Bold | InlineStyle::BoldItalic => {
+ // TextFormat has no bold/weight — flush and render as separate label
+ flush_job(&mut job, ui);
+ let rt = if matches!(style, InlineStyle::BoldItalic) {
+ RichText::new(text).strong().italics()
+ } else {
+ RichText::new(text).strong()
+ };
+ ui.label(rt);
+ }
}
- },
+ }
InlineElement::Link { text, url } => {
flush_job(&mut job, ui);
- ui.hyperlink_to(RichText::new(text).color(theme.link_color), url);
+ ui.hyperlink_to(
+ RichText::new(text.resolve(buffer)).color(theme.link_color),
+ url.resolve(buffer),
+ );
}
InlineElement::Image { alt, url } => {
flush_job(&mut job, ui);
- ui.hyperlink_to(format!("[Image: {}]", alt), url);
+ ui.hyperlink_to(
+ format!("[Image: {}]", alt.resolve(buffer)),
+ url.resolve(buffer),
+ );
}
InlineElement::LineBreak => {
@@ -222,24 +244,24 @@ fn render_code_block(language: Option<&str>, content: &str, theme: &MdTheme, ui:
ui.add_space(8.0);
}
-fn render_list_item(item: &ListItem, marker: &str, theme: &MdTheme, ui: &mut Ui) {
+fn render_list_item(item: &ListItem, marker: &str, theme: &MdTheme, buffer: &str, ui: &mut Ui) {
ui.horizontal(|ui| {
ui.label(RichText::new(marker).weak());
ui.vertical(|ui| {
ui.horizontal_wrapped(|ui| {
- render_inlines(&item.content, theme, ui);
+ render_inlines(&item.content, theme, buffer, ui);
});
// Render nested list if present
if let Some(nested) = &item.nested {
ui.indent("nested", |ui| {
- render_element(nested, theme, ui);
+ render_element(nested, theme, buffer, ui);
});
}
});
});
}
-fn render_table(headers: &[String], rows: &[Vec<String>], theme: &MdTheme, ui: &mut Ui) {
+fn render_table(headers: &[Span], rows: &[Vec<Span>], theme: &MdTheme, buffer: &str, ui: &mut Ui) {
use egui_extras::{Column, TableBuilder};
let num_cols = headers.len();
@@ -260,10 +282,9 @@ fn render_table(headers: &[String], rows: &[Vec<String>], theme: &MdTheme, ui: &
.header(28.0, |mut header| {
for h in headers {
header.col(|ui| {
- ui.painter()
- .rect_filled(ui.max_rect(), 0.0, header_bg);
+ ui.painter().rect_filled(ui.max_rect(), 0.0, header_bg);
egui::Frame::NONE.inner_margin(cell_padding).show(ui, |ui| {
- ui.strong(h);
+ ui.strong(h.resolve(buffer));
});
});
}
@@ -275,7 +296,7 @@ fn render_table(headers: &[String], rows: &[Vec<String>], theme: &MdTheme, ui: &
table_row.col(|ui| {
egui::Frame::NONE.inner_margin(cell_padding).show(ui, |ui| {
if let Some(cell) = row.get(i) {
- ui.label(cell);
+ ui.label(cell.resolve(buffer));
}
});
});
@@ -286,8 +307,8 @@ fn render_table(headers: &[String], rows: &[Vec<String>], theme: &MdTheme, ui: &
ui.add_space(8.0);
}
-fn render_partial(partial: &Partial, theme: &MdTheme, ui: &mut Ui) {
- let content = &partial.content;
+fn render_partial(partial: &Partial, theme: &MdTheme, buffer: &str, ui: &mut Ui) {
+ let content = partial.content(buffer);
if content.is_empty() {
return;
}
@@ -301,7 +322,7 @@ fn render_partial(partial: &Partial, theme: &MdTheme, ui: &mut Ui) {
.corner_radius(4.0)
.show(ui, |ui| {
if let Some(lang) = language {
- ui.label(RichText::new(lang).small().weak());
+ ui.label(RichText::new(lang.resolve(buffer)).small().weak());
}
ui.add(
egui::Label::new(RichText::new(content).monospace().color(theme.code_text))
@@ -323,7 +344,7 @@ fn render_partial(partial: &Partial, theme: &MdTheme, ui: &mut Ui) {
seen_separator,
} => {
if *seen_separator {
- render_table(headers, rows, theme, ui);
+ render_table(headers, rows, theme, buffer, ui);
} else {
ui.label(content);
}
@@ -331,17 +352,17 @@ fn render_partial(partial: &Partial, theme: &MdTheme, ui: &mut Ui) {
PartialKind::Paragraph => {
// Parse inline elements from the partial content for proper formatting
- let inlines = parse_inline(content);
+ let inlines = parse_inline(content, partial.content_start);
ui.horizontal_wrapped(|ui| {
- render_inlines(&inlines, theme, ui);
+ render_inlines(&inlines, theme, buffer, ui);
});
}
_ => {
// Other partial kinds - parse inline elements too
- let inlines = parse_inline(content);
+ let inlines = parse_inline(content, partial.content_start);
ui.horizontal_wrapped(|ui| {
- render_inlines(&inlines, theme, ui);
+ render_inlines(&inlines, theme, buffer, ui);
});
}
}