commit 769f7cd497e249e6a7823925bca438ff3b09d18b
parent 3c6d9b65a4d9a84522efc08d81af8db47612444c
Author: William Casarin <jb55@jb55.com>
Date: Sun, 15 Feb 2026 09:57:32 -0800
md-stream: fix paragraph break detection when streaming char-by-char
When tokens arrive one character at a time, \n\n was never detected
as a paragraph break because each \n was processed separately.
Check if partial content ends with \n when new text starts with \n
to handle the split boundary case.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
2 files changed, 85 insertions(+), 0 deletions(-)
diff --git a/crates/md-stream/src/parser.rs b/crates/md-stream/src/parser.rs
@@ -318,6 +318,26 @@ impl StreamParser {
/// Process inline content.
fn process_inline(&mut self, text: &str) -> bool {
+ // Check for paragraph break split across tokens:
+ // partial content ends with \n and new text starts with \n
+ if text.starts_with('\n') {
+ if let Some(ref partial) = self.partial {
+ if partial.content.ends_with('\n') {
+ // Double newline split across token boundary — emit paragraph
+ let para_text = std::mem::take(&mut self.partial.as_mut().unwrap().content);
+ self.partial = None;
+
+ if !para_text.trim().is_empty() {
+ let inline_elements = parse_inline(para_text.trim());
+ self.parsed.push(MdElement::Paragraph(inline_elements));
+ }
+ self.at_line_start = true;
+ self.advance(1); // consume the \n
+ return true;
+ }
+ }
+ }
+
if let Some(nl_pos) = text.find("\n\n") {
// Double newline = paragraph break
// Combine accumulated partial content with text before \n\n
diff --git a/crates/md-stream/src/tests.rs b/crates/md-stream/src/tests.rs
@@ -432,6 +432,71 @@ fn test_paragraph_finalize_emits_content() {
}
#[test]
+fn test_inline_code_with_angle_brackets() {
+ // Test parse_inline directly
+ let input = "Generic Rust: `impl Iterator<Item = &str>` returns a `Result<(), anyhow::Error>`";
+ let result = crate::parse_inline(input);
+ eprintln!("parse_inline result: {:#?}", result);
+
+ let code_elements: Vec<_> = result
+ .iter()
+ .filter(|e| matches!(e, InlineElement::Code(_)))
+ .collect();
+ assert_eq!(code_elements.len(), 2, "Expected 2 code spans, got: {:#?}", result);
+}
+
+#[test]
+fn test_streaming_inline_code_with_angle_brackets() {
+ // Test streaming parser with token-by-token delivery
+ let mut parser = StreamParser::new();
+ let input = "5. Generic Rust: `impl Iterator<Item = &str>` returns a `Result<(), anyhow::Error>`\n\n";
+
+ // Simulate streaming token by token
+ for ch in input.chars() {
+ parser.push(&ch.to_string());
+ }
+
+ eprintln!("Parsed elements: {:#?}", parser.parsed());
+ eprintln!("Partial: {:#?}", parser.partial());
+
+ // Should have one paragraph with code spans
+ assert!(!parser.parsed().is_empty(), "Should have parsed elements");
+
+ if let MdElement::Paragraph(inlines) = &parser.parsed()[0] {
+ let code_elements: Vec<_> = inlines
+ .iter()
+ .filter(|e| matches!(e, InlineElement::Code(_)))
+ .collect();
+ assert_eq!(code_elements.len(), 2, "Expected 2 code spans, got: {:#?}", inlines);
+ } else {
+ panic!("Expected paragraph, got: {:?}", parser.parsed()[0]);
+ }
+}
+
+#[test]
+fn test_streaming_multiple_code_spans_with_angle_brackets() {
+ // From the screenshot: multiple code spans with nested angle brackets
+ let mut parser = StreamParser::new();
+ let input = "use `HashMap<K, V>` or `Vec<String>` or `Option<Box<dyn Error>>` in your types\n\n";
+
+ for ch in input.chars() {
+ parser.push(&ch.to_string());
+ }
+
+ assert!(!parser.parsed().is_empty(), "Should have parsed elements");
+
+ if let MdElement::Paragraph(inlines) = &parser.parsed()[0] {
+ let code_elements: Vec<_> = inlines
+ .iter()
+ .filter(|e| matches!(e, InlineElement::Code(_)))
+ .collect();
+ assert_eq!(code_elements.len(), 3, "Expected 3 code spans, got: {:#?}", inlines);
+ } else {
+ panic!("Expected paragraph, got: {:?}", parser.parsed()[0]);
+ }
+}
+
+#[test]
fn test_heading_partial_kind_distinct_from_paragraph() {
let mut parser = StreamParser::new();
parser.push("# Heading without newline");