parser.rs (35078B)
1 //! Core streaming parser implementation. 2 3 use crate::element::{CodeBlock, MdElement, Span}; 4 use crate::inline::parse_inline; 5 use crate::partial::{Partial, PartialKind}; 6 7 /// Incremental markdown parser for streaming input. 8 /// 9 /// Maintains a single contiguous buffer of incoming text and tracks 10 /// a processing cursor to allow progressive rendering as content streams in. 11 pub struct StreamParser { 12 /// Contiguous buffer of all pushed text 13 buffer: String, 14 15 /// Completed markdown elements 16 parsed: Vec<MdElement>, 17 18 /// Current in-progress element (if any) 19 partial: Option<Partial>, 20 21 /// Byte offset of first unprocessed content in buffer 22 process_pos: usize, 23 24 /// Are we at the start of a line? (for block-level detection) 25 at_line_start: bool, 26 } 27 28 /// Lightweight dispatch tag for partial state, avoiding Clone on PartialKind 29 /// which contains Vecs (table headers/rows). 30 #[derive(Clone, Copy)] 31 enum PartialDispatch { 32 CodeFence { fence_char: char, fence_len: usize }, 33 Heading { level: u8 }, 34 Table, 35 Paragraph, 36 Other, 37 } 38 39 impl StreamParser { 40 pub fn new() -> Self { 41 Self { 42 buffer: String::new(), 43 parsed: Vec::new(), 44 partial: None, 45 process_pos: 0, 46 at_line_start: true, 47 } 48 } 49 50 /// Push a new token chunk and process it. 51 pub fn push(&mut self, token: &str) { 52 if token.is_empty() { 53 return; 54 } 55 56 self.buffer.push_str(token); 57 self.process_new_content(); 58 } 59 60 /// Get completed elements for rendering. 61 pub fn parsed(&self) -> &[MdElement] { 62 &self.parsed 63 } 64 65 /// Get the parser's buffer for resolving spans. 66 pub fn buffer(&self) -> &str { 67 &self.buffer 68 } 69 70 /// Consume the parser and return the completed elements and buffer. 71 pub fn into_parts(self) -> (Vec<MdElement>, String) { 72 (self.parsed, self.buffer) 73 } 74 75 /// Consume the parser and return the completed elements. 76 pub fn into_parsed(self) -> Vec<MdElement> { 77 self.parsed 78 } 79 80 /// Get the current partial state (for speculative rendering). 81 pub fn partial(&self) -> Option<&Partial> { 82 self.partial.as_ref() 83 } 84 85 /// Get the speculative content that would render from partial state. 86 /// Returns the raw accumulated text that isn't yet a complete element. 87 pub fn partial_content(&self) -> Option<&str> { 88 self.partial.as_ref().map(|p| p.content(&self.buffer)) 89 } 90 91 /// Check if we're currently inside a code block. 92 pub fn in_code_block(&self) -> bool { 93 matches!( 94 self.partial.as_ref().map(|p| &p.kind), 95 Some(PartialKind::CodeFence { .. }) 96 ) 97 } 98 99 /// Get the unprocessed portion of the buffer. 100 fn remaining(&self) -> &str { 101 &self.buffer[self.process_pos..] 102 } 103 104 /// Compute a trimmed span (strip leading/trailing whitespace). 105 fn trim_span(&self, span: Span) -> Span { 106 let s = &self.buffer[span.start..span.end]; 107 let trimmed = s.trim(); 108 if trimmed.is_empty() { 109 return Span::new(span.start, span.start); 110 } 111 let ltrim = s.len() - s.trim_start().len(); 112 Span::new(span.start + ltrim, span.start + ltrim + trimmed.len()) 113 } 114 115 /// Extract the dispatch info from the current partial state. 116 /// Returns only small Copy data to avoid cloning Vecs in PartialKind::Table. 117 fn partial_dispatch(&self) -> Option<PartialDispatch> { 118 self.partial.as_ref().map(|p| match &p.kind { 119 PartialKind::CodeFence { 120 fence_char, 121 fence_len, 122 .. 123 } => PartialDispatch::CodeFence { 124 fence_char: *fence_char, 125 fence_len: *fence_len, 126 }, 127 PartialKind::Heading { level } => PartialDispatch::Heading { level: *level }, 128 PartialKind::Table { .. } => PartialDispatch::Table, 129 PartialKind::Paragraph => PartialDispatch::Paragraph, 130 _ => PartialDispatch::Other, 131 }) 132 } 133 134 /// Process newly added content. 135 fn process_new_content(&mut self) { 136 while self.process_pos < self.buffer.len() { 137 // Handle based on current partial state 138 if let Some(dispatch) = self.partial_dispatch() { 139 match dispatch { 140 PartialDispatch::CodeFence { 141 fence_char, 142 fence_len, 143 } => { 144 if self.process_code_fence(fence_char, fence_len) { 145 continue; 146 } 147 return; // Need more input 148 } 149 PartialDispatch::Heading { level } => { 150 if self.process_heading(level) { 151 continue; 152 } 153 return; 154 } 155 PartialDispatch::Table => { 156 if self.process_table() { 157 continue; 158 } 159 return; 160 } 161 PartialDispatch::Paragraph => { 162 // For paragraphs, check if we're at a line start that could be a block element 163 if self.at_line_start { 164 // Take the paragraph partial first — try_block_start may 165 // replace self.partial with the new block element 166 let para_partial = self.partial.take(); 167 168 if let Some(consumed) = self.try_block_start() { 169 // Emit the saved paragraph before the new block 170 if let Some(partial) = para_partial { 171 let span = partial.content_span(); 172 let trimmed = self.trim_span(span); 173 if !trimmed.is_empty() { 174 let content = trimmed.resolve(&self.buffer); 175 let inline_elements = parse_inline(content, trimmed.start); 176 self.parsed.push(MdElement::Paragraph(inline_elements)); 177 } 178 } 179 self.advance(consumed); 180 continue; 181 } 182 183 // Block start failed — restore the paragraph partial 184 self.partial = para_partial; 185 // If remaining could be the start of a block element but we 186 // don't have enough chars yet, wait for more input rather than 187 // consuming into the paragraph (e.g. "`" could become "```") 188 if self.could_be_block_start() { 189 return; 190 } 191 } 192 // Continue with inline processing 193 if self.process_inline() { 194 continue; 195 } 196 return; 197 } 198 PartialDispatch::Other => { 199 // For other inline elements, process character by character 200 if self.process_inline() { 201 continue; 202 } 203 return; 204 } 205 } 206 } 207 208 // No partial state - detect new elements 209 if self.at_line_start { 210 if let Some(consumed) = self.try_block_start() { 211 self.advance(consumed); 212 continue; 213 } 214 if self.could_be_block_start() { 215 return; 216 } 217 } 218 219 // Fall back to inline processing 220 if self.process_inline() { 221 continue; 222 } 223 return; 224 } 225 } 226 227 /// Check if remaining text could be the start of a block element but we don't 228 /// have enough characters to confirm yet. Used to defer consuming 229 /// ambiguous prefixes like "`" or "``" that might become "```". 230 fn could_be_block_start(&self) -> bool { 231 let trimmed = self.remaining().trim_start(); 232 if trimmed.is_empty() { 233 return false; 234 } 235 236 // Could be a code fence: need at least 3 backticks or tildes 237 let first = trimmed.as_bytes()[0]; 238 if first == b'`' || first == b'~' { 239 if trimmed.len() < 3 { 240 // All chars so far are the same fence char 241 return trimmed.bytes().all(|b| b == first); 242 } 243 // Have 3+ fence chars — still need the newline to finalize 244 // the opening line (language tag may be incomplete) 245 let fence_len = trimmed.bytes().take_while(|&b| b == first).count(); 246 if fence_len >= 3 && !trimmed[fence_len..].contains('\n') { 247 return true; 248 } 249 } 250 251 // Could be a thematic break: need "---", "***", or "___" 252 if trimmed.len() < 3 { 253 let first = trimmed.as_bytes()[0]; 254 if first == b'-' || first == b'*' || first == b'_' { 255 return trimmed.bytes().all(|b| b == first); 256 } 257 } 258 259 // Could be a table row: starts with | but no newline yet 260 if trimmed.starts_with('|') && !trimmed.contains('\n') { 261 return true; 262 } 263 264 false 265 } 266 267 /// Try to detect a block-level element at line start. 268 /// Returns bytes consumed if successful. 269 fn try_block_start(&mut self) -> Option<usize> { 270 let text = self.remaining(); 271 let trimmed = text.trim_start(); 272 let leading_space = text.len() - trimmed.len(); 273 274 // Heading: # ## ### etc 275 if trimmed.starts_with('#') { 276 let level = trimmed.chars().take_while(|&c| c == '#').count(); 277 if level <= 6 { 278 if let Some(rest) = trimmed.get(level..) { 279 if rest.starts_with(' ') || rest.is_empty() { 280 let consumed = leading_space + level + rest.starts_with(' ') as usize; 281 let content_start = self.process_pos + consumed; 282 let mut partial = Partial::new( 283 PartialKind::Heading { level: level as u8 }, 284 self.process_pos, 285 ); 286 partial.content_start = content_start; 287 partial.content_end = content_start; 288 self.partial = Some(partial); 289 self.at_line_start = false; 290 return Some(consumed); 291 } 292 } 293 } 294 } 295 296 // Code fence: ``` or ~~~ 297 if trimmed.starts_with("```") || trimmed.starts_with("~~~") { 298 let fence_char = trimmed.chars().next().unwrap(); 299 let fence_len = trimmed.chars().take_while(|&c| c == fence_char).count(); 300 301 if fence_len >= 3 { 302 let after_fence = &trimmed[fence_len..]; 303 if let Some(nl_pos) = after_fence.find('\n') { 304 let lang = after_fence[..nl_pos].trim(); 305 let lang_span = if lang.is_empty() { 306 None 307 } else { 308 // Compute absolute span for the language 309 let lang_start_in_after = after_fence[..nl_pos].as_ptr() as usize 310 - after_fence.as_ptr() as usize 311 + (after_fence[..nl_pos].len() 312 - after_fence[..nl_pos].trim_start().len()); 313 let abs_start = 314 self.process_pos + leading_space + fence_len + lang_start_in_after; 315 Some(Span::new(abs_start, abs_start + lang.len())) 316 }; 317 let consumed_lang = nl_pos + 1; 318 319 let consumed = leading_space + fence_len + consumed_lang; 320 let content_start = self.process_pos + consumed; 321 let mut partial = Partial::new( 322 PartialKind::CodeFence { 323 fence_char, 324 fence_len, 325 language: lang_span, 326 }, 327 self.process_pos, 328 ); 329 partial.content_start = content_start; 330 partial.content_end = content_start; 331 self.partial = Some(partial); 332 self.at_line_start = false; 333 return Some(consumed); 334 } else { 335 // No newline yet — the language tag may be incomplete. 336 // Wait for more input so we don't commit a truncated span. 337 return None; 338 } 339 } 340 } 341 342 // Thematic break: --- *** ___ 343 if (trimmed.starts_with("---") || trimmed.starts_with("***") || trimmed.starts_with("___")) 344 && trimmed.chars().filter(|&c| !c.is_whitespace()).count() >= 3 345 { 346 let break_char = trimmed.chars().next().unwrap(); 347 if trimmed 348 .chars() 349 .all(|c| c == break_char || c.is_whitespace()) 350 { 351 if let Some(nl_pos) = text.find('\n') { 352 self.parsed.push(MdElement::ThematicBreak); 353 self.at_line_start = true; 354 return Some(nl_pos + 1); 355 } 356 } 357 } 358 359 // Table row: starts with | 360 if trimmed.starts_with('|') { 361 if let Some(nl_pos) = trimmed.find('\n') { 362 let line = &trimmed[..nl_pos]; 363 let line_abs_offset = self.process_pos + leading_space; 364 let cells = parse_table_row(line, line_abs_offset); 365 if !cells.is_empty() { 366 let mut partial = Partial::new( 367 PartialKind::Table { 368 headers: cells, 369 rows: Vec::new(), 370 seen_separator: false, 371 }, 372 self.process_pos, 373 ); 374 partial.content_start = self.process_pos; 375 partial.content_end = self.process_pos + leading_space + nl_pos; 376 self.partial = Some(partial); 377 self.at_line_start = true; 378 return Some(leading_space + nl_pos + 1); 379 } 380 } 381 } 382 383 None 384 } 385 386 /// Process content inside a code fence. 387 /// Returns true if we should continue processing, false if we need more input. 388 fn process_code_fence(&mut self, fence_char: char, fence_len: usize) -> bool { 389 let text_start = self.process_pos; 390 let text_end = self.buffer.len(); 391 let mut pos = text_start; 392 393 while pos < text_end { 394 // Find next line boundary 395 let line_end = self.buffer[pos..text_end] 396 .find('\n') 397 .map(|i| pos + i + 1) 398 .unwrap_or(text_end); 399 let line = &self.buffer[pos..line_end]; 400 401 let partial = self.partial.as_mut().unwrap(); 402 403 // Check if we're at a line start within the code fence 404 let at_content_line_start = 405 partial.content_is_empty() || self.buffer[..partial.content_end].ends_with('\n'); 406 407 if at_content_line_start { 408 let trimmed = line.trim_start(); 409 410 // Check for closing fence 411 if trimmed.len() >= fence_len 412 && trimmed 413 .as_bytes() 414 .iter() 415 .take(fence_len) 416 .all(|&b| b == fence_char as u8) 417 { 418 let after_fence = &trimmed[fence_len..]; 419 if after_fence.trim().is_empty() || after_fence.starts_with('\n') { 420 // Found closing fence! Complete the code block 421 let language = 422 if let PartialKind::CodeFence { language, .. } = &partial.kind { 423 *language 424 } else { 425 None 426 }; 427 428 let content_span = partial.content_span(); 429 self.parsed.push(MdElement::CodeBlock(CodeBlock { 430 language, 431 content: content_span, 432 })); 433 self.partial = None; 434 self.at_line_start = true; 435 436 // Advance past the closing fence line 437 self.advance(line_end - text_start); 438 return true; 439 } 440 } 441 442 // If this could be the start of a closing fence but we don't 443 // have enough chars yet, wait for more input 444 if !trimmed.is_empty() 445 && trimmed.len() < fence_len 446 && trimmed.bytes().all(|b| b == fence_char as u8) 447 && !line.contains('\n') 448 { 449 // Advance past content lines we already processed, 450 // but stop before the partial fence so we re-check it 451 // when more data arrives. 452 self.advance(pos - text_start); 453 return false; 454 } 455 } 456 457 // Not a closing fence - extend content span to include this line 458 partial.content_end += line.len(); 459 pos = line_end; 460 } 461 462 // Consumed all available text, need more 463 self.advance(text_end - text_start); 464 false 465 } 466 467 /// Process heading content until newline. 468 fn process_heading(&mut self, level: u8) -> bool { 469 let remaining = self.remaining(); 470 if let Some(nl_pos) = remaining.find('\n') { 471 let partial = self.partial.as_mut().unwrap(); 472 partial.content_end += nl_pos; 473 474 let content_span = partial.content_span(); 475 let trimmed = self.trim_span(content_span); 476 self.parsed.push(MdElement::Heading { 477 level, 478 content: trimmed, 479 }); 480 self.partial = None; 481 self.at_line_start = true; 482 self.advance(nl_pos + 1); 483 true 484 } else { 485 // No newline yet - accumulate 486 let len = remaining.len(); 487 let partial = self.partial.as_mut().unwrap(); 488 partial.content_end += len; 489 self.advance(len); 490 false 491 } 492 } 493 494 /// Process table content line by line. 495 /// Returns true if we should continue processing, false if we need more input. 496 fn process_table(&mut self) -> bool { 497 let remaining = self.remaining(); 498 // We need at least one complete line to process 499 if let Some(nl_pos) = remaining.find('\n') { 500 let line = &remaining[..nl_pos]; 501 let trimmed = line.trim(); 502 503 // Check if this line continues the table 504 if trimmed.starts_with('|') { 505 // Capture everything we need from remaining before dropping the borrow 506 let is_sep = is_separator_row(trimmed); 507 let line_abs_offset = self.process_pos; 508 let trim_offset = line.len() - trimmed.len(); 509 let trimmed_span = Span::new( 510 self.process_pos + trim_offset, 511 self.process_pos + trim_offset + trimmed.len(), 512 ); 513 let cells = parse_table_row(trimmed, line_abs_offset + trim_offset); 514 let partial = self.partial.as_mut().unwrap(); 515 if let PartialKind::Table { 516 ref mut rows, 517 ref mut seen_separator, 518 ref headers, 519 .. 520 } = partial.kind 521 { 522 if !*seen_separator { 523 // Expecting separator row 524 if is_sep { 525 *seen_separator = true; 526 } else { 527 // Not a valid table — emit header as paragraph 528 let header_text = format!( 529 "| {} |", 530 headers 531 .iter() 532 .map(|s| s.resolve(&self.buffer)) 533 .collect::<Vec<_>>() 534 .join(" | ") 535 ); 536 let row_text = trimmed_span.resolve(&self.buffer); 537 self.partial = None; 538 let combined = format!("{}\n{}", header_text, row_text); 539 let inlines = parse_inline(&combined, 0); 540 self.parsed.push(MdElement::Paragraph(inlines)); 541 self.at_line_start = true; 542 self.advance(nl_pos + 1); 543 return true; 544 } 545 } else { 546 // Data row 547 rows.push(cells); 548 } 549 } 550 self.advance(nl_pos + 1); 551 return true; 552 } 553 554 // Line doesn't start with | — table is complete 555 let partial = self.partial.take().unwrap(); 556 if let PartialKind::Table { 557 headers, 558 rows, 559 seen_separator, 560 } = partial.kind 561 { 562 if seen_separator { 563 self.parsed.push(MdElement::Table { headers, rows }); 564 } else { 565 // Never saw separator — emit as paragraph 566 let text = format!( 567 "| {} |", 568 headers 569 .iter() 570 .map(|s| s.resolve(&self.buffer)) 571 .collect::<Vec<_>>() 572 .join(" | ") 573 ); 574 let inlines = parse_inline(&text, 0); 575 self.parsed.push(MdElement::Paragraph(inlines)); 576 } 577 } 578 self.at_line_start = true; 579 // Don't advance — let the non-table line be re-processed 580 return true; 581 } 582 583 // No newline yet — check if we have a partial line starting with | 584 // If so, wait for more input. If not, table is done. 585 let trimmed = remaining.trim(); 586 if trimmed.starts_with('|') || trimmed.is_empty() { 587 // Could be another table row, wait for newline 588 return false; 589 } 590 591 // Non-pipe content without newline — table is complete 592 let partial = self.partial.take().unwrap(); 593 if let PartialKind::Table { 594 headers, 595 rows, 596 seen_separator, 597 } = partial.kind 598 { 599 if seen_separator { 600 self.parsed.push(MdElement::Table { headers, rows }); 601 } else { 602 let text = format!( 603 "| {} |", 604 headers 605 .iter() 606 .map(|s| s.resolve(&self.buffer)) 607 .collect::<Vec<_>>() 608 .join(" | ") 609 ); 610 let inlines = parse_inline(&text, 0); 611 self.parsed.push(MdElement::Paragraph(inlines)); 612 } 613 } 614 self.at_line_start = true; 615 true 616 } 617 618 /// Process inline content. 619 fn process_inline(&mut self) -> bool { 620 let remaining = self.remaining(); 621 622 // Check for paragraph break split across tokens: 623 // partial content ends with \n and new text starts with \n 624 if remaining.starts_with('\n') { 625 if let Some(ref partial) = self.partial { 626 if self.buffer[..partial.content_end].ends_with('\n') { 627 // Double newline split across token boundary — emit paragraph 628 let span = partial.content_span(); 629 let trimmed = self.trim_span(span); 630 self.partial = None; 631 632 if !trimmed.is_empty() { 633 let content = trimmed.resolve(&self.buffer); 634 let inline_elements = parse_inline(content, trimmed.start); 635 self.parsed.push(MdElement::Paragraph(inline_elements)); 636 } 637 self.at_line_start = true; 638 self.advance(1); // consume the \n 639 return true; 640 } 641 } 642 } 643 644 if let Some(nl_pos) = remaining.find('\n') { 645 let after_nl = &remaining[nl_pos + 1..]; 646 647 // Check if text after the newline starts a block element (code fence, heading, etc.) 648 // If so, emit the current paragraph and let the block parser handle the rest. 649 if !after_nl.is_empty() { 650 let trimmed_after = after_nl.trim_start(); 651 let is_block_start = trimmed_after.starts_with("```") 652 || trimmed_after.starts_with("~~~") 653 || trimmed_after.starts_with('#') 654 || trimmed_after.starts_with('|'); 655 if is_block_start { 656 // Accumulate text before the newline into the paragraph 657 if let Some(ref mut partial) = self.partial { 658 partial.content_end += nl_pos; 659 let span = partial.content_span(); 660 let trimmed = self.trim_span(span); 661 self.partial = None; 662 663 if !trimmed.is_empty() { 664 let content = trimmed.resolve(&self.buffer); 665 let inline_elements = parse_inline(content, trimmed.start); 666 self.parsed.push(MdElement::Paragraph(inline_elements)); 667 } 668 } else { 669 let start = self.process_pos; 670 let end = self.process_pos + nl_pos; 671 let span = Span::new(start, end); 672 let trimmed = self.trim_span(span); 673 674 if !trimmed.is_empty() { 675 let content = trimmed.resolve(&self.buffer); 676 let inline_elements = parse_inline(content, trimmed.start); 677 self.parsed.push(MdElement::Paragraph(inline_elements)); 678 } 679 } 680 self.at_line_start = true; 681 self.advance(nl_pos + 1); 682 return true; 683 } 684 } 685 } 686 687 // Re-borrow remaining since prior branches may not have taken 688 let remaining = self.remaining(); 689 690 if let Some(nl_pos) = remaining.find("\n\n") { 691 // Double newline = paragraph break 692 // Combine accumulated partial content with text before \n\n 693 if let Some(ref mut partial) = self.partial { 694 partial.content_end += nl_pos; 695 let span = partial.content_span(); 696 let trimmed = self.trim_span(span); 697 self.partial = None; 698 699 if !trimmed.is_empty() { 700 let content = trimmed.resolve(&self.buffer); 701 let inline_elements = parse_inline(content, trimmed.start); 702 self.parsed.push(MdElement::Paragraph(inline_elements)); 703 } 704 } else { 705 let start = self.process_pos; 706 let end = self.process_pos + nl_pos; 707 let span = Span::new(start, end); 708 let trimmed = self.trim_span(span); 709 710 if !trimmed.is_empty() { 711 let content = trimmed.resolve(&self.buffer); 712 let inline_elements = parse_inline(content, trimmed.start); 713 self.parsed.push(MdElement::Paragraph(inline_elements)); 714 } 715 } 716 self.at_line_start = true; 717 self.advance(nl_pos + 2); 718 return true; 719 } 720 721 if let Some(nl_pos) = remaining.find('\n') { 722 // Single newline - continue accumulating but track position 723 if let Some(ref mut partial) = self.partial { 724 partial.content_end += nl_pos + 1; 725 } else { 726 // Start accumulating paragraph 727 let content_start = self.process_pos; 728 let content_end = self.process_pos + nl_pos + 1; 729 self.partial = Some(Partial { 730 kind: PartialKind::Paragraph, 731 start_pos: self.process_pos, 732 content_start, 733 content_end, 734 }); 735 } 736 self.at_line_start = true; 737 self.advance(nl_pos + 1); 738 return true; 739 } 740 741 // No newline - accumulate 742 let len = remaining.len(); 743 if let Some(ref mut partial) = self.partial { 744 partial.content_end += len; 745 } else { 746 let content_start = self.process_pos; 747 let content_end = self.process_pos + len; 748 self.partial = Some(Partial { 749 kind: PartialKind::Paragraph, 750 start_pos: self.process_pos, 751 content_start, 752 content_end, 753 }); 754 } 755 self.at_line_start = false; 756 self.advance(len); 757 false 758 } 759 760 /// Advance the processing position by n bytes. 761 fn advance(&mut self, n: usize) { 762 self.process_pos += n; 763 } 764 765 /// Finalize parsing (call when stream ends). 766 /// Converts any remaining partial state to complete elements. 767 pub fn finalize(&mut self) { 768 if let Some(partial) = self.partial.take() { 769 match partial.kind { 770 PartialKind::CodeFence { language, .. } => { 771 // Unclosed code block - emit what we have 772 self.parsed.push(MdElement::CodeBlock(CodeBlock { 773 language, 774 content: partial.content_span(), 775 })); 776 } 777 PartialKind::Heading { level } => { 778 let trimmed = self.trim_span(partial.content_span()); 779 self.parsed.push(MdElement::Heading { 780 level, 781 content: trimmed, 782 }); 783 } 784 PartialKind::Table { 785 headers, 786 rows, 787 seen_separator, 788 } => { 789 if seen_separator { 790 self.parsed.push(MdElement::Table { headers, rows }); 791 } else { 792 // Never saw separator — not a real table, emit as paragraph 793 let text = format!( 794 "| {} |", 795 headers 796 .iter() 797 .map(|s| s.resolve(&self.buffer)) 798 .collect::<Vec<_>>() 799 .join(" | ") 800 ); 801 let inlines = parse_inline(&text, 0); 802 self.parsed.push(MdElement::Paragraph(inlines)); 803 } 804 } 805 PartialKind::Paragraph => { 806 let trimmed = self.trim_span(partial.content_span()); 807 if !trimmed.is_empty() { 808 let content = trimmed.resolve(&self.buffer); 809 let inline_elements = parse_inline(content, trimmed.start); 810 self.parsed.push(MdElement::Paragraph(inline_elements)); 811 } 812 } 813 _ => { 814 // Other partial kinds (lists, blockquotes, etc.) - emit as paragraph for now 815 let trimmed = self.trim_span(partial.content_span()); 816 if !trimmed.is_empty() { 817 let content = trimmed.resolve(&self.buffer); 818 let inline_elements = parse_inline(content, trimmed.start); 819 self.parsed.push(MdElement::Paragraph(inline_elements)); 820 } 821 } 822 } 823 } 824 } 825 } 826 827 impl Default for StreamParser { 828 fn default() -> Self { 829 Self::new() 830 } 831 } 832 833 /// Parse a table row into cell spans by splitting on `|`. 834 /// `line_offset` is the absolute buffer position of `line`. 835 fn parse_table_row(line: &str, line_offset: usize) -> Vec<Span> { 836 let trimmed = line.trim(); 837 let trim_start = line.len() - line.trim_start().len(); 838 let base = line_offset + trim_start; 839 840 let inner_start; 841 let inner; 842 if let Some(stripped) = trimmed.strip_prefix('|') { 843 inner_start = base + 1; 844 inner = stripped.strip_suffix('|').unwrap_or(stripped); 845 } else { 846 inner_start = base; 847 inner = trimmed.strip_suffix('|').unwrap_or(trimmed); 848 }; 849 850 let mut result = Vec::new(); 851 let mut pos = 0; 852 for cell in inner.split('|') { 853 let cell_start = inner_start + pos; 854 let cell_trimmed = cell.trim(); 855 if cell_trimmed.is_empty() { 856 // Empty cell — use a zero-length span at the position 857 result.push(Span::new(cell_start, cell_start)); 858 } else { 859 let ltrim = cell.len() - cell.trim_start().len(); 860 let span_start = cell_start + ltrim; 861 let span_end = span_start + cell_trimmed.len(); 862 result.push(Span::new(span_start, span_end)); 863 } 864 pos += cell.len() + 1; // +1 for the | delimiter 865 } 866 result 867 } 868 869 /// Check if a line is a table separator row (e.g. `|---|---|`). 870 fn is_separator_row(line: &str) -> bool { 871 let trimmed = line.trim(); 872 let inner = trimmed.strip_prefix('|').unwrap_or(trimmed); 873 let inner = inner.strip_suffix('|').unwrap_or(inner); 874 let cells: Vec<&str> = inner.split('|').map(|c| c.trim()).collect(); 875 !cells.is_empty() 876 && cells.iter().all(|c| { 877 let t = c.trim_matches(':'); 878 !t.is_empty() && t.chars().all(|ch| ch == '-') 879 }) 880 }