notedeck

One damus client to rule them all
git clone git://jb55.com/notedeck
Log | Files | Refs | README | LICENSE

parser.rs (35078B)


      1 //! Core streaming parser implementation.
      2 
      3 use crate::element::{CodeBlock, MdElement, Span};
      4 use crate::inline::parse_inline;
      5 use crate::partial::{Partial, PartialKind};
      6 
      7 /// Incremental markdown parser for streaming input.
      8 ///
      9 /// Maintains a single contiguous buffer of incoming text and tracks
     10 /// a processing cursor to allow progressive rendering as content streams in.
     11 pub struct StreamParser {
     12     /// Contiguous buffer of all pushed text
     13     buffer: String,
     14 
     15     /// Completed markdown elements
     16     parsed: Vec<MdElement>,
     17 
     18     /// Current in-progress element (if any)
     19     partial: Option<Partial>,
     20 
     21     /// Byte offset of first unprocessed content in buffer
     22     process_pos: usize,
     23 
     24     /// Are we at the start of a line? (for block-level detection)
     25     at_line_start: bool,
     26 }
     27 
     28 /// Lightweight dispatch tag for partial state, avoiding Clone on PartialKind
     29 /// which contains Vecs (table headers/rows).
     30 #[derive(Clone, Copy)]
     31 enum PartialDispatch {
     32     CodeFence { fence_char: char, fence_len: usize },
     33     Heading { level: u8 },
     34     Table,
     35     Paragraph,
     36     Other,
     37 }
     38 
     39 impl StreamParser {
     40     pub fn new() -> Self {
     41         Self {
     42             buffer: String::new(),
     43             parsed: Vec::new(),
     44             partial: None,
     45             process_pos: 0,
     46             at_line_start: true,
     47         }
     48     }
     49 
     50     /// Push a new token chunk and process it.
     51     pub fn push(&mut self, token: &str) {
     52         if token.is_empty() {
     53             return;
     54         }
     55 
     56         self.buffer.push_str(token);
     57         self.process_new_content();
     58     }
     59 
     60     /// Get completed elements for rendering.
     61     pub fn parsed(&self) -> &[MdElement] {
     62         &self.parsed
     63     }
     64 
     65     /// Get the parser's buffer for resolving spans.
     66     pub fn buffer(&self) -> &str {
     67         &self.buffer
     68     }
     69 
     70     /// Consume the parser and return the completed elements and buffer.
     71     pub fn into_parts(self) -> (Vec<MdElement>, String) {
     72         (self.parsed, self.buffer)
     73     }
     74 
     75     /// Consume the parser and return the completed elements.
     76     pub fn into_parsed(self) -> Vec<MdElement> {
     77         self.parsed
     78     }
     79 
     80     /// Get the current partial state (for speculative rendering).
     81     pub fn partial(&self) -> Option<&Partial> {
     82         self.partial.as_ref()
     83     }
     84 
     85     /// Get the speculative content that would render from partial state.
     86     /// Returns the raw accumulated text that isn't yet a complete element.
     87     pub fn partial_content(&self) -> Option<&str> {
     88         self.partial.as_ref().map(|p| p.content(&self.buffer))
     89     }
     90 
     91     /// Check if we're currently inside a code block.
     92     pub fn in_code_block(&self) -> bool {
     93         matches!(
     94             self.partial.as_ref().map(|p| &p.kind),
     95             Some(PartialKind::CodeFence { .. })
     96         )
     97     }
     98 
     99     /// Get the unprocessed portion of the buffer.
    100     fn remaining(&self) -> &str {
    101         &self.buffer[self.process_pos..]
    102     }
    103 
    104     /// Compute a trimmed span (strip leading/trailing whitespace).
    105     fn trim_span(&self, span: Span) -> Span {
    106         let s = &self.buffer[span.start..span.end];
    107         let trimmed = s.trim();
    108         if trimmed.is_empty() {
    109             return Span::new(span.start, span.start);
    110         }
    111         let ltrim = s.len() - s.trim_start().len();
    112         Span::new(span.start + ltrim, span.start + ltrim + trimmed.len())
    113     }
    114 
    115     /// Extract the dispatch info from the current partial state.
    116     /// Returns only small Copy data to avoid cloning Vecs in PartialKind::Table.
    117     fn partial_dispatch(&self) -> Option<PartialDispatch> {
    118         self.partial.as_ref().map(|p| match &p.kind {
    119             PartialKind::CodeFence {
    120                 fence_char,
    121                 fence_len,
    122                 ..
    123             } => PartialDispatch::CodeFence {
    124                 fence_char: *fence_char,
    125                 fence_len: *fence_len,
    126             },
    127             PartialKind::Heading { level } => PartialDispatch::Heading { level: *level },
    128             PartialKind::Table { .. } => PartialDispatch::Table,
    129             PartialKind::Paragraph => PartialDispatch::Paragraph,
    130             _ => PartialDispatch::Other,
    131         })
    132     }
    133 
    134     /// Process newly added content.
    135     fn process_new_content(&mut self) {
    136         while self.process_pos < self.buffer.len() {
    137             // Handle based on current partial state
    138             if let Some(dispatch) = self.partial_dispatch() {
    139                 match dispatch {
    140                     PartialDispatch::CodeFence {
    141                         fence_char,
    142                         fence_len,
    143                     } => {
    144                         if self.process_code_fence(fence_char, fence_len) {
    145                             continue;
    146                         }
    147                         return; // Need more input
    148                     }
    149                     PartialDispatch::Heading { level } => {
    150                         if self.process_heading(level) {
    151                             continue;
    152                         }
    153                         return;
    154                     }
    155                     PartialDispatch::Table => {
    156                         if self.process_table() {
    157                             continue;
    158                         }
    159                         return;
    160                     }
    161                     PartialDispatch::Paragraph => {
    162                         // For paragraphs, check if we're at a line start that could be a block element
    163                         if self.at_line_start {
    164                             // Take the paragraph partial first — try_block_start may
    165                             // replace self.partial with the new block element
    166                             let para_partial = self.partial.take();
    167 
    168                             if let Some(consumed) = self.try_block_start() {
    169                                 // Emit the saved paragraph before the new block
    170                                 if let Some(partial) = para_partial {
    171                                     let span = partial.content_span();
    172                                     let trimmed = self.trim_span(span);
    173                                     if !trimmed.is_empty() {
    174                                         let content = trimmed.resolve(&self.buffer);
    175                                         let inline_elements = parse_inline(content, trimmed.start);
    176                                         self.parsed.push(MdElement::Paragraph(inline_elements));
    177                                     }
    178                                 }
    179                                 self.advance(consumed);
    180                                 continue;
    181                             }
    182 
    183                             // Block start failed — restore the paragraph partial
    184                             self.partial = para_partial;
    185                             // If remaining could be the start of a block element but we
    186                             // don't have enough chars yet, wait for more input rather than
    187                             // consuming into the paragraph (e.g. "`" could become "```")
    188                             if self.could_be_block_start() {
    189                                 return;
    190                             }
    191                         }
    192                         // Continue with inline processing
    193                         if self.process_inline() {
    194                             continue;
    195                         }
    196                         return;
    197                     }
    198                     PartialDispatch::Other => {
    199                         // For other inline elements, process character by character
    200                         if self.process_inline() {
    201                             continue;
    202                         }
    203                         return;
    204                     }
    205                 }
    206             }
    207 
    208             // No partial state - detect new elements
    209             if self.at_line_start {
    210                 if let Some(consumed) = self.try_block_start() {
    211                     self.advance(consumed);
    212                     continue;
    213                 }
    214                 if self.could_be_block_start() {
    215                     return;
    216                 }
    217             }
    218 
    219             // Fall back to inline processing
    220             if self.process_inline() {
    221                 continue;
    222             }
    223             return;
    224         }
    225     }
    226 
    227     /// Check if remaining text could be the start of a block element but we don't
    228     /// have enough characters to confirm yet. Used to defer consuming
    229     /// ambiguous prefixes like "`" or "``" that might become "```".
    230     fn could_be_block_start(&self) -> bool {
    231         let trimmed = self.remaining().trim_start();
    232         if trimmed.is_empty() {
    233             return false;
    234         }
    235 
    236         // Could be a code fence: need at least 3 backticks or tildes
    237         let first = trimmed.as_bytes()[0];
    238         if first == b'`' || first == b'~' {
    239             if trimmed.len() < 3 {
    240                 // All chars so far are the same fence char
    241                 return trimmed.bytes().all(|b| b == first);
    242             }
    243             // Have 3+ fence chars — still need the newline to finalize
    244             // the opening line (language tag may be incomplete)
    245             let fence_len = trimmed.bytes().take_while(|&b| b == first).count();
    246             if fence_len >= 3 && !trimmed[fence_len..].contains('\n') {
    247                 return true;
    248             }
    249         }
    250 
    251         // Could be a thematic break: need "---", "***", or "___"
    252         if trimmed.len() < 3 {
    253             let first = trimmed.as_bytes()[0];
    254             if first == b'-' || first == b'*' || first == b'_' {
    255                 return trimmed.bytes().all(|b| b == first);
    256             }
    257         }
    258 
    259         // Could be a table row: starts with | but no newline yet
    260         if trimmed.starts_with('|') && !trimmed.contains('\n') {
    261             return true;
    262         }
    263 
    264         false
    265     }
    266 
    267     /// Try to detect a block-level element at line start.
    268     /// Returns bytes consumed if successful.
    269     fn try_block_start(&mut self) -> Option<usize> {
    270         let text = self.remaining();
    271         let trimmed = text.trim_start();
    272         let leading_space = text.len() - trimmed.len();
    273 
    274         // Heading: # ## ### etc
    275         if trimmed.starts_with('#') {
    276             let level = trimmed.chars().take_while(|&c| c == '#').count();
    277             if level <= 6 {
    278                 if let Some(rest) = trimmed.get(level..) {
    279                     if rest.starts_with(' ') || rest.is_empty() {
    280                         let consumed = leading_space + level + rest.starts_with(' ') as usize;
    281                         let content_start = self.process_pos + consumed;
    282                         let mut partial = Partial::new(
    283                             PartialKind::Heading { level: level as u8 },
    284                             self.process_pos,
    285                         );
    286                         partial.content_start = content_start;
    287                         partial.content_end = content_start;
    288                         self.partial = Some(partial);
    289                         self.at_line_start = false;
    290                         return Some(consumed);
    291                     }
    292                 }
    293             }
    294         }
    295 
    296         // Code fence: ``` or ~~~
    297         if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
    298             let fence_char = trimmed.chars().next().unwrap();
    299             let fence_len = trimmed.chars().take_while(|&c| c == fence_char).count();
    300 
    301             if fence_len >= 3 {
    302                 let after_fence = &trimmed[fence_len..];
    303                 if let Some(nl_pos) = after_fence.find('\n') {
    304                     let lang = after_fence[..nl_pos].trim();
    305                     let lang_span = if lang.is_empty() {
    306                         None
    307                     } else {
    308                         // Compute absolute span for the language
    309                         let lang_start_in_after = after_fence[..nl_pos].as_ptr() as usize
    310                             - after_fence.as_ptr() as usize
    311                             + (after_fence[..nl_pos].len()
    312                                 - after_fence[..nl_pos].trim_start().len());
    313                         let abs_start =
    314                             self.process_pos + leading_space + fence_len + lang_start_in_after;
    315                         Some(Span::new(abs_start, abs_start + lang.len()))
    316                     };
    317                     let consumed_lang = nl_pos + 1;
    318 
    319                     let consumed = leading_space + fence_len + consumed_lang;
    320                     let content_start = self.process_pos + consumed;
    321                     let mut partial = Partial::new(
    322                         PartialKind::CodeFence {
    323                             fence_char,
    324                             fence_len,
    325                             language: lang_span,
    326                         },
    327                         self.process_pos,
    328                     );
    329                     partial.content_start = content_start;
    330                     partial.content_end = content_start;
    331                     self.partial = Some(partial);
    332                     self.at_line_start = false;
    333                     return Some(consumed);
    334                 } else {
    335                     // No newline yet — the language tag may be incomplete.
    336                     // Wait for more input so we don't commit a truncated span.
    337                     return None;
    338                 }
    339             }
    340         }
    341 
    342         // Thematic break: --- *** ___
    343         if (trimmed.starts_with("---") || trimmed.starts_with("***") || trimmed.starts_with("___"))
    344             && trimmed.chars().filter(|&c| !c.is_whitespace()).count() >= 3
    345         {
    346             let break_char = trimmed.chars().next().unwrap();
    347             if trimmed
    348                 .chars()
    349                 .all(|c| c == break_char || c.is_whitespace())
    350             {
    351                 if let Some(nl_pos) = text.find('\n') {
    352                     self.parsed.push(MdElement::ThematicBreak);
    353                     self.at_line_start = true;
    354                     return Some(nl_pos + 1);
    355                 }
    356             }
    357         }
    358 
    359         // Table row: starts with |
    360         if trimmed.starts_with('|') {
    361             if let Some(nl_pos) = trimmed.find('\n') {
    362                 let line = &trimmed[..nl_pos];
    363                 let line_abs_offset = self.process_pos + leading_space;
    364                 let cells = parse_table_row(line, line_abs_offset);
    365                 if !cells.is_empty() {
    366                     let mut partial = Partial::new(
    367                         PartialKind::Table {
    368                             headers: cells,
    369                             rows: Vec::new(),
    370                             seen_separator: false,
    371                         },
    372                         self.process_pos,
    373                     );
    374                     partial.content_start = self.process_pos;
    375                     partial.content_end = self.process_pos + leading_space + nl_pos;
    376                     self.partial = Some(partial);
    377                     self.at_line_start = true;
    378                     return Some(leading_space + nl_pos + 1);
    379                 }
    380             }
    381         }
    382 
    383         None
    384     }
    385 
    386     /// Process content inside a code fence.
    387     /// Returns true if we should continue processing, false if we need more input.
    388     fn process_code_fence(&mut self, fence_char: char, fence_len: usize) -> bool {
    389         let text_start = self.process_pos;
    390         let text_end = self.buffer.len();
    391         let mut pos = text_start;
    392 
    393         while pos < text_end {
    394             // Find next line boundary
    395             let line_end = self.buffer[pos..text_end]
    396                 .find('\n')
    397                 .map(|i| pos + i + 1)
    398                 .unwrap_or(text_end);
    399             let line = &self.buffer[pos..line_end];
    400 
    401             let partial = self.partial.as_mut().unwrap();
    402 
    403             // Check if we're at a line start within the code fence
    404             let at_content_line_start =
    405                 partial.content_is_empty() || self.buffer[..partial.content_end].ends_with('\n');
    406 
    407             if at_content_line_start {
    408                 let trimmed = line.trim_start();
    409 
    410                 // Check for closing fence
    411                 if trimmed.len() >= fence_len
    412                     && trimmed
    413                         .as_bytes()
    414                         .iter()
    415                         .take(fence_len)
    416                         .all(|&b| b == fence_char as u8)
    417                 {
    418                     let after_fence = &trimmed[fence_len..];
    419                     if after_fence.trim().is_empty() || after_fence.starts_with('\n') {
    420                         // Found closing fence! Complete the code block
    421                         let language =
    422                             if let PartialKind::CodeFence { language, .. } = &partial.kind {
    423                                 *language
    424                             } else {
    425                                 None
    426                             };
    427 
    428                         let content_span = partial.content_span();
    429                         self.parsed.push(MdElement::CodeBlock(CodeBlock {
    430                             language,
    431                             content: content_span,
    432                         }));
    433                         self.partial = None;
    434                         self.at_line_start = true;
    435 
    436                         // Advance past the closing fence line
    437                         self.advance(line_end - text_start);
    438                         return true;
    439                     }
    440                 }
    441 
    442                 // If this could be the start of a closing fence but we don't
    443                 // have enough chars yet, wait for more input
    444                 if !trimmed.is_empty()
    445                     && trimmed.len() < fence_len
    446                     && trimmed.bytes().all(|b| b == fence_char as u8)
    447                     && !line.contains('\n')
    448                 {
    449                     // Advance past content lines we already processed,
    450                     // but stop before the partial fence so we re-check it
    451                     // when more data arrives.
    452                     self.advance(pos - text_start);
    453                     return false;
    454                 }
    455             }
    456 
    457             // Not a closing fence - extend content span to include this line
    458             partial.content_end += line.len();
    459             pos = line_end;
    460         }
    461 
    462         // Consumed all available text, need more
    463         self.advance(text_end - text_start);
    464         false
    465     }
    466 
    467     /// Process heading content until newline.
    468     fn process_heading(&mut self, level: u8) -> bool {
    469         let remaining = self.remaining();
    470         if let Some(nl_pos) = remaining.find('\n') {
    471             let partial = self.partial.as_mut().unwrap();
    472             partial.content_end += nl_pos;
    473 
    474             let content_span = partial.content_span();
    475             let trimmed = self.trim_span(content_span);
    476             self.parsed.push(MdElement::Heading {
    477                 level,
    478                 content: trimmed,
    479             });
    480             self.partial = None;
    481             self.at_line_start = true;
    482             self.advance(nl_pos + 1);
    483             true
    484         } else {
    485             // No newline yet - accumulate
    486             let len = remaining.len();
    487             let partial = self.partial.as_mut().unwrap();
    488             partial.content_end += len;
    489             self.advance(len);
    490             false
    491         }
    492     }
    493 
    494     /// Process table content line by line.
    495     /// Returns true if we should continue processing, false if we need more input.
    496     fn process_table(&mut self) -> bool {
    497         let remaining = self.remaining();
    498         // We need at least one complete line to process
    499         if let Some(nl_pos) = remaining.find('\n') {
    500             let line = &remaining[..nl_pos];
    501             let trimmed = line.trim();
    502 
    503             // Check if this line continues the table
    504             if trimmed.starts_with('|') {
    505                 // Capture everything we need from remaining before dropping the borrow
    506                 let is_sep = is_separator_row(trimmed);
    507                 let line_abs_offset = self.process_pos;
    508                 let trim_offset = line.len() - trimmed.len();
    509                 let trimmed_span = Span::new(
    510                     self.process_pos + trim_offset,
    511                     self.process_pos + trim_offset + trimmed.len(),
    512                 );
    513                 let cells = parse_table_row(trimmed, line_abs_offset + trim_offset);
    514                 let partial = self.partial.as_mut().unwrap();
    515                 if let PartialKind::Table {
    516                     ref mut rows,
    517                     ref mut seen_separator,
    518                     ref headers,
    519                     ..
    520                 } = partial.kind
    521                 {
    522                     if !*seen_separator {
    523                         // Expecting separator row
    524                         if is_sep {
    525                             *seen_separator = true;
    526                         } else {
    527                             // Not a valid table — emit header as paragraph
    528                             let header_text = format!(
    529                                 "| {} |",
    530                                 headers
    531                                     .iter()
    532                                     .map(|s| s.resolve(&self.buffer))
    533                                     .collect::<Vec<_>>()
    534                                     .join(" | ")
    535                             );
    536                             let row_text = trimmed_span.resolve(&self.buffer);
    537                             self.partial = None;
    538                             let combined = format!("{}\n{}", header_text, row_text);
    539                             let inlines = parse_inline(&combined, 0);
    540                             self.parsed.push(MdElement::Paragraph(inlines));
    541                             self.at_line_start = true;
    542                             self.advance(nl_pos + 1);
    543                             return true;
    544                         }
    545                     } else {
    546                         // Data row
    547                         rows.push(cells);
    548                     }
    549                 }
    550                 self.advance(nl_pos + 1);
    551                 return true;
    552             }
    553 
    554             // Line doesn't start with | — table is complete
    555             let partial = self.partial.take().unwrap();
    556             if let PartialKind::Table {
    557                 headers,
    558                 rows,
    559                 seen_separator,
    560             } = partial.kind
    561             {
    562                 if seen_separator {
    563                     self.parsed.push(MdElement::Table { headers, rows });
    564                 } else {
    565                     // Never saw separator — emit as paragraph
    566                     let text = format!(
    567                         "| {} |",
    568                         headers
    569                             .iter()
    570                             .map(|s| s.resolve(&self.buffer))
    571                             .collect::<Vec<_>>()
    572                             .join(" | ")
    573                     );
    574                     let inlines = parse_inline(&text, 0);
    575                     self.parsed.push(MdElement::Paragraph(inlines));
    576                 }
    577             }
    578             self.at_line_start = true;
    579             // Don't advance — let the non-table line be re-processed
    580             return true;
    581         }
    582 
    583         // No newline yet — check if we have a partial line starting with |
    584         // If so, wait for more input. If not, table is done.
    585         let trimmed = remaining.trim();
    586         if trimmed.starts_with('|') || trimmed.is_empty() {
    587             // Could be another table row, wait for newline
    588             return false;
    589         }
    590 
    591         // Non-pipe content without newline — table is complete
    592         let partial = self.partial.take().unwrap();
    593         if let PartialKind::Table {
    594             headers,
    595             rows,
    596             seen_separator,
    597         } = partial.kind
    598         {
    599             if seen_separator {
    600                 self.parsed.push(MdElement::Table { headers, rows });
    601             } else {
    602                 let text = format!(
    603                     "| {} |",
    604                     headers
    605                         .iter()
    606                         .map(|s| s.resolve(&self.buffer))
    607                         .collect::<Vec<_>>()
    608                         .join(" | ")
    609                 );
    610                 let inlines = parse_inline(&text, 0);
    611                 self.parsed.push(MdElement::Paragraph(inlines));
    612             }
    613         }
    614         self.at_line_start = true;
    615         true
    616     }
    617 
    618     /// Process inline content.
    619     fn process_inline(&mut self) -> bool {
    620         let remaining = self.remaining();
    621 
    622         // Check for paragraph break split across tokens:
    623         // partial content ends with \n and new text starts with \n
    624         if remaining.starts_with('\n') {
    625             if let Some(ref partial) = self.partial {
    626                 if self.buffer[..partial.content_end].ends_with('\n') {
    627                     // Double newline split across token boundary — emit paragraph
    628                     let span = partial.content_span();
    629                     let trimmed = self.trim_span(span);
    630                     self.partial = None;
    631 
    632                     if !trimmed.is_empty() {
    633                         let content = trimmed.resolve(&self.buffer);
    634                         let inline_elements = parse_inline(content, trimmed.start);
    635                         self.parsed.push(MdElement::Paragraph(inline_elements));
    636                     }
    637                     self.at_line_start = true;
    638                     self.advance(1); // consume the \n
    639                     return true;
    640                 }
    641             }
    642         }
    643 
    644         if let Some(nl_pos) = remaining.find('\n') {
    645             let after_nl = &remaining[nl_pos + 1..];
    646 
    647             // Check if text after the newline starts a block element (code fence, heading, etc.)
    648             // If so, emit the current paragraph and let the block parser handle the rest.
    649             if !after_nl.is_empty() {
    650                 let trimmed_after = after_nl.trim_start();
    651                 let is_block_start = trimmed_after.starts_with("```")
    652                     || trimmed_after.starts_with("~~~")
    653                     || trimmed_after.starts_with('#')
    654                     || trimmed_after.starts_with('|');
    655                 if is_block_start {
    656                     // Accumulate text before the newline into the paragraph
    657                     if let Some(ref mut partial) = self.partial {
    658                         partial.content_end += nl_pos;
    659                         let span = partial.content_span();
    660                         let trimmed = self.trim_span(span);
    661                         self.partial = None;
    662 
    663                         if !trimmed.is_empty() {
    664                             let content = trimmed.resolve(&self.buffer);
    665                             let inline_elements = parse_inline(content, trimmed.start);
    666                             self.parsed.push(MdElement::Paragraph(inline_elements));
    667                         }
    668                     } else {
    669                         let start = self.process_pos;
    670                         let end = self.process_pos + nl_pos;
    671                         let span = Span::new(start, end);
    672                         let trimmed = self.trim_span(span);
    673 
    674                         if !trimmed.is_empty() {
    675                             let content = trimmed.resolve(&self.buffer);
    676                             let inline_elements = parse_inline(content, trimmed.start);
    677                             self.parsed.push(MdElement::Paragraph(inline_elements));
    678                         }
    679                     }
    680                     self.at_line_start = true;
    681                     self.advance(nl_pos + 1);
    682                     return true;
    683                 }
    684             }
    685         }
    686 
    687         // Re-borrow remaining since prior branches may not have taken
    688         let remaining = self.remaining();
    689 
    690         if let Some(nl_pos) = remaining.find("\n\n") {
    691             // Double newline = paragraph break
    692             // Combine accumulated partial content with text before \n\n
    693             if let Some(ref mut partial) = self.partial {
    694                 partial.content_end += nl_pos;
    695                 let span = partial.content_span();
    696                 let trimmed = self.trim_span(span);
    697                 self.partial = None;
    698 
    699                 if !trimmed.is_empty() {
    700                     let content = trimmed.resolve(&self.buffer);
    701                     let inline_elements = parse_inline(content, trimmed.start);
    702                     self.parsed.push(MdElement::Paragraph(inline_elements));
    703                 }
    704             } else {
    705                 let start = self.process_pos;
    706                 let end = self.process_pos + nl_pos;
    707                 let span = Span::new(start, end);
    708                 let trimmed = self.trim_span(span);
    709 
    710                 if !trimmed.is_empty() {
    711                     let content = trimmed.resolve(&self.buffer);
    712                     let inline_elements = parse_inline(content, trimmed.start);
    713                     self.parsed.push(MdElement::Paragraph(inline_elements));
    714                 }
    715             }
    716             self.at_line_start = true;
    717             self.advance(nl_pos + 2);
    718             return true;
    719         }
    720 
    721         if let Some(nl_pos) = remaining.find('\n') {
    722             // Single newline - continue accumulating but track position
    723             if let Some(ref mut partial) = self.partial {
    724                 partial.content_end += nl_pos + 1;
    725             } else {
    726                 // Start accumulating paragraph
    727                 let content_start = self.process_pos;
    728                 let content_end = self.process_pos + nl_pos + 1;
    729                 self.partial = Some(Partial {
    730                     kind: PartialKind::Paragraph,
    731                     start_pos: self.process_pos,
    732                     content_start,
    733                     content_end,
    734                 });
    735             }
    736             self.at_line_start = true;
    737             self.advance(nl_pos + 1);
    738             return true;
    739         }
    740 
    741         // No newline - accumulate
    742         let len = remaining.len();
    743         if let Some(ref mut partial) = self.partial {
    744             partial.content_end += len;
    745         } else {
    746             let content_start = self.process_pos;
    747             let content_end = self.process_pos + len;
    748             self.partial = Some(Partial {
    749                 kind: PartialKind::Paragraph,
    750                 start_pos: self.process_pos,
    751                 content_start,
    752                 content_end,
    753             });
    754         }
    755         self.at_line_start = false;
    756         self.advance(len);
    757         false
    758     }
    759 
    760     /// Advance the processing position by n bytes.
    761     fn advance(&mut self, n: usize) {
    762         self.process_pos += n;
    763     }
    764 
    765     /// Finalize parsing (call when stream ends).
    766     /// Converts any remaining partial state to complete elements.
    767     pub fn finalize(&mut self) {
    768         if let Some(partial) = self.partial.take() {
    769             match partial.kind {
    770                 PartialKind::CodeFence { language, .. } => {
    771                     // Unclosed code block - emit what we have
    772                     self.parsed.push(MdElement::CodeBlock(CodeBlock {
    773                         language,
    774                         content: partial.content_span(),
    775                     }));
    776                 }
    777                 PartialKind::Heading { level } => {
    778                     let trimmed = self.trim_span(partial.content_span());
    779                     self.parsed.push(MdElement::Heading {
    780                         level,
    781                         content: trimmed,
    782                     });
    783                 }
    784                 PartialKind::Table {
    785                     headers,
    786                     rows,
    787                     seen_separator,
    788                 } => {
    789                     if seen_separator {
    790                         self.parsed.push(MdElement::Table { headers, rows });
    791                     } else {
    792                         // Never saw separator — not a real table, emit as paragraph
    793                         let text = format!(
    794                             "| {} |",
    795                             headers
    796                                 .iter()
    797                                 .map(|s| s.resolve(&self.buffer))
    798                                 .collect::<Vec<_>>()
    799                                 .join(" | ")
    800                         );
    801                         let inlines = parse_inline(&text, 0);
    802                         self.parsed.push(MdElement::Paragraph(inlines));
    803                     }
    804                 }
    805                 PartialKind::Paragraph => {
    806                     let trimmed = self.trim_span(partial.content_span());
    807                     if !trimmed.is_empty() {
    808                         let content = trimmed.resolve(&self.buffer);
    809                         let inline_elements = parse_inline(content, trimmed.start);
    810                         self.parsed.push(MdElement::Paragraph(inline_elements));
    811                     }
    812                 }
    813                 _ => {
    814                     // Other partial kinds (lists, blockquotes, etc.) - emit as paragraph for now
    815                     let trimmed = self.trim_span(partial.content_span());
    816                     if !trimmed.is_empty() {
    817                         let content = trimmed.resolve(&self.buffer);
    818                         let inline_elements = parse_inline(content, trimmed.start);
    819                         self.parsed.push(MdElement::Paragraph(inline_elements));
    820                     }
    821                 }
    822             }
    823         }
    824     }
    825 }
    826 
    827 impl Default for StreamParser {
    828     fn default() -> Self {
    829         Self::new()
    830     }
    831 }
    832 
    833 /// Parse a table row into cell spans by splitting on `|`.
    834 /// `line_offset` is the absolute buffer position of `line`.
    835 fn parse_table_row(line: &str, line_offset: usize) -> Vec<Span> {
    836     let trimmed = line.trim();
    837     let trim_start = line.len() - line.trim_start().len();
    838     let base = line_offset + trim_start;
    839 
    840     let inner_start;
    841     let inner;
    842     if let Some(stripped) = trimmed.strip_prefix('|') {
    843         inner_start = base + 1;
    844         inner = stripped.strip_suffix('|').unwrap_or(stripped);
    845     } else {
    846         inner_start = base;
    847         inner = trimmed.strip_suffix('|').unwrap_or(trimmed);
    848     };
    849 
    850     let mut result = Vec::new();
    851     let mut pos = 0;
    852     for cell in inner.split('|') {
    853         let cell_start = inner_start + pos;
    854         let cell_trimmed = cell.trim();
    855         if cell_trimmed.is_empty() {
    856             // Empty cell — use a zero-length span at the position
    857             result.push(Span::new(cell_start, cell_start));
    858         } else {
    859             let ltrim = cell.len() - cell.trim_start().len();
    860             let span_start = cell_start + ltrim;
    861             let span_end = span_start + cell_trimmed.len();
    862             result.push(Span::new(span_start, span_end));
    863         }
    864         pos += cell.len() + 1; // +1 for the | delimiter
    865     }
    866     result
    867 }
    868 
    869 /// Check if a line is a table separator row (e.g. `|---|---|`).
    870 fn is_separator_row(line: &str) -> bool {
    871     let trimmed = line.trim();
    872     let inner = trimmed.strip_prefix('|').unwrap_or(trimmed);
    873     let inner = inner.strip_suffix('|').unwrap_or(inner);
    874     let cells: Vec<&str> = inner.split('|').map(|c| c.trim()).collect();
    875     !cells.is_empty()
    876         && cells.iter().all(|c| {
    877             let t = c.trim_matches(':');
    878             !t.is_empty() && t.chars().all(|ch| ch == '-')
    879         })
    880 }