inline.rs - notedeck - One damus client to rule them all

inline.rs (19983B)
      1 //! Inline element parsing for bold, italic, code, links, etc.
      2 
      3 use crate::element::{InlineElement, InlineStyle, Span};
      4 use crate::partial::PartialKind;
      5 
      6 /// Parses inline elements from text.
      7 /// `base_offset` is the position of `text` within the parser's buffer.
      8 /// All returned Spans are absolute buffer positions.
      9 ///
     10 /// Note: This is called on complete paragraph text, not streaming.
     11 /// For streaming, we use PartialKind to track incomplete markers.
     12 pub fn parse_inline(text: &str, base_offset: usize) -> Vec<InlineElement> {
     13     let mut result = Vec::new();
     14     let mut chars = text.char_indices().peekable();
     15     let mut plain_start = 0;
     16 
     17     while let Some((i, c)) = chars.next() {
     18         match c {
     19             // Backtick - inline code
     20             '`' => {
     21                 // Flush any pending plain text
     22                 if i > plain_start {
     23                     result.push(InlineElement::Text(Span::new(
     24                         base_offset + plain_start,
     25                         base_offset + i,
     26                     )));
     27                 }
     28 
     29                 // Count backticks
     30                 let mut backtick_count = 1;
     31                 while chars.peek().map(|(_, c)| *c == '`').unwrap_or(false) {
     32                     chars.next();
     33                     backtick_count += 1;
     34                 }
     35 
     36                 let start_pos = i + backtick_count;
     37 
     38                 // Find closing backticks (same count)
     39                 if let Some(end_pos) = find_closing_backticks(&text[start_pos..], backtick_count) {
     40                     let code_start = start_pos;
     41                     let code_end = start_pos + end_pos;
     42                     let code_content = &text[code_start..code_end];
     43                     // Strip single leading/trailing space if present (CommonMark rule)
     44                     let (trim_start, trim_end) = if code_content.starts_with(' ')
     45                         && code_content.ends_with(' ')
     46                         && code_content.len() > 1
     47                     {
     48                         (code_start + 1, code_end - 1)
     49                     } else {
     50                         (code_start, code_end)
     51                     };
     52                     result.push(InlineElement::Code(Span::new(
     53                         base_offset + trim_start,
     54                         base_offset + trim_end,
     55                     )));
     56 
     57                     // Advance past closing backticks
     58                     let skip_to = start_pos + end_pos + backtick_count;
     59                     while chars.peek().map(|(idx, _)| *idx < skip_to).unwrap_or(false) {
     60                         chars.next();
     61                     }
     62                     plain_start = skip_to;
     63                 } else {
     64                     // No closing - treat as plain text
     65                     plain_start = i;
     66                 }
     67             }
     68 
     69             // Asterisk or underscore - potential bold/italic
     70             '*' | '_' => {
     71                 let marker = c;
     72                 let marker_start = i;
     73 
     74                 // Count consecutive markers
     75                 let mut count = 1;
     76                 while chars.peek().map(|(_, ch)| *ch == marker).unwrap_or(false) {
     77                     chars.next();
     78                     count += 1;
     79                 }
     80 
     81                 // Limit to 3 for bold+italic
     82                 let effective_count = count.min(3);
     83 
     84                 // Check if this could be an opener (not preceded by whitespace at word boundary for _)
     85                 let can_open = if marker == '_' {
     86                     // Underscore: check word boundary rules
     87                     i == 0
     88                         || text[..i]
     89                             .chars()
     90                             .last()
     91                             .map(|c| c.is_whitespace() || c.is_ascii_punctuation())
     92                             .unwrap_or(true)
     93                 } else {
     94                     true // Asterisk can always open
     95                 };
     96 
     97                 if !can_open {
     98                     // Not a valid opener, treat as plain text
     99                     continue;
    100                 }
    101 
    102                 let content_start = marker_start + count;
    103 
    104                 // Look for closing marker
    105                 if let Some((content_end_local, close_len)) =
    106                     find_closing_emphasis(&text[content_start..], marker, effective_count)
    107                 {
    108                     // Flush pending plain text
    109                     if marker_start > plain_start {
    110                         result.push(InlineElement::Text(Span::new(
    111                             base_offset + plain_start,
    112                             base_offset + marker_start,
    113                         )));
    114                     }
    115 
    116                     let style = match close_len {
    117                         1 => InlineStyle::Italic,
    118                         2 => InlineStyle::Bold,
    119                         _ => InlineStyle::BoldItalic,
    120                     };
    121 
    122                     result.push(InlineElement::Styled {
    123                         style,
    124                         content: Span::new(
    125                             base_offset + content_start,
    126                             base_offset + content_start + content_end_local,
    127                         ),
    128                     });
    129 
    130                     // Advance past the content and closing marker
    131                     let skip_to = content_start + content_end_local + close_len;
    132                     while chars.peek().map(|(idx, _)| *idx < skip_to).unwrap_or(false) {
    133                         chars.next();
    134                     }
    135                     plain_start = skip_to;
    136                 }
    137                 // If no closing found, leave as plain text (will be collected)
    138             }
    139 
    140             // Tilde - potential strikethrough
    141             '~' => {
    142                 if chars.peek().map(|(_, c)| *c == '~').unwrap_or(false) {
    143                     chars.next(); // consume second ~
    144 
    145                     // Flush pending text
    146                     if i > plain_start {
    147                         result.push(InlineElement::Text(Span::new(
    148                             base_offset + plain_start,
    149                             base_offset + i,
    150                         )));
    151                     }
    152 
    153                     let content_start = i + 2;
    154 
    155                     // Find closing ~~
    156                     if let Some(end_pos) = text[content_start..].find("~~") {
    157                         result.push(InlineElement::Styled {
    158                             style: InlineStyle::Strikethrough,
    159                             content: Span::new(
    160                                 base_offset + content_start,
    161                                 base_offset + content_start + end_pos,
    162                             ),
    163                         });
    164 
    165                         let skip_to = content_start + end_pos + 2;
    166                         while chars.peek().map(|(idx, _)| *idx < skip_to).unwrap_or(false) {
    167                             chars.next();
    168                         }
    169                         plain_start = skip_to;
    170                     } else {
    171                         // No closing, revert
    172                         plain_start = i;
    173                     }
    174                 }
    175             }
    176 
    177             // Square bracket - potential link or image
    178             '[' => {
    179                 // Flush pending text
    180                 if i > plain_start {
    181                     result.push(InlineElement::Text(Span::new(
    182                         base_offset + plain_start,
    183                         base_offset + i,
    184                     )));
    185                 }
    186 
    187                 if let Some((text_span, url_span, total_len)) =
    188                     parse_link(&text[i..], base_offset + i)
    189                 {
    190                     result.push(InlineElement::Link {
    191                         text: text_span,
    192                         url: url_span,
    193                     });
    194 
    195                     let skip_to = i + total_len;
    196                     while chars.peek().map(|(idx, _)| *idx < skip_to).unwrap_or(false) {
    197                         chars.next();
    198                     }
    199                     plain_start = skip_to;
    200                 } else {
    201                     // Not a valid link, treat [ as plain text
    202                     plain_start = i;
    203                 }
    204             }
    205 
    206             // Exclamation - potential image
    207             '!' => {
    208                 if chars.peek().map(|(_, c)| *c == '[').unwrap_or(false) {
    209                     // Flush pending text
    210                     if i > plain_start {
    211                         result.push(InlineElement::Text(Span::new(
    212                             base_offset + plain_start,
    213                             base_offset + i,
    214                         )));
    215                     }
    216 
    217                     chars.next(); // consume [
    218 
    219                     if let Some((alt_span, url_span, link_len)) =
    220                         parse_link(&text[i + 1..], base_offset + i + 1)
    221                     {
    222                         result.push(InlineElement::Image {
    223                             alt: alt_span,
    224                             url: url_span,
    225                         });
    226 
    227                         let skip_to = i + 1 + link_len;
    228                         while chars.peek().map(|(idx, _)| *idx < skip_to).unwrap_or(false) {
    229                             chars.next();
    230                         }
    231                         plain_start = skip_to;
    232                     } else {
    233                         // Not a valid image
    234                         plain_start = i;
    235                     }
    236                 }
    237             }
    238 
    239             // Newline - could be hard break
    240             '\n' => {
    241                 // Check for hard line break (two spaces before newline)
    242                 if i >= 2 && text[..i].ends_with("  ") {
    243                     // Flush text without trailing spaces
    244                     let text_end = i - 2;
    245                     if text_end > plain_start {
    246                         result.push(InlineElement::Text(Span::new(
    247                             base_offset + plain_start,
    248                             base_offset + text_end,
    249                         )));
    250                     }
    251                     result.push(InlineElement::LineBreak);
    252                     plain_start = i + 1;
    253                 }
    254                 // Otherwise soft line break, keep in text
    255             }
    256 
    257             _ => {
    258                 // Regular character, continue
    259             }
    260         }
    261     }
    262 
    263     // Flush remaining plain text
    264     if plain_start < text.len() {
    265         result.push(InlineElement::Text(Span::new(
    266             base_offset + plain_start,
    267             base_offset + text.len(),
    268         )));
    269     }
    270 
    271     // Collapse adjacent Text elements
    272     collapse_text_elements(&mut result);
    273 
    274     result
    275 }
    276 
    277 /// Find closing backticks matching the opening count.
    278 fn find_closing_backticks(text: &str, count: usize) -> Option<usize> {
    279     let bytes = text.as_bytes();
    280     let mut i = 0;
    281 
    282     while i < bytes.len() {
    283         if bytes[i] == b'`' {
    284             // Count consecutive backticks at this position
    285             let run_start = i;
    286             while i < bytes.len() && bytes[i] == b'`' {
    287                 i += 1;
    288             }
    289             let run_len = i - run_start;
    290             if run_len == count {
    291                 return Some(run_start);
    292             }
    293             // Not the right count, continue
    294         } else {
    295             // Skip non-backtick character (handle UTF-8)
    296             i += text[i..].chars().next().map(|c| c.len_utf8()).unwrap_or(1);
    297         }
    298     }
    299     None
    300 }
    301 
    302 /// Find closing emphasis marker.
    303 /// Returns (end_position, actual_close_len) if found.
    304 fn find_closing_emphasis(text: &str, marker: char, open_count: usize) -> Option<(usize, usize)> {
    305     let mut chars = text.char_indices().peekable();
    306 
    307     while let Some((pos, c)) = chars.next() {
    308         if c == marker {
    309             // Count consecutive markers
    310             let mut count = 1;
    311             while chars.peek().map(|(_, ch)| *ch == marker).unwrap_or(false) {
    312                 chars.next();
    313                 count += 1;
    314             }
    315 
    316             // Check if this could close (not followed by alphanumeric for _)
    317             let can_close = if marker == '_' {
    318                 chars.peek().is_none_or(|(_, next_c)| {
    319                     next_c.is_whitespace() || next_c.is_ascii_punctuation()
    320                 })
    321             } else {
    322                 true
    323             };
    324 
    325             if can_close && count >= open_count.min(3) {
    326                 let close_len = count.min(open_count).min(3);
    327                 return Some((pos, close_len));
    328             }
    329         }
    330     }
    331     None
    332 }
    333 
    334 /// Parse a link starting with [
    335 /// Returns (text_span, url_span, total_bytes_consumed)
    336 fn parse_link(text: &str, base_offset: usize) -> Option<(Span, Span, usize)> {
    337     if !text.starts_with('[') {
    338         return None;
    339     }
    340 
    341     // Find closing ]
    342     let mut bracket_depth = 0;
    343     let mut bracket_end = None;
    344 
    345     for (i, c) in text.char_indices() {
    346         match c {
    347             '[' => bracket_depth += 1,
    348             ']' => {
    349                 bracket_depth -= 1;
    350                 if bracket_depth == 0 {
    351                     bracket_end = Some(i);
    352                     break;
    353                 }
    354             }
    355             _ => {}
    356         }
    357     }
    358 
    359     let bracket_end = bracket_end?;
    360 
    361     // Check for ( immediately after ]
    362     let rest = &text[bracket_end + 1..];
    363     if !rest.starts_with('(') {
    364         return None;
    365     }
    366 
    367     // Find closing )
    368     let mut paren_depth = 0;
    369     let mut paren_end = None;
    370 
    371     for (i, c) in rest.char_indices() {
    372         match c {
    373             '(' => paren_depth += 1,
    374             ')' => {
    375                 paren_depth -= 1;
    376                 if paren_depth == 0 {
    377                     paren_end = Some(i);
    378                     break;
    379                 }
    380             }
    381             _ => {}
    382         }
    383     }
    384 
    385     let paren_end = paren_end?;
    386 
    387     // text_span: content between [ and ]
    388     let text_span = Span::new(base_offset + 1, base_offset + bracket_end);
    389     // url_span: content between ( and )
    390     let url_start = bracket_end + 1 + 1; // ] + (
    391     let url_end = bracket_end + 1 + paren_end; // position of )
    392     let url_span = Span::new(base_offset + url_start, base_offset + url_end);
    393 
    394     // Total consumed: [ + text + ] + ( + url + )
    395     let total = bracket_end + 1 + paren_end + 1;
    396 
    397     Some((text_span, url_span, total))
    398 }
    399 
    400 /// Collapse adjacent Text elements into one.
    401 fn collapse_text_elements(elements: &mut Vec<InlineElement>) {
    402     if elements.len() < 2 {
    403         return;
    404     }
    405 
    406     let mut write = 0;
    407     for read in 1..elements.len() {
    408         if let (InlineElement::Text(a), InlineElement::Text(b)) =
    409             (&elements[write], &elements[read])
    410         {
    411             // Merge spans — contiguous or not, just extend to cover both
    412             let merged = Span::new(a.start, b.end);
    413             elements[write] = InlineElement::Text(merged);
    414         } else {
    415             write += 1;
    416             if write != read {
    417                 elements.swap(write, read);
    418             }
    419         }
    420     }
    421     elements.truncate(write + 1);
    422 }
    423 
    424 /// Streaming inline parser state.
    425 /// Tracks partial inline elements across token boundaries.
    426 pub struct InlineState {
    427     /// Accumulated text waiting to be parsed
    428     buffer: String,
    429     /// Current partial element being built
    430     partial: Option<PartialKind>,
    431 }
    432 
    433 impl InlineState {
    434     pub fn new() -> Self {
    435         Self {
    436             buffer: String::new(),
    437             partial: None,
    438         }
    439     }
    440 
    441     /// Push new text and try to extract complete inline elements.
    442     /// Returns elements that are definitely complete.
    443     pub fn push(&mut self, text: &str) -> Vec<InlineElement> {
    444         self.buffer.push_str(text);
    445         self.extract_complete()
    446     }
    447 
    448     /// Get current buffer content for speculative rendering.
    449     pub fn buffer(&self) -> &str {
    450         &self.buffer
    451     }
    452 
    453     /// Check if we might be in the middle of an inline element.
    454     pub fn has_potential_partial(&self) -> bool {
    455         self.partial.is_some()
    456             || self.buffer.ends_with('`')
    457             || self.buffer.ends_with('*')
    458             || self.buffer.ends_with('_')
    459             || self.buffer.ends_with('~')
    460             || self.buffer.ends_with('[')
    461             || self.buffer.ends_with('!')
    462     }
    463 
    464     /// Finalize - return whatever we have as parsed elements.
    465     pub fn finalize(self) -> Vec<InlineElement> {
    466         parse_inline(&self.buffer, 0)
    467     }
    468 
    469     /// Extract complete inline elements from the buffer.
    470     fn extract_complete(&mut self) -> Vec<InlineElement> {
    471         let result = parse_inline(&self.buffer, 0);
    472 
    473         // Check if the buffer might have incomplete markers at the end
    474         if self.has_incomplete_tail() {
    475             // Keep the buffer, don't return anything yet
    476             return Vec::new();
    477         }
    478 
    479         // Buffer is stable, clear it and return parsed result
    480         self.buffer.clear();
    481         result
    482     }
    483 
    484     /// Check if the buffer ends with potentially incomplete markers.
    485     fn has_incomplete_tail(&self) -> bool {
    486         let s = &self.buffer;
    487 
    488         // Check for unclosed backticks
    489         let backtick_count = s.chars().filter(|&c| c == '`').count();
    490         if backtick_count % 2 != 0 {
    491             return true;
    492         }
    493 
    494         // Check for unclosed brackets
    495         let open_brackets = s.chars().filter(|&c| c == '[').count();
    496         let close_brackets = s.chars().filter(|&c| c == ']').count();
    497         if open_brackets > close_brackets {
    498             return true;
    499         }
    500 
    501         // Check for trailing asterisks/underscores that might start formatting
    502         if s.ends_with('*') || s.ends_with('_') || s.ends_with('~') {
    503             return true;
    504         }
    505 
    506         false
    507     }
    508 }
    509 
    510 impl Default for InlineState {
    511     fn default() -> Self {
    512         Self::new()
    513     }
    514 }
    515 
    516 #[cfg(test)]
    517 mod tests {
    518     use super::*;
    519 
    520     fn resolve<'a>(span: &Span, text: &'a str) -> &'a str {
    521         span.resolve(text)
    522     }
    523 
    524     #[test]
    525     fn test_inline_code() {
    526         let text = "some `code` here";
    527         let result = parse_inline(text, 0);
    528         assert!(result.iter().any(|e| matches!(
    529             e,
    530             InlineElement::Code(s) if resolve(s, text) == "code"
    531         )));
    532     }
    533 
    534     #[test]
    535     fn test_bold() {
    536         let text = "some **bold** text";
    537         let result = parse_inline(text, 0);
    538         assert!(result.iter().any(|e| matches!(
    539             e,
    540             InlineElement::Styled { style: InlineStyle::Bold, content } if resolve(content, text) == "bold"
    541         )));
    542     }
    543 
    544     #[test]
    545     fn test_italic() {
    546         let text = "some *italic* text";
    547         let result = parse_inline(text, 0);
    548         assert!(result.iter().any(|e| matches!(
    549             e,
    550             InlineElement::Styled { style: InlineStyle::Italic, content } if resolve(content, text) == "italic"
    551         )));
    552     }
    553 
    554     #[test]
    555     fn test_link() {
    556         let text = "check [this](https://example.com) out";
    557         let result = parse_inline(text, 0);
    558         assert!(result.iter().any(|e| matches!(
    559             e,
    560             InlineElement::Link { text: t, url } if resolve(t, text) == "this" && resolve(url, text) == "https://example.com"
    561         )));
    562     }
    563 
    564     #[test]
    565     fn test_image() {
    566         let text = "see ![alt](img.png) here";
    567         let result = parse_inline(text, 0);
    568         assert!(result.iter().any(|e| matches!(
    569             e,
    570             InlineElement::Image { alt, url } if resolve(alt, text) == "alt" && resolve(url, text) == "img.png"
    571         )));
    572     }
    573 
    574     #[test]
    575     fn test_strikethrough() {
    576         let text = "some ~~deleted~~ text";
    577         let result = parse_inline(text, 0);
    578         assert!(result.iter().any(|e| matches!(
    579             e,
    580             InlineElement::Styled { style: InlineStyle::Strikethrough, content } if resolve(content, text) == "deleted"
    581         )));
    582     }
    583 
    584     #[test]
    585     fn test_mixed() {
    586         let text = "**bold** and *italic* and `code`";
    587         let result = parse_inline(text, 0);
    588         assert_eq!(
    589             result
    590                 .iter()
    591                 .filter(|e| !matches!(e, InlineElement::Text(_)))
    592                 .count(),
    593             3
    594         );
    595     }
    596 }
	notedeck One damus client to rule them all
	git clone git://jb55.com/notedeck
	Log \| Files \| Refs \| README \| LICENSE