inline.rs (19983B)
1 //! Inline element parsing for bold, italic, code, links, etc. 2 3 use crate::element::{InlineElement, InlineStyle, Span}; 4 use crate::partial::PartialKind; 5 6 /// Parses inline elements from text. 7 /// `base_offset` is the position of `text` within the parser's buffer. 8 /// All returned Spans are absolute buffer positions. 9 /// 10 /// Note: This is called on complete paragraph text, not streaming. 11 /// For streaming, we use PartialKind to track incomplete markers. 12 pub fn parse_inline(text: &str, base_offset: usize) -> Vec<InlineElement> { 13 let mut result = Vec::new(); 14 let mut chars = text.char_indices().peekable(); 15 let mut plain_start = 0; 16 17 while let Some((i, c)) = chars.next() { 18 match c { 19 // Backtick - inline code 20 '`' => { 21 // Flush any pending plain text 22 if i > plain_start { 23 result.push(InlineElement::Text(Span::new( 24 base_offset + plain_start, 25 base_offset + i, 26 ))); 27 } 28 29 // Count backticks 30 let mut backtick_count = 1; 31 while chars.peek().map(|(_, c)| *c == '`').unwrap_or(false) { 32 chars.next(); 33 backtick_count += 1; 34 } 35 36 let start_pos = i + backtick_count; 37 38 // Find closing backticks (same count) 39 if let Some(end_pos) = find_closing_backticks(&text[start_pos..], backtick_count) { 40 let code_start = start_pos; 41 let code_end = start_pos + end_pos; 42 let code_content = &text[code_start..code_end]; 43 // Strip single leading/trailing space if present (CommonMark rule) 44 let (trim_start, trim_end) = if code_content.starts_with(' ') 45 && code_content.ends_with(' ') 46 && code_content.len() > 1 47 { 48 (code_start + 1, code_end - 1) 49 } else { 50 (code_start, code_end) 51 }; 52 result.push(InlineElement::Code(Span::new( 53 base_offset + trim_start, 54 base_offset + trim_end, 55 ))); 56 57 // Advance past closing backticks 58 let skip_to = start_pos + end_pos + backtick_count; 59 while chars.peek().map(|(idx, _)| *idx < skip_to).unwrap_or(false) { 60 chars.next(); 61 } 62 plain_start = skip_to; 63 } else { 64 // No closing - treat as plain text 65 plain_start = i; 66 } 67 } 68 69 // Asterisk or underscore - potential bold/italic 70 '*' | '_' => { 71 let marker = c; 72 let marker_start = i; 73 74 // Count consecutive markers 75 let mut count = 1; 76 while chars.peek().map(|(_, ch)| *ch == marker).unwrap_or(false) { 77 chars.next(); 78 count += 1; 79 } 80 81 // Limit to 3 for bold+italic 82 let effective_count = count.min(3); 83 84 // Check if this could be an opener (not preceded by whitespace at word boundary for _) 85 let can_open = if marker == '_' { 86 // Underscore: check word boundary rules 87 i == 0 88 || text[..i] 89 .chars() 90 .last() 91 .map(|c| c.is_whitespace() || c.is_ascii_punctuation()) 92 .unwrap_or(true) 93 } else { 94 true // Asterisk can always open 95 }; 96 97 if !can_open { 98 // Not a valid opener, treat as plain text 99 continue; 100 } 101 102 let content_start = marker_start + count; 103 104 // Look for closing marker 105 if let Some((content_end_local, close_len)) = 106 find_closing_emphasis(&text[content_start..], marker, effective_count) 107 { 108 // Flush pending plain text 109 if marker_start > plain_start { 110 result.push(InlineElement::Text(Span::new( 111 base_offset + plain_start, 112 base_offset + marker_start, 113 ))); 114 } 115 116 let style = match close_len { 117 1 => InlineStyle::Italic, 118 2 => InlineStyle::Bold, 119 _ => InlineStyle::BoldItalic, 120 }; 121 122 result.push(InlineElement::Styled { 123 style, 124 content: Span::new( 125 base_offset + content_start, 126 base_offset + content_start + content_end_local, 127 ), 128 }); 129 130 // Advance past the content and closing marker 131 let skip_to = content_start + content_end_local + close_len; 132 while chars.peek().map(|(idx, _)| *idx < skip_to).unwrap_or(false) { 133 chars.next(); 134 } 135 plain_start = skip_to; 136 } 137 // If no closing found, leave as plain text (will be collected) 138 } 139 140 // Tilde - potential strikethrough 141 '~' => { 142 if chars.peek().map(|(_, c)| *c == '~').unwrap_or(false) { 143 chars.next(); // consume second ~ 144 145 // Flush pending text 146 if i > plain_start { 147 result.push(InlineElement::Text(Span::new( 148 base_offset + plain_start, 149 base_offset + i, 150 ))); 151 } 152 153 let content_start = i + 2; 154 155 // Find closing ~~ 156 if let Some(end_pos) = text[content_start..].find("~~") { 157 result.push(InlineElement::Styled { 158 style: InlineStyle::Strikethrough, 159 content: Span::new( 160 base_offset + content_start, 161 base_offset + content_start + end_pos, 162 ), 163 }); 164 165 let skip_to = content_start + end_pos + 2; 166 while chars.peek().map(|(idx, _)| *idx < skip_to).unwrap_or(false) { 167 chars.next(); 168 } 169 plain_start = skip_to; 170 } else { 171 // No closing, revert 172 plain_start = i; 173 } 174 } 175 } 176 177 // Square bracket - potential link or image 178 '[' => { 179 // Flush pending text 180 if i > plain_start { 181 result.push(InlineElement::Text(Span::new( 182 base_offset + plain_start, 183 base_offset + i, 184 ))); 185 } 186 187 if let Some((text_span, url_span, total_len)) = 188 parse_link(&text[i..], base_offset + i) 189 { 190 result.push(InlineElement::Link { 191 text: text_span, 192 url: url_span, 193 }); 194 195 let skip_to = i + total_len; 196 while chars.peek().map(|(idx, _)| *idx < skip_to).unwrap_or(false) { 197 chars.next(); 198 } 199 plain_start = skip_to; 200 } else { 201 // Not a valid link, treat [ as plain text 202 plain_start = i; 203 } 204 } 205 206 // Exclamation - potential image 207 '!' => { 208 if chars.peek().map(|(_, c)| *c == '[').unwrap_or(false) { 209 // Flush pending text 210 if i > plain_start { 211 result.push(InlineElement::Text(Span::new( 212 base_offset + plain_start, 213 base_offset + i, 214 ))); 215 } 216 217 chars.next(); // consume [ 218 219 if let Some((alt_span, url_span, link_len)) = 220 parse_link(&text[i + 1..], base_offset + i + 1) 221 { 222 result.push(InlineElement::Image { 223 alt: alt_span, 224 url: url_span, 225 }); 226 227 let skip_to = i + 1 + link_len; 228 while chars.peek().map(|(idx, _)| *idx < skip_to).unwrap_or(false) { 229 chars.next(); 230 } 231 plain_start = skip_to; 232 } else { 233 // Not a valid image 234 plain_start = i; 235 } 236 } 237 } 238 239 // Newline - could be hard break 240 '\n' => { 241 // Check for hard line break (two spaces before newline) 242 if i >= 2 && text[..i].ends_with(" ") { 243 // Flush text without trailing spaces 244 let text_end = i - 2; 245 if text_end > plain_start { 246 result.push(InlineElement::Text(Span::new( 247 base_offset + plain_start, 248 base_offset + text_end, 249 ))); 250 } 251 result.push(InlineElement::LineBreak); 252 plain_start = i + 1; 253 } 254 // Otherwise soft line break, keep in text 255 } 256 257 _ => { 258 // Regular character, continue 259 } 260 } 261 } 262 263 // Flush remaining plain text 264 if plain_start < text.len() { 265 result.push(InlineElement::Text(Span::new( 266 base_offset + plain_start, 267 base_offset + text.len(), 268 ))); 269 } 270 271 // Collapse adjacent Text elements 272 collapse_text_elements(&mut result); 273 274 result 275 } 276 277 /// Find closing backticks matching the opening count. 278 fn find_closing_backticks(text: &str, count: usize) -> Option<usize> { 279 let bytes = text.as_bytes(); 280 let mut i = 0; 281 282 while i < bytes.len() { 283 if bytes[i] == b'`' { 284 // Count consecutive backticks at this position 285 let run_start = i; 286 while i < bytes.len() && bytes[i] == b'`' { 287 i += 1; 288 } 289 let run_len = i - run_start; 290 if run_len == count { 291 return Some(run_start); 292 } 293 // Not the right count, continue 294 } else { 295 // Skip non-backtick character (handle UTF-8) 296 i += text[i..].chars().next().map(|c| c.len_utf8()).unwrap_or(1); 297 } 298 } 299 None 300 } 301 302 /// Find closing emphasis marker. 303 /// Returns (end_position, actual_close_len) if found. 304 fn find_closing_emphasis(text: &str, marker: char, open_count: usize) -> Option<(usize, usize)> { 305 let mut chars = text.char_indices().peekable(); 306 307 while let Some((pos, c)) = chars.next() { 308 if c == marker { 309 // Count consecutive markers 310 let mut count = 1; 311 while chars.peek().map(|(_, ch)| *ch == marker).unwrap_or(false) { 312 chars.next(); 313 count += 1; 314 } 315 316 // Check if this could close (not followed by alphanumeric for _) 317 let can_close = if marker == '_' { 318 chars.peek().is_none_or(|(_, next_c)| { 319 next_c.is_whitespace() || next_c.is_ascii_punctuation() 320 }) 321 } else { 322 true 323 }; 324 325 if can_close && count >= open_count.min(3) { 326 let close_len = count.min(open_count).min(3); 327 return Some((pos, close_len)); 328 } 329 } 330 } 331 None 332 } 333 334 /// Parse a link starting with [ 335 /// Returns (text_span, url_span, total_bytes_consumed) 336 fn parse_link(text: &str, base_offset: usize) -> Option<(Span, Span, usize)> { 337 if !text.starts_with('[') { 338 return None; 339 } 340 341 // Find closing ] 342 let mut bracket_depth = 0; 343 let mut bracket_end = None; 344 345 for (i, c) in text.char_indices() { 346 match c { 347 '[' => bracket_depth += 1, 348 ']' => { 349 bracket_depth -= 1; 350 if bracket_depth == 0 { 351 bracket_end = Some(i); 352 break; 353 } 354 } 355 _ => {} 356 } 357 } 358 359 let bracket_end = bracket_end?; 360 361 // Check for ( immediately after ] 362 let rest = &text[bracket_end + 1..]; 363 if !rest.starts_with('(') { 364 return None; 365 } 366 367 // Find closing ) 368 let mut paren_depth = 0; 369 let mut paren_end = None; 370 371 for (i, c) in rest.char_indices() { 372 match c { 373 '(' => paren_depth += 1, 374 ')' => { 375 paren_depth -= 1; 376 if paren_depth == 0 { 377 paren_end = Some(i); 378 break; 379 } 380 } 381 _ => {} 382 } 383 } 384 385 let paren_end = paren_end?; 386 387 // text_span: content between [ and ] 388 let text_span = Span::new(base_offset + 1, base_offset + bracket_end); 389 // url_span: content between ( and ) 390 let url_start = bracket_end + 1 + 1; // ] + ( 391 let url_end = bracket_end + 1 + paren_end; // position of ) 392 let url_span = Span::new(base_offset + url_start, base_offset + url_end); 393 394 // Total consumed: [ + text + ] + ( + url + ) 395 let total = bracket_end + 1 + paren_end + 1; 396 397 Some((text_span, url_span, total)) 398 } 399 400 /// Collapse adjacent Text elements into one. 401 fn collapse_text_elements(elements: &mut Vec<InlineElement>) { 402 if elements.len() < 2 { 403 return; 404 } 405 406 let mut write = 0; 407 for read in 1..elements.len() { 408 if let (InlineElement::Text(a), InlineElement::Text(b)) = 409 (&elements[write], &elements[read]) 410 { 411 // Merge spans — contiguous or not, just extend to cover both 412 let merged = Span::new(a.start, b.end); 413 elements[write] = InlineElement::Text(merged); 414 } else { 415 write += 1; 416 if write != read { 417 elements.swap(write, read); 418 } 419 } 420 } 421 elements.truncate(write + 1); 422 } 423 424 /// Streaming inline parser state. 425 /// Tracks partial inline elements across token boundaries. 426 pub struct InlineState { 427 /// Accumulated text waiting to be parsed 428 buffer: String, 429 /// Current partial element being built 430 partial: Option<PartialKind>, 431 } 432 433 impl InlineState { 434 pub fn new() -> Self { 435 Self { 436 buffer: String::new(), 437 partial: None, 438 } 439 } 440 441 /// Push new text and try to extract complete inline elements. 442 /// Returns elements that are definitely complete. 443 pub fn push(&mut self, text: &str) -> Vec<InlineElement> { 444 self.buffer.push_str(text); 445 self.extract_complete() 446 } 447 448 /// Get current buffer content for speculative rendering. 449 pub fn buffer(&self) -> &str { 450 &self.buffer 451 } 452 453 /// Check if we might be in the middle of an inline element. 454 pub fn has_potential_partial(&self) -> bool { 455 self.partial.is_some() 456 || self.buffer.ends_with('`') 457 || self.buffer.ends_with('*') 458 || self.buffer.ends_with('_') 459 || self.buffer.ends_with('~') 460 || self.buffer.ends_with('[') 461 || self.buffer.ends_with('!') 462 } 463 464 /// Finalize - return whatever we have as parsed elements. 465 pub fn finalize(self) -> Vec<InlineElement> { 466 parse_inline(&self.buffer, 0) 467 } 468 469 /// Extract complete inline elements from the buffer. 470 fn extract_complete(&mut self) -> Vec<InlineElement> { 471 let result = parse_inline(&self.buffer, 0); 472 473 // Check if the buffer might have incomplete markers at the end 474 if self.has_incomplete_tail() { 475 // Keep the buffer, don't return anything yet 476 return Vec::new(); 477 } 478 479 // Buffer is stable, clear it and return parsed result 480 self.buffer.clear(); 481 result 482 } 483 484 /// Check if the buffer ends with potentially incomplete markers. 485 fn has_incomplete_tail(&self) -> bool { 486 let s = &self.buffer; 487 488 // Check for unclosed backticks 489 let backtick_count = s.chars().filter(|&c| c == '`').count(); 490 if backtick_count % 2 != 0 { 491 return true; 492 } 493 494 // Check for unclosed brackets 495 let open_brackets = s.chars().filter(|&c| c == '[').count(); 496 let close_brackets = s.chars().filter(|&c| c == ']').count(); 497 if open_brackets > close_brackets { 498 return true; 499 } 500 501 // Check for trailing asterisks/underscores that might start formatting 502 if s.ends_with('*') || s.ends_with('_') || s.ends_with('~') { 503 return true; 504 } 505 506 false 507 } 508 } 509 510 impl Default for InlineState { 511 fn default() -> Self { 512 Self::new() 513 } 514 } 515 516 #[cfg(test)] 517 mod tests { 518 use super::*; 519 520 fn resolve<'a>(span: &Span, text: &'a str) -> &'a str { 521 span.resolve(text) 522 } 523 524 #[test] 525 fn test_inline_code() { 526 let text = "some `code` here"; 527 let result = parse_inline(text, 0); 528 assert!(result.iter().any(|e| matches!( 529 e, 530 InlineElement::Code(s) if resolve(s, text) == "code" 531 ))); 532 } 533 534 #[test] 535 fn test_bold() { 536 let text = "some **bold** text"; 537 let result = parse_inline(text, 0); 538 assert!(result.iter().any(|e| matches!( 539 e, 540 InlineElement::Styled { style: InlineStyle::Bold, content } if resolve(content, text) == "bold" 541 ))); 542 } 543 544 #[test] 545 fn test_italic() { 546 let text = "some *italic* text"; 547 let result = parse_inline(text, 0); 548 assert!(result.iter().any(|e| matches!( 549 e, 550 InlineElement::Styled { style: InlineStyle::Italic, content } if resolve(content, text) == "italic" 551 ))); 552 } 553 554 #[test] 555 fn test_link() { 556 let text = "check [this](https://example.com) out"; 557 let result = parse_inline(text, 0); 558 assert!(result.iter().any(|e| matches!( 559 e, 560 InlineElement::Link { text: t, url } if resolve(t, text) == "this" && resolve(url, text) == "https://example.com" 561 ))); 562 } 563 564 #[test] 565 fn test_image() { 566 let text = "see  here"; 567 let result = parse_inline(text, 0); 568 assert!(result.iter().any(|e| matches!( 569 e, 570 InlineElement::Image { alt, url } if resolve(alt, text) == "alt" && resolve(url, text) == "img.png" 571 ))); 572 } 573 574 #[test] 575 fn test_strikethrough() { 576 let text = "some ~~deleted~~ text"; 577 let result = parse_inline(text, 0); 578 assert!(result.iter().any(|e| matches!( 579 e, 580 InlineElement::Styled { style: InlineStyle::Strikethrough, content } if resolve(content, text) == "deleted" 581 ))); 582 } 583 584 #[test] 585 fn test_mixed() { 586 let text = "**bold** and *italic* and `code`"; 587 let result = parse_inline(text, 0); 588 assert_eq!( 589 result 590 .iter() 591 .filter(|e| !matches!(e, InlineElement::Text(_))) 592 .count(), 593 3 594 ); 595 } 596 }