shatter

A fast, zero-copy nostr content parser in Rust
git clone git://jb55.com/shatter
Log | Files | Refs | README

commit 821cf87c434012a2f590c0dc533456298ac9f470
parent d89ddb0c3b0b3dc4056059753ff5481ff0d1a3e5
Author: William Casarin <jb55@jb55.com>
Date:   Mon,  3 Jul 2023 11:21:15 -0700

slice: use u32 for storage

We can always cast this to usize if needed

Diffstat:
Msrc/shard.rs | 33+++++++++++++++++++++++++--------
1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/src/shard.rs b/src/shard.rs @@ -1,26 +1,41 @@ use crate::parser::{Bound, Error, Parser, Result}; use log::debug; +/// A slice into the original buffer. Contains a position, which is an +/// index into the buffer, and the length of the segment. #[derive(Debug, PartialEq, Eq)] pub struct ByteSlice { - pos: usize, - len: usize, + pos: u32, + len: u32, } impl ByteSlice { - pub fn new(pos: usize, len: usize) -> ByteSlice { + pub fn new(pos: u32, len: u32) -> ByteSlice { ByteSlice { pos, len } } + #[inline(always)] + fn pos_usize(&self) -> usize { + self.pos as usize + } + + #[inline(always)] + fn len_usize(&self) -> usize { + self.len as usize + } + + /// Get the slice of the buffer as a native array slice pub fn bytes<'a>(&self, data: &'a [u8]) -> &'a [u8] { - &data[self.pos..self.pos + self.len] + &data[self.pos_usize()..self.pos_usize() + self.len_usize()] } - pub fn str<'a>(&self, data: &'a [u8]) -> Option<&'a str> { - std::str::from_utf8(self.bytes(data)).ok() + /// Get the slice of the buffer as a string + pub fn str<'a>(&self, data: &'a [u8]) -> std::result::Result<&'a str, std::str::Utf8Error> { + std::str::from_utf8(self.bytes(data)) } } +/// A Shard represents a part of the shattered content. #[derive(Debug, PartialEq, Eq)] pub enum Shard { Text(ByteSlice), @@ -45,6 +60,7 @@ impl Shards { } } + /// Parse a hashtag (content after the #) pub fn parse_hashtag(parser: &mut Parser) -> Result<ByteSlice> { let start = parser.pos(); match parser.parse_until(is_boundary_char) { @@ -54,7 +70,7 @@ impl Shards { if len <= 0 { return Err(Error::NotFound); } - return Ok(ByteSlice::new(start, len)); + return Ok(ByteSlice::new(start as u32, len as u32)); } Err(err) => Err(err.into()), } @@ -66,7 +82,7 @@ impl Shards { return; } - let txt_slice = ByteSlice::new(start, len); + let txt_slice = ByteSlice::new(start as u32, len as u32); /* debug!( "pushing text block {:?} @ {} '{:?}'", @@ -78,6 +94,7 @@ impl Shards { self.shards.push(Shard::Text(txt_slice)); } + /// Parse (shatter) content into shards pub fn parse(content: &str) -> Result<Shards> { let mut parser = Parser::from_str(content); let len = parser.len();