notedeck

One damus client to rule them all
git clone git://jb55.com/notedeck
Log | Files | Refs | README | LICENSE

commit 03eac33213245aa33bfb3f8e43f6414d21fba48e
parent 5f040d5ff8be0b4aff4b7987c65e73039fb3f26e
Author: William Casarin <jb55@jb55.com>
Date:   Sun, 22 Feb 2026 11:18:28 -0800

protoverse: add s-expression parser crate

Port the protoverse C parser to Rust as a standalone crate with zero
dependencies. Implements a flat arena AST (CellId-indexed), zero-copy
tokenizer, recursive descent parser with checkpoint/restore backtracking,
s-expression serializer (round-trips correctly), and natural language
describe engine matching the C reference output.

Supports room/space/group/object cell types with 11 attribute types
(id, name, shape, material, condition, width/height/depth, location,
state, type). Tested against satoshis-citadel.space.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Diffstat:
MCargo.lock | 4++++
MCargo.toml | 1+
Acrates/protoverse/Cargo.toml | 5+++++
Acrates/protoverse/src/ast.rs | 160+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acrates/protoverse/src/describe.rs | 204+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acrates/protoverse/src/lib.rs | 272+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acrates/protoverse/src/parser.rs | 445+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acrates/protoverse/src/serializer.rs | 108+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acrates/protoverse/src/tokenizer.rs | 230+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
9 files changed, 1429 insertions(+), 0 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock @@ -5251,6 +5251,10 @@ dependencies = [ ] [[package]] +name = "protoverse" +version = "0.1.0" + +[[package]] name = "puffin" version = "0.19.1" source = "git+https://github.com/jb55/puffin?rev=c6a6242adaf90b6292c0f462d2acd34d96d224d2#c6a6242adaf90b6292c0f462d2acd34d96d224d2" diff --git a/Cargo.toml b/Cargo.toml @@ -15,6 +15,7 @@ members = [ "crates/md-stream", "crates/tokenator", "crates/enostr", + "crates/protoverse", ] [workspace.dependencies] diff --git a/crates/protoverse/Cargo.toml b/crates/protoverse/Cargo.toml @@ -0,0 +1,5 @@ +[package] +name = "protoverse" +version = "0.1.0" +edition = "2021" +description = "S-expression parser for protoverse spatial descriptions" diff --git a/crates/protoverse/src/ast.rs b/crates/protoverse/src/ast.rs @@ -0,0 +1,160 @@ +use std::fmt; + +/// Index into Space.cells +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct CellId(pub u32); + +/// The parsed space — a flat arena of cells and attributes. +/// +/// Cells and attributes are stored contiguously. Each cell references +/// its attributes via a range into `attributes`, and its children +/// via a range into `child_ids` (which itself stores CellIds). +pub struct Space { + /// All cells, indexed by CellId + pub cells: Vec<Cell>, + /// All attributes, contiguous per cell + pub attributes: Vec<Attribute>, + /// Flat child reference array — cells index into this + pub child_ids: Vec<CellId>, + /// Root cell of the space + pub root: CellId, +} + +pub struct Cell { + pub cell_type: CellType, + /// Index of first attribute in Space.attributes + pub first_attr: u32, + /// Number of attributes + pub attr_count: u16, + /// Index of first child reference in Space.child_ids + pub first_child: u32, + /// Number of children + pub child_count: u16, + /// Parent cell + pub parent: Option<CellId>, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum CellType { + Room, + Space, + Group, + Object(ObjectType), +} + +#[derive(Clone, Debug, PartialEq)] +pub enum ObjectType { + Table, + Chair, + Door, + Light, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Attribute { + Id(String), + Type(String), + Name(String), + Material(String), + Condition(String), + Shape(Shape), + Width(f64), + Depth(f64), + Height(f64), + Location(String), + State(CellState), +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Shape { + Rectangle, + Circle, + Square, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum CellState { + On, + Off, + Sleeping, +} + +// --- Display implementations --- + +impl fmt::Display for ObjectType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ObjectType::Table => write!(f, "table"), + ObjectType::Chair => write!(f, "chair"), + ObjectType::Door => write!(f, "door"), + ObjectType::Light => write!(f, "light"), + } + } +} + +impl fmt::Display for CellType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CellType::Room => write!(f, "room"), + CellType::Space => write!(f, "space"), + CellType::Group => write!(f, "group"), + CellType::Object(o) => write!(f, "{}", o), + } + } +} + +impl fmt::Display for Shape { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Shape::Rectangle => write!(f, "rectangle"), + Shape::Circle => write!(f, "circle"), + Shape::Square => write!(f, "square"), + } + } +} + +impl fmt::Display for CellState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CellState::On => write!(f, "on"), + CellState::Off => write!(f, "off"), + CellState::Sleeping => write!(f, "sleeping"), + } + } +} + +// --- Space accessor methods --- + +impl Space { + pub fn cell(&self, id: CellId) -> &Cell { + &self.cells[id.0 as usize] + } + + pub fn children(&self, id: CellId) -> &[CellId] { + let cell = self.cell(id); + let start = cell.first_child as usize; + let end = start + cell.child_count as usize; + &self.child_ids[start..end] + } + + pub fn attrs(&self, id: CellId) -> &[Attribute] { + let cell = self.cell(id); + let start = cell.first_attr as usize; + let end = start + cell.attr_count as usize; + &self.attributes[start..end] + } + + pub fn name(&self, id: CellId) -> Option<&str> { + self.attrs(id).iter().find_map(|a| match a { + Attribute::Name(s) => Some(s.as_str()), + _ => None, + }) + } + + pub fn find_attr<F>(&self, id: CellId, pred: F) -> Option<&Attribute> + where + F: Fn(&Attribute) -> bool, + { + self.attrs(id).iter().find(|a| pred(a)) + } +} diff --git a/crates/protoverse/src/describe.rs b/crates/protoverse/src/describe.rs @@ -0,0 +1,204 @@ +use crate::ast::*; + +/// Generate a natural language description of a space. +pub fn describe(space: &Space) -> String { + describe_from(space, space.root, 10) +} + +/// Generate a description starting from a specific cell with depth limit. +pub fn describe_from(space: &Space, root: CellId, max_depth: usize) -> String { + let mut buf = String::new(); + describe_cells(space, root, max_depth, 0, &mut buf); + buf +} + +fn describe_cells(space: &Space, id: CellId, max_depth: usize, depth: usize, buf: &mut String) { + if depth > max_depth { + return; + } + + if !describe_cell(space, id, buf) { + return; + } + + buf.push_str(".\n"); + + let children = space.children(id); + if children.is_empty() { + return; + } + + let cell = space.cell(id); + if matches!(cell.cell_type, CellType::Room | CellType::Space) { + push_word(buf, "It contains"); + } + + // Recurse into first child (matches C behavior) + describe_cells(space, children[0], max_depth, depth + 1, buf); +} + +fn describe_cell(space: &Space, id: CellId, buf: &mut String) -> bool { + let cell = space.cell(id); + match &cell.cell_type { + CellType::Room => describe_area(space, id, "room", buf), + CellType::Space => describe_area(space, id, "space", buf), + CellType::Group => describe_group(space, id, buf), + CellType::Object(_) => false, // unimplemented in C reference + } +} + +fn describe_area(space: &Space, id: CellId, area_name: &str, buf: &mut String) -> bool { + buf.push_str("There is a(n)"); + + push_adjectives(space, id, buf); + push_shape(space, id, buf); + push_word(buf, area_name); + push_made_of(space, id, buf); + push_named(space, id, buf); + + true +} + +fn describe_group(space: &Space, id: CellId, buf: &mut String) -> bool { + let children = space.children(id); + let nobjs = children.len(); + + describe_amount(nobjs, buf); + push_word(buf, "object"); + + if nobjs > 1 { + buf.push_str("s:"); + } else { + buf.push(':'); + } + + push_word(buf, "a"); + + for (i, &child_id) in children.iter().enumerate() { + if i > 0 { + if i == nobjs - 1 { + push_word(buf, "and"); + } else { + buf.push(','); + } + } + describe_object_name(space, child_id, buf); + } + + true +} + +fn describe_object_name(space: &Space, id: CellId, buf: &mut String) { + if let Some(name) = space.name(id) { + push_word(buf, name); + } + + let cell = space.cell(id); + let type_str = match &cell.cell_type { + CellType::Object(obj) => obj.to_string(), + other => other.to_string(), + }; + push_word(buf, &type_str); +} + +fn describe_amount(n: usize, buf: &mut String) { + let word = match n { + 1 => "a single", + 2 => "a couple", + 3 => "three", + 4 => "four", + 5 => "five", + _ => "many", + }; + push_word(buf, word); +} + +// --- Helper functions --- + +/// Push a word with automatic space separation. +/// Adds a space before the word if the previous character is not whitespace. +fn push_word(buf: &mut String, word: &str) { + if let Some(last) = buf.as_bytes().last() { + if !last.is_ascii_whitespace() { + buf.push(' '); + } + } + buf.push_str(word); +} + +fn push_adjectives(space: &Space, id: CellId, buf: &mut String) { + let attrs = space.attrs(id); + let conditions: Vec<&str> = attrs + .iter() + .filter_map(|a| match a { + Attribute::Condition(s) => Some(s.as_str()), + _ => None, + }) + .collect(); + + let adj_count = conditions.len(); + + for (i, cond) in conditions.iter().enumerate() { + if i > 0 { + if i == adj_count - 1 { + push_word(buf, "and"); + } else { + buf.push(','); + } + } + push_word(buf, cond); + } +} + +fn push_shape(space: &Space, id: CellId, buf: &mut String) { + let shape = space.attrs(id).iter().find_map(|a| match a { + Attribute::Shape(s) => Some(s), + _ => None, + }); + + if let Some(shape) = shape { + let adj = match shape { + Shape::Rectangle => "rectangular", + Shape::Circle => "circular", + Shape::Square => "square", + }; + push_word(buf, adj); + } +} + +fn push_made_of(space: &Space, id: CellId, buf: &mut String) { + let material = space.attrs(id).iter().find_map(|a| match a { + Attribute::Material(s) => Some(s.as_str()), + _ => None, + }); + + if let Some(mat) = material { + push_word(buf, "made of"); + push_word(buf, mat); + } +} + +fn push_named(space: &Space, id: CellId, buf: &mut String) { + if let Some(name) = space.name(id) { + push_word(buf, "named"); + push_word(buf, name); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::parse; + + #[test] + fn test_describe_simple_room() { + let space = + parse("(room (shape rectangle) (name \"Test Room\") (material \"wood\"))").unwrap(); + let desc = describe(&space); + assert!(desc.contains("There is a(n)")); + assert!(desc.contains("rectangular")); + assert!(desc.contains("room")); + assert!(desc.contains("made of wood")); + assert!(desc.contains("named Test Room")); + } +} diff --git a/crates/protoverse/src/lib.rs b/crates/protoverse/src/lib.rs @@ -0,0 +1,272 @@ +//! Protoverse: S-expression parser for spatial world descriptions +//! +//! Parses protoverse `.space` format — an s-expression language for +//! describing rooms, objects, and their attributes. Designed for +//! progressive LOD: text descriptions, 2D maps, and 3D rendering +//! can all be derived from the same source. +//! +//! # Example +//! +//! ``` +//! use protoverse::{parse, serialize, describe}; +//! +//! let input = r#"(room (name "My Room") (shape rectangle) (width 10) (depth 8) +//! (group +//! (table (name "desk") (material "wood")) +//! (chair (name "office chair"))))"#; +//! +//! let space = parse(input).unwrap(); +//! let description = describe(&space); +//! let roundtrip = serialize(&space); +//! ``` + +pub mod ast; +pub mod describe; +pub mod parser; +pub mod serializer; +pub mod tokenizer; + +pub use ast::*; +pub use describe::{describe, describe_from}; +pub use parser::parse; +pub use serializer::{serialize, serialize_from}; + +#[cfg(test)] +mod tests { + use super::*; + + const SATOSHIS_CITADEL: &str = r#"(space (shape rectangle) + (condition "clean") + (condition "shiny") + (material "solid gold") + (name "Satoshi's Den") + (width 10) (depth 10) (height 100) + (group + (table (id welcome-desk) + (name "welcome desk") + (material "marble") + (condition "clean") + (condition "new") + (width 1) (depth 2) (height 1) + (location center) + (light (name "desk"))) + + (chair (id welcome-desk-chair) + (name "fancy")) + + (chair (name "throne") (material "invisible")) + + (light (location ceiling) + (name "ceiling") + (state off) + (shape circle))))"#; + + const EXAMPLE_ROOM: &str = r#"(room (shape rectangle) + (condition "clean") + (material "gold") + (name "Satoshi's Den") + (width 10) (depth 10) (height 100) + (group + (table (id welcome-desk) + (name "welcome desk") + (material "marble") + (condition "new") + (width 1) (depth 2) (height 1) + (light (name "desk"))) + + (chair (id welcome-desk-chair) + (name "fancy")) + + (light (location ceiling) + (name "ceiling") + (state off) + (shape circle))))"#; + + #[test] + fn test_parse_satoshis_citadel() { + let space = parse(SATOSHIS_CITADEL).unwrap(); + + // Root is a space cell + let root = space.cell(space.root); + assert_eq!(root.cell_type, CellType::Space); + assert_eq!(space.name(space.root), Some("Satoshi's Den")); + + // Root has 8 attributes + let attrs = space.attrs(space.root); + assert_eq!(attrs.len(), 8); + + // Root has one child (group) + let root_children = space.children(space.root); + assert_eq!(root_children.len(), 1); + let group_id = root_children[0]; + let group = space.cell(group_id); + assert_eq!(group.cell_type, CellType::Group); + + // Group has 4 children: table, chair, chair, light + let group_children = space.children(group_id); + assert_eq!(group_children.len(), 4); + + assert_eq!( + space.cell(group_children[0]).cell_type, + CellType::Object(ObjectType::Table) + ); + assert_eq!( + space.cell(group_children[1]).cell_type, + CellType::Object(ObjectType::Chair) + ); + assert_eq!( + space.cell(group_children[2]).cell_type, + CellType::Object(ObjectType::Chair) + ); + assert_eq!( + space.cell(group_children[3]).cell_type, + CellType::Object(ObjectType::Light) + ); + + // Table has a child light + let table_children = space.children(group_children[0]); + assert_eq!(table_children.len(), 1); + assert_eq!( + space.cell(table_children[0]).cell_type, + CellType::Object(ObjectType::Light) + ); + assert_eq!(space.name(table_children[0]), Some("desk")); + + // Check object names + assert_eq!(space.name(group_children[0]), Some("welcome desk")); + assert_eq!(space.name(group_children[1]), Some("fancy")); + assert_eq!(space.name(group_children[2]), Some("throne")); + assert_eq!(space.name(group_children[3]), Some("ceiling")); + } + + #[test] + fn test_parse_example_room() { + let space = parse(EXAMPLE_ROOM).unwrap(); + let root = space.cell(space.root); + assert_eq!(root.cell_type, CellType::Room); + assert_eq!(space.name(space.root), Some("Satoshi's Den")); + } + + #[test] + fn test_round_trip() { + let space1 = parse(SATOSHIS_CITADEL).unwrap(); + let serialized = serialize(&space1); + + // Re-parse the serialized output + let space2 = parse(&serialized).unwrap(); + + // Same structure + assert_eq!(space1.cells.len(), space2.cells.len()); + assert_eq!(space1.attributes.len(), space2.attributes.len()); + assert_eq!(space1.child_ids.len(), space2.child_ids.len()); + + // Same root type + assert_eq!( + space1.cell(space1.root).cell_type, + space2.cell(space2.root).cell_type + ); + + // Same name + assert_eq!(space1.name(space1.root), space2.name(space2.root)); + + // Same group children count + let g1 = space1.children(space1.root)[0]; + let g2 = space2.children(space2.root)[0]; + assert_eq!(space1.children(g1).len(), space2.children(g2).len()); + } + + #[test] + fn test_describe_satoshis_citadel() { + let space = parse(SATOSHIS_CITADEL).unwrap(); + let desc = describe(&space); + + // Check the area description + assert!(desc.contains("There is a(n)")); + assert!(desc.contains("clean")); + assert!(desc.contains("shiny")); + assert!(desc.contains("rectangular")); + assert!(desc.contains("space")); + assert!(desc.contains("made of solid gold")); + assert!(desc.contains("named Satoshi's Den")); + + // Check the group description + assert!(desc.contains("It contains")); + assert!(desc.contains("four")); + assert!(desc.contains("objects:")); + assert!(desc.contains("welcome desk table")); + assert!(desc.contains("fancy chair")); + assert!(desc.contains("throne chair")); + assert!(desc.contains("ceiling light")); + + // Exact match against C reference output + let expected = "There is a(n) clean and shiny rectangular space made of solid gold named Satoshi's Den.\nIt contains four objects: a welcome desk table, fancy chair, throne chair and ceiling light.\n"; + assert_eq!(desc, expected); + } + + #[test] + fn test_parse_real_space_file() { + // Parse the actual .space file from the protoverse repo + let path = "/home/jb55/src/c/protoverse/satoshis-citadel.space"; + if let Ok(content) = std::fs::read_to_string(path) { + let space = parse(&content).unwrap(); + assert_eq!(space.cell(space.root).cell_type, CellType::Space); + assert_eq!(space.name(space.root), Some("Satoshi's Den")); + + // Verify round-trip + let serialized = serialize(&space); + let space2 = parse(&serialized).unwrap(); + assert_eq!(space.cells.len(), space2.cells.len()); + } + } + + #[test] + fn test_parent_references() { + let space = parse(SATOSHIS_CITADEL).unwrap(); + + // Root has no parent + assert_eq!(space.cell(space.root).parent, None); + + // Group's parent is root + let group_id = space.children(space.root)[0]; + assert_eq!(space.cell(group_id).parent, Some(space.root)); + + // Table's parent is group + let table_id = space.children(group_id)[0]; + assert_eq!(space.cell(table_id).parent, Some(group_id)); + + // Desk light's parent is table + let light_id = space.children(table_id)[0]; + assert_eq!(space.cell(light_id).parent, Some(table_id)); + } + + #[test] + fn test_attribute_details() { + let space = parse(SATOSHIS_CITADEL).unwrap(); + + // Check root shape + let shape = space + .find_attr(space.root, |a| matches!(a, Attribute::Shape(_))) + .unwrap(); + assert_eq!(*shape, Attribute::Shape(Shape::Rectangle)); + + // Check root dimensions + let width = space + .find_attr(space.root, |a| matches!(a, Attribute::Width(_))) + .unwrap(); + assert_eq!(*width, Attribute::Width(10.0)); + + // Check table material + let table_id = space.children(space.children(space.root)[0])[0]; + let material = space + .find_attr(table_id, |a| matches!(a, Attribute::Material(_))) + .unwrap(); + assert_eq!(*material, Attribute::Material("marble".to_string())); + + // Check light state + let light_id = space.children(space.children(space.root)[0])[3]; + let state = space + .find_attr(light_id, |a| matches!(a, Attribute::State(_))) + .unwrap(); + assert_eq!(*state, Attribute::State(CellState::Off)); + } +} diff --git a/crates/protoverse/src/parser.rs b/crates/protoverse/src/parser.rs @@ -0,0 +1,445 @@ +use crate::ast::*; +use crate::tokenizer::{tokenize, Token}; +use std::fmt; + +#[derive(Debug)] +pub struct ParseError { + pub msg: String, +} + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "parse error: {}", self.msg) + } +} + +impl std::error::Error for ParseError {} + +/// Parse an s-expression string into a Space. +pub fn parse(input: &str) -> Result<Space, ParseError> { + let tokens = tokenize(input).map_err(|e| ParseError { + msg: format!("tokenization failed: {}", e), + })?; + + let mut parser = Parser { + tokens, + pos: 0, + cells: Vec::new(), + attributes: Vec::new(), + child_ids: Vec::new(), + }; + + let root = parser.parse_cell().ok_or_else(|| ParseError { + msg: "failed to parse root cell".into(), + })?; + + Ok(Space { + cells: parser.cells, + attributes: parser.attributes, + child_ids: parser.child_ids, + root, + }) +} + +struct Parser<'a> { + tokens: Vec<Token<'a>>, + pos: usize, + cells: Vec<Cell>, + attributes: Vec<Attribute>, + child_ids: Vec<CellId>, +} + +#[derive(Clone)] +struct Checkpoint { + pos: usize, + cells_len: usize, + attrs_len: usize, + child_ids_len: usize, +} + +impl<'a> Parser<'a> { + fn checkpoint(&self) -> Checkpoint { + Checkpoint { + pos: self.pos, + cells_len: self.cells.len(), + attrs_len: self.attributes.len(), + child_ids_len: self.child_ids.len(), + } + } + + fn restore(&mut self, cp: Checkpoint) { + self.pos = cp.pos; + self.cells.truncate(cp.cells_len); + self.attributes.truncate(cp.attrs_len); + self.child_ids.truncate(cp.child_ids_len); + } + + fn peek(&self) -> Option<&Token<'a>> { + self.tokens.get(self.pos) + } + + fn eat_open(&mut self) -> bool { + if matches!(self.peek(), Some(Token::Open)) { + self.pos += 1; + true + } else { + false + } + } + + fn eat_close(&mut self) -> bool { + if matches!(self.peek(), Some(Token::Close)) { + self.pos += 1; + true + } else { + false + } + } + + fn eat_symbol_match(&mut self, expected: &str) -> bool { + if let Some(Token::Symbol(s)) = self.peek() { + if *s == expected { + self.pos += 1; + return true; + } + } + false + } + + fn eat_symbol(&mut self) -> Option<&'a str> { + if let Some(Token::Symbol(s)) = self.peek() { + let s = *s; + self.pos += 1; + Some(s) + } else { + None + } + } + + fn eat_string(&mut self) -> Option<&'a str> { + if let Some(Token::Str(s)) = self.peek() { + let s = *s; + self.pos += 1; + Some(s) + } else { + None + } + } + + fn eat_number(&mut self) -> Option<f64> { + if let Some(Token::Number(s)) = self.peek() { + if let Ok(n) = s.parse::<f64>() { + self.pos += 1; + return Some(n); + } + } + None + } + + fn push_cell(&mut self, cell: Cell) -> CellId { + let id = CellId(self.cells.len() as u32); + self.cells.push(cell); + id + } + + // --- Attribute parsing --- + + fn try_parse_attribute(&mut self) -> Option<Attribute> { + let cp = self.checkpoint(); + + if !self.eat_open() { + return None; + } + + let sym = match self.eat_symbol() { + Some(s) => s, + None => { + self.restore(cp); + return None; + } + }; + + let result = match sym { + "shape" => self.eat_symbol().and_then(|s| { + let shape = match s { + "rectangle" => Shape::Rectangle, + "circle" => Shape::Circle, + "square" => Shape::Square, + _ => return None, + }; + Some(Attribute::Shape(shape)) + }), + "id" => self.eat_symbol().map(|s| Attribute::Id(s.to_string())), + "name" => self.eat_string().map(|s| Attribute::Name(s.to_string())), + "material" => self + .eat_string() + .map(|s| Attribute::Material(s.to_string())), + "condition" => self + .eat_string() + .map(|s| Attribute::Condition(s.to_string())), + "location" => self + .eat_symbol() + .map(|s| Attribute::Location(s.to_string())), + "state" => self.eat_symbol().and_then(|s| { + let state = match s { + "on" => CellState::On, + "off" => CellState::Off, + "sleeping" => CellState::Sleeping, + _ => return None, + }; + Some(Attribute::State(state)) + }), + "type" => self.eat_symbol().map(|s| Attribute::Type(s.to_string())), + "width" => self.eat_number().map(Attribute::Width), + "height" => self.eat_number().map(Attribute::Height), + "depth" => self.eat_number().map(Attribute::Depth), + _ => None, + }; + + match result { + Some(attr) => { + if self.eat_close() { + Some(attr) + } else { + self.restore(cp); + None + } + } + None => { + self.restore(cp); + None + } + } + } + + /// Parse zero or more attributes, returning the count. + /// Attributes are pushed contiguously into self.attributes. + fn parse_attributes(&mut self) -> u16 { + let mut count = 0u16; + while let Some(attr) = self.try_parse_attribute() { + self.attributes.push(attr); + count += 1; + } + count + } + + // --- Cell parsing --- + + /// Parse attributes and an optional child cell (for room/space/object). + fn parse_cell_attrs(&mut self, cell_type: CellType) -> Option<CellId> { + let first_attr = self.attributes.len() as u32; + let attr_count = self.parse_attributes(); + + // Parse optional child cell — recursion may push to child_ids + let opt_child = self.parse_cell(); + + // Capture first_child AFTER recursion so nested children don't interleave + let first_child = self.child_ids.len() as u32; + let child_count; + if let Some(child_id) = opt_child { + self.child_ids.push(child_id); + child_count = 1u16; + } else { + child_count = 0; + } + + let id = self.push_cell(Cell { + cell_type, + first_attr, + attr_count, + first_child, + child_count, + parent: None, + }); + + // Set parent on children + for i in 0..child_count { + let child_id = self.child_ids[(first_child + i as u32) as usize]; + self.cells[child_id.0 as usize].parent = Some(id); + } + + Some(id) + } + + fn try_parse_named_cell(&mut self, name: &str, cell_type: CellType) -> Option<CellId> { + let cp = self.checkpoint(); + + if !self.eat_symbol_match(name) { + self.restore(cp); + return None; + } + + match self.parse_cell_attrs(cell_type) { + Some(id) => Some(id), + None => { + self.restore(cp); + None + } + } + } + + fn try_parse_room(&mut self) -> Option<CellId> { + self.try_parse_named_cell("room", CellType::Room) + } + + fn try_parse_space(&mut self) -> Option<CellId> { + self.try_parse_named_cell("space", CellType::Space) + } + + fn try_parse_group(&mut self) -> Option<CellId> { + let cp = self.checkpoint(); + + if !self.eat_symbol_match("group") { + self.restore(cp); + return None; + } + + // Collect children — each parse_cell may recursively push to child_ids, + // so we collect CellIds first and append ours after recursion completes + let mut collected = Vec::new(); + while let Some(child_id) = self.parse_cell() { + collected.push(child_id); + } + + if collected.is_empty() { + self.restore(cp); + return None; + } + + // Now append our children contiguously + let first_child = self.child_ids.len() as u32; + let child_count = collected.len() as u16; + self.child_ids.extend_from_slice(&collected); + + let id = self.push_cell(Cell { + cell_type: CellType::Group, + first_attr: 0, + attr_count: 0, + first_child, + child_count, + parent: None, + }); + + // Set parent on children + for i in 0..child_count { + let child_id = self.child_ids[(first_child + i as u32) as usize]; + self.cells[child_id.0 as usize].parent = Some(id); + } + + Some(id) + } + + fn try_parse_object(&mut self) -> Option<CellId> { + let cp = self.checkpoint(); + + let sym = self.eat_symbol()?; + + let obj_type = match sym { + "table" => ObjectType::Table, + "chair" => ObjectType::Chair, + "door" => ObjectType::Door, + "light" => ObjectType::Light, + _ => { + self.restore(cp); + return None; + } + }; + + match self.parse_cell_attrs(CellType::Object(obj_type)) { + Some(id) => Some(id), + None => { + self.restore(cp); + None + } + } + } + + fn parse_cell(&mut self) -> Option<CellId> { + let cp = self.checkpoint(); + + if !self.eat_open() { + return None; + } + + // Try each cell type + let id = self + .try_parse_group() + .or_else(|| self.try_parse_room()) + .or_else(|| self.try_parse_space()) + .or_else(|| self.try_parse_object()); + + match id { + Some(id) => { + if self.eat_close() { + Some(id) + } else { + self.restore(cp); + None + } + } + None => { + self.restore(cp); + None + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_simple_room() { + let space = parse("(room (name \"Test Room\") (width 10))").unwrap(); + assert_eq!(space.cells.len(), 1); + let root = space.cell(space.root); + assert_eq!(root.cell_type, CellType::Room); + assert_eq!(root.attr_count, 2); + assert_eq!(space.name(space.root), Some("Test Room")); + } + + #[test] + fn test_parse_object_with_child() { + let input = "(table (name \"desk\") (light (name \"lamp\")))"; + let space = parse(input).unwrap(); + // light is cell 0, table is cell 1 + assert_eq!(space.cells.len(), 2); + let root = space.cell(space.root); + assert_eq!(root.cell_type, CellType::Object(ObjectType::Table)); + assert_eq!(root.child_count, 1); + + let children = space.children(space.root); + let child = space.cell(children[0]); + assert_eq!(child.cell_type, CellType::Object(ObjectType::Light)); + assert_eq!(space.name(children[0]), Some("lamp")); + } + + #[test] + fn test_parse_group() { + let input = "(room (group (table (name \"t1\")) (chair (name \"c1\"))))"; + let space = parse(input).unwrap(); + // table=0, chair=1, group=2, room=3 + assert_eq!(space.cells.len(), 4); + let root = space.cell(space.root); + assert_eq!(root.cell_type, CellType::Room); + + // room has one child (group) + let room_children = space.children(space.root); + assert_eq!(room_children.len(), 1); + let group = space.cell(room_children[0]); + assert_eq!(group.cell_type, CellType::Group); + + // group has two children + let group_children = space.children(room_children[0]); + assert_eq!(group_children.len(), 2); + assert_eq!( + space.cell(group_children[0]).cell_type, + CellType::Object(ObjectType::Table) + ); + assert_eq!( + space.cell(group_children[1]).cell_type, + CellType::Object(ObjectType::Chair) + ); + } +} diff --git a/crates/protoverse/src/serializer.rs b/crates/protoverse/src/serializer.rs @@ -0,0 +1,108 @@ +use crate::ast::*; +use std::fmt::Write; + +/// Serialize a Space back to s-expression format. +pub fn serialize(space: &Space) -> String { + serialize_from(space, space.root) +} + +/// Serialize a subtree starting from a specific cell. +pub fn serialize_from(space: &Space, root: CellId) -> String { + let mut out = String::new(); + write_cell(space, root, 0, &mut out); + out +} + +fn format_number(n: f64) -> String { + if n == n.floor() && n.abs() < i64::MAX as f64 { + format!("{}", n as i64) + } else { + format!("{}", n) + } +} + +fn write_cell(space: &Space, id: CellId, indent: usize, out: &mut String) { + let cell = space.cell(id); + let pad = " ".repeat(indent); + let inner_pad = " ".repeat(indent + 1); + + out.push('('); + out.push_str(&cell.cell_type.to_string()); + + // Attributes + let attrs = space.attrs(id); + for attr in attrs { + let _ = write!(out, "\n{}", inner_pad); + write_attr(attr, out); + } + + // Children + let children = space.children(id); + for &child_id in children { + let _ = write!(out, "\n{}", inner_pad); + write_cell(space, child_id, indent + 1, out); + } + + // Closing paren on same line if no attrs/children, else on new line + if !attrs.is_empty() || !children.is_empty() { + // For readability, close on the last line + out.push(')'); + } else { + out.push(')'); + } + + let _ = pad; // used above via inner_pad derivation +} + +fn write_attr(attr: &Attribute, out: &mut String) { + match attr { + Attribute::Shape(s) => { + let _ = write!(out, "(shape {})", s); + } + Attribute::Id(s) => { + let _ = write!(out, "(id {})", s); + } + Attribute::Name(s) => { + let _ = write!(out, "(name \"{}\")", s); + } + Attribute::Material(s) => { + let _ = write!(out, "(material \"{}\")", s); + } + Attribute::Condition(s) => { + let _ = write!(out, "(condition \"{}\")", s); + } + Attribute::Location(s) => { + let _ = write!(out, "(location {})", s); + } + Attribute::State(s) => { + let _ = write!(out, "(state {})", s); + } + Attribute::Type(s) => { + let _ = write!(out, "(type {})", s); + } + Attribute::Width(n) => { + let _ = write!(out, "(width {})", format_number(*n)); + } + Attribute::Height(n) => { + let _ = write!(out, "(height {})", format_number(*n)); + } + Attribute::Depth(n) => { + let _ = write!(out, "(depth {})", format_number(*n)); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::parse; + + #[test] + fn test_serialize_simple() { + let space = parse("(room (name \"Test\") (width 10))").unwrap(); + let output = serialize(&space); + assert!(output.contains("(room")); + assert!(output.contains("(name \"Test\")")); + assert!(output.contains("(width 10)")); + } +} diff --git a/crates/protoverse/src/tokenizer.rs b/crates/protoverse/src/tokenizer.rs @@ -0,0 +1,230 @@ +use std::fmt; + +/// A token from the s-expression tokenizer. +/// String references are zero-copy slices into the input. +#[derive(Debug, Clone, PartialEq)] +pub enum Token<'a> { + Open, + Close, + Symbol(&'a str), + Str(&'a str), + Number(&'a str), +} + +#[derive(Debug)] +pub struct TokenError { + pub msg: String, + pub pos: usize, +} + +impl fmt::Display for TokenError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "token error at position {}: {}", self.pos, self.msg) + } +} + +impl std::error::Error for TokenError {} + +fn is_symbol_start(c: u8) -> bool { + c.is_ascii_lowercase() +} + +fn is_symbol_char(c: u8) -> bool { + c.is_ascii_lowercase() || c.is_ascii_digit() || c == b'-' || c == b'_' +} + +fn scan_symbol(input: &[u8], start: usize) -> Result<usize, TokenError> { + if start >= input.len() || !is_symbol_start(input[start]) { + return Err(TokenError { + msg: "symbol must start with a-z".into(), + pos: start, + }); + } + let mut end = start + 1; + while end < input.len() { + let c = input[end]; + if c.is_ascii_whitespace() || c == b')' || c == b'(' { + break; + } + if !is_symbol_char(c) { + return Err(TokenError { + msg: format!("invalid symbol character '{}'", c as char), + pos: end, + }); + } + end += 1; + } + Ok(end) +} + +fn scan_number(input: &[u8], start: usize) -> Result<usize, TokenError> { + if start >= input.len() { + return Err(TokenError { + msg: "unexpected end of input in number".into(), + pos: start, + }); + } + let first = input[start]; + if !first.is_ascii_digit() && first != b'-' { + return Err(TokenError { + msg: "number must start with 0-9 or -".into(), + pos: start, + }); + } + let mut end = start + 1; + while end < input.len() { + let c = input[end]; + if c.is_ascii_whitespace() || c == b')' || c == b'(' { + break; + } + if !c.is_ascii_digit() && c != b'.' { + return Err(TokenError { + msg: format!("invalid number character '{}'", c as char), + pos: end, + }); + } + end += 1; + } + Ok(end) +} + +fn scan_string(input: &[u8], start: usize) -> Result<(usize, usize), TokenError> { + // start should point at the opening quote + if start >= input.len() || input[start] != b'"' { + return Err(TokenError { + msg: "string must start with '\"'".into(), + pos: start, + }); + } + let content_start = start + 1; + let mut i = content_start; + while i < input.len() { + if input[i] == b'\\' { + i += 2; // skip escaped char + continue; + } + if input[i] == b'"' { + return Ok((content_start, i)); // i points at closing quote + } + i += 1; + } + Err(TokenError { + msg: "unterminated string".into(), + pos: start, + }) +} + +/// Tokenize an s-expression input string into a sequence of tokens. +/// Token string/symbol/number values are zero-copy references into the input. +pub fn tokenize(input: &str) -> Result<Vec<Token<'_>>, TokenError> { + let mut tokens = Vec::new(); + let bytes = input.as_bytes(); + let mut i = 0; + + while i < bytes.len() { + let c = bytes[i]; + + if c.is_ascii_whitespace() { + i += 1; + continue; + } + + match c { + b'(' => { + tokens.push(Token::Open); + i += 1; + } + b')' => { + tokens.push(Token::Close); + i += 1; + } + b'"' => { + let (content_start, content_end) = scan_string(bytes, i)?; + tokens.push(Token::Str(&input[content_start..content_end])); + i = content_end + 1; // skip closing quote + } + b'a'..=b'z' => { + let end = scan_symbol(bytes, i)?; + tokens.push(Token::Symbol(&input[i..end])); + i = end; + } + b'0'..=b'9' | b'-' => { + let end = scan_number(bytes, i)?; + tokens.push(Token::Number(&input[i..end])); + i = end; + } + _ => { + return Err(TokenError { + msg: format!("unexpected character '{}'", c as char), + pos: i, + }); + } + } + } + + Ok(tokens) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_tokenize_simple() { + let tokens = tokenize("(room (name \"hello\"))").unwrap(); + assert_eq!( + tokens, + vec![ + Token::Open, + Token::Symbol("room"), + Token::Open, + Token::Symbol("name"), + Token::Str("hello"), + Token::Close, + Token::Close, + ] + ); + } + + #[test] + fn test_tokenize_number() { + let tokens = tokenize("(width 10)").unwrap(); + assert_eq!( + tokens, + vec![ + Token::Open, + Token::Symbol("width"), + Token::Number("10"), + Token::Close, + ] + ); + } + + #[test] + fn test_tokenize_symbol_with_dash() { + let tokens = tokenize("(id welcome-desk)").unwrap(); + assert_eq!( + tokens, + vec![ + Token::Open, + Token::Symbol("id"), + Token::Symbol("welcome-desk"), + Token::Close, + ] + ); + } + + #[test] + fn test_tokenize_negative_number() { + let tokens = tokenize("(height -5)").unwrap(); + assert_eq!( + tokens, + vec![ + Token::Open, + Token::Symbol("height"), + Token::Number("-5"), + Token::Close, + ] + ); + } +}