commit 03eac33213245aa33bfb3f8e43f6414d21fba48e
parent 5f040d5ff8be0b4aff4b7987c65e73039fb3f26e
Author: William Casarin <jb55@jb55.com>
Date: Sun, 22 Feb 2026 11:18:28 -0800
protoverse: add s-expression parser crate
Port the protoverse C parser to Rust as a standalone crate with zero
dependencies. Implements a flat arena AST (CellId-indexed), zero-copy
tokenizer, recursive descent parser with checkpoint/restore backtracking,
s-expression serializer (round-trips correctly), and natural language
describe engine matching the C reference output.
Supports room/space/group/object cell types with 11 attribute types
(id, name, shape, material, condition, width/height/depth, location,
state, type). Tested against satoshis-citadel.space.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
9 files changed, 1429 insertions(+), 0 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
@@ -5251,6 +5251,10 @@ dependencies = [
]
[[package]]
+name = "protoverse"
+version = "0.1.0"
+
+[[package]]
name = "puffin"
version = "0.19.1"
source = "git+https://github.com/jb55/puffin?rev=c6a6242adaf90b6292c0f462d2acd34d96d224d2#c6a6242adaf90b6292c0f462d2acd34d96d224d2"
diff --git a/Cargo.toml b/Cargo.toml
@@ -15,6 +15,7 @@ members = [
"crates/md-stream",
"crates/tokenator",
"crates/enostr",
+ "crates/protoverse",
]
[workspace.dependencies]
diff --git a/crates/protoverse/Cargo.toml b/crates/protoverse/Cargo.toml
@@ -0,0 +1,5 @@
+[package]
+name = "protoverse"
+version = "0.1.0"
+edition = "2021"
+description = "S-expression parser for protoverse spatial descriptions"
diff --git a/crates/protoverse/src/ast.rs b/crates/protoverse/src/ast.rs
@@ -0,0 +1,160 @@
+use std::fmt;
+
+/// Index into Space.cells
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub struct CellId(pub u32);
+
+/// The parsed space — a flat arena of cells and attributes.
+///
+/// Cells and attributes are stored contiguously. Each cell references
+/// its attributes via a range into `attributes`, and its children
+/// via a range into `child_ids` (which itself stores CellIds).
+pub struct Space {
+ /// All cells, indexed by CellId
+ pub cells: Vec<Cell>,
+ /// All attributes, contiguous per cell
+ pub attributes: Vec<Attribute>,
+ /// Flat child reference array — cells index into this
+ pub child_ids: Vec<CellId>,
+ /// Root cell of the space
+ pub root: CellId,
+}
+
+pub struct Cell {
+ pub cell_type: CellType,
+ /// Index of first attribute in Space.attributes
+ pub first_attr: u32,
+ /// Number of attributes
+ pub attr_count: u16,
+ /// Index of first child reference in Space.child_ids
+ pub first_child: u32,
+ /// Number of children
+ pub child_count: u16,
+ /// Parent cell
+ pub parent: Option<CellId>,
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum CellType {
+ Room,
+ Space,
+ Group,
+ Object(ObjectType),
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum ObjectType {
+ Table,
+ Chair,
+ Door,
+ Light,
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum Attribute {
+ Id(String),
+ Type(String),
+ Name(String),
+ Material(String),
+ Condition(String),
+ Shape(Shape),
+ Width(f64),
+ Depth(f64),
+ Height(f64),
+ Location(String),
+ State(CellState),
+}
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum Shape {
+ Rectangle,
+ Circle,
+ Square,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum CellState {
+ On,
+ Off,
+ Sleeping,
+}
+
+// --- Display implementations ---
+
+impl fmt::Display for ObjectType {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ ObjectType::Table => write!(f, "table"),
+ ObjectType::Chair => write!(f, "chair"),
+ ObjectType::Door => write!(f, "door"),
+ ObjectType::Light => write!(f, "light"),
+ }
+ }
+}
+
+impl fmt::Display for CellType {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ CellType::Room => write!(f, "room"),
+ CellType::Space => write!(f, "space"),
+ CellType::Group => write!(f, "group"),
+ CellType::Object(o) => write!(f, "{}", o),
+ }
+ }
+}
+
+impl fmt::Display for Shape {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ Shape::Rectangle => write!(f, "rectangle"),
+ Shape::Circle => write!(f, "circle"),
+ Shape::Square => write!(f, "square"),
+ }
+ }
+}
+
+impl fmt::Display for CellState {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ CellState::On => write!(f, "on"),
+ CellState::Off => write!(f, "off"),
+ CellState::Sleeping => write!(f, "sleeping"),
+ }
+ }
+}
+
+// --- Space accessor methods ---
+
+impl Space {
+ pub fn cell(&self, id: CellId) -> &Cell {
+ &self.cells[id.0 as usize]
+ }
+
+ pub fn children(&self, id: CellId) -> &[CellId] {
+ let cell = self.cell(id);
+ let start = cell.first_child as usize;
+ let end = start + cell.child_count as usize;
+ &self.child_ids[start..end]
+ }
+
+ pub fn attrs(&self, id: CellId) -> &[Attribute] {
+ let cell = self.cell(id);
+ let start = cell.first_attr as usize;
+ let end = start + cell.attr_count as usize;
+ &self.attributes[start..end]
+ }
+
+ pub fn name(&self, id: CellId) -> Option<&str> {
+ self.attrs(id).iter().find_map(|a| match a {
+ Attribute::Name(s) => Some(s.as_str()),
+ _ => None,
+ })
+ }
+
+ pub fn find_attr<F>(&self, id: CellId, pred: F) -> Option<&Attribute>
+ where
+ F: Fn(&Attribute) -> bool,
+ {
+ self.attrs(id).iter().find(|a| pred(a))
+ }
+}
diff --git a/crates/protoverse/src/describe.rs b/crates/protoverse/src/describe.rs
@@ -0,0 +1,204 @@
+use crate::ast::*;
+
+/// Generate a natural language description of a space.
+pub fn describe(space: &Space) -> String {
+ describe_from(space, space.root, 10)
+}
+
+/// Generate a description starting from a specific cell with depth limit.
+pub fn describe_from(space: &Space, root: CellId, max_depth: usize) -> String {
+ let mut buf = String::new();
+ describe_cells(space, root, max_depth, 0, &mut buf);
+ buf
+}
+
+fn describe_cells(space: &Space, id: CellId, max_depth: usize, depth: usize, buf: &mut String) {
+ if depth > max_depth {
+ return;
+ }
+
+ if !describe_cell(space, id, buf) {
+ return;
+ }
+
+ buf.push_str(".\n");
+
+ let children = space.children(id);
+ if children.is_empty() {
+ return;
+ }
+
+ let cell = space.cell(id);
+ if matches!(cell.cell_type, CellType::Room | CellType::Space) {
+ push_word(buf, "It contains");
+ }
+
+ // Recurse into first child (matches C behavior)
+ describe_cells(space, children[0], max_depth, depth + 1, buf);
+}
+
+fn describe_cell(space: &Space, id: CellId, buf: &mut String) -> bool {
+ let cell = space.cell(id);
+ match &cell.cell_type {
+ CellType::Room => describe_area(space, id, "room", buf),
+ CellType::Space => describe_area(space, id, "space", buf),
+ CellType::Group => describe_group(space, id, buf),
+ CellType::Object(_) => false, // unimplemented in C reference
+ }
+}
+
+fn describe_area(space: &Space, id: CellId, area_name: &str, buf: &mut String) -> bool {
+ buf.push_str("There is a(n)");
+
+ push_adjectives(space, id, buf);
+ push_shape(space, id, buf);
+ push_word(buf, area_name);
+ push_made_of(space, id, buf);
+ push_named(space, id, buf);
+
+ true
+}
+
+fn describe_group(space: &Space, id: CellId, buf: &mut String) -> bool {
+ let children = space.children(id);
+ let nobjs = children.len();
+
+ describe_amount(nobjs, buf);
+ push_word(buf, "object");
+
+ if nobjs > 1 {
+ buf.push_str("s:");
+ } else {
+ buf.push(':');
+ }
+
+ push_word(buf, "a");
+
+ for (i, &child_id) in children.iter().enumerate() {
+ if i > 0 {
+ if i == nobjs - 1 {
+ push_word(buf, "and");
+ } else {
+ buf.push(',');
+ }
+ }
+ describe_object_name(space, child_id, buf);
+ }
+
+ true
+}
+
+fn describe_object_name(space: &Space, id: CellId, buf: &mut String) {
+ if let Some(name) = space.name(id) {
+ push_word(buf, name);
+ }
+
+ let cell = space.cell(id);
+ let type_str = match &cell.cell_type {
+ CellType::Object(obj) => obj.to_string(),
+ other => other.to_string(),
+ };
+ push_word(buf, &type_str);
+}
+
+fn describe_amount(n: usize, buf: &mut String) {
+ let word = match n {
+ 1 => "a single",
+ 2 => "a couple",
+ 3 => "three",
+ 4 => "four",
+ 5 => "five",
+ _ => "many",
+ };
+ push_word(buf, word);
+}
+
+// --- Helper functions ---
+
+/// Push a word with automatic space separation.
+/// Adds a space before the word if the previous character is not whitespace.
+fn push_word(buf: &mut String, word: &str) {
+ if let Some(last) = buf.as_bytes().last() {
+ if !last.is_ascii_whitespace() {
+ buf.push(' ');
+ }
+ }
+ buf.push_str(word);
+}
+
+fn push_adjectives(space: &Space, id: CellId, buf: &mut String) {
+ let attrs = space.attrs(id);
+ let conditions: Vec<&str> = attrs
+ .iter()
+ .filter_map(|a| match a {
+ Attribute::Condition(s) => Some(s.as_str()),
+ _ => None,
+ })
+ .collect();
+
+ let adj_count = conditions.len();
+
+ for (i, cond) in conditions.iter().enumerate() {
+ if i > 0 {
+ if i == adj_count - 1 {
+ push_word(buf, "and");
+ } else {
+ buf.push(',');
+ }
+ }
+ push_word(buf, cond);
+ }
+}
+
+fn push_shape(space: &Space, id: CellId, buf: &mut String) {
+ let shape = space.attrs(id).iter().find_map(|a| match a {
+ Attribute::Shape(s) => Some(s),
+ _ => None,
+ });
+
+ if let Some(shape) = shape {
+ let adj = match shape {
+ Shape::Rectangle => "rectangular",
+ Shape::Circle => "circular",
+ Shape::Square => "square",
+ };
+ push_word(buf, adj);
+ }
+}
+
+fn push_made_of(space: &Space, id: CellId, buf: &mut String) {
+ let material = space.attrs(id).iter().find_map(|a| match a {
+ Attribute::Material(s) => Some(s.as_str()),
+ _ => None,
+ });
+
+ if let Some(mat) = material {
+ push_word(buf, "made of");
+ push_word(buf, mat);
+ }
+}
+
+fn push_named(space: &Space, id: CellId, buf: &mut String) {
+ if let Some(name) = space.name(id) {
+ push_word(buf, "named");
+ push_word(buf, name);
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::parser::parse;
+
+ #[test]
+ fn test_describe_simple_room() {
+ let space =
+ parse("(room (shape rectangle) (name \"Test Room\") (material \"wood\"))").unwrap();
+ let desc = describe(&space);
+ assert!(desc.contains("There is a(n)"));
+ assert!(desc.contains("rectangular"));
+ assert!(desc.contains("room"));
+ assert!(desc.contains("made of wood"));
+ assert!(desc.contains("named Test Room"));
+ }
+}
diff --git a/crates/protoverse/src/lib.rs b/crates/protoverse/src/lib.rs
@@ -0,0 +1,272 @@
+//! Protoverse: S-expression parser for spatial world descriptions
+//!
+//! Parses protoverse `.space` format — an s-expression language for
+//! describing rooms, objects, and their attributes. Designed for
+//! progressive LOD: text descriptions, 2D maps, and 3D rendering
+//! can all be derived from the same source.
+//!
+//! # Example
+//!
+//! ```
+//! use protoverse::{parse, serialize, describe};
+//!
+//! let input = r#"(room (name "My Room") (shape rectangle) (width 10) (depth 8)
+//! (group
+//! (table (name "desk") (material "wood"))
+//! (chair (name "office chair"))))"#;
+//!
+//! let space = parse(input).unwrap();
+//! let description = describe(&space);
+//! let roundtrip = serialize(&space);
+//! ```
+
+pub mod ast;
+pub mod describe;
+pub mod parser;
+pub mod serializer;
+pub mod tokenizer;
+
+pub use ast::*;
+pub use describe::{describe, describe_from};
+pub use parser::parse;
+pub use serializer::{serialize, serialize_from};
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ const SATOSHIS_CITADEL: &str = r#"(space (shape rectangle)
+ (condition "clean")
+ (condition "shiny")
+ (material "solid gold")
+ (name "Satoshi's Den")
+ (width 10) (depth 10) (height 100)
+ (group
+ (table (id welcome-desk)
+ (name "welcome desk")
+ (material "marble")
+ (condition "clean")
+ (condition "new")
+ (width 1) (depth 2) (height 1)
+ (location center)
+ (light (name "desk")))
+
+ (chair (id welcome-desk-chair)
+ (name "fancy"))
+
+ (chair (name "throne") (material "invisible"))
+
+ (light (location ceiling)
+ (name "ceiling")
+ (state off)
+ (shape circle))))"#;
+
+ const EXAMPLE_ROOM: &str = r#"(room (shape rectangle)
+ (condition "clean")
+ (material "gold")
+ (name "Satoshi's Den")
+ (width 10) (depth 10) (height 100)
+ (group
+ (table (id welcome-desk)
+ (name "welcome desk")
+ (material "marble")
+ (condition "new")
+ (width 1) (depth 2) (height 1)
+ (light (name "desk")))
+
+ (chair (id welcome-desk-chair)
+ (name "fancy"))
+
+ (light (location ceiling)
+ (name "ceiling")
+ (state off)
+ (shape circle))))"#;
+
+ #[test]
+ fn test_parse_satoshis_citadel() {
+ let space = parse(SATOSHIS_CITADEL).unwrap();
+
+ // Root is a space cell
+ let root = space.cell(space.root);
+ assert_eq!(root.cell_type, CellType::Space);
+ assert_eq!(space.name(space.root), Some("Satoshi's Den"));
+
+ // Root has 8 attributes
+ let attrs = space.attrs(space.root);
+ assert_eq!(attrs.len(), 8);
+
+ // Root has one child (group)
+ let root_children = space.children(space.root);
+ assert_eq!(root_children.len(), 1);
+ let group_id = root_children[0];
+ let group = space.cell(group_id);
+ assert_eq!(group.cell_type, CellType::Group);
+
+ // Group has 4 children: table, chair, chair, light
+ let group_children = space.children(group_id);
+ assert_eq!(group_children.len(), 4);
+
+ assert_eq!(
+ space.cell(group_children[0]).cell_type,
+ CellType::Object(ObjectType::Table)
+ );
+ assert_eq!(
+ space.cell(group_children[1]).cell_type,
+ CellType::Object(ObjectType::Chair)
+ );
+ assert_eq!(
+ space.cell(group_children[2]).cell_type,
+ CellType::Object(ObjectType::Chair)
+ );
+ assert_eq!(
+ space.cell(group_children[3]).cell_type,
+ CellType::Object(ObjectType::Light)
+ );
+
+ // Table has a child light
+ let table_children = space.children(group_children[0]);
+ assert_eq!(table_children.len(), 1);
+ assert_eq!(
+ space.cell(table_children[0]).cell_type,
+ CellType::Object(ObjectType::Light)
+ );
+ assert_eq!(space.name(table_children[0]), Some("desk"));
+
+ // Check object names
+ assert_eq!(space.name(group_children[0]), Some("welcome desk"));
+ assert_eq!(space.name(group_children[1]), Some("fancy"));
+ assert_eq!(space.name(group_children[2]), Some("throne"));
+ assert_eq!(space.name(group_children[3]), Some("ceiling"));
+ }
+
+ #[test]
+ fn test_parse_example_room() {
+ let space = parse(EXAMPLE_ROOM).unwrap();
+ let root = space.cell(space.root);
+ assert_eq!(root.cell_type, CellType::Room);
+ assert_eq!(space.name(space.root), Some("Satoshi's Den"));
+ }
+
+ #[test]
+ fn test_round_trip() {
+ let space1 = parse(SATOSHIS_CITADEL).unwrap();
+ let serialized = serialize(&space1);
+
+ // Re-parse the serialized output
+ let space2 = parse(&serialized).unwrap();
+
+ // Same structure
+ assert_eq!(space1.cells.len(), space2.cells.len());
+ assert_eq!(space1.attributes.len(), space2.attributes.len());
+ assert_eq!(space1.child_ids.len(), space2.child_ids.len());
+
+ // Same root type
+ assert_eq!(
+ space1.cell(space1.root).cell_type,
+ space2.cell(space2.root).cell_type
+ );
+
+ // Same name
+ assert_eq!(space1.name(space1.root), space2.name(space2.root));
+
+ // Same group children count
+ let g1 = space1.children(space1.root)[0];
+ let g2 = space2.children(space2.root)[0];
+ assert_eq!(space1.children(g1).len(), space2.children(g2).len());
+ }
+
+ #[test]
+ fn test_describe_satoshis_citadel() {
+ let space = parse(SATOSHIS_CITADEL).unwrap();
+ let desc = describe(&space);
+
+ // Check the area description
+ assert!(desc.contains("There is a(n)"));
+ assert!(desc.contains("clean"));
+ assert!(desc.contains("shiny"));
+ assert!(desc.contains("rectangular"));
+ assert!(desc.contains("space"));
+ assert!(desc.contains("made of solid gold"));
+ assert!(desc.contains("named Satoshi's Den"));
+
+ // Check the group description
+ assert!(desc.contains("It contains"));
+ assert!(desc.contains("four"));
+ assert!(desc.contains("objects:"));
+ assert!(desc.contains("welcome desk table"));
+ assert!(desc.contains("fancy chair"));
+ assert!(desc.contains("throne chair"));
+ assert!(desc.contains("ceiling light"));
+
+ // Exact match against C reference output
+ let expected = "There is a(n) clean and shiny rectangular space made of solid gold named Satoshi's Den.\nIt contains four objects: a welcome desk table, fancy chair, throne chair and ceiling light.\n";
+ assert_eq!(desc, expected);
+ }
+
+ #[test]
+ fn test_parse_real_space_file() {
+ // Parse the actual .space file from the protoverse repo
+ let path = "/home/jb55/src/c/protoverse/satoshis-citadel.space";
+ if let Ok(content) = std::fs::read_to_string(path) {
+ let space = parse(&content).unwrap();
+ assert_eq!(space.cell(space.root).cell_type, CellType::Space);
+ assert_eq!(space.name(space.root), Some("Satoshi's Den"));
+
+ // Verify round-trip
+ let serialized = serialize(&space);
+ let space2 = parse(&serialized).unwrap();
+ assert_eq!(space.cells.len(), space2.cells.len());
+ }
+ }
+
+ #[test]
+ fn test_parent_references() {
+ let space = parse(SATOSHIS_CITADEL).unwrap();
+
+ // Root has no parent
+ assert_eq!(space.cell(space.root).parent, None);
+
+ // Group's parent is root
+ let group_id = space.children(space.root)[0];
+ assert_eq!(space.cell(group_id).parent, Some(space.root));
+
+ // Table's parent is group
+ let table_id = space.children(group_id)[0];
+ assert_eq!(space.cell(table_id).parent, Some(group_id));
+
+ // Desk light's parent is table
+ let light_id = space.children(table_id)[0];
+ assert_eq!(space.cell(light_id).parent, Some(table_id));
+ }
+
+ #[test]
+ fn test_attribute_details() {
+ let space = parse(SATOSHIS_CITADEL).unwrap();
+
+ // Check root shape
+ let shape = space
+ .find_attr(space.root, |a| matches!(a, Attribute::Shape(_)))
+ .unwrap();
+ assert_eq!(*shape, Attribute::Shape(Shape::Rectangle));
+
+ // Check root dimensions
+ let width = space
+ .find_attr(space.root, |a| matches!(a, Attribute::Width(_)))
+ .unwrap();
+ assert_eq!(*width, Attribute::Width(10.0));
+
+ // Check table material
+ let table_id = space.children(space.children(space.root)[0])[0];
+ let material = space
+ .find_attr(table_id, |a| matches!(a, Attribute::Material(_)))
+ .unwrap();
+ assert_eq!(*material, Attribute::Material("marble".to_string()));
+
+ // Check light state
+ let light_id = space.children(space.children(space.root)[0])[3];
+ let state = space
+ .find_attr(light_id, |a| matches!(a, Attribute::State(_)))
+ .unwrap();
+ assert_eq!(*state, Attribute::State(CellState::Off));
+ }
+}
diff --git a/crates/protoverse/src/parser.rs b/crates/protoverse/src/parser.rs
@@ -0,0 +1,445 @@
+use crate::ast::*;
+use crate::tokenizer::{tokenize, Token};
+use std::fmt;
+
+#[derive(Debug)]
+pub struct ParseError {
+ pub msg: String,
+}
+
+impl fmt::Display for ParseError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "parse error: {}", self.msg)
+ }
+}
+
+impl std::error::Error for ParseError {}
+
+/// Parse an s-expression string into a Space.
+pub fn parse(input: &str) -> Result<Space, ParseError> {
+ let tokens = tokenize(input).map_err(|e| ParseError {
+ msg: format!("tokenization failed: {}", e),
+ })?;
+
+ let mut parser = Parser {
+ tokens,
+ pos: 0,
+ cells: Vec::new(),
+ attributes: Vec::new(),
+ child_ids: Vec::new(),
+ };
+
+ let root = parser.parse_cell().ok_or_else(|| ParseError {
+ msg: "failed to parse root cell".into(),
+ })?;
+
+ Ok(Space {
+ cells: parser.cells,
+ attributes: parser.attributes,
+ child_ids: parser.child_ids,
+ root,
+ })
+}
+
+struct Parser<'a> {
+ tokens: Vec<Token<'a>>,
+ pos: usize,
+ cells: Vec<Cell>,
+ attributes: Vec<Attribute>,
+ child_ids: Vec<CellId>,
+}
+
+#[derive(Clone)]
+struct Checkpoint {
+ pos: usize,
+ cells_len: usize,
+ attrs_len: usize,
+ child_ids_len: usize,
+}
+
+impl<'a> Parser<'a> {
+ fn checkpoint(&self) -> Checkpoint {
+ Checkpoint {
+ pos: self.pos,
+ cells_len: self.cells.len(),
+ attrs_len: self.attributes.len(),
+ child_ids_len: self.child_ids.len(),
+ }
+ }
+
+ fn restore(&mut self, cp: Checkpoint) {
+ self.pos = cp.pos;
+ self.cells.truncate(cp.cells_len);
+ self.attributes.truncate(cp.attrs_len);
+ self.child_ids.truncate(cp.child_ids_len);
+ }
+
+ fn peek(&self) -> Option<&Token<'a>> {
+ self.tokens.get(self.pos)
+ }
+
+ fn eat_open(&mut self) -> bool {
+ if matches!(self.peek(), Some(Token::Open)) {
+ self.pos += 1;
+ true
+ } else {
+ false
+ }
+ }
+
+ fn eat_close(&mut self) -> bool {
+ if matches!(self.peek(), Some(Token::Close)) {
+ self.pos += 1;
+ true
+ } else {
+ false
+ }
+ }
+
+ fn eat_symbol_match(&mut self, expected: &str) -> bool {
+ if let Some(Token::Symbol(s)) = self.peek() {
+ if *s == expected {
+ self.pos += 1;
+ return true;
+ }
+ }
+ false
+ }
+
+ fn eat_symbol(&mut self) -> Option<&'a str> {
+ if let Some(Token::Symbol(s)) = self.peek() {
+ let s = *s;
+ self.pos += 1;
+ Some(s)
+ } else {
+ None
+ }
+ }
+
+ fn eat_string(&mut self) -> Option<&'a str> {
+ if let Some(Token::Str(s)) = self.peek() {
+ let s = *s;
+ self.pos += 1;
+ Some(s)
+ } else {
+ None
+ }
+ }
+
+ fn eat_number(&mut self) -> Option<f64> {
+ if let Some(Token::Number(s)) = self.peek() {
+ if let Ok(n) = s.parse::<f64>() {
+ self.pos += 1;
+ return Some(n);
+ }
+ }
+ None
+ }
+
+ fn push_cell(&mut self, cell: Cell) -> CellId {
+ let id = CellId(self.cells.len() as u32);
+ self.cells.push(cell);
+ id
+ }
+
+ // --- Attribute parsing ---
+
+ fn try_parse_attribute(&mut self) -> Option<Attribute> {
+ let cp = self.checkpoint();
+
+ if !self.eat_open() {
+ return None;
+ }
+
+ let sym = match self.eat_symbol() {
+ Some(s) => s,
+ None => {
+ self.restore(cp);
+ return None;
+ }
+ };
+
+ let result = match sym {
+ "shape" => self.eat_symbol().and_then(|s| {
+ let shape = match s {
+ "rectangle" => Shape::Rectangle,
+ "circle" => Shape::Circle,
+ "square" => Shape::Square,
+ _ => return None,
+ };
+ Some(Attribute::Shape(shape))
+ }),
+ "id" => self.eat_symbol().map(|s| Attribute::Id(s.to_string())),
+ "name" => self.eat_string().map(|s| Attribute::Name(s.to_string())),
+ "material" => self
+ .eat_string()
+ .map(|s| Attribute::Material(s.to_string())),
+ "condition" => self
+ .eat_string()
+ .map(|s| Attribute::Condition(s.to_string())),
+ "location" => self
+ .eat_symbol()
+ .map(|s| Attribute::Location(s.to_string())),
+ "state" => self.eat_symbol().and_then(|s| {
+ let state = match s {
+ "on" => CellState::On,
+ "off" => CellState::Off,
+ "sleeping" => CellState::Sleeping,
+ _ => return None,
+ };
+ Some(Attribute::State(state))
+ }),
+ "type" => self.eat_symbol().map(|s| Attribute::Type(s.to_string())),
+ "width" => self.eat_number().map(Attribute::Width),
+ "height" => self.eat_number().map(Attribute::Height),
+ "depth" => self.eat_number().map(Attribute::Depth),
+ _ => None,
+ };
+
+ match result {
+ Some(attr) => {
+ if self.eat_close() {
+ Some(attr)
+ } else {
+ self.restore(cp);
+ None
+ }
+ }
+ None => {
+ self.restore(cp);
+ None
+ }
+ }
+ }
+
+ /// Parse zero or more attributes, returning the count.
+ /// Attributes are pushed contiguously into self.attributes.
+ fn parse_attributes(&mut self) -> u16 {
+ let mut count = 0u16;
+ while let Some(attr) = self.try_parse_attribute() {
+ self.attributes.push(attr);
+ count += 1;
+ }
+ count
+ }
+
+ // --- Cell parsing ---
+
+ /// Parse attributes and an optional child cell (for room/space/object).
+ fn parse_cell_attrs(&mut self, cell_type: CellType) -> Option<CellId> {
+ let first_attr = self.attributes.len() as u32;
+ let attr_count = self.parse_attributes();
+
+ // Parse optional child cell — recursion may push to child_ids
+ let opt_child = self.parse_cell();
+
+ // Capture first_child AFTER recursion so nested children don't interleave
+ let first_child = self.child_ids.len() as u32;
+ let child_count;
+ if let Some(child_id) = opt_child {
+ self.child_ids.push(child_id);
+ child_count = 1u16;
+ } else {
+ child_count = 0;
+ }
+
+ let id = self.push_cell(Cell {
+ cell_type,
+ first_attr,
+ attr_count,
+ first_child,
+ child_count,
+ parent: None,
+ });
+
+ // Set parent on children
+ for i in 0..child_count {
+ let child_id = self.child_ids[(first_child + i as u32) as usize];
+ self.cells[child_id.0 as usize].parent = Some(id);
+ }
+
+ Some(id)
+ }
+
+ fn try_parse_named_cell(&mut self, name: &str, cell_type: CellType) -> Option<CellId> {
+ let cp = self.checkpoint();
+
+ if !self.eat_symbol_match(name) {
+ self.restore(cp);
+ return None;
+ }
+
+ match self.parse_cell_attrs(cell_type) {
+ Some(id) => Some(id),
+ None => {
+ self.restore(cp);
+ None
+ }
+ }
+ }
+
+ fn try_parse_room(&mut self) -> Option<CellId> {
+ self.try_parse_named_cell("room", CellType::Room)
+ }
+
+ fn try_parse_space(&mut self) -> Option<CellId> {
+ self.try_parse_named_cell("space", CellType::Space)
+ }
+
+ fn try_parse_group(&mut self) -> Option<CellId> {
+ let cp = self.checkpoint();
+
+ if !self.eat_symbol_match("group") {
+ self.restore(cp);
+ return None;
+ }
+
+ // Collect children — each parse_cell may recursively push to child_ids,
+ // so we collect CellIds first and append ours after recursion completes
+ let mut collected = Vec::new();
+ while let Some(child_id) = self.parse_cell() {
+ collected.push(child_id);
+ }
+
+ if collected.is_empty() {
+ self.restore(cp);
+ return None;
+ }
+
+ // Now append our children contiguously
+ let first_child = self.child_ids.len() as u32;
+ let child_count = collected.len() as u16;
+ self.child_ids.extend_from_slice(&collected);
+
+ let id = self.push_cell(Cell {
+ cell_type: CellType::Group,
+ first_attr: 0,
+ attr_count: 0,
+ first_child,
+ child_count,
+ parent: None,
+ });
+
+ // Set parent on children
+ for i in 0..child_count {
+ let child_id = self.child_ids[(first_child + i as u32) as usize];
+ self.cells[child_id.0 as usize].parent = Some(id);
+ }
+
+ Some(id)
+ }
+
+ fn try_parse_object(&mut self) -> Option<CellId> {
+ let cp = self.checkpoint();
+
+ let sym = self.eat_symbol()?;
+
+ let obj_type = match sym {
+ "table" => ObjectType::Table,
+ "chair" => ObjectType::Chair,
+ "door" => ObjectType::Door,
+ "light" => ObjectType::Light,
+ _ => {
+ self.restore(cp);
+ return None;
+ }
+ };
+
+ match self.parse_cell_attrs(CellType::Object(obj_type)) {
+ Some(id) => Some(id),
+ None => {
+ self.restore(cp);
+ None
+ }
+ }
+ }
+
+ fn parse_cell(&mut self) -> Option<CellId> {
+ let cp = self.checkpoint();
+
+ if !self.eat_open() {
+ return None;
+ }
+
+ // Try each cell type
+ let id = self
+ .try_parse_group()
+ .or_else(|| self.try_parse_room())
+ .or_else(|| self.try_parse_space())
+ .or_else(|| self.try_parse_object());
+
+ match id {
+ Some(id) => {
+ if self.eat_close() {
+ Some(id)
+ } else {
+ self.restore(cp);
+ None
+ }
+ }
+ None => {
+ self.restore(cp);
+ None
+ }
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_parse_simple_room() {
+ let space = parse("(room (name \"Test Room\") (width 10))").unwrap();
+ assert_eq!(space.cells.len(), 1);
+ let root = space.cell(space.root);
+ assert_eq!(root.cell_type, CellType::Room);
+ assert_eq!(root.attr_count, 2);
+ assert_eq!(space.name(space.root), Some("Test Room"));
+ }
+
+ #[test]
+ fn test_parse_object_with_child() {
+ let input = "(table (name \"desk\") (light (name \"lamp\")))";
+ let space = parse(input).unwrap();
+ // light is cell 0, table is cell 1
+ assert_eq!(space.cells.len(), 2);
+ let root = space.cell(space.root);
+ assert_eq!(root.cell_type, CellType::Object(ObjectType::Table));
+ assert_eq!(root.child_count, 1);
+
+ let children = space.children(space.root);
+ let child = space.cell(children[0]);
+ assert_eq!(child.cell_type, CellType::Object(ObjectType::Light));
+ assert_eq!(space.name(children[0]), Some("lamp"));
+ }
+
+ #[test]
+ fn test_parse_group() {
+ let input = "(room (group (table (name \"t1\")) (chair (name \"c1\"))))";
+ let space = parse(input).unwrap();
+ // table=0, chair=1, group=2, room=3
+ assert_eq!(space.cells.len(), 4);
+ let root = space.cell(space.root);
+ assert_eq!(root.cell_type, CellType::Room);
+
+ // room has one child (group)
+ let room_children = space.children(space.root);
+ assert_eq!(room_children.len(), 1);
+ let group = space.cell(room_children[0]);
+ assert_eq!(group.cell_type, CellType::Group);
+
+ // group has two children
+ let group_children = space.children(room_children[0]);
+ assert_eq!(group_children.len(), 2);
+ assert_eq!(
+ space.cell(group_children[0]).cell_type,
+ CellType::Object(ObjectType::Table)
+ );
+ assert_eq!(
+ space.cell(group_children[1]).cell_type,
+ CellType::Object(ObjectType::Chair)
+ );
+ }
+}
diff --git a/crates/protoverse/src/serializer.rs b/crates/protoverse/src/serializer.rs
@@ -0,0 +1,108 @@
+use crate::ast::*;
+use std::fmt::Write;
+
+/// Serialize a Space back to s-expression format.
+pub fn serialize(space: &Space) -> String {
+ serialize_from(space, space.root)
+}
+
+/// Serialize a subtree starting from a specific cell.
+pub fn serialize_from(space: &Space, root: CellId) -> String {
+ let mut out = String::new();
+ write_cell(space, root, 0, &mut out);
+ out
+}
+
+fn format_number(n: f64) -> String {
+ if n == n.floor() && n.abs() < i64::MAX as f64 {
+ format!("{}", n as i64)
+ } else {
+ format!("{}", n)
+ }
+}
+
+fn write_cell(space: &Space, id: CellId, indent: usize, out: &mut String) {
+ let cell = space.cell(id);
+ let pad = " ".repeat(indent);
+ let inner_pad = " ".repeat(indent + 1);
+
+ out.push('(');
+ out.push_str(&cell.cell_type.to_string());
+
+ // Attributes
+ let attrs = space.attrs(id);
+ for attr in attrs {
+ let _ = write!(out, "\n{}", inner_pad);
+ write_attr(attr, out);
+ }
+
+ // Children
+ let children = space.children(id);
+ for &child_id in children {
+ let _ = write!(out, "\n{}", inner_pad);
+ write_cell(space, child_id, indent + 1, out);
+ }
+
+ // Closing paren on same line if no attrs/children, else on new line
+ if !attrs.is_empty() || !children.is_empty() {
+ // For readability, close on the last line
+ out.push(')');
+ } else {
+ out.push(')');
+ }
+
+ let _ = pad; // used above via inner_pad derivation
+}
+
+fn write_attr(attr: &Attribute, out: &mut String) {
+ match attr {
+ Attribute::Shape(s) => {
+ let _ = write!(out, "(shape {})", s);
+ }
+ Attribute::Id(s) => {
+ let _ = write!(out, "(id {})", s);
+ }
+ Attribute::Name(s) => {
+ let _ = write!(out, "(name \"{}\")", s);
+ }
+ Attribute::Material(s) => {
+ let _ = write!(out, "(material \"{}\")", s);
+ }
+ Attribute::Condition(s) => {
+ let _ = write!(out, "(condition \"{}\")", s);
+ }
+ Attribute::Location(s) => {
+ let _ = write!(out, "(location {})", s);
+ }
+ Attribute::State(s) => {
+ let _ = write!(out, "(state {})", s);
+ }
+ Attribute::Type(s) => {
+ let _ = write!(out, "(type {})", s);
+ }
+ Attribute::Width(n) => {
+ let _ = write!(out, "(width {})", format_number(*n));
+ }
+ Attribute::Height(n) => {
+ let _ = write!(out, "(height {})", format_number(*n));
+ }
+ Attribute::Depth(n) => {
+ let _ = write!(out, "(depth {})", format_number(*n));
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::parser::parse;
+
+ #[test]
+ fn test_serialize_simple() {
+ let space = parse("(room (name \"Test\") (width 10))").unwrap();
+ let output = serialize(&space);
+ assert!(output.contains("(room"));
+ assert!(output.contains("(name \"Test\")"));
+ assert!(output.contains("(width 10)"));
+ }
+}
diff --git a/crates/protoverse/src/tokenizer.rs b/crates/protoverse/src/tokenizer.rs
@@ -0,0 +1,230 @@
+use std::fmt;
+
+/// A token from the s-expression tokenizer.
+/// String references are zero-copy slices into the input.
+#[derive(Debug, Clone, PartialEq)]
+pub enum Token<'a> {
+ Open,
+ Close,
+ Symbol(&'a str),
+ Str(&'a str),
+ Number(&'a str),
+}
+
+#[derive(Debug)]
+pub struct TokenError {
+ pub msg: String,
+ pub pos: usize,
+}
+
+impl fmt::Display for TokenError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "token error at position {}: {}", self.pos, self.msg)
+ }
+}
+
+impl std::error::Error for TokenError {}
+
+fn is_symbol_start(c: u8) -> bool {
+ c.is_ascii_lowercase()
+}
+
+fn is_symbol_char(c: u8) -> bool {
+ c.is_ascii_lowercase() || c.is_ascii_digit() || c == b'-' || c == b'_'
+}
+
+fn scan_symbol(input: &[u8], start: usize) -> Result<usize, TokenError> {
+ if start >= input.len() || !is_symbol_start(input[start]) {
+ return Err(TokenError {
+ msg: "symbol must start with a-z".into(),
+ pos: start,
+ });
+ }
+ let mut end = start + 1;
+ while end < input.len() {
+ let c = input[end];
+ if c.is_ascii_whitespace() || c == b')' || c == b'(' {
+ break;
+ }
+ if !is_symbol_char(c) {
+ return Err(TokenError {
+ msg: format!("invalid symbol character '{}'", c as char),
+ pos: end,
+ });
+ }
+ end += 1;
+ }
+ Ok(end)
+}
+
+fn scan_number(input: &[u8], start: usize) -> Result<usize, TokenError> {
+ if start >= input.len() {
+ return Err(TokenError {
+ msg: "unexpected end of input in number".into(),
+ pos: start,
+ });
+ }
+ let first = input[start];
+ if !first.is_ascii_digit() && first != b'-' {
+ return Err(TokenError {
+ msg: "number must start with 0-9 or -".into(),
+ pos: start,
+ });
+ }
+ let mut end = start + 1;
+ while end < input.len() {
+ let c = input[end];
+ if c.is_ascii_whitespace() || c == b')' || c == b'(' {
+ break;
+ }
+ if !c.is_ascii_digit() && c != b'.' {
+ return Err(TokenError {
+ msg: format!("invalid number character '{}'", c as char),
+ pos: end,
+ });
+ }
+ end += 1;
+ }
+ Ok(end)
+}
+
+fn scan_string(input: &[u8], start: usize) -> Result<(usize, usize), TokenError> {
+ // start should point at the opening quote
+ if start >= input.len() || input[start] != b'"' {
+ return Err(TokenError {
+ msg: "string must start with '\"'".into(),
+ pos: start,
+ });
+ }
+ let content_start = start + 1;
+ let mut i = content_start;
+ while i < input.len() {
+ if input[i] == b'\\' {
+ i += 2; // skip escaped char
+ continue;
+ }
+ if input[i] == b'"' {
+ return Ok((content_start, i)); // i points at closing quote
+ }
+ i += 1;
+ }
+ Err(TokenError {
+ msg: "unterminated string".into(),
+ pos: start,
+ })
+}
+
+/// Tokenize an s-expression input string into a sequence of tokens.
+/// Token string/symbol/number values are zero-copy references into the input.
+pub fn tokenize(input: &str) -> Result<Vec<Token<'_>>, TokenError> {
+ let mut tokens = Vec::new();
+ let bytes = input.as_bytes();
+ let mut i = 0;
+
+ while i < bytes.len() {
+ let c = bytes[i];
+
+ if c.is_ascii_whitespace() {
+ i += 1;
+ continue;
+ }
+
+ match c {
+ b'(' => {
+ tokens.push(Token::Open);
+ i += 1;
+ }
+ b')' => {
+ tokens.push(Token::Close);
+ i += 1;
+ }
+ b'"' => {
+ let (content_start, content_end) = scan_string(bytes, i)?;
+ tokens.push(Token::Str(&input[content_start..content_end]));
+ i = content_end + 1; // skip closing quote
+ }
+ b'a'..=b'z' => {
+ let end = scan_symbol(bytes, i)?;
+ tokens.push(Token::Symbol(&input[i..end]));
+ i = end;
+ }
+ b'0'..=b'9' | b'-' => {
+ let end = scan_number(bytes, i)?;
+ tokens.push(Token::Number(&input[i..end]));
+ i = end;
+ }
+ _ => {
+ return Err(TokenError {
+ msg: format!("unexpected character '{}'", c as char),
+ pos: i,
+ });
+ }
+ }
+ }
+
+ Ok(tokens)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_tokenize_simple() {
+ let tokens = tokenize("(room (name \"hello\"))").unwrap();
+ assert_eq!(
+ tokens,
+ vec![
+ Token::Open,
+ Token::Symbol("room"),
+ Token::Open,
+ Token::Symbol("name"),
+ Token::Str("hello"),
+ Token::Close,
+ Token::Close,
+ ]
+ );
+ }
+
+ #[test]
+ fn test_tokenize_number() {
+ let tokens = tokenize("(width 10)").unwrap();
+ assert_eq!(
+ tokens,
+ vec![
+ Token::Open,
+ Token::Symbol("width"),
+ Token::Number("10"),
+ Token::Close,
+ ]
+ );
+ }
+
+ #[test]
+ fn test_tokenize_symbol_with_dash() {
+ let tokens = tokenize("(id welcome-desk)").unwrap();
+ assert_eq!(
+ tokens,
+ vec![
+ Token::Open,
+ Token::Symbol("id"),
+ Token::Symbol("welcome-desk"),
+ Token::Close,
+ ]
+ );
+ }
+
+ #[test]
+ fn test_tokenize_negative_number() {
+ let tokens = tokenize("(height -5)").unwrap();
+ assert_eq!(
+ tokens,
+ vec![
+ Token::Open,
+ Token::Symbol("height"),
+ Token::Number("-5"),
+ Token::Close,
+ ]
+ );
+ }
+}