Compare commits

...
Sign in to create a new pull request.

10 commits

37 changed files with 1484 additions and 0 deletions

78
Cargo.lock generated
View file

@ -935,6 +935,15 @@ version = "0.7.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32"
[[package]]
name = "clipboard-win"
version = "5.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bde03770d3df201d4fb868f2c9c59e66a3e4e2bd06692a0fe701e7103c7e84d4"
dependencies = [
"error-code",
]
[[package]]
name = "cmake"
version = "0.1.57"
@ -1209,6 +1218,7 @@ dependencies = [
"hickory-resolver",
"http",
"image",
"indexmap",
"ipaddress",
"itertools 0.14.0",
"ldap3",
@ -1819,6 +1829,12 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "error-code"
version = "3.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59"
[[package]]
name = "event-listener"
version = "5.3.1"
@ -3658,6 +3674,33 @@ dependencies = [
"syn",
]
[[package]]
name = "peg"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9928cfca101b36ec5163e70049ee5368a8a1c3c6efc9ca9c5f9cc2f816152477"
dependencies = [
"peg-macros",
"peg-runtime",
]
[[package]]
name = "peg-macros"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6298ab04c202fa5b5d52ba03269fb7b74550b150323038878fe6c372d8280f71"
dependencies = [
"peg-runtime",
"proc-macro2",
"quote",
]
[[package]]
name = "peg-runtime"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "132dca9b868d927b35b5dd728167b2dee150eb1ad686008fc71ccb298b776fca"
[[package]]
name = "percent-encoding"
version = "2.3.2"
@ -4585,6 +4628,25 @@ version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "rustyline"
version = "17.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e902948a25149d50edc1a8e0141aad50f54e22ba83ff988cf8f7c9ef07f50564"
dependencies = [
"bitflags",
"cfg-if",
"clipboard-win",
"libc",
"log",
"memchr",
"nix",
"unicode-segmentation",
"unicode-width 0.2.2",
"utf8parse",
"windows-sys 0.60.2",
]
[[package]]
name = "rustyline-async"
version = "0.4.6"
@ -5107,6 +5169,16 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
[[package]]
name = "stitcher"
version = "0.5.3"
dependencies = [
"indexmap",
"itertools 0.14.0",
"peg",
"rustyline",
]
[[package]]
name = "strict"
version = "0.2.0"
@ -5868,6 +5940,12 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "uuid"
version = "1.19.0"

View file

@ -548,6 +548,10 @@ features = ["sync", "tls-rustls", "rustls-provider"]
[workspace.dependencies.resolv-conf]
version = "0.7.5"
# Used by stitched ordering
[workspace.dependencies.indexmap]
version = "2.13.0"
#
# Patches
#

View file

@ -118,6 +118,7 @@ webpage.optional = true
blurhash.workspace = true
blurhash.optional = true
recaptcha-verify = { version = "0.1.5", default-features = false }
indexmap.workspace = true
[target.'cfg(all(unix, target_os = "linux"))'.dependencies]
sd-notify.workspace = true

19
src/stitcher/Cargo.toml Normal file
View file

@ -0,0 +1,19 @@
[package]
name = "stitcher"
description = "An implementation of stitched ordering (https://codeberg.org/andybalaam/stitched-order)"
edition.workspace = true
license.workspace = true
readme.workspace = true
repository.workspace = true
version.workspace = true
[lib]
path = "mod.rs"
[dependencies]
indexmap.workspace = true
itertools.workspace = true
[dev-dependencies]
peg = "0.8.5"
rustyline = { version = "17.0.2", default-features = false }

141
src/stitcher/algorithm.rs Normal file
View file

@ -0,0 +1,141 @@
use std::collections::HashSet;
use indexmap::IndexSet;
use itertools::Itertools;
use super::{Batch, Gap, OrderKey, StitchedItem, StitcherBackend};
/// Updates to a gap in the stitched order.
#[derive(Debug)]
pub struct GapUpdate<'id, K: OrderKey> {
/// The opaque key of the gap to update.
pub key: K,
/// The new contents of the gap. If this is empty, the gap should be
/// deleted.
pub gap: Gap,
/// New items to insert after the gap. These items _should not_ be
/// synchronized to clients.
pub inserted_items: Vec<StitchedItem<'id>>,
}
/// Updates to the stitched order.
#[derive(Debug)]
pub struct OrderUpdates<'id, K: OrderKey> {
/// Updates to individual gaps. The items inserted by these updates _should
/// not_ be synchronized to clients.
pub gap_updates: Vec<GapUpdate<'id, K>>,
/// New items to append to the end of the order. These items _should_ be
/// synchronized to clients.
pub new_items: Vec<StitchedItem<'id>>,
// The subset of events in the batch which got slotted into an existing gap. This is tracked
// for unit testing and may eventually be sent to clients.
pub events_added_to_gaps: HashSet<&'id str>,
}
/// The stitcher, which implements the stitched ordering algorithm.
/// Its primary method is [`Stitcher::stitch`].
pub struct Stitcher<'backend, B: StitcherBackend> {
backend: &'backend B,
}
impl<B: StitcherBackend> Stitcher<'_, B> {
/// Create a new [`Stitcher`] given a [`StitcherBackend`].
pub fn new(backend: &B) -> Stitcher<'_, B> { Stitcher { backend } }
/// Given a [`Batch`], compute the [`OrderUpdates`] which should be made to
/// the stitched order to incorporate that batch. It is the responsibility
/// of the caller to apply the updates.
pub fn stitch<'id>(&self, batch: &Batch<'id>) -> OrderUpdates<'id, B::Key> {
let mut gap_updates = Vec::new();
let mut events_added_to_gaps: HashSet<&'id str> = HashSet::new();
// Events in the batch which haven't been fitted into a gap or appended to the
// end yet.
let mut remaining_events: IndexSet<_> = batch.events().collect();
// 1: Find existing gaps which include IDs of events in `batch`
let matching_gaps = self.backend.find_matching_gaps(batch.events());
// Repeat steps 2-9 for each matching gap
for (key, mut gap) in matching_gaps {
// 2. Find events in `batch` which are mentioned in `gap`
let matching_events = remaining_events.iter().filter(|id| gap.contains(**id));
// Extend `events_added_to_gaps` with the matching events, which are destined to
// be slotted into gaps.
events_added_to_gaps.extend(matching_events.clone());
// 3. Create the to-insert list from the predecessor sets of each matching event
let events_to_insert: Vec<_> = matching_events
.filter_map(|event| batch.predecessors(event))
.flat_map(|predecessors| predecessors.predecessor_set.iter())
.filter(|event| remaining_events.contains(*event))
.copied()
.collect();
// 4. Remove the events in the to-insert list from `remaining_events` so they
// aren't processed again
remaining_events.retain(|event| !events_to_insert.contains(event));
// 5 and 6
let inserted_items = self.sort_events_and_create_gaps(batch, events_to_insert);
// 8. Update gap
gap.retain(|id| !batch.contains(id));
// 7 and 9. Append to-insert list and delete gap if empty
// The actual work of mutating the order is handled by the callee,
// we just record an update to make.
gap_updates.push(GapUpdate { key: key.clone(), gap, inserted_items });
}
// 10. Append remaining events and gaps
let new_items = self.sort_events_and_create_gaps(batch, remaining_events);
OrderUpdates {
gap_updates,
new_items,
events_added_to_gaps,
}
}
fn sort_events_and_create_gaps<'id>(
&self,
batch: &Batch<'id>,
events_to_insert: impl IntoIterator<Item = &'id str>,
) -> Vec<StitchedItem<'id>> {
// 5. Sort the to-insert list with DAG;received order
let events_to_insert = events_to_insert
.into_iter()
.sorted_by(batch.compare_by_dag_received())
.collect_vec();
// allocate 1.5x the size of the to-insert list
let items_capacity = events_to_insert
.capacity()
.saturating_add(events_to_insert.capacity().div_euclid(2));
let mut items = Vec::with_capacity(items_capacity);
for event in events_to_insert {
let missing_prev_events: HashSet<String> = batch
.predecessors(event)
.expect("events in to_insert should be in batch")
.prev_events
.iter()
.filter(|prev_event| {
!(batch.contains(prev_event) || self.backend.event_exists(prev_event))
})
.map(|id| String::from(*id))
.collect();
if !missing_prev_events.is_empty() {
items.push(StitchedItem::Gap(missing_prev_events));
}
items.push(StitchedItem::Event(event));
}
items
}
}

View file

@ -0,0 +1,88 @@
use std::collections::HashSet;
use rustyline::{DefaultEditor, Result, error::ReadlineError};
use stitcher::{Batch, EventEdges, Stitcher, memory_backend::MemoryStitcherBackend};
const BANNER: &str = "
stitched ordering test repl
- append an event by typing its name: `A`
- to add prev events, type an arrow and then space-separated event names: `A --> B C D`
- to add multiple events at once, separate them with commas
- use `/reset` to clear the ordering
Ctrl-D to exit, Ctrl-C to clear input
"
.trim_ascii();
enum Command<'line> {
AppendEvents(EventEdges<'line>),
ResetOrder,
}
peg::parser! {
// partially copied from the test case parser
grammar command_parser() for str {
/// Parse whitespace.
rule _ -> () = quiet! { $([' '])* {} }
/// Parse an event ID.
rule event_id() -> &'input str
= quiet! { id:$([char if char.is_ascii_alphanumeric() || ['_', '-'].contains(&char)]+) { id } }
/ expected!("an event ID containing only [a-zA-Z0-9_-]")
/// Parse an event and its prev events.
rule event() -> (&'input str, HashSet<&'input str>)
= id:event_id() prev_events:(_ "-->" _ id:(event_id() ++ _) { id })? {
(id, prev_events.into_iter().flatten().collect())
}
pub rule command() -> Command<'input> =
"/reset" { Command::ResetOrder }
/ events:event() ++ (_ "," _) { Command::AppendEvents(events.into_iter().collect()) }
}
}
fn main() -> Result<()> {
let mut backend = MemoryStitcherBackend::default();
let mut reader = DefaultEditor::new()?;
println!("{BANNER}");
loop {
match reader.readline("> ") {
| Ok(line) => match command_parser::command(&line) {
| Ok(Command::AppendEvents(events)) => {
let batch = Batch::from_edges(&events);
let stitcher = Stitcher::new(&backend);
let updates = stitcher.stitch(&batch);
for update in &updates.gap_updates {
println!("update to gap {}:", update.key);
println!(" new gap contents: {:?}", update.gap);
println!(" inserted items: {:?}", update.inserted_items);
}
println!("events added to gaps: {:?}", &updates.events_added_to_gaps);
println!();
println!("items to sync: {:?}", &updates.new_items);
backend.extend(updates);
println!("order: {backend:?}");
},
| Ok(Command::ResetOrder) => {
backend.clear();
println!("order cleared.");
},
| Err(parse_error) => {
println!("parse error!! {parse_error}");
},
},
| Err(ReadlineError::Interrupted) => {
println!("interrupt");
},
| Err(ReadlineError::Eof) => {
println!("goodbye :3");
break Ok(());
},
| Err(err) => break Err(err),
}
}
}

View file

@ -0,0 +1,130 @@
use std::{
fmt::Debug,
sync::atomic::{AtomicU64, Ordering},
};
use crate::{Gap, OrderUpdates, StitchedItem, StitcherBackend};
/// A version of [`StitchedItem`] which owns event IDs.
#[derive(Debug)]
enum MemoryStitcherItem {
Event(String),
Gap(Gap),
}
impl From<StitchedItem<'_>> for MemoryStitcherItem {
fn from(value: StitchedItem) -> Self {
match value {
| StitchedItem::Event(id) => MemoryStitcherItem::Event(id.to_string()),
| StitchedItem::Gap(gap) => MemoryStitcherItem::Gap(gap),
}
}
}
impl<'id> From<&'id MemoryStitcherItem> for StitchedItem<'id> {
fn from(value: &'id MemoryStitcherItem) -> Self {
match value {
| MemoryStitcherItem::Event(id) => StitchedItem::Event(id),
| MemoryStitcherItem::Gap(gap) => StitchedItem::Gap(gap.clone()),
}
}
}
/// A stitcher backend which holds a stitched ordering in RAM.
#[derive(Default)]
pub struct MemoryStitcherBackend {
items: Vec<(u64, MemoryStitcherItem)>,
counter: AtomicU64,
}
impl MemoryStitcherBackend {
fn next_id(&self) -> u64 { self.counter.fetch_add(1, Ordering::Relaxed) }
/// Extend this ordering with new updates.
pub fn extend(&mut self, results: OrderUpdates<'_, <Self as StitcherBackend>::Key>) {
for update in results.gap_updates {
let Some(gap_index) = self.items.iter().position(|(key, _)| *key == update.key)
else {
panic!("bad update key {}", update.key);
};
let insertion_index = if update.gap.is_empty() {
self.items.remove(gap_index);
gap_index
} else {
match self.items.get_mut(gap_index) {
| Some((_, MemoryStitcherItem::Gap(gap))) => {
*gap = update.gap;
},
| Some((key, other)) => {
panic!("expected item with key {key} to be a gap, it was {other:?}");
},
| None => unreachable!("we just checked that this index is valid"),
}
gap_index.checked_add(1).expect(
"should never allocate usize::MAX ids. what kind of test are you running",
)
};
let to_insert: Vec<_> = update
.inserted_items
.into_iter()
.map(|item| (self.next_id(), item.into()))
.collect();
self.items
.splice(insertion_index..insertion_index, to_insert.into_iter())
.for_each(drop);
}
let new_items: Vec<_> = results
.new_items
.into_iter()
.map(|item| (self.next_id(), item.into()))
.collect();
self.items.extend(new_items);
}
/// Iterate over the items in this ordering.
pub fn iter(&self) -> impl Iterator<Item = StitchedItem<'_>> {
self.items.iter().map(|(_, item)| item.into())
}
/// Clear this ordering.
pub fn clear(&mut self) { self.items.clear(); }
}
impl StitcherBackend for MemoryStitcherBackend {
type Key = u64;
fn find_matching_gaps<'a>(
&'a self,
events: impl Iterator<Item = &'a str>,
) -> impl Iterator<Item = (Self::Key, Gap)> {
// nobody cares about test suite performance right
let mut gaps = vec![];
for event in events {
for (key, item) in &self.items {
if let MemoryStitcherItem::Gap(gap) = item
&& gap.contains(event)
{
gaps.push((*key, gap.clone()));
}
}
}
gaps.into_iter()
}
fn event_exists<'a>(&'a self, event: &'a str) -> bool {
self.items
.iter()
.any(|item| matches!(&item.1, MemoryStitcherItem::Event(id) if event == id))
}
}
impl Debug for MemoryStitcherBackend {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_list().entries(self.iter()).finish()
}
}

160
src/stitcher/mod.rs Normal file
View file

@ -0,0 +1,160 @@
use std::{cmp::Ordering, collections::HashSet};
use indexmap::IndexMap;
pub mod algorithm;
pub mod memory_backend;
#[cfg(test)]
mod test;
pub use algorithm::*;
/// A gap in the stitched order.
pub type Gap = HashSet<String>;
/// An item in the stitched order.
#[derive(Debug)]
pub enum StitchedItem<'id> {
/// A single event.
Event(&'id str),
/// A gap representing one or more missing events.
Gap(Gap),
}
/// An opaque key returned by a [`StitcherBackend`] to identify an item in its
/// order.
pub trait OrderKey: Eq + Clone {}
impl<T: Eq + Clone> OrderKey for T {}
/// A trait providing read-only access to an existing stitched order.
pub trait StitcherBackend {
type Key: OrderKey;
/// Return all gaps containing one or more events listed in `events`.
fn find_matching_gaps<'a>(
&'a self,
events: impl Iterator<Item = &'a str>,
) -> impl Iterator<Item = (Self::Key, Gap)>;
/// Return whether an event exists in the stitched order.
fn event_exists<'a>(&'a self, event: &'a str) -> bool;
}
/// An ordered map from an event ID to its `prev_events`.
pub type EventEdges<'id> = IndexMap<&'id str, HashSet<&'id str>>;
/// Information about the `prev_events` of an event.
/// This struct does not store the ID of the event itself.
#[derive(Debug)]
struct EventPredecessors<'id> {
/// The `prev_events` of the event.
pub prev_events: HashSet<&'id str>,
/// The predecessor set of the event. This is derived from, and a superset
/// of, [`EventPredecessors::prev_events`]. See
/// [`Batch::find_predecessor_set`] for details. It is cached in this
/// struct for performance.
pub predecessor_set: HashSet<&'id str>,
}
/// A batch of events to be inserted into the stitched order.
#[derive(Debug)]
pub struct Batch<'id> {
events: IndexMap<&'id str, EventPredecessors<'id>>,
}
impl<'id> Batch<'id> {
/// Create a new [`Batch`] from an [`EventEdges`].
pub fn from_edges<'edges>(edges: &EventEdges<'edges>) -> Batch<'edges> {
let mut events = IndexMap::new();
for (event, prev_events) in edges {
let predecessor_set = Self::find_predecessor_set(event, edges);
events.insert(*event, EventPredecessors {
prev_events: prev_events.clone(),
predecessor_set,
});
}
Batch { events }
}
/// Build the predecessor set of `event` using `edges`. The predecessor set
/// is a subgraph of the room's DAG which may be thought of as a tree
/// rooted at `event` containing _only_ events which are included in
/// `edges`. It is represented as a set and not a proper tree structure for
/// efficiency.
fn find_predecessor_set<'a>(event: &'a str, edges: &EventEdges<'a>) -> HashSet<&'a str> {
// The predecessor set which we are building.
let mut predecessor_set = HashSet::new();
// The queue of events to check for membership in `remaining_events`.
let mut events_to_check = vec![event];
// Events which we have already checked and do not need to revisit.
let mut events_already_checked = HashSet::new();
while let Some(event) = events_to_check.pop() {
// Don't add this event to the queue again.
events_already_checked.insert(event);
// If this event is in `edges`, add it to the predecessor set.
if let Some(children) = edges.get(event) {
predecessor_set.insert(event);
// Also add all its `prev_events` to the queue. It's fine if some of them don't
// exist in `edges` because they'll just be discarded when they're popped
// off the queue.
events_to_check.extend(
children
.iter()
.filter(|event| !events_already_checked.contains(*event)),
);
}
}
predecessor_set
}
/// Iterate over all the events contained in this batch.
fn events(&self) -> impl Iterator<Item = &'id str> { self.events.keys().copied() }
/// Check whether an event exists in this batch.
fn contains(&self, event: &'id str) -> bool { self.events.contains_key(event) }
/// Return the predecessors of an event, if it exists in this batch.
fn predecessors(&self, event: &str) -> Option<&EventPredecessors<'id>> {
self.events.get(event)
}
/// Compare two events by DAG;received order.
///
/// If either event is in the other's predecessor set it comes first,
/// otherwise they are sorted by which comes first in the batch.
fn compare_by_dag_received(&self) -> impl FnMut(&&'id str, &&'id str) -> Ordering {
|a, b| {
if self
.predecessors(a)
.is_some_and(|it| it.predecessor_set.contains(b))
{
Ordering::Greater
} else if self
.predecessors(b)
.is_some_and(|it| it.predecessor_set.contains(a))
{
Ordering::Less
} else {
let a_index = self
.events
.get_index_of(a)
.expect("a should be in this batch");
let b_index = self
.events
.get_index_of(b)
.expect("b should be in this batch");
a_index.cmp(&b_index)
}
}
}
}

102
src/stitcher/test/mod.rs Normal file
View file

@ -0,0 +1,102 @@
use itertools::Itertools;
use super::{algorithm::*, *};
use crate::memory_backend::MemoryStitcherBackend;
mod parser;
fn run_testcase(testcase: parser::TestCase<'_>) {
let mut backend = MemoryStitcherBackend::default();
for (index, phase) in testcase.into_iter().enumerate() {
let stitcher = Stitcher::new(&backend);
let batch = Batch::from_edges(&phase.batch);
let updates = stitcher.stitch(&batch);
println!();
println!("===== phase {index}");
for update in &updates.gap_updates {
println!("update to gap {}:", update.key);
println!(" new gap contents: {:?}", update.gap);
println!(" inserted items: {:?}", update.inserted_items);
}
println!("expected new items: {:?}", &phase.order.new_items);
println!(" actual new items: {:?}", &updates.new_items);
for (expected, actual) in phase
.order
.new_items
.iter()
.zip_eq(updates.new_items.iter())
{
assert_eq!(
expected, actual,
"bad new item, expected {expected:?} but got {actual:?}"
);
}
if let Some(updated_gaps) = phase.updated_gaps {
println!("expected events added to gaps: {updated_gaps:?}");
println!(" actual events added to gaps: {:?}", updates.events_added_to_gaps);
assert_eq!(
updated_gaps, updates.events_added_to_gaps,
"incorrect events added to gaps"
);
}
backend.extend(updates);
println!("extended ordering: {:?}", backend);
for (expected, ref actual) in phase.order.iter().zip_eq(backend.iter()) {
assert_eq!(
expected, actual,
"bad item in order, expected {expected:?} but got {actual:?}",
);
}
}
}
macro_rules! testcase {
($index:literal : $id:ident) => {
#[test]
fn $id() {
let testcase = parser::parse(include_str!(concat!(
"./testcases/",
$index,
"-",
stringify!($id),
".stitched"
)));
run_testcase(testcase);
}
};
}
testcase!("001": receiving_new_events);
testcase!("002": recovering_after_netsplit);
testcase!("zzz": being_before_a_gap_item_beats_being_after_an_existing_item_multiple);
testcase!("zzz": being_before_a_gap_item_beats_being_after_an_existing_item);
testcase!("zzz": chains_are_reordered_using_prev_events);
testcase!("zzz": empty_then_simple_chain);
testcase!("zzz": empty_then_two_chains_interleaved);
testcase!("zzz": empty_then_two_chains);
testcase!("zzz": filling_in_a_gap_with_a_batch_containing_gaps);
testcase!("zzz": gaps_appear_before_events_referring_to_them_received_order);
testcase!("zzz": gaps_appear_before_events_referring_to_them);
testcase!("zzz": if_prev_events_determine_order_they_override_received);
testcase!("zzz": insert_into_first_of_several_gaps);
testcase!("zzz": insert_into_last_of_several_gaps);
testcase!("zzz": insert_into_middle_of_several_gaps);
testcase!("zzz": linked_events_are_split_across_gaps);
testcase!("zzz": linked_events_in_a_diamond_are_split_across_gaps);
testcase!("zzz": middle_of_batch_matches_gap_and_end_of_batch_matches_end);
testcase!("zzz": middle_of_batch_matches_gap);
testcase!("zzz": multiple_events_referring_to_the_same_missing_event_first_has_more);
testcase!("zzz": multiple_events_referring_to_the_same_missing_event);
testcase!("zzz": multiple_events_referring_to_the_same_missing_event_with_more);
testcase!("zzz": multiple_missing_prev_events_turn_into_a_single_gap);
testcase!("zzz": partially_filling_a_gap_leaves_it_before_new_nodes);
testcase!("zzz": partially_filling_a_gap_with_two_events);
testcase!("zzz": received_order_wins_within_a_subgroup_if_no_prev_event_chain);
testcase!("zzz": subgroups_are_processed_in_first_received_order);

140
src/stitcher/test/parser.rs Normal file
View file

@ -0,0 +1,140 @@
use std::collections::HashSet;
use indexmap::IndexMap;
use super::StitchedItem;
pub(super) type TestEventId<'id> = &'id str;
pub(super) type TestGap<'id> = HashSet<TestEventId<'id>>;
#[derive(Debug)]
pub(super) enum TestStitchedItem<'id> {
Event(TestEventId<'id>),
Gap(TestGap<'id>),
}
impl PartialEq<StitchedItem<'_>> for TestStitchedItem<'_> {
fn eq(&self, other: &StitchedItem<'_>) -> bool {
match (self, other) {
| (TestStitchedItem::Event(lhs), StitchedItem::Event(rhs)) => lhs == rhs,
| (TestStitchedItem::Gap(lhs), StitchedItem::Gap(rhs)) =>
lhs.iter().all(|id| rhs.contains(*id)),
| _ => false,
}
}
}
pub(super) type TestCase<'id> = Vec<Phase<'id>>;
pub(super) struct Phase<'id> {
pub batch: Batch<'id>,
pub order: Order<'id>,
pub updated_gaps: Option<HashSet<TestEventId<'id>>>,
}
pub(super) type Batch<'id> = IndexMap<TestEventId<'id>, HashSet<TestEventId<'id>>>;
pub(super) struct Order<'id> {
pub inserted_items: Vec<TestStitchedItem<'id>>,
pub new_items: Vec<TestStitchedItem<'id>>,
}
impl<'id> Order<'id> {
pub(super) fn iter(&self) -> impl Iterator<Item = &TestStitchedItem<'id>> {
self.inserted_items.iter().chain(self.new_items.iter())
}
}
peg::parser! {
grammar testcase() for str {
/// Parse whitespace.
rule _ -> () = quiet! { $([' '])* {} }
/// Parse empty lines and comments.
rule newline() -> () = quiet! { (("#" [^'\n']*)? "\n")+ {} }
/// Parse an "event ID" in a test case, which may only consist of ASCII letters and numbers.
rule event_id() -> TestEventId<'input>
= quiet! { id:$([char if char.is_ascii_alphanumeric()]+) { id } }
/ expected!("event id")
/// Parse a gap in the order section.
rule gap() -> TestGap<'input>
= "-" events:event_id() ++ "," { events.into_iter().collect() }
/// Parse either an event id or a gap.
rule stitched_item() -> TestStitchedItem<'input> =
id:event_id() { TestStitchedItem::Event(id) }
/ gap:gap() { TestStitchedItem::Gap(gap) }
/// Parse an event line in the batch section, mapping an event name to zero or one prev events.
/// The prev events are merged together by [`batch()`].
rule batch_event() -> (TestEventId<'input>, Option<TestEventId<'input>>)
= id:event_id() prev:(_ "-->" _ prev:event_id() { prev })? { (id, prev) }
/// Parse the batch section of a phase.
rule batch() -> Batch<'input>
= events:batch_event() ++ newline() {
/*
Repeated event lines need to be merged together. For example,
A --> B
A --> C
represents a _single_ event `A` with two prev events, `B` and `C`.
*/
events.into_iter()
.fold(IndexMap::new(), |mut batch: Batch<'_>, (id, prev_event)| {
// Find the prev events set of this event in the batch.
// If it doesn't exist, make a new empty one.
let mut prev_events = batch.entry(id).or_default();
// If this event line defines a prev event to add, insert it into the set.
if let Some(prev_event) = prev_event {
prev_events.insert(prev_event);
}
batch
})
}
rule order() -> Order<'input> =
items:(item:stitched_item() new:"*"? { (item, new.is_some()) }) ** newline()
{
let (mut inserted_items, mut new_items) = (vec![], vec![]);
for (item, new) in items {
if new {
new_items.push(item);
} else {
inserted_items.push(item);
}
}
Order {
inserted_items,
new_items,
}
}
rule updated_gaps() -> HashSet<TestEventId<'input>> =
events:event_id() ++ newline() { events.into_iter().collect() }
rule phase() -> Phase<'input> =
"=== when we receive these events ==="
newline() batch:batch()
newline() "=== then we arrange into this order ==="
newline() order:order()
updated_gaps:(
newline() "=== and we notify about these gaps ==="
newline() updated_gaps:updated_gaps() { updated_gaps }
)?
{ Phase { batch, order, updated_gaps } }
pub rule testcase() -> TestCase<'input> = phase() ++ newline()
}
}
pub(super) fn parse<'input>(input: &'input str) -> TestCase<'input> {
testcase::testcase(input.trim_ascii_end()).expect("parse error")
}

View file

@ -0,0 +1,22 @@
=== when we receive these events ===
A
B --> A
C --> B
=== then we arrange into this order ===
# Given the server has some existing events in this order:
A*
B*
C*
=== when we receive these events ===
# When it receives new ones:
D --> C
E --> D
=== then we arrange into this order ===
# Then it simply appends them at the end of the order:
A
B
C
D*
E*

View file

@ -0,0 +1,46 @@
=== when we receive these events ===
A1
A2 --> A1
A3 --> A2
=== then we arrange into this order ===
# Given the server has some existing events in this order:
A1*
A2*
A3*
=== when we receive these events ===
# And after a netsplit the server receives some unrelated events, which refer to
# some unknown event, because the server didn't receive all of them:
B7 --> B6
B8 --> B7
B9 --> B8
=== then we arrange into this order ===
# Then these events are new, and we add a gap to show something is missing:
A1
A2
A3
-B6*
B7*
B8*
B9*
=== when we receive these events ===
# Then if we backfill and receive more of those events later:
B4 --> B3
B5 --> B4
B6 --> B5
=== then we arrange into this order ===
# They are slotted into the gap, and a new gap is created to represent the
# still-missing events:
A1
A2
A3
-B3
B4
B5
B6
B7
B8
B9
=== and we notify about these gaps ===
B6

View file

@ -0,0 +1,30 @@
=== when we receive these events ===
D --> C
=== then we arrange into this order ===
# We may see situations that are ambiguous about whether an event is new or
# belongs in a gap, because it is a predecessor of a gap event and also has a
# new event as its predecessor. This a rare case where either outcome could be
# valid. If the initial order is this:
-C*
D*
=== when we receive these events ===
# And then we receive B
B --> A
=== then we arrange into this order ===
# Which is new because it's unrelated to everything else
-C
D
-A*
B*
=== when we receive these events ===
# And later it turns out that C refers back to B
C --> B
=== then we arrange into this order ===
# Then we place C into the early gap even though it is after B, so arguably
# should be the newest
C
D
-A
B
=== and we notify about these gaps ===
C

View file

@ -0,0 +1,28 @@
=== when we receive these events ===
# An ambiguous situation can occur when we have multiple gaps that both might
# accepts an event. This should be relatively rare.
A --> G1
B --> A
C --> G2
=== then we arrange into this order ===
-G1*
A*
B*
-G2*
C*
=== when we receive these events ===
# When we receive F, which is a predecessor of both G1 and G2
F
G1 --> F
G2 --> F
=== then we arrange into this order ===
# Then F appears in the earlier gap, but arguably it should appear later.
F
G1
A
B
G2
C
=== and we notify about these gaps ===
G1
G2

View file

@ -0,0 +1,10 @@
=== when we receive these events ===
# Even though we see C first, it is re-ordered because we must obey prev_events
# so A comes first.
C --> A
A
B --> A
=== then we arrange into this order ===
A*
C*
B*

View file

@ -0,0 +1,8 @@
=== when we receive these events ===
A
B --> A
C --> B
=== then we arrange into this order ===
A*
B*
C*

View file

@ -0,0 +1,18 @@
=== when we receive these events ===
# A chain ABC
A
B --> A
C --> B
# And a separate chain XYZ
X --> W
Y --> X
Z --> Y
=== then we arrange into this order ===
# Should produce them in order with a gap
A*
B*
C*
-W*
X*
Y*
Z*

View file

@ -0,0 +1,18 @@
=== when we receive these events ===
# Same as empty_then_two_chains except for received order
# A chain ABC, and a separate chain XYZ, but interleaved
A
X --> W
B --> A
Y --> X
C --> B
Z --> Y
=== then we arrange into this order ===
# Should produce them in order with a gap
A*
-W*
X*
B*
Y*
C*
Z*

View file

@ -0,0 +1,33 @@
=== when we receive these events ===
# Given 3 gaps exist
B --> A
D --> C
F --> E
=== then we arrange into this order ===
-A*
B*
-C*
D*
-E*
F*
=== when we receive these events ===
# When we fill one with something that also refers to non-existent events
C --> X
C --> Y
G --> C
G --> Z
=== then we arrange into this order ===
# Then we fill in the gap (C) and make new gaps too (X+Y and Z)
-A
B
-X,Y
C
D
-E
F
-Z*
G*
=== and we notify about these gaps ===
# And we notify about the gap that was updated
C

View file

@ -0,0 +1,13 @@
=== when we receive these events ===
# Several events refer to missing events and the events are unrelated
C --> Y
C --> Z
A --> X
B
=== then we arrange into this order ===
# The gaps appear immediately before the events referring to them
-Y,Z*
C*
-X*
A*
B*

View file

@ -0,0 +1,14 @@
=== when we receive these events ===
# Several events refer to missing events and the events are related
C --> Y
C --> Z
C --> B
A --> X
B --> A
=== then we arrange into this order ===
# The gaps appear immediately before the events referring to them
-X*
A*
B*
-Y,Z*
C*

View file

@ -0,0 +1,15 @@
=== when we receive these events ===
# The relationships determine the order here, so they override received order
F --> E
C --> B
D --> C
E --> D
B --> A
A
=== then we arrange into this order ===
A*
B*
C*
D*
E*
F*

View file

@ -0,0 +1,27 @@
=== when we receive these events ===
# Given 3 gaps exist
B --> A
D --> C
F --> E
=== then we arrange into this order ===
-A*
B*
-C*
D*
-E*
F*
=== when we receive these events ===
# When the first of them is filled in
A
=== then we arrange into this order ===
# Then we slot it into the gap, not at the end
A
B
-C
D
-E
F
=== and we notify about these gaps ===
# And we notify about the gap being filled in
A

View file

@ -0,0 +1,27 @@
=== when we receive these events ===
# Given 3 gaps exist
B --> A
D --> C
F --> E
=== then we arrange into this order ===
-A*
B*
-C*
D*
-E*
F*
=== when we receive these events ===
# When the last gap is filled in
E
=== then we arrange into this order ===
# Then we slot it into the gap, not at the end
-A
B
-C
D
E
F
=== and we notify about these gaps ===
# And we notify about the gap being filled in
E

View file

@ -0,0 +1,27 @@
=== when we receive these events ===
# Given 3 gaps exist
B --> A
D --> C
F --> E
=== then we arrange into this order ===
-A*
B*
-C*
D*
-E*
F*
=== when we receive these events ===
# When a middle one is filled in
C
=== then we arrange into this order ===
# Then we slot it into the gap, not at the end
-A
B
C
D
-E
F
=== and we notify about these gaps ===
# And we notify about the gap being filled in
C

View file

@ -0,0 +1,29 @@
=== when we receive these events ===
# Given a couple of gaps
B --> X2
D --> X4
=== then we arrange into this order ===
-X2*
B*
-X4*
D*
=== when we receive these events ===
# And linked events that fill those in and are newer
X1
X2 --> X1
X3 --> X2
X4 --> X3
X5 --> X4
=== then we arrange into this order ===
# Then the gaps are filled and new events appear at the front
X1
X2
B
X3
X4
D
X5*
=== and we notify about these gaps ===
X2
X4

View file

@ -0,0 +1,31 @@
=== when we receive these events ===
# Given a couple of gaps
B --> X2a
D --> X3
=== then we arrange into this order ===
-X2a*
B*
-X3*
D*
=== when we receive these events ===
# When we receive a diamond that touches gaps and some new events
X1
X2a --> X1
X2b --> X1
X3 --> X2a
X3 --> X2b
X4 --> X3
=== then we arrange into this order ===
# Then matching events and direct predecessors fit into the gaps
# and other stuff is new
X1
X2a
B
X2b
X3
D
X4*
=== and we notify about these gaps ===
X2a
X3

View file

@ -0,0 +1,25 @@
=== when we receive these events ===
# Given a gap before all the Bs
B1 --> C2
B2 --> B1
=== then we arrange into this order ===
-C2*
B1*
B2*
=== when we receive these events ===
# When a batch arrives with a not-last event matching the gap
C1
C2 --> C1
C3 --> C2
=== then we arrange into this order ===
# Then we slot the matching events into the gap
# and the later events are new
C1
C2
B1
B2
C3*
=== and we notify about these gaps ===
# And we notify about the gap being filled in
C2

View file

@ -0,0 +1,26 @@
=== when we receive these events ===
# Given a gap before all the Bs
B1 --> C2
B2 --> B1
=== then we arrange into this order ===
-C2*
B1*
B2*
=== when we receive these events ===
# When a batch arrives with a not-last event matching the gap, and the last
# event linked to a recent event
C1
C2 --> C1
C3 --> C2
C3 --> B2
=== then we arrange into this order ===
# Then we slot the entire batch into the gap
C1
C2
B1
B2
C3*
=== and we notify about these gaps ===
# And we notify about the gap being filled in
C2

View file

@ -0,0 +1,26 @@
=== when we receive these events ===
# If multiple events all refer to the same missing event:
A --> X
B --> X
C --> X
=== then we arrange into this order ===
# Then we insert gaps before all of them. This avoids the need to search the
# entire existing order whenever we create a new gap.
-X*
A*
-X*
B*
-X*
C*
=== when we receive these events ===
# The ambiguity is resolved when the missing event arrives:
X
=== then we arrange into this order ===
# We choose the earliest gap, and all the relevant gaps are removed (which does
# mean we need to search the existing order).
X
A
B
C
=== and we notify about these gaps ===
X

View file

@ -0,0 +1,29 @@
=== when we receive these events ===
# Several events refer to the same missing event, but the first refers to
# others too
A --> X
A --> Y
A --> Z
B --> X
C --> X
=== then we arrange into this order ===
# We end up with multiple gaps
-X,Y,Z*
A*
-X*
B*
-X*
C*
=== when we receive these events ===
# When we receive the missing item
X
=== then we arrange into this order ===
# It goes into the earliest slot, and the non-empty gap remains
-Y,Z
X
A
B
C
=== and we notify about these gaps ===
X

View file

@ -0,0 +1,28 @@
=== when we receive these events ===
# Several events refer to the same missing event, but one refers to others too
A --> X
B --> X
B --> Y
B --> Z
C --> X
=== then we arrange into this order ===
# We end up with multiple gaps
-X*
A*
-X,Y,Z*
B*
-X*
C*
=== when we receive these events ===
# When we receive the missing item
X
=== then we arrange into this order ===
# It goes into the earliest slot, and the non-empty gap remains
X
A
-Y,Z
B
C
=== and we notify about these gaps ===
X

View file

@ -0,0 +1,9 @@
=== when we receive these events ===
# A refers to multiple missing things
A --> X
A --> Y
A --> Z
=== then we arrange into this order ===
# But we only make one gap, with multiple IDs in it
-X,Y,Z*
A*

View file

@ -0,0 +1,23 @@
=== when we receive these events ===
A
F --> B
F --> C
F --> D
F --> E
=== then we arrange into this order ===
# Given a gap that lists several nodes:
A*
-B,C,D,E*
F*
=== when we receive these events ===
# When we provide one of the missing events:
C
=== then we arrange into this order ===
# Then it is inserted after the gap, and the gap is shrunk:
A
-B,D,E
C
F
=== and we notify about these gaps ===
# And we notify about the gap that was updated
C

View file

@ -0,0 +1,27 @@
=== when we receive these events ===
# Given an event references multiple missing events
A
F --> B
F --> C
F --> D
F --> E
=== then we arrange into this order ===
A*
-B,C,D,E*
F*
=== when we receive these events ===
# When we provide some of the missing events
C
E
=== then we arrange into this order ===
# Then we insert them after the gap and shrink the list of events in the gap
A
-B,D
C
E
F
=== and we notify about these gaps ===
# And we notify about the gap that was updated
C
E

View file

@ -0,0 +1,16 @@
=== when we receive these events ===
# Everything is after A, but there is no prev_event chain between the others, so
# we use received order.
A
F --> A
C --> A
D --> A
E --> A
B --> A
=== then we arrange into this order ===
A*
F*
C*
D*
E*
B*

View file

@ -0,0 +1,16 @@
=== when we receive these events ===
# We preserve the received order where it does not conflict with the prev_events
A
X --> W
Y --> X
Z --> Y
B --> A
C --> B
=== then we arrange into this order ===
A*
-W*
X*
Y*
Z*
B*
C*