diff --git a/Cargo.toml b/Cargo.toml index b1a646c..ecdcc2b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,9 @@ edition = "2024" authors = ["Manuel Penschuck"] description = "Utilities to read PACE26 instances and write answers" license = "GPL-3.0-or-later" +repository = "https://github.com/manpen/pace26io" +homepage = "https://pacechallenge.org/2026/" +exclude = ["/.github"] [dependencies] serde = "1.0.228" diff --git a/README.md b/README.md new file mode 100644 index 0000000..5bc4a18 --- /dev/null +++ b/README.md @@ -0,0 +1,48 @@ +# PACE 2026 I/O Crate + +This crate implements parsers and writers for the PACE 2026 file format. It was originally +developped for the official PACE tools (e.g., verifier and stride). As such, it offers a +great deal of flexibility including quite pedantic parsing modes. Most users should stay +away from this mess and rather use the simplified reader interface: + +## Simplified reader interface + +We offer a simplified interface in [`pace::simplified::Instance`] intend to be used by solver +implementers. To read an instance, you may use: + +```rust +use std::{fs::File, io::BufReader}; +use pace26io::{binary_tree::*, newick::NewickWriter, pace::simplified::*}; + +type Builder = IndexedBinTreeBuilder; // If you do not care about inner node indices, use BinTreeBuilder +type Node = ::Node; + +// A solver would typically use `std::io::stdin().lock()` instead of reading a file +let mut input = BufReader::new(File::open("examples/tiny01.nw").unwrap()); + +// Parse instance +let mut tree_builder = Builder::default(); +let instance = Instance::try_read(&mut input, &mut tree_builder) + .expect("Valid PACE26 Instance"); + +println!("# Found {} trees", instance.trees.len()); +``` + +This interface will ignore most parser warnings and only report raise errors if parsing cannot continue. We recommend the `stride` tool to debug broken instances. + +## Tree representation + +We offer only rudamentary tree representations, more specifically +[`binary_tree::BinTree`] and [`binary_tree::IndexedBinTree`]. The latter also stores +node ids of internal nodes, which are for instance used by graph parameters. + +We expect that solvers will typically need more control over their data structures. +For this reason, the crate is designed to make implementation of own tree structures straight forward. +You need to provide + - A node type which respresents both inner nodes and leafes. It needs to implement [`binary_tree::TopDownCursor`] and --if applicable-- [`binary_tree::TreeWithNodeIdx`]. + - A struct implementing [`binary_tree::TreeBuilder`]. + +## Writing Newick strings + +A Newick String writer is provided for each data structure implementing [`binary_tree::TopDownCursor`]. +For further details see [`newick::NewickWriter`]. diff --git a/examples/normalize.rs b/examples/normalize.rs new file mode 100644 index 0000000..22ef112 --- /dev/null +++ b/examples/normalize.rs @@ -0,0 +1,76 @@ +/// This example reads in an instance, orders the trees, the children of each +/// inner node, such that the left child always contains the smallest leaf label. +/// +/// To execute it, run `cat examples/tiny01.nw | cargo --example normalize` +use pace26io::{binary_tree::*, newick::NewickWriter, pace::simplified::*}; + +type Builder = IndexedBinTreeBuilder; // If you do not care about inner node indices, use BinTreeBuilder +type Node = ::Node; + +fn main() { + let mut tree_builder = Builder::default(); + let instance = Instance::try_read(&mut std::io::stdin().lock(), &mut tree_builder) + .expect("Valid PACE26 Instance"); + + println!("# Found {} trees", instance.trees.len()); + if let Some(td) = instance.tree_decomposition.as_ref() { + println!( + "# Found tree decomposition with treewidth {}, {} bags, and {} edges", + td.treewidth, + td.bags.len(), + td.edges.len() + ); + } + + for (tree_id, tree) in instance.trees.iter().enumerate() { + let root_id = (tree_id + 1) * (instance.num_leaves - 1) + 2; + let normalized_tree = + build_normalized_tree(&mut tree_builder, tree, NodeIdx(root_id as u32)); + + println!("{}", normalized_tree.top_down().to_newick_string()); + } +} + +fn build_normalized_tree( + builder: &mut Builder, + node: impl TopDownCursor, + node_id: NodeIdx, +) -> Node { + let root = build_normalized_tree_rec(builder, node, node_id).0; + builder.make_root(root) +} + +fn build_normalized_tree_rec( + builder: &mut Builder, + node: impl TopDownCursor, + node_id: NodeIdx, +) -> (Node, Label, NodeIdx) { + match node.visit() { + // Base case: For a leaf with simply copy the label and build a new leaf node + NodeType::Leaf(label) => (builder.new_leaf(label), label, node_id), + + // Recursion into subtrees: + NodeType::Inner(left, right) => { + // recursively decent into both subtrees + let (child0, label0, next_node_id) = + build_normalized_tree_rec(builder, left, node_id.incremented()); + let (child1, label1, next_node_id) = + build_normalized_tree_rec(builder, right, next_node_id); + + // construct a new inner node with the smaller subtree to the left + if label0 < label1 { + ( + builder.new_inner(node_id, child0, child1), + label0, + next_node_id, + ) + } else { + ( + builder.new_inner(node_id, child1, child0), + label1, + next_node_id, + ) + } + } + } +} diff --git a/examples/tiny01.nw b/examples/tiny01.nw new file mode 100644 index 0000000..f2fa395 --- /dev/null +++ b/examples/tiny01.nw @@ -0,0 +1,7 @@ +#s hash "0x118494c254c36869026cfc" +#s name "tiny01" +#s desc "Example shown on https://pacechallenge.org" +#p 2 6 +(((5,6),(3,4)),(1,2)); +(((((4,2),1),5),3),6); +#x treedecomp [2,[[8,16],[8,11,16],[1,11,15],[2,11,16],[7,8,11],[8,10,16],[3,10,13],[4,10,16],[8,9],[5,9,14],[6,9,12]],[[1,2],[1,6],[1,9],[2,3],[2,4],[2,5],[6,7],[6,8],[9,10],[9,11]]] diff --git a/src/binary_tree/mod.rs b/src/binary_tree/mod.rs index 33a70bf..21eb244 100644 --- a/src/binary_tree/mod.rs +++ b/src/binary_tree/mod.rs @@ -6,7 +6,7 @@ pub use indexed_bin_tree::*; pub mod depth_first_search; pub use depth_first_search::DepthFirstSearch; -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] pub struct NodeIdx(pub u32); impl NodeIdx { @@ -28,11 +28,13 @@ impl From