implement cleaned up and well documented version of bot engine

2023-04-11 18:50:24 -04:00 · 2023-04-11 18:50:24 -04:00 · 2a53e992ae
parent 3fb3743cd1
commit 2a53e992ae
4 changed files with 455 additions and 17 deletions
--- a/fish/src/bot.rs
+++ b/fish/src/bot.rs
@ -0,0 +1,183 @@
+//! Primary interface to working with the Blockfish engine. The [`Bot`] type controls an
+//! anytime algorithm that will provide a suggestion for the next move. It may be
+//! repeatedly polled by the `think` method in order to attempt to improve the suggestion.
+
+use alloc::collections::BinaryHeap;
+use alloc::vec::Vec;
+use mino::matrix::Mat;
+use mino::srs::{Piece, Queue};
+
+mod node;
+
+use self::node::{Node, RawNodePtr};
+
+pub(crate) use bumpalo::Bump as Arena;
+
+/// Encompasses an instance of the algorithm.
+pub struct Bot {
+    algorithm: SegmentedAStar,
+    // IMPORTANT: `arena` must occur after `algorithm` so that it is dropped last.
+    arena: Arena,
+}
+
+impl Bot {
+    /// Constructs a new bot from the given initial state (matrix and queue).
+    // TODO: specify weights
+    pub fn new(matrix: &Mat, queue: Queue<'_>) -> Self {
+        let arena = bumpalo::Bump::new();
+        let root = Node::alloc_root(&arena, matrix, queue);
+        let algorithm = SegmentedAStar::new(root);
+        Self { algorithm, arena }
+    }
+
+    /// Perform a single "iteration" of work, which may end up improving the suggestion.
+    /// What defines an iteration is vague, but similar versions of the engine should be
+    /// deterministic, such that performing the same number of iterations gives the same
+    /// resulting suggestion.
+    pub fn think(&mut self) {
+        self.algorithm.step(&self.arena);
+    }
+
+    /// Return the current best suggested placement. Returns `None` under two possible
+    /// conditions:
+    /// - `think` has not been called enough times to provide an initial suggestion.
+    /// - there are no valid placements for the initial state
+    pub fn suggest(&self) -> Option<Piece> {
+        self.algorithm.best().and_then(|node| node.root_placement())
+    }
+}
+
+// This implements an algorithm that is very similar to A* but has a slight
+// modification. Rather than one big open set, there are separate sets at each depth of
+// the search. After picking a node from one open set and expanding its children into the
+// successor set, we next pick a node from that successor set. This process continues
+// until a terminal node is reached. In order to select which open set to start picking
+// from next, we look globally at all the open sets and find the node with the best
+// rating; this part works similarly to as if there was only one open set.
+//
+// Only terminal nodes are compared in order to pick a suggestion. An interesting
+// consequence of this design is that on the first run of the algorithm we end up
+// performing a best-first-search, and the first terminal node found ends up being our
+// initial suggestion. This BFS terminates very quickly so it is nice from the perspective
+// of an anytime algorithm.
+//
+// The problem with directly applying A* for an anytime downstacking algorithm is that
+// simply looking for the best heuristic measurement (f) can lead you into a situation
+// where a node that only made 2 placements has a better score than all of the nodes with
+// 3+ placements, and thus it is considered the best. This is definitely not correct,
+// since that 2-placement node only leads to worse board states as you continue to place
+// pieces on the board. In downstacking you have to place all of your pieces, you can't
+// just stop after placing a few and arriving at a good board state! So before actually
+// considering a node to be a suggestion we have to make sure we run out all of the queue
+// first (i.e. its a terminal node), and only then should we check its rating.
+
+struct SegmentedAStar {
+    open: Vec<BinaryHeap<AStarNode>>,
+    depth: usize,
+    best: Option<RawNodePtr>,
+}
+
+#[derive(Debug)]
+struct ShouldSelect;
+
+impl SegmentedAStar {
+    fn new(root: &Node) -> Self {
+        let mut open = Vec::with_capacity(root.queue().len());
+        open.push(BinaryHeap::new());
+        open[0].push(root.into());
+        Self {
+            open,
+            depth: 0,
+            best: None,
+        }
+    }
+
+    fn best(&self) -> Option<&Node> {
+        self.best.map(|node| unsafe { node.as_node() })
+    }
+
+    fn step(&mut self, arena: &Arena) {
+        match self.expand(arena) {
+            Ok(_) => {}
+            Err(ShouldSelect) => self.select(),
+        }
+    }
+
+    fn expand(&mut self, arena: &Arena) -> Result<&Node, ShouldSelect> {
+        let open_set = self.open.get_mut(self.depth);
+        let cand = open_set.map_or(None, |set| set.pop()).ok_or(ShouldSelect)?;
+        let cand = unsafe { cand.0.as_node() };
+
+        if cand.is_terminal() {
+            self.depth = self.open.len(); // makes expand() fail immediately
+            self.backup(cand);
+            return Err(ShouldSelect);
+        }
+
+        self.depth += 1;
+        if self.open.len() <= self.depth {
+            self.open.resize_with(self.depth + 1, BinaryHeap::new);
+        }
+
+        for suc in cand.expand(arena) {
+            self.open[self.depth].push(suc.into());
+        }
+
+        Ok(cand)
+    }
+
+    fn backup(&mut self, cand: &Node) {
+        let rating = cand.rating();
+        if self.best().map_or(true, |n| rating < n.rating()) {
+            tracing::debug!(
+                "update suggestion ({}): {rating}",
+                self.best.map_or("1st", |_| "new")
+            );
+            self.best = Some(cand.into());
+        }
+    }
+
+    fn select(&mut self) {
+        self.open
+            .iter()
+            .map(|set| set.peek().map(|node| unsafe { node.0.as_node() }))
+            .enumerate()
+            .filter(|(_, best)| best.is_some())
+            .min_by_key(|(_, best)| best.unwrap().rating())
+            .map(|(depth, _)| {
+                self.depth = depth;
+            });
+    }
+}
+
+// Wraps a `Node` pointer but implements `cmp::Ord` in order to compare by rating.
+#[derive(Copy, Clone)]
+struct AStarNode(RawNodePtr);
+
+impl From<&Node> for AStarNode {
+    fn from(node: &Node) -> Self {
+        Self(node.into())
+    }
+}
+
+impl core::cmp::Ord for AStarNode {
+    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
+        let lhs = unsafe { self.0.as_node() };
+        let rhs = unsafe { other.0.as_node() };
+        lhs.rating().cmp(&rhs.rating()).reverse()
+    }
+}
+
+impl core::cmp::PartialOrd for AStarNode {
+    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl core::cmp::Eq for AStarNode {}
+
+impl core::cmp::PartialEq for AStarNode {
+    fn eq(&self, other: &Self) -> bool {
+        self.cmp(other).is_eq()
+    }
+}
--- a/fish/src/bot/node.rs
+++ b/fish/src/bot/node.rs
@ -0,0 +1,253 @@
+//! Graph data structures used by `Bot` in its search algorithm.
+
+use mino::matrix::{Mat, MatBuf};
+use mino::srs::{Piece, PieceType, Queue};
+
+use crate::bot::Arena;
+use crate::eval::evaluate;
+use crate::find::find_locations;
+
+/// Represents a node in the search tree. A node basically just consists of a board state
+/// (incl. queue) and some extra metadata relating it to previous nodes in the tree.
+pub(crate) struct Node {
+    matrix: *const Mat,
+    queue: RawQueue,
+    edge: Option<Edge>,
+    pcnt: u32,
+    rating: i32,
+    // currently there is no need to store a node's children, but maybe this could change
+    // in the future.
+}
+
+// Reallocates the matrix into the arena.
+fn copy_matrix<'a>(arena: &'a Arena, matrix: &Mat) -> &'a Mat {
+    Mat::new(arena.alloc_slice_copy(&matrix[..]))
+}
+
+// Reallocates the queue into the arena.
+fn copy_queue<'a>(arena: &'a Arena, queue: Queue<'_>) -> Queue<'a> {
+    Queue {
+        hold: queue.hold,
+        next: arena.alloc_slice_copy(&queue.next),
+    }
+}
+
+impl Node {
+    /// Allocate a root node using the given arena and initial configuration. The initial
+    /// matrix and queue are also allocated onto the arena, so you do not need to worry
+    /// about their lifetimes when managing the lifetime of the root.
+    pub fn alloc_root<'a>(arena: &'a Arena, matrix: &Mat, queue: Queue<'_>) -> &'a Self {
+        let matrix = copy_matrix(arena, matrix);
+        let queue = copy_queue(arena, queue);
+        Node::alloc(arena, matrix, queue, None)
+    }
+
+    // `matrix` and `queue` must be allocated inside `arena`
+    fn alloc<'a>(
+        arena: &'a Arena,
+        matrix: &'a Mat,
+        queue: Queue<'a>,
+        edge: Option<Edge>,
+    ) -> &'a Self {
+        let pcnt = match &edge {
+            None => 0,
+            Some(e) => e.parent().pcnt + 1,
+        };
+        let queue = RawQueue::from(queue);
+
+        // FIXME: the old blockfish has two special edge cases for rating nodes that is
+        // not done here.
+        //
+        // 1. nodes that reach the bottom of the board early ("solutions") are highly
+        // prioritized. this is done by using the piece count *as the rating* in order to
+        // force it to be extremely low, as well as sorting solutions by # of pieces in
+        // case there are multiple. according to frey, this probably causes blockfish to
+        // greed out in various scenarios where it sees a path to the bottom but it is not
+        // actually the end of the race. part of the issue is of course that it isn't
+        // communicated to blockfish whether or not the bottom of the board is actually
+        // the end of the race, but also that the intermediate steps to get to the bottom
+        // may be suboptimal placements when it isn't.
+        //
+        // 2. blockfish would actually average the last two evaluations and use that as
+        // the final rating. this is meant as a concession for the fact that the last
+        // placement made by the bot is not actually a placement we are required to make,
+        // since in reality there is going to be the opportunity to hold the final piece
+        // and use something else instead. so the 2nd to last rating is important in cases
+        // where the last piece leads to suboptimal board states which may be able to be
+        // avoided by holding the last piece. i think this improves the performance only
+        // slightly, but it is also a bit of a hack that deserves further consideration.
+
+        let rating = evaluate(matrix, pcnt as usize); // FIXME: pass weights to evaluation function
+
+        arena.alloc_with(|| Self {
+            matrix,
+            queue,
+            edge,
+            pcnt,
+            rating,
+        })
+    }
+
+    pub fn matrix(&self) -> &Mat {
+        unsafe { &*self.matrix }
+    }
+
+    pub fn queue(&self) -> Queue<'_> {
+        unsafe { self.queue.as_queue() }
+    }
+
+    pub fn rating(&self) -> i32 {
+        self.rating
+    }
+
+    pub fn is_terminal(&self) -> bool {
+        // TODO: additional terminal-node conditions e.g. clears last row of garbage
+        self.queue().is_empty()
+    }
+
+    /// Get the initial placement made after the root node which eventually arrives at
+    /// this node.
+    pub fn root_placement(&self) -> Option<Piece> {
+        let mut root_placement = None;
+        let mut parent = Some(self);
+        while let Some(node) = parent.take() {
+            parent = node.edge.as_ref().map(|e| {
+                root_placement = Some(e.placement);
+                e.parent()
+            });
+        }
+        root_placement
+    }
+
+    /// Expands this node, allocating the children into the given arena.
+    // `self` must be allocated inside `arena`
+    pub fn expand<'a>(&'a self, arena: &'a Arena) -> impl Iterator<Item = &'a Node> + 'a {
+        let placements = self.queue().reachable().flat_map(|ty| {
+            let locs = find_locations(self.matrix(), ty);
+            locs.map(move |loc| Piece { ty, loc })
+        });
+
+        let mut matrix = MatBuf::new();
+
+        placements.map(move |placement| {
+            matrix.copy_from(self.matrix());
+            placement.cells().fill(&mut matrix);
+            matrix.clear_lines();
+            // TODO: the above call returns useful information about if this placement is
+            // a combo, does it clear the bottom row of garbage. this should be used for
+            // prioritizing nodes
+
+            let parent = RawNodePtr::from(self);
+            let edge = Edge { placement, parent };
+            let suc_matrix = copy_matrix(arena, &matrix);
+            let suc_queue = self.queue().remove(placement.ty);
+
+            // TODO: transposition table lookup
+            Node::alloc(arena, suc_matrix, suc_queue, Some(edge))
+        })
+    }
+}
+
+/// Represents an edge in the graph, pointing from a node to its parent. Particularly,
+/// contains the placement made in order to arrive at the child from the parent.
+struct Edge {
+    placement: Piece,
+    parent: RawNodePtr,
+}
+
+impl Edge {
+    fn parent(&self) -> &Node {
+        unsafe { self.parent.as_node() }
+    }
+}
+
+/// Wraps a raw pointer to a `Node`, requiring you to manage the lifetime yourself.
+#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
+#[repr(transparent)]
+pub(crate) struct RawNodePtr(*const Node);
+
+impl RawNodePtr {
+    pub unsafe fn as_node<'a>(self) -> &'a Node {
+        &*self.0
+    }
+}
+
+impl From<&Node> for RawNodePtr {
+    fn from(node: &Node) -> Self {
+        Self(node)
+    }
+}
+
+/// Wraps the raw components of a `Queue`, requiring you to manage the lifetime yourself.
+#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
+struct RawQueue {
+    hold: Option<PieceType>,
+    len: u16, // u16 to save space esp. considering padding
+    next: *const PieceType,
+}
+
+impl RawQueue {
+    pub unsafe fn as_queue<'a>(self) -> Queue<'a> {
+        let hold = self.hold;
+        let next = core::slice::from_raw_parts(self.next, self.len as usize);
+        Queue { hold, next }
+    }
+}
+
+impl From<Queue<'_>> for RawQueue {
+    fn from(queue: Queue<'_>) -> Self {
+        Self {
+            hold: queue.hold,
+            len: queue.next.len() as u16,
+            next: queue.next.as_ptr(),
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use mino::mat;
+
+    #[test]
+    fn test_copy_matrix() {
+        let arena = Arena::new();
+        let mat0 = mat! {
+            "..xxx..x.x";
+            "xxxxxx.xxx";
+        };
+        let mat1 = copy_matrix(&arena, mat0);
+        assert_eq!(mat0, mat1);
+    }
+
+    #[test]
+    fn test_copy_queue() {
+        use PieceType::*;
+        let arena = Arena::new();
+        let q0 = Queue::new(None, &[I, L, J, O]);
+        let q1 = copy_queue(&arena, q0);
+        assert_eq!(q0, q1);
+    }
+
+    #[test]
+    fn test_sizeof_raw_queue() {
+        assert_eq!(
+            core::mem::size_of::<RawQueue>(),
+            core::mem::size_of::<(u16, u16, *const ())>(),
+        );
+    }
+
+    #[test]
+    fn test_raw_queue_roundtrip() {
+        use PieceType::*;
+        let q0 = Queue::new(None, &[I, L, J, O]);
+        let rq0 = RawQueue::from(q0);
+        let q1 = unsafe { rq0.as_queue() };
+        assert_eq!(q1, q0);
+
+        let q0 = Queue::new(None, &[]);
+        let rq0 = RawQueue::from(q0);
+        let q1 = unsafe { rq0.as_queue() };
+        assert_eq!(q1, q0);
+    }
+}
--- a/fish/src/lib.rs
+++ b/fish/src/lib.rs
@ -6,8 +6,11 @@ pub mod ai;
 pub mod eval;
 pub mod find;

+pub mod bot;
+
 #[cfg(feature = "io")]
 pub mod io;

 pub use ai::Ai;
+pub use bot::Bot;
 pub use find::find_locations;
--- a/tidepool/src/main.rs
+++ b/tidepool/src/main.rs
@ -1,4 +1,5 @@
-use fish::ai;
+use fish::bot::Bot;
+use mino::srs::Queue;
 use rand::Rng as _;
 use tidepool::sim;

@ -17,8 +18,8 @@ impl std::fmt::Display for RngSeed {
 }

 pub fn main() -> std::io::Result<()> {
-    const AI_CYCLES: usize = 5_000;
-    const GOAL: usize = 50;
+    const THINK_CYCLES: usize = 5_000;
+    const GOAL: usize = 100;

    tracing_subscriber::fmt::fmt()
        .with_writer(std::io::stderr)
@ -48,28 +49,26 @@ pub fn main() -> std::io::Result<()> {
            println!();
        }

-        let ll = sim.lines_left();
+        let queue = Queue::new(hold, &next);
+        let mut bot = Bot::new(mat, queue);

-        let mut ai = ai::Ai::new(mat, &next, hold);
-        for i in 0..AI_CYCLES {
+        for i in 0..THINK_CYCLES {
            if i > 0 && i % 1000 == 0 {
                tracing::debug!("iteration {i}");
            }
+            bot.think();
+        }

-            if ai.think().is_err() {
+        let best = match bot.suggest() {
+            Some(pc) => pc,
+            None => {
+                println!("no suggestion!");
                break;
            }
-        }
+        };

-        let mut best = ai.suggestion();
-        let best = best.nth(0);
-
-        if let Some(pc) = best {
-            sim.play(pc);
-        } else {
-            println!("no suggestion!");
-            break;
-        }
+        let ll = sim.lines_left();
+        sim.play(best);

        ds += ll - sim.lines_left();
        ps += 1;