shark/fish/src/bot.rs

//! Primary interface to working with the Blockfish engine. The [`Bot`] type controls an
//! anytime algorithm that will provide a suggestion for the next move. It may be
//! repeatedly polled by the `think` method in order to attempt to improve the suggestion.

use alloc::collections::BinaryHeap;
use alloc::vec::Vec;
use mino::matrix::Mat;
use mino::srs::{Piece, Queue};

mod node;

use self::node::{Node, RawNodePtr};

pub(crate) use bumpalo::Bump as Arena;

/// Encompasses an instance of the algorithm.
pub struct Bot {
    algorithm: SegmentedAStar,
    // IMPORTANT: `arena` must occur after `algorithm` so that it is dropped last.
    arena: Arena,
}

impl Bot {
    /// Constructs a new bot from the given initial state (matrix and queue).
    // TODO: specify weights
    pub fn new(matrix: &Mat, queue: Queue<'_>) -> Self {
        let arena = bumpalo::Bump::new();
        let root = Node::alloc_root(&arena, matrix, queue);
        let algorithm = SegmentedAStar::new(root);
        Self { algorithm, arena }
    }

    /// Perform a single "iteration" of work, which may end up improving the suggestion.
    /// What defines an iteration is vague, but similar versions of the engine should be
    /// deterministic, such that performing the same number of iterations gives the same
    /// resulting suggestion.
    pub fn think(&mut self) {
        self.algorithm.step(&self.arena);
    }

    /// Return the current best suggested placement. Returns `None` under two possible
    /// conditions:
    /// - `think` has not been called enough times to provide an initial suggestion.
    /// - there are no valid placements for the initial state
    pub fn suggest(&self) -> Option<Piece> {
        self.algorithm.best().and_then(|node| node.root_placement())
    }
}

// This implements an algorithm that is very similar to A* but has a slight
// modification. Rather than one big open set, there are separate sets at each depth of
// the search. After picking a node from one open set and expanding its children into the
// successor set, we next pick a node from that successor set. This process continues
// until a terminal node is reached. In order to select which open set to start picking
// from next, we look globally at all the open sets and find the node with the best
// rating; this part works similarly to as if there was only one open set.
//
// Only terminal nodes are compared in order to pick a suggestion. An interesting
// consequence of this design is that on the first run of the algorithm we end up
// performing a best-first-search, and the first terminal node found ends up being our
// initial suggestion. This BFS terminates very quickly so it is nice from the perspective
// of an anytime algorithm.
//
// The problem with directly applying A* for an anytime downstacking algorithm is that
// simply looking for the best heuristic measurement (f) can lead you into a situation
// where a node that only made 2 placements has a better score than all of the nodes with
// 3+ placements, and thus it is considered the best. This is definitely not correct,
// since that 2-placement node only leads to worse board states as you continue to place
// pieces on the board. In downstacking you have to place all of your pieces, you can't
// just stop after placing a few and arriving at a good board state! So before actually
// considering a node to be a suggestion we have to make sure we run out all of the queue
// first (i.e. its a terminal node), and only then should we check its rating.

struct SegmentedAStar {
    open: Vec<BinaryHeap<AStarNode>>,
    depth: usize,
    best: Option<RawNodePtr>,
}

#[derive(Debug)]
struct ShouldSelect;

impl SegmentedAStar {
    fn new(root: &Node) -> Self {
        let mut open = Vec::with_capacity(root.queue().len());
        open.push(BinaryHeap::new());
        open[0].push(root.into());
        Self {
            open,
            depth: 0,
            best: None,
        }
    }

    fn best(&self) -> Option<&Node> {
        self.best.map(|node| unsafe { node.as_node() })
    }

    fn step(&mut self, arena: &Arena) {
        match self.expand(arena) {
            Ok(_) => {}
            Err(ShouldSelect) => self.select(),
        }
    }

    fn expand<'a>(&mut self, arena: &'a Arena) -> Result<&'a Node, ShouldSelect> {
        let open_set = self.open.get_mut(self.depth);
        let cand = open_set.map_or(None, |set| set.pop()).ok_or(ShouldSelect)?;
        let cand = unsafe { cand.0.as_node() };

        if cand.is_terminal() {
            self.depth = self.open.len(); // makes expand() fail immediately
            self.backup(cand);
            return Err(ShouldSelect);
        }

        self.depth += 1;
        if self.open.len() <= self.depth {
            self.open.resize_with(self.depth + 1, BinaryHeap::new);
        }

        for suc in cand.expand(arena) {
            self.open[self.depth].push(suc.into());
        }

        Ok(cand)
    }

    fn backup(&mut self, cand: &Node) {
        let rating = cand.rating();
        if self.best().map_or(true, |n| rating < n.rating()) {
            tracing::debug!(
                "update suggestion ({}): {cand:?}",
                self.best.map_or("1st", |_| "new")
            );
            self.best = Some(cand.into());
        }
    }

    fn select(&mut self) {
        self.open
            .iter()
            .map(|set| set.peek().map(|node| unsafe { node.0.as_node() }))
            .enumerate()
            .filter(|(_, best)| best.is_some())
            .min_by_key(|(_, best)| best.unwrap().rating())
            .map(|(depth, _)| {
                self.depth = depth;
            });
    }
}

// Wraps a `Node` pointer but implements `cmp::Ord` in order to compare by rating.
#[derive(Copy, Clone)]
struct AStarNode(RawNodePtr);

impl From<&Node> for AStarNode {
    fn from(node: &Node) -> Self {
        Self(node.into())
    }
}

impl core::cmp::Ord for AStarNode {
    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
        let lhs = unsafe { self.0.as_node() };
        let rhs = unsafe { other.0.as_node() };
        // FIXME: add a deterministic tiebreaker
        lhs.rating().cmp(&rhs.rating()).reverse()
    }
}

impl core::cmp::PartialOrd for AStarNode {
    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
        Some(self.cmp(other))
    }
}

impl core::cmp::Eq for AStarNode {}

impl core::cmp::PartialEq for AStarNode {
    fn eq(&self, other: &Self) -> bool {
        self.cmp(other).is_eq()
    }
}