//! Primary interface to working with the Blockfish engine. The [`Bot`] type controls an //! anytime algorithm that will provide a suggestion for the next move. It may be //! repeatedly polled by the `think` method in order to attempt to improve the suggestion. use alloc::collections::BinaryHeap; use alloc::vec::Vec; use mino::matrix::Mat; use mino::srs::{Piece, Queue}; mod node; mod trans; use crate::bot::node::{Node, RawNodePtr}; use crate::bot::trans::TransTable; use crate::eval::{features, Weights}; pub(crate) use bumpalo::Bump as Arena; /// Encompasses an instance of the algorithm. pub struct Bot { iters: u32, evaluator: Evaluator, trans: TransTable, algorithm: SegmentedAStar, // IMPORTANT: `arena` must occur after `algorithm` so that it is dropped last. arena: Arena, } impl Bot { /// Constructs a new bot from the given initial state (matrix and queue). // TODO: specify weights pub fn new(weights: &Weights, matrix: &Mat, queue: Queue<'_>) -> Self { let arena = bumpalo::Bump::new(); let root = Node::alloc_root(&arena, matrix, queue); let evaluator = Evaluator::new(weights, root); let trans = TransTable::new(); let algorithm = SegmentedAStar::new(root); Self { iters: 0, evaluator, trans, algorithm, arena, } } /// Runs the bot for up to `gas` more iterations. An "iteration" is a unit of work /// that is intentionally kept vague, but should be proportional to the amount CPU /// time. Iterations are deterministic, so similar versions of the engine will produce /// the same suggestions if run the for the same number of iterations. pub fn think_for(&mut self, gas: u32) { // NOTICE: The actual number of iterations may slightly exceed the provided gas due to // how the bot is currently structured. This shouldn't have a substantial impact over // the long run since the overshoot will be very small in terms of CPU // time. // // Runs will be deterministic as long as two runs end on the same *target* // iterations on the last call to `think_for`, e.g. "bot.think_for(5000)" is the // same as "bot.think_for(2500); bot.think_for(5000 - bot.iterations());" let max_iters = self.iters + gas; while self.iters < max_iters { let did_update = self.algorithm.step( &self.arena, &mut self.trans, &self.evaluator, &mut self.iters, ); if did_update { tracing::debug!( "new suggestion @ {}: {:?}", self.iters, self.algorithm.best().unwrap(), ); } } } /// Returns the number of iterations done so far. pub fn iterations(&self) -> u32 { self.iters } /// Return the current best suggested placement. Returns `None` under two possible /// conditions: /// - `think` has not been called enough times to provide an initial suggestion. /// - there are no valid placements for the initial state pub fn suggest(&self) -> Option { self.algorithm.best().and_then(|node| node.root_placement()) } } struct Evaluator { weights: Weights, root_score: i32, root_queue_len: usize, } impl Evaluator { fn new(weights: &Weights, root: &Node) -> Self { Self { weights: *weights, root_score: features(root.matrix(), 0).evaluate(weights), root_queue_len: root.queue().len(), } } fn evaluate(&self, mat: &Mat, queue: Queue<'_>) -> i32 { // FIXME: the old blockfish has two special edge cases for rating nodes that is // not done here. // // 1. nodes that reach the bottom of the board early ("solutions") are highly // prioritized. this is done by using the piece count *as the rating* in order to // force it to be extremely low, as well as sorting solutions by # of pieces in // case there are multiple. according to frey, this probably causes blockfish to // greed out in various scenarios where it sees a path to the bottom but it is not // actually the end of the race. part of the issue is of course that it isn't // communicated to blockfish whether or not the bottom of the board is actually // the end of the race, but also that the intermediate steps to get to the bottom // may be suboptimal placements when it isn't. // // 2. blockfish would actually average the last two evaluations and use that as // the final rating. this is meant as a concession for the fact that the last // placement made by the bot is not actually a placement we are required to make, // since in reality there is going to be the opportunity to hold the final piece // and use something else instead. so the 2nd to last rating is important in cases // where the last piece leads to suboptimal board states which may be able to be // avoided by holding the last piece. i think this improves the performance only // slightly, but it is also a bit of a hack that deserves further consideration. let pcnt = self.root_queue_len.saturating_sub(queue.len()); let score = features(mat, pcnt).evaluate(&self.weights); // larger (i.e., further below the root score) is better self.root_score - score } } // This implements an algorithm that is very similar to A* but has a slight // modification. Rather than one big open set, there are separate sets at each depth of // the search. After picking a node from one open set and expanding its children into the // successor set, we next pick a node from that successor set. This process continues // until a terminal node is reached. In order to select which open set to start picking // from next, we look globally at all the open sets and find the node with the best // rating; this part works similarly to as if there was only one open set. // // Only terminal nodes are compared in order to pick a suggestion. An interesting // consequence of this design is that on the first run of the algorithm we end up // performing a best-first-search, and the first terminal node found ends up being our // initial suggestion. This BFS terminates very quickly so it is nice from the perspective // of an anytime algorithm. // // The problem with directly applying A* for an anytime downstacking algorithm is that // simply looking for the best heuristic measurement (f) can lead you into a situation // where a node that only made 2 placements has a better score than all of the nodes with // 3+ placements, and thus it is considered the best. This is definitely not correct, // since that 2-placement node only leads to worse board states as you continue to place // pieces on the board. In downstacking you have to place all of your pieces, you can't // just stop after placing a few and arriving at a good board state! So before actually // considering a node to be a suggestion we have to make sure we run out all of the queue // first (i.e. its a terminal node), and only then should we check its rating. struct SegmentedAStar { open: Vec>, depth: usize, best: Option, } impl SegmentedAStar { fn new(root: &Node) -> Self { let mut open = Vec::with_capacity(root.queue().len()); open.push(BinaryHeap::new()); open[0].push(root.into()); Self { open, depth: 0, best: None, } } fn best(&self) -> Option<&Node> { self.best.map(|node| unsafe { node.as_node() }) } fn step( &mut self, arena: &Arena, trans: &mut TransTable, eval: &Evaluator, iters: &mut u32, ) -> bool { *iters += 1; match self.expand(arena, trans, eval) { Ok(work) => { *iters += work; false } Err(maybe_cand) => { self.select(); if let Some(cand) = maybe_cand { self.backup(cand) } else { false } } } } fn expand<'a>( &mut self, arena: &'a Arena, trans: &mut TransTable, eval: &Evaluator, ) -> Result> { let open_set = self.open.get_mut(self.depth); let cand = open_set.map_or(None, |set| set.pop()).ok_or(None)?; let cand = unsafe { cand.0.as_node() }; if cand.is_terminal() { return Err(Some(cand)); } self.depth += 1; if self.open.len() <= self.depth { self.open.resize_with(self.depth + 1, BinaryHeap::new); } let mut work = 0; let evaluate = |mat: &Mat, queue: Queue<'_>| { // each evaluated board state = +1 unit work work += 1; eval.evaluate(mat, queue) }; for suc in cand.expand(arena, trans, evaluate) { self.open[self.depth].push(suc.into()); } Ok(work) } fn backup(&mut self, cand: &Node) -> bool { if self.best().map_or(true, |best| cand.is_better(best)) { self.best = Some(cand.into()); true } else { false } } fn select(&mut self) { let mut best = None; self.depth = 0; for (depth, set) in self.open.iter().enumerate() { let Some(cand) = set.peek() else { continue }; let cand = unsafe { cand.0.as_node() }; if best.map_or(true, |best| cand.is_better(best)) { best = Some(cand); self.depth = depth; } } } } // Wraps a `Node` pointer but implements `cmp::Ord` in order to compare by rating. #[derive(Copy, Clone)] struct AStarNode(RawNodePtr); impl From<&Node> for AStarNode { fn from(node: &Node) -> Self { Self(node.into()) } } impl core::cmp::Ord for AStarNode { fn cmp(&self, other: &Self) -> core::cmp::Ordering { let lhs = unsafe { self.0.as_node() }; let rhs = unsafe { other.0.as_node() }; if lhs.is_better(rhs) { core::cmp::Ordering::Greater } else { core::cmp::Ordering::Less } } } impl core::cmp::PartialOrd for AStarNode { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl core::cmp::Eq for AStarNode {} impl core::cmp::PartialEq for AStarNode { fn eq(&self, other: &Self) -> bool { self.cmp(other).is_eq() } }