300 lines
11 KiB
Rust
300 lines
11 KiB
Rust
//! Primary interface to working with the Blockfish engine. The [`Bot`] type controls an
|
|
//! anytime algorithm that will provide a suggestion for the next move. It may be
|
|
//! repeatedly polled by the `think` method in order to attempt to improve the suggestion.
|
|
|
|
use alloc::collections::BinaryHeap;
|
|
use alloc::vec::Vec;
|
|
use mino::matrix::Mat;
|
|
use mino::srs::{Piece, Queue};
|
|
|
|
mod node;
|
|
mod trans;
|
|
|
|
use crate::bot::node::{Node, RawNodePtr};
|
|
use crate::bot::trans::TransTable;
|
|
use crate::eval::{features, Weights};
|
|
|
|
pub(crate) use bumpalo::Bump as Arena;
|
|
|
|
/// Encompasses an instance of the algorithm.
|
|
pub struct Bot {
|
|
iters: u32,
|
|
evaluator: Evaluator,
|
|
trans: TransTable,
|
|
algorithm: SegmentedAStar,
|
|
// IMPORTANT: `arena` must occur after `algorithm` so that it is dropped last.
|
|
arena: Arena,
|
|
}
|
|
|
|
impl Bot {
|
|
/// Constructs a new bot from the given initial state (matrix and queue).
|
|
// TODO: specify weights
|
|
pub fn new(weights: &Weights, matrix: &Mat, queue: Queue<'_>) -> Self {
|
|
let arena = bumpalo::Bump::new();
|
|
let root = Node::alloc_root(&arena, matrix, queue);
|
|
let evaluator = Evaluator::new(weights, root);
|
|
let trans = TransTable::new();
|
|
let algorithm = SegmentedAStar::new(root);
|
|
Self {
|
|
iters: 0,
|
|
evaluator,
|
|
trans,
|
|
algorithm,
|
|
arena,
|
|
}
|
|
}
|
|
|
|
/// Runs the bot for up to `gas` more iterations. An "iteration" is a unit of work
|
|
/// that is intentionally kept vague, but should be proportional to the amount CPU
|
|
/// time. Iterations are deterministic, so similar versions of the engine will produce
|
|
/// the same suggestions if run the for the same number of iterations.
|
|
pub fn think_for(&mut self, gas: u32) {
|
|
// NOTICE: The actual number of iterations may slightly exceed the provided gas due to
|
|
// how the bot is currently structured. This shouldn't have a substantial impact over
|
|
// the long run since the overshoot will be very small in terms of CPU
|
|
// time.
|
|
//
|
|
// Runs will be deterministic as long as two runs end on the same *target*
|
|
// iterations on the last call to `think_for`, e.g. "bot.think_for(5000)" is the
|
|
// same as "bot.think_for(2500); bot.think_for(5000 - bot.iterations());"
|
|
let max_iters = self.iters + gas;
|
|
while self.iters < max_iters {
|
|
let did_update = self.algorithm.step(
|
|
&self.arena,
|
|
&mut self.trans,
|
|
&self.evaluator,
|
|
&mut self.iters,
|
|
);
|
|
if did_update {
|
|
tracing::debug!(
|
|
"new suggestion @ {}: {:?}",
|
|
self.iters,
|
|
self.algorithm.best().unwrap(),
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Returns the number of iterations done so far.
|
|
pub fn iterations(&self) -> u32 {
|
|
self.iters
|
|
}
|
|
|
|
/// Return the current best suggested placement. Returns `None` under two possible
|
|
/// conditions:
|
|
/// - `think` has not been called enough times to provide an initial suggestion.
|
|
/// - there are no valid placements for the initial state
|
|
pub fn suggest(&self) -> Option<Piece> {
|
|
self.algorithm.best().and_then(|node| node.root_placement())
|
|
}
|
|
}
|
|
|
|
struct Evaluator {
|
|
weights: Weights,
|
|
root_score: i32,
|
|
root_queue_len: usize,
|
|
}
|
|
|
|
impl Evaluator {
|
|
fn new(weights: &Weights, root: &Node) -> Self {
|
|
Self {
|
|
weights: *weights,
|
|
root_score: features(root.matrix(), 0).evaluate(weights),
|
|
root_queue_len: root.queue().len(),
|
|
}
|
|
}
|
|
|
|
fn evaluate(&self, mat: &Mat, queue: Queue<'_>) -> i32 {
|
|
// FIXME: the old blockfish has two special edge cases for rating nodes that is
|
|
// not done here.
|
|
//
|
|
// 1. nodes that reach the bottom of the board early ("solutions") are highly
|
|
// prioritized. this is done by using the piece count *as the rating* in order to
|
|
// force it to be extremely low, as well as sorting solutions by # of pieces in
|
|
// case there are multiple. according to frey, this probably causes blockfish to
|
|
// greed out in various scenarios where it sees a path to the bottom but it is not
|
|
// actually the end of the race. part of the issue is of course that it isn't
|
|
// communicated to blockfish whether or not the bottom of the board is actually
|
|
// the end of the race, but also that the intermediate steps to get to the bottom
|
|
// may be suboptimal placements when it isn't.
|
|
//
|
|
// 2. blockfish would actually average the last two evaluations and use that as
|
|
// the final rating. this is meant as a concession for the fact that the last
|
|
// placement made by the bot is not actually a placement we are required to make,
|
|
// since in reality there is going to be the opportunity to hold the final piece
|
|
// and use something else instead. so the 2nd to last rating is important in cases
|
|
// where the last piece leads to suboptimal board states which may be able to be
|
|
// avoided by holding the last piece. i think this improves the performance only
|
|
// slightly, but it is also a bit of a hack that deserves further consideration.
|
|
|
|
let pcnt = self.root_queue_len.saturating_sub(queue.len());
|
|
let score = features(mat, pcnt).evaluate(&self.weights);
|
|
|
|
// larger (i.e., further below the root score) is better
|
|
self.root_score - score
|
|
}
|
|
}
|
|
|
|
// This implements an algorithm that is very similar to A* but has a slight
|
|
// modification. Rather than one big open set, there are separate sets at each depth of
|
|
// the search. After picking a node from one open set and expanding its children into the
|
|
// successor set, we next pick a node from that successor set. This process continues
|
|
// until a terminal node is reached. In order to select which open set to start picking
|
|
// from next, we look globally at all the open sets and find the node with the best
|
|
// rating; this part works similarly to as if there was only one open set.
|
|
//
|
|
// Only terminal nodes are compared in order to pick a suggestion. An interesting
|
|
// consequence of this design is that on the first run of the algorithm we end up
|
|
// performing a best-first-search, and the first terminal node found ends up being our
|
|
// initial suggestion. This BFS terminates very quickly so it is nice from the perspective
|
|
// of an anytime algorithm.
|
|
//
|
|
// The problem with directly applying A* for an anytime downstacking algorithm is that
|
|
// simply looking for the best heuristic measurement (f) can lead you into a situation
|
|
// where a node that only made 2 placements has a better score than all of the nodes with
|
|
// 3+ placements, and thus it is considered the best. This is definitely not correct,
|
|
// since that 2-placement node only leads to worse board states as you continue to place
|
|
// pieces on the board. In downstacking you have to place all of your pieces, you can't
|
|
// just stop after placing a few and arriving at a good board state! So before actually
|
|
// considering a node to be a suggestion we have to make sure we run out all of the queue
|
|
// first (i.e. its a terminal node), and only then should we check its rating.
|
|
|
|
struct SegmentedAStar {
|
|
open: Vec<BinaryHeap<AStarNode>>,
|
|
depth: usize,
|
|
best: Option<RawNodePtr>,
|
|
}
|
|
|
|
impl SegmentedAStar {
|
|
fn new(root: &Node) -> Self {
|
|
let mut open = Vec::with_capacity(root.queue().len());
|
|
open.push(BinaryHeap::new());
|
|
open[0].push(root.into());
|
|
Self {
|
|
open,
|
|
depth: 0,
|
|
best: None,
|
|
}
|
|
}
|
|
|
|
fn best(&self) -> Option<&Node> {
|
|
self.best.map(|node| unsafe { node.as_node() })
|
|
}
|
|
|
|
fn step(
|
|
&mut self,
|
|
arena: &Arena,
|
|
trans: &mut TransTable,
|
|
eval: &Evaluator,
|
|
iters: &mut u32,
|
|
) -> bool {
|
|
*iters += 1;
|
|
match self.expand(arena, trans, eval) {
|
|
Ok(work) => {
|
|
*iters += work;
|
|
false
|
|
}
|
|
Err(maybe_cand) => {
|
|
self.select();
|
|
if let Some(cand) = maybe_cand {
|
|
self.backup(cand)
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn expand<'a>(
|
|
&mut self,
|
|
arena: &'a Arena,
|
|
trans: &mut TransTable,
|
|
eval: &Evaluator,
|
|
) -> Result<u32, Option<&'a Node>> {
|
|
let open_set = self.open.get_mut(self.depth);
|
|
let cand = open_set.map_or(None, |set| set.pop()).ok_or(None)?;
|
|
let cand = unsafe { cand.0.as_node() };
|
|
|
|
if cand.is_terminal() {
|
|
return Err(Some(cand));
|
|
}
|
|
|
|
self.depth += 1;
|
|
if self.open.len() <= self.depth {
|
|
self.open.resize_with(self.depth + 1, BinaryHeap::new);
|
|
}
|
|
|
|
let mut work = 0;
|
|
let evaluate = |mat: &Mat, queue: Queue<'_>| {
|
|
// each evaluated board state = +1 unit work
|
|
work += 1;
|
|
eval.evaluate(mat, queue)
|
|
};
|
|
|
|
for suc in cand.expand(arena, trans, evaluate) {
|
|
self.open[self.depth].push(suc.into());
|
|
}
|
|
|
|
Ok(work)
|
|
}
|
|
|
|
fn backup(&mut self, cand: &Node) -> bool {
|
|
if self.best().map_or(true, |best| cand.is_better(best)) {
|
|
self.best = Some(cand.into());
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
fn select(&mut self) {
|
|
let mut best = None;
|
|
self.depth = 0;
|
|
|
|
for (depth, set) in self.open.iter().enumerate() {
|
|
let Some(cand) = set.peek() else { continue };
|
|
let cand = unsafe { cand.0.as_node() };
|
|
if best.map_or(true, |best| cand.is_better(best)) {
|
|
best = Some(cand);
|
|
self.depth = depth;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Wraps a `Node` pointer but implements `cmp::Ord` in order to compare by rating.
|
|
#[derive(Copy, Clone)]
|
|
struct AStarNode(RawNodePtr);
|
|
|
|
impl From<&Node> for AStarNode {
|
|
fn from(node: &Node) -> Self {
|
|
Self(node.into())
|
|
}
|
|
}
|
|
|
|
impl core::cmp::Ord for AStarNode {
|
|
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
|
|
let lhs = unsafe { self.0.as_node() };
|
|
let rhs = unsafe { other.0.as_node() };
|
|
if lhs.is_better(rhs) {
|
|
core::cmp::Ordering::Greater
|
|
} else {
|
|
core::cmp::Ordering::Less
|
|
}
|
|
}
|
|
}
|
|
|
|
impl core::cmp::PartialOrd for AStarNode {
|
|
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
|
|
Some(self.cmp(other))
|
|
}
|
|
}
|
|
|
|
impl core::cmp::Eq for AStarNode {}
|
|
|
|
impl core::cmp::PartialEq for AStarNode {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
self.cmp(other).is_eq()
|
|
}
|
|
}
|