diff --git a/fish/src/bot.rs b/fish/src/bot.rs index b5709f9..81f1704 100644 --- a/fish/src/bot.rs +++ b/fish/src/bot.rs @@ -10,11 +10,13 @@ use mino::srs::{Piece, Queue}; mod node; use self::node::{Node, RawNodePtr}; +use crate::eval::evaluate; pub(crate) use bumpalo::Bump as Arena; /// Encompasses an instance of the algorithm. pub struct Bot { + evaluator: Evaluator, algorithm: SegmentedAStar, // IMPORTANT: `arena` must occur after `algorithm` so that it is dropped last. arena: Arena, @@ -26,8 +28,13 @@ impl Bot { pub fn new(matrix: &Mat, queue: Queue<'_>) -> Self { let arena = bumpalo::Bump::new(); let root = Node::alloc_root(&arena, matrix, queue); + let evaluator = Evaluator::new(root); let algorithm = SegmentedAStar::new(root); - Self { algorithm, arena } + Self { + evaluator, + algorithm, + arena, + } } /// Perform a single "iteration" of work, which may end up improving the suggestion. @@ -35,7 +42,7 @@ impl Bot { /// deterministic, such that performing the same number of iterations gives the same /// resulting suggestion. pub fn think(&mut self) { - self.algorithm.step(&self.arena); + self.algorithm.step(&self.arena, &self.evaluator); } /// Return the current best suggested placement. Returns `None` under two possible @@ -47,6 +54,50 @@ impl Bot { } } +struct Evaluator { + // TODO: weights + root_score: i32, + root_queue_len: usize, +} + +impl Evaluator { + fn new(root: &Node) -> Self { + Self { + root_score: evaluate(root.matrix(), 0), + root_queue_len: root.queue().len(), + } + } + + fn evaluate(&self, mat: &Mat, queue: Queue<'_>) -> i32 { + let pcnt = self.root_queue_len.saturating_sub(queue.len()); + + // FIXME: the old blockfish has two special edge cases for rating nodes that is + // not done here. + // + // 1. nodes that reach the bottom of the board early ("solutions") are highly + // prioritized. this is done by using the piece count *as the rating* in order to + // force it to be extremely low, as well as sorting solutions by # of pieces in + // case there are multiple. according to frey, this probably causes blockfish to + // greed out in various scenarios where it sees a path to the bottom but it is not + // actually the end of the race. part of the issue is of course that it isn't + // communicated to blockfish whether or not the bottom of the board is actually + // the end of the race, but also that the intermediate steps to get to the bottom + // may be suboptimal placements when it isn't. + // + // 2. blockfish would actually average the last two evaluations and use that as + // the final rating. this is meant as a concession for the fact that the last + // placement made by the bot is not actually a placement we are required to make, + // since in reality there is going to be the opportunity to hold the final piece + // and use something else instead. so the 2nd to last rating is important in cases + // where the last piece leads to suboptimal board states which may be able to be + // avoided by holding the last piece. i think this improves the performance only + // slightly, but it is also a bit of a hack that deserves further consideration. + + // larger (i.e., further below the root score) is better + self.root_score - evaluate(mat, pcnt) + } +} + // This implements an algorithm that is very similar to A* but has a slight // modification. Rather than one big open set, there are separate sets at each depth of // the search. After picking a node from one open set and expanding its children into the @@ -96,14 +147,14 @@ impl SegmentedAStar { self.best.map(|node| unsafe { node.as_node() }) } - fn step(&mut self, arena: &Arena) { - match self.expand(arena) { + fn step(&mut self, arena: &Arena, eval: &Evaluator) { + match self.expand(arena, eval) { Ok(_) => {} Err(ShouldSelect) => self.select(), } } - fn expand<'a>(&mut self, arena: &'a Arena) -> Result<&'a Node, ShouldSelect> { + fn expand<'a>(&mut self, arena: &'a Arena, eval: &Evaluator) -> Result<&'a Node, ShouldSelect> { let open_set = self.open.get_mut(self.depth); let cand = open_set.map_or(None, |set| set.pop()).ok_or(ShouldSelect)?; let cand = unsafe { cand.0.as_node() }; @@ -119,7 +170,7 @@ impl SegmentedAStar { self.open.resize_with(self.depth + 1, BinaryHeap::new); } - for suc in cand.expand(arena) { + for suc in cand.expand(arena, |m, q| eval.evaluate(m, q)) { self.open[self.depth].push(suc.into()); } @@ -127,10 +178,9 @@ impl SegmentedAStar { } fn backup(&mut self, cand: &Node) { - let rating = cand.rating(); - if self.best().map_or(true, |n| rating < n.rating()) { + if self.best().map_or(true, |best| cand.is_better(best)) { tracing::debug!( - "update suggestion ({}): {cand:?}", + "{} suggestion: {cand:?}", self.best.map_or("1st", |_| "new") ); self.best = Some(cand.into()); @@ -138,15 +188,17 @@ impl SegmentedAStar { } fn select(&mut self) { - self.open - .iter() - .map(|set| set.peek().map(|node| unsafe { node.0.as_node() })) - .enumerate() - .filter(|(_, best)| best.is_some()) - .min_by_key(|(_, best)| best.unwrap().rating()) - .map(|(depth, _)| { + let mut best = None; + self.depth = 0; + + for (depth, set) in self.open.iter().enumerate() { + let Some(cand) = set.peek() else { continue }; + let cand = unsafe { cand.0.as_node() }; + if best.map_or(true, |best| cand.is_better(best)) { + best = Some(cand); self.depth = depth; - }); + } + } } } @@ -164,8 +216,11 @@ impl core::cmp::Ord for AStarNode { fn cmp(&self, other: &Self) -> core::cmp::Ordering { let lhs = unsafe { self.0.as_node() }; let rhs = unsafe { other.0.as_node() }; - // FIXME: add a deterministic tiebreaker - lhs.rating().cmp(&rhs.rating()).reverse() + if lhs.is_better(rhs) { + core::cmp::Ordering::Greater + } else { + core::cmp::Ordering::Less + } } } diff --git a/fish/src/bot/node.rs b/fish/src/bot/node.rs index 462404d..93b649c 100644 --- a/fish/src/bot/node.rs +++ b/fish/src/bot/node.rs @@ -4,7 +4,6 @@ use mino::matrix::{Mat, MatBuf}; use mino::srs::{Piece, PieceType, Queue}; use crate::bot::Arena; -use crate::eval::evaluate; use crate::find::find_locations; /// Represents a node in the search tree. A node basically just consists of a board state @@ -13,7 +12,6 @@ pub(crate) struct Node { matrix: *const Mat, queue: RawQueue, edge: Option, - pcnt: u32, rating: i32, // currently there is no need to store a node's children, but maybe this could change // in the future. @@ -39,7 +37,7 @@ impl Node { pub fn alloc_root<'a>(arena: &'a Arena, matrix: &Mat, queue: Queue<'_>) -> &'a Self { let matrix = copy_matrix(arena, matrix); let queue = copy_queue(arena, queue); - Node::alloc(arena, matrix, queue, None) + Node::alloc(arena, matrix, queue, i32::MIN, None) } // `matrix` and `queue` must be allocated inside `arena` @@ -47,43 +45,15 @@ impl Node { arena: &'a Arena, matrix: &'a Mat, queue: Queue<'a>, + rating: i32, edge: Option, ) -> &'a Self { - let pcnt = match &edge { - None => 0, - Some(e) => e.parent().pcnt + 1, - }; + let matrix = matrix as *const Mat; let queue = RawQueue::from(queue); - - // FIXME: the old blockfish has two special edge cases for rating nodes that is - // not done here. - // - // 1. nodes that reach the bottom of the board early ("solutions") are highly - // prioritized. this is done by using the piece count *as the rating* in order to - // force it to be extremely low, as well as sorting solutions by # of pieces in - // case there are multiple. according to frey, this probably causes blockfish to - // greed out in various scenarios where it sees a path to the bottom but it is not - // actually the end of the race. part of the issue is of course that it isn't - // communicated to blockfish whether or not the bottom of the board is actually - // the end of the race, but also that the intermediate steps to get to the bottom - // may be suboptimal placements when it isn't. - // - // 2. blockfish would actually average the last two evaluations and use that as - // the final rating. this is meant as a concession for the fact that the last - // placement made by the bot is not actually a placement we are required to make, - // since in reality there is going to be the opportunity to hold the final piece - // and use something else instead. so the 2nd to last rating is important in cases - // where the last piece leads to suboptimal board states which may be able to be - // avoided by holding the last piece. i think this improves the performance only - // slightly, but it is also a bit of a hack that deserves further consideration. - - let rating = evaluate(matrix, pcnt as usize); // FIXME: pass weights to evaluation function - arena.alloc_with(|| Self { matrix, queue, edge, - pcnt, rating, }) } @@ -96,8 +66,8 @@ impl Node { unsafe { self.queue.as_queue() } } - pub fn rating(&self) -> i32 { - self.rating + pub fn is_better(&self, other: &Node) -> bool { + self.rating > other.rating } pub fn is_terminal(&self) -> bool { @@ -121,7 +91,14 @@ impl Node { /// Expands this node, allocating the children into the given arena. // `self` must be allocated inside `arena` - pub fn expand<'a>(&'a self, arena: &'a Arena) -> impl Iterator + 'a { + pub fn expand<'a, E>( + &'a self, + arena: &'a Arena, + evaluate: E, + ) -> impl Iterator + 'a + where + E: Fn(&Mat, Queue<'_>) -> i32 + 'a, + { let placements = self.queue().reachable().flat_map(|ty| { let locs = find_locations(self.matrix(), ty); locs.map(move |loc| Piece { ty, loc }) @@ -142,8 +119,11 @@ impl Node { let suc_matrix = copy_matrix(arena, &matrix); let suc_queue = self.queue().remove(placement.ty); - // TODO: transposition table lookup - Node::alloc(arena, suc_matrix, suc_queue, Some(edge)) + // TODO: transposition table + + let rating = evaluate(suc_matrix, suc_queue); + + Node::alloc(arena, suc_matrix, suc_queue, rating, Some(edge)) }) } }