create Evaluator type that manages ratings; compute score relative to root

2023-04-15 18:24:48 -04:00 · 2023-04-15 18:24:48 -04:00 · 32f66bb423
parent df1913d8f3
commit 32f66bb423
2 changed files with 92 additions and 57 deletions
--- a/fish/src/bot.rs
+++ b/fish/src/bot.rs
@ -10,11 +10,13 @@ use mino::srs::{Piece, Queue};
 mod node;
 use self::node::{Node, RawNodePtr};
 use crate::eval::evaluate;
 pub(crate) use bumpalo::Bump as Arena;
 /// Encompasses an instance of the algorithm.
 pub struct Bot {
    evaluator: Evaluator,
    algorithm: SegmentedAStar,
    // IMPORTANT: `arena` must occur after `algorithm` so that it is dropped last.
    arena: Arena,
@ -26,8 +28,13 @@ impl Bot {
    pub fn new(matrix: &Mat, queue: Queue<'_>) -> Self {
        let arena = bumpalo::Bump::new();
        let root = Node::alloc_root(&arena, matrix, queue);
        let evaluator = Evaluator::new(root);
        let algorithm = SegmentedAStar::new(root);
-        Self { algorithm, arena }
+        Self {
            evaluator,
            algorithm,
            arena,
        }
    }
    /// Perform a single "iteration" of work, which may end up improving the suggestion.
@ -35,7 +42,7 @@ impl Bot {
    /// deterministic, such that performing the same number of iterations gives the same
    /// resulting suggestion.
    pub fn think(&mut self) {
-        self.algorithm.step(&self.arena);
+        self.algorithm.step(&self.arena, &self.evaluator);
    }
    /// Return the current best suggested placement. Returns `None` under two possible
@ -47,6 +54,50 @@ impl Bot {
    }
 }
 struct Evaluator {
    // TODO: weights
    root_score: i32,
    root_queue_len: usize,
 }
 impl Evaluator {
    fn new(root: &Node) -> Self {
        Self {
            root_score: evaluate(root.matrix(), 0),
            root_queue_len: root.queue().len(),
        }
    }
    fn evaluate(&self, mat: &Mat, queue: Queue<'_>) -> i32 {
        let pcnt = self.root_queue_len.saturating_sub(queue.len());
        // FIXME: the old blockfish has two special edge cases for rating nodes that is
        // not done here.
        //
        // 1. nodes that reach the bottom of the board early ("solutions") are highly
        // prioritized. this is done by using the piece count *as the rating* in order to
        // force it to be extremely low, as well as sorting solutions by # of pieces in
        // case there are multiple. according to frey, this probably causes blockfish to
        // greed out in various scenarios where it sees a path to the bottom but it is not
        // actually the end of the race. part of the issue is of course that it isn't
        // communicated to blockfish whether or not the bottom of the board is actually
        // the end of the race, but also that the intermediate steps to get to the bottom
        // may be suboptimal placements when it isn't.
        //
        // 2. blockfish would actually average the last two evaluations and use that as
        // the final rating. this is meant as a concession for the fact that the last
        // placement made by the bot is not actually a placement we are required to make,
        // since in reality there is going to be the opportunity to hold the final piece
        // and use something else instead. so the 2nd to last rating is important in cases
        // where the last piece leads to suboptimal board states which may be able to be
        // avoided by holding the last piece. i think this improves the performance only
        // slightly, but it is also a bit of a hack that deserves further consideration.
        // larger (i.e., further below the root score) is better
        self.root_score - evaluate(mat, pcnt)
    }
 }
 // This implements an algorithm that is very similar to A* but has a slight
 // modification. Rather than one big open set, there are separate sets at each depth of
 // the search. After picking a node from one open set and expanding its children into the
@ -96,14 +147,14 @@ impl SegmentedAStar {
        self.best.map(|node| unsafe { node.as_node() })
    }
-    fn step(&mut self, arena: &Arena) {
+    fn step(&mut self, arena: &Arena, eval: &Evaluator) {
-        match self.expand(arena) {
+        match self.expand(arena, eval) {
            Ok(_) => {}
            Err(ShouldSelect) => self.select(),
        }
    }
-    fn expand<'a>(&mut self, arena: &'a Arena) -> Result<&'a Node, ShouldSelect> {
+    fn expand<'a>(&mut self, arena: &'a Arena, eval: &Evaluator) -> Result<&'a Node, ShouldSelect> {
        let open_set = self.open.get_mut(self.depth);
        let cand = open_set.map_or(None, |set| set.pop()).ok_or(ShouldSelect)?;
        let cand = unsafe { cand.0.as_node() };
@ -119,7 +170,7 @@ impl SegmentedAStar {
            self.open.resize_with(self.depth + 1, BinaryHeap::new);
        }
-        for suc in cand.expand(arena) {
+        for suc in cand.expand(arena, |m, q| eval.evaluate(m, q)) {
            self.open[self.depth].push(suc.into());
        }
@ -127,10 +178,9 @@ impl SegmentedAStar {
    }
    fn backup(&mut self, cand: &Node) {
-        let rating = cand.rating();
+        if self.best().map_or(true, |best| cand.is_better(best)) {
        if self.best().map_or(true, |n| rating < n.rating()) {
            tracing::debug!(
-                "update suggestion ({}): {cand:?}",
+                "{} suggestion: {cand:?}",
                self.best.map_or("1st", |_| "new")
            );
            self.best = Some(cand.into());
@ -138,15 +188,17 @@ impl SegmentedAStar {
    }
    fn select(&mut self) {
-        self.open
+        let mut best = None;
-            .iter()
+        self.depth = 0;
-            .map(|set| set.peek().map(|node| unsafe { node.0.as_node() }))
+
-            .enumerate()
+        for (depth, set) in self.open.iter().enumerate() {
-            .filter(|(_, best)| best.is_some())
+            let Some(cand) = set.peek() else { continue };
-            .min_by_key(|(_, best)| best.unwrap().rating())
+            let cand = unsafe { cand.0.as_node() };
-            .map(|(depth, _)| {
+            if best.map_or(true, |best| cand.is_better(best)) {
                best = Some(cand);
                self.depth = depth;
-            });
+            }
        }
    }
 }
@ -164,8 +216,11 @@ impl core::cmp::Ord for AStarNode {
    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
        let lhs = unsafe { self.0.as_node() };
        let rhs = unsafe { other.0.as_node() };
-        // FIXME: add a deterministic tiebreaker
+        if lhs.is_better(rhs) {
-        lhs.rating().cmp(&rhs.rating()).reverse()
+            core::cmp::Ordering::Greater
        } else {
            core::cmp::Ordering::Less
        }
    }
 }
--- a/fish/src/bot/node.rs
+++ b/fish/src/bot/node.rs
@ -4,7 +4,6 @@ use mino::matrix::{Mat, MatBuf};
 use mino::srs::{Piece, PieceType, Queue};
 use crate::bot::Arena;
 use crate::eval::evaluate;
 use crate::find::find_locations;
 /// Represents a node in the search tree. A node basically just consists of a board state
@ -13,7 +12,6 @@ pub(crate) struct Node {
    matrix: *const Mat,
    queue: RawQueue,
    edge: Option<Edge>,
    pcnt: u32,
    rating: i32,
    // currently there is no need to store a node's children, but maybe this could change
    // in the future.
@ -39,7 +37,7 @@ impl Node {
    pub fn alloc_root<'a>(arena: &'a Arena, matrix: &Mat, queue: Queue<'_>) -> &'a Self {
        let matrix = copy_matrix(arena, matrix);
        let queue = copy_queue(arena, queue);
-        Node::alloc(arena, matrix, queue, None)
+        Node::alloc(arena, matrix, queue, i32::MIN, None)
    }
    // `matrix` and `queue` must be allocated inside `arena`
@ -47,43 +45,15 @@ impl Node {
        arena: &'a Arena,
        matrix: &'a Mat,
        queue: Queue<'a>,
        rating: i32,
        edge: Option<Edge>,
    ) -> &'a Self {
-        let pcnt = match &edge {
+        let matrix = matrix as *const Mat;
            None => 0,
            Some(e) => e.parent().pcnt + 1,
        };
        let queue = RawQueue::from(queue);
        // FIXME: the old blockfish has two special edge cases for rating nodes that is
        // not done here.
        //
        // 1. nodes that reach the bottom of the board early ("solutions") are highly
        // prioritized. this is done by using the piece count *as the rating* in order to
        // force it to be extremely low, as well as sorting solutions by # of pieces in
        // case there are multiple. according to frey, this probably causes blockfish to
        // greed out in various scenarios where it sees a path to the bottom but it is not
        // actually the end of the race. part of the issue is of course that it isn't
        // communicated to blockfish whether or not the bottom of the board is actually
        // the end of the race, but also that the intermediate steps to get to the bottom
        // may be suboptimal placements when it isn't.
        //
        // 2. blockfish would actually average the last two evaluations and use that as
        // the final rating. this is meant as a concession for the fact that the last
        // placement made by the bot is not actually a placement we are required to make,
        // since in reality there is going to be the opportunity to hold the final piece
        // and use something else instead. so the 2nd to last rating is important in cases
        // where the last piece leads to suboptimal board states which may be able to be
        // avoided by holding the last piece. i think this improves the performance only
        // slightly, but it is also a bit of a hack that deserves further consideration.
        let rating = evaluate(matrix, pcnt as usize); // FIXME: pass weights to evaluation function
        arena.alloc_with(|| Self {
            matrix,
            queue,
            edge,
            pcnt,
            rating,
        })
    }
@ -96,8 +66,8 @@ impl Node {
        unsafe { self.queue.as_queue() }
    }
-    pub fn rating(&self) -> i32 {
+    pub fn is_better(&self, other: &Node) -> bool {
-        self.rating
+        self.rating > other.rating
    }
    pub fn is_terminal(&self) -> bool {
@ -121,7 +91,14 @@ impl Node {
    /// Expands this node, allocating the children into the given arena.
    // `self` must be allocated inside `arena`
-    pub fn expand<'a>(&'a self, arena: &'a Arena) -> impl Iterator<Item = &'a Node> + 'a {
+    pub fn expand<'a, E>(
        &'a self,
        arena: &'a Arena,
        evaluate: E,
    ) -> impl Iterator<Item = &'a Node> + 'a
    where
        E: Fn(&Mat, Queue<'_>) -> i32 + 'a,
    {
        let placements = self.queue().reachable().flat_map(|ty| {
            let locs = find_locations(self.matrix(), ty);
            locs.map(move |loc| Piece { ty, loc })
@ -142,8 +119,11 @@ impl Node {
            let suc_matrix = copy_matrix(arena, &matrix);
            let suc_queue = self.queue().remove(placement.ty);
-            // TODO: transposition table lookup
+            // TODO: transposition table
-            Node::alloc(arena, suc_matrix, suc_queue, Some(edge))
+
            let rating = evaluate(suc_matrix, suc_queue);
            Node::alloc(arena, suc_matrix, suc_queue, rating, Some(edge))
        })
    }
 }