create Evaluator type that manages ratings; compute score relative to root

2023-04-15 18:24:48 -04:00 · 2023-04-15 18:24:48 -04:00 · 32f66bb423
parent df1913d8f3
commit 32f66bb423
2 changed files with 92 additions and 57 deletions
--- a/fish/src/bot.rs
+++ b/fish/src/bot.rs
@ -10,11 +10,13 @@ use mino::srs::{Piece, Queue};
 mod node;

 use self::node::{Node, RawNodePtr};
+use crate::eval::evaluate;

 pub(crate) use bumpalo::Bump as Arena;

 /// Encompasses an instance of the algorithm.
 pub struct Bot {
+    evaluator: Evaluator,
    algorithm: SegmentedAStar,
    // IMPORTANT: `arena` must occur after `algorithm` so that it is dropped last.
    arena: Arena,
@ -26,8 +28,13 @@ impl Bot {
    pub fn new(matrix: &Mat, queue: Queue<'_>) -> Self {
        let arena = bumpalo::Bump::new();
        let root = Node::alloc_root(&arena, matrix, queue);
+        let evaluator = Evaluator::new(root);
        let algorithm = SegmentedAStar::new(root);
-        Self { algorithm, arena }
+        Self {
+            evaluator,
+            algorithm,
+            arena,
+        }
    }

    /// Perform a single "iteration" of work, which may end up improving the suggestion.
@ -35,7 +42,7 @@ impl Bot {
    /// deterministic, such that performing the same number of iterations gives the same
    /// resulting suggestion.
    pub fn think(&mut self) {
-        self.algorithm.step(&self.arena);
+        self.algorithm.step(&self.arena, &self.evaluator);
    }

    /// Return the current best suggested placement. Returns `None` under two possible
@ -47,6 +54,50 @@ impl Bot {
    }
 }

+struct Evaluator {
+    // TODO: weights
+    root_score: i32,
+    root_queue_len: usize,
+}
+
+impl Evaluator {
+    fn new(root: &Node) -> Self {
+        Self {
+            root_score: evaluate(root.matrix(), 0),
+            root_queue_len: root.queue().len(),
+        }
+    }
+
+    fn evaluate(&self, mat: &Mat, queue: Queue<'_>) -> i32 {
+        let pcnt = self.root_queue_len.saturating_sub(queue.len());
+
+        // FIXME: the old blockfish has two special edge cases for rating nodes that is
+        // not done here.
+        //
+        // 1. nodes that reach the bottom of the board early ("solutions") are highly
+        // prioritized. this is done by using the piece count *as the rating* in order to
+        // force it to be extremely low, as well as sorting solutions by # of pieces in
+        // case there are multiple. according to frey, this probably causes blockfish to
+        // greed out in various scenarios where it sees a path to the bottom but it is not
+        // actually the end of the race. part of the issue is of course that it isn't
+        // communicated to blockfish whether or not the bottom of the board is actually
+        // the end of the race, but also that the intermediate steps to get to the bottom
+        // may be suboptimal placements when it isn't.
+        //
+        // 2. blockfish would actually average the last two evaluations and use that as
+        // the final rating. this is meant as a concession for the fact that the last
+        // placement made by the bot is not actually a placement we are required to make,
+        // since in reality there is going to be the opportunity to hold the final piece
+        // and use something else instead. so the 2nd to last rating is important in cases
+        // where the last piece leads to suboptimal board states which may be able to be
+        // avoided by holding the last piece. i think this improves the performance only
+        // slightly, but it is also a bit of a hack that deserves further consideration.
+
+        // larger (i.e., further below the root score) is better
+        self.root_score - evaluate(mat, pcnt)
+    }
+}
+
 // This implements an algorithm that is very similar to A* but has a slight
 // modification. Rather than one big open set, there are separate sets at each depth of
 // the search. After picking a node from one open set and expanding its children into the
@ -96,14 +147,14 @@ impl SegmentedAStar {
        self.best.map(|node| unsafe { node.as_node() })
    }

-    fn step(&mut self, arena: &Arena) {
-        match self.expand(arena) {
+    fn step(&mut self, arena: &Arena, eval: &Evaluator) {
+        match self.expand(arena, eval) {
            Ok(_) => {}
            Err(ShouldSelect) => self.select(),
        }
    }

-    fn expand<'a>(&mut self, arena: &'a Arena) -> Result<&'a Node, ShouldSelect> {
+    fn expand<'a>(&mut self, arena: &'a Arena, eval: &Evaluator) -> Result<&'a Node, ShouldSelect> {
        let open_set = self.open.get_mut(self.depth);
        let cand = open_set.map_or(None, |set| set.pop()).ok_or(ShouldSelect)?;
        let cand = unsafe { cand.0.as_node() };
@ -119,7 +170,7 @@ impl SegmentedAStar {
            self.open.resize_with(self.depth + 1, BinaryHeap::new);
        }

-        for suc in cand.expand(arena) {
+        for suc in cand.expand(arena, |m, q| eval.evaluate(m, q)) {
            self.open[self.depth].push(suc.into());
        }

@ -127,10 +178,9 @@ impl SegmentedAStar {
    }

    fn backup(&mut self, cand: &Node) {
-        let rating = cand.rating();
-        if self.best().map_or(true, |n| rating < n.rating()) {
+        if self.best().map_or(true, |best| cand.is_better(best)) {
            tracing::debug!(
-                "update suggestion ({}): {cand:?}",
+                "{} suggestion: {cand:?}",
                self.best.map_or("1st", |_| "new")
            );
            self.best = Some(cand.into());
@ -138,15 +188,17 @@ impl SegmentedAStar {
    }

    fn select(&mut self) {
-        self.open
-            .iter()
-            .map(|set| set.peek().map(|node| unsafe { node.0.as_node() }))
-            .enumerate()
-            .filter(|(_, best)| best.is_some())
-            .min_by_key(|(_, best)| best.unwrap().rating())
-            .map(|(depth, _)| {
+        let mut best = None;
+        self.depth = 0;
+
+        for (depth, set) in self.open.iter().enumerate() {
+            let Some(cand) = set.peek() else { continue };
+            let cand = unsafe { cand.0.as_node() };
+            if best.map_or(true, |best| cand.is_better(best)) {
+                best = Some(cand);
                self.depth = depth;
-            });
+            }
+        }
    }
 }

@ -164,8 +216,11 @@ impl core::cmp::Ord for AStarNode {
    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
        let lhs = unsafe { self.0.as_node() };
        let rhs = unsafe { other.0.as_node() };
-        // FIXME: add a deterministic tiebreaker
-        lhs.rating().cmp(&rhs.rating()).reverse()
+        if lhs.is_better(rhs) {
+            core::cmp::Ordering::Greater
+        } else {
+            core::cmp::Ordering::Less
+        }
    }
 }

--- a/fish/src/bot/node.rs
+++ b/fish/src/bot/node.rs
@ -4,7 +4,6 @@ use mino::matrix::{Mat, MatBuf};
 use mino::srs::{Piece, PieceType, Queue};

 use crate::bot::Arena;
-use crate::eval::evaluate;
 use crate::find::find_locations;

 /// Represents a node in the search tree. A node basically just consists of a board state
@ -13,7 +12,6 @@ pub(crate) struct Node {
    matrix: *const Mat,
    queue: RawQueue,
    edge: Option<Edge>,
-    pcnt: u32,
    rating: i32,
    // currently there is no need to store a node's children, but maybe this could change
    // in the future.
@ -39,7 +37,7 @@ impl Node {
    pub fn alloc_root<'a>(arena: &'a Arena, matrix: &Mat, queue: Queue<'_>) -> &'a Self {
        let matrix = copy_matrix(arena, matrix);
        let queue = copy_queue(arena, queue);
-        Node::alloc(arena, matrix, queue, None)
+        Node::alloc(arena, matrix, queue, i32::MIN, None)
    }

    // `matrix` and `queue` must be allocated inside `arena`
@ -47,43 +45,15 @@ impl Node {
        arena: &'a Arena,
        matrix: &'a Mat,
        queue: Queue<'a>,
+        rating: i32,
        edge: Option<Edge>,
    ) -> &'a Self {
-        let pcnt = match &edge {
-            None => 0,
-            Some(e) => e.parent().pcnt + 1,
-        };
+        let matrix = matrix as *const Mat;
        let queue = RawQueue::from(queue);
-
-        // FIXME: the old blockfish has two special edge cases for rating nodes that is
-        // not done here.
-        //
-        // 1. nodes that reach the bottom of the board early ("solutions") are highly
-        // prioritized. this is done by using the piece count *as the rating* in order to
-        // force it to be extremely low, as well as sorting solutions by # of pieces in
-        // case there are multiple. according to frey, this probably causes blockfish to
-        // greed out in various scenarios where it sees a path to the bottom but it is not
-        // actually the end of the race. part of the issue is of course that it isn't
-        // communicated to blockfish whether or not the bottom of the board is actually
-        // the end of the race, but also that the intermediate steps to get to the bottom
-        // may be suboptimal placements when it isn't.
-        //
-        // 2. blockfish would actually average the last two evaluations and use that as
-        // the final rating. this is meant as a concession for the fact that the last
-        // placement made by the bot is not actually a placement we are required to make,
-        // since in reality there is going to be the opportunity to hold the final piece
-        // and use something else instead. so the 2nd to last rating is important in cases
-        // where the last piece leads to suboptimal board states which may be able to be
-        // avoided by holding the last piece. i think this improves the performance only
-        // slightly, but it is also a bit of a hack that deserves further consideration.
-
-        let rating = evaluate(matrix, pcnt as usize); // FIXME: pass weights to evaluation function
-
        arena.alloc_with(|| Self {
            matrix,
            queue,
            edge,
-            pcnt,
            rating,
        })
    }
@ -96,8 +66,8 @@ impl Node {
        unsafe { self.queue.as_queue() }
    }

-    pub fn rating(&self) -> i32 {
-        self.rating
+    pub fn is_better(&self, other: &Node) -> bool {
+        self.rating > other.rating
    }

    pub fn is_terminal(&self) -> bool {
@ -121,7 +91,14 @@ impl Node {

    /// Expands this node, allocating the children into the given arena.
    // `self` must be allocated inside `arena`
-    pub fn expand<'a>(&'a self, arena: &'a Arena) -> impl Iterator<Item = &'a Node> + 'a {
+    pub fn expand<'a, E>(
+        &'a self,
+        arena: &'a Arena,
+        evaluate: E,
+    ) -> impl Iterator<Item = &'a Node> + 'a
+    where
+        E: Fn(&Mat, Queue<'_>) -> i32 + 'a,
+    {
        let placements = self.queue().reachable().flat_map(|ty| {
            let locs = find_locations(self.matrix(), ty);
            locs.map(move |loc| Piece { ty, loc })
@ -142,8 +119,11 @@ impl Node {
            let suc_matrix = copy_matrix(arena, &matrix);
            let suc_queue = self.queue().remove(placement.ty);

-            // TODO: transposition table lookup
-            Node::alloc(arena, suc_matrix, suc_queue, Some(edge))
+            // TODO: transposition table
+
+            let rating = evaluate(suc_matrix, suc_queue);
+
+            Node::alloc(arena, suc_matrix, suc_queue, rating, Some(edge))
        })
    }
 }