create Evaluator type that manages ratings; compute score relative to root
This commit is contained in:
parent
df1913d8f3
commit
32f66bb423
|
@ -10,11 +10,13 @@ use mino::srs::{Piece, Queue};
|
|||
mod node;
|
||||
|
||||
use self::node::{Node, RawNodePtr};
|
||||
use crate::eval::evaluate;
|
||||
|
||||
pub(crate) use bumpalo::Bump as Arena;
|
||||
|
||||
/// Encompasses an instance of the algorithm.
|
||||
pub struct Bot {
|
||||
evaluator: Evaluator,
|
||||
algorithm: SegmentedAStar,
|
||||
// IMPORTANT: `arena` must occur after `algorithm` so that it is dropped last.
|
||||
arena: Arena,
|
||||
|
@ -26,8 +28,13 @@ impl Bot {
|
|||
pub fn new(matrix: &Mat, queue: Queue<'_>) -> Self {
|
||||
let arena = bumpalo::Bump::new();
|
||||
let root = Node::alloc_root(&arena, matrix, queue);
|
||||
let evaluator = Evaluator::new(root);
|
||||
let algorithm = SegmentedAStar::new(root);
|
||||
Self { algorithm, arena }
|
||||
Self {
|
||||
evaluator,
|
||||
algorithm,
|
||||
arena,
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform a single "iteration" of work, which may end up improving the suggestion.
|
||||
|
@ -35,7 +42,7 @@ impl Bot {
|
|||
/// deterministic, such that performing the same number of iterations gives the same
|
||||
/// resulting suggestion.
|
||||
pub fn think(&mut self) {
|
||||
self.algorithm.step(&self.arena);
|
||||
self.algorithm.step(&self.arena, &self.evaluator);
|
||||
}
|
||||
|
||||
/// Return the current best suggested placement. Returns `None` under two possible
|
||||
|
@ -47,6 +54,50 @@ impl Bot {
|
|||
}
|
||||
}
|
||||
|
||||
struct Evaluator {
|
||||
// TODO: weights
|
||||
root_score: i32,
|
||||
root_queue_len: usize,
|
||||
}
|
||||
|
||||
impl Evaluator {
|
||||
fn new(root: &Node) -> Self {
|
||||
Self {
|
||||
root_score: evaluate(root.matrix(), 0),
|
||||
root_queue_len: root.queue().len(),
|
||||
}
|
||||
}
|
||||
|
||||
fn evaluate(&self, mat: &Mat, queue: Queue<'_>) -> i32 {
|
||||
let pcnt = self.root_queue_len.saturating_sub(queue.len());
|
||||
|
||||
// FIXME: the old blockfish has two special edge cases for rating nodes that is
|
||||
// not done here.
|
||||
//
|
||||
// 1. nodes that reach the bottom of the board early ("solutions") are highly
|
||||
// prioritized. this is done by using the piece count *as the rating* in order to
|
||||
// force it to be extremely low, as well as sorting solutions by # of pieces in
|
||||
// case there are multiple. according to frey, this probably causes blockfish to
|
||||
// greed out in various scenarios where it sees a path to the bottom but it is not
|
||||
// actually the end of the race. part of the issue is of course that it isn't
|
||||
// communicated to blockfish whether or not the bottom of the board is actually
|
||||
// the end of the race, but also that the intermediate steps to get to the bottom
|
||||
// may be suboptimal placements when it isn't.
|
||||
//
|
||||
// 2. blockfish would actually average the last two evaluations and use that as
|
||||
// the final rating. this is meant as a concession for the fact that the last
|
||||
// placement made by the bot is not actually a placement we are required to make,
|
||||
// since in reality there is going to be the opportunity to hold the final piece
|
||||
// and use something else instead. so the 2nd to last rating is important in cases
|
||||
// where the last piece leads to suboptimal board states which may be able to be
|
||||
// avoided by holding the last piece. i think this improves the performance only
|
||||
// slightly, but it is also a bit of a hack that deserves further consideration.
|
||||
|
||||
// larger (i.e., further below the root score) is better
|
||||
self.root_score - evaluate(mat, pcnt)
|
||||
}
|
||||
}
|
||||
|
||||
// This implements an algorithm that is very similar to A* but has a slight
|
||||
// modification. Rather than one big open set, there are separate sets at each depth of
|
||||
// the search. After picking a node from one open set and expanding its children into the
|
||||
|
@ -96,14 +147,14 @@ impl SegmentedAStar {
|
|||
self.best.map(|node| unsafe { node.as_node() })
|
||||
}
|
||||
|
||||
fn step(&mut self, arena: &Arena) {
|
||||
match self.expand(arena) {
|
||||
fn step(&mut self, arena: &Arena, eval: &Evaluator) {
|
||||
match self.expand(arena, eval) {
|
||||
Ok(_) => {}
|
||||
Err(ShouldSelect) => self.select(),
|
||||
}
|
||||
}
|
||||
|
||||
fn expand<'a>(&mut self, arena: &'a Arena) -> Result<&'a Node, ShouldSelect> {
|
||||
fn expand<'a>(&mut self, arena: &'a Arena, eval: &Evaluator) -> Result<&'a Node, ShouldSelect> {
|
||||
let open_set = self.open.get_mut(self.depth);
|
||||
let cand = open_set.map_or(None, |set| set.pop()).ok_or(ShouldSelect)?;
|
||||
let cand = unsafe { cand.0.as_node() };
|
||||
|
@ -119,7 +170,7 @@ impl SegmentedAStar {
|
|||
self.open.resize_with(self.depth + 1, BinaryHeap::new);
|
||||
}
|
||||
|
||||
for suc in cand.expand(arena) {
|
||||
for suc in cand.expand(arena, |m, q| eval.evaluate(m, q)) {
|
||||
self.open[self.depth].push(suc.into());
|
||||
}
|
||||
|
||||
|
@ -127,10 +178,9 @@ impl SegmentedAStar {
|
|||
}
|
||||
|
||||
fn backup(&mut self, cand: &Node) {
|
||||
let rating = cand.rating();
|
||||
if self.best().map_or(true, |n| rating < n.rating()) {
|
||||
if self.best().map_or(true, |best| cand.is_better(best)) {
|
||||
tracing::debug!(
|
||||
"update suggestion ({}): {cand:?}",
|
||||
"{} suggestion: {cand:?}",
|
||||
self.best.map_or("1st", |_| "new")
|
||||
);
|
||||
self.best = Some(cand.into());
|
||||
|
@ -138,15 +188,17 @@ impl SegmentedAStar {
|
|||
}
|
||||
|
||||
fn select(&mut self) {
|
||||
self.open
|
||||
.iter()
|
||||
.map(|set| set.peek().map(|node| unsafe { node.0.as_node() }))
|
||||
.enumerate()
|
||||
.filter(|(_, best)| best.is_some())
|
||||
.min_by_key(|(_, best)| best.unwrap().rating())
|
||||
.map(|(depth, _)| {
|
||||
let mut best = None;
|
||||
self.depth = 0;
|
||||
|
||||
for (depth, set) in self.open.iter().enumerate() {
|
||||
let Some(cand) = set.peek() else { continue };
|
||||
let cand = unsafe { cand.0.as_node() };
|
||||
if best.map_or(true, |best| cand.is_better(best)) {
|
||||
best = Some(cand);
|
||||
self.depth = depth;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -164,8 +216,11 @@ impl core::cmp::Ord for AStarNode {
|
|||
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
|
||||
let lhs = unsafe { self.0.as_node() };
|
||||
let rhs = unsafe { other.0.as_node() };
|
||||
// FIXME: add a deterministic tiebreaker
|
||||
lhs.rating().cmp(&rhs.rating()).reverse()
|
||||
if lhs.is_better(rhs) {
|
||||
core::cmp::Ordering::Greater
|
||||
} else {
|
||||
core::cmp::Ordering::Less
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -4,7 +4,6 @@ use mino::matrix::{Mat, MatBuf};
|
|||
use mino::srs::{Piece, PieceType, Queue};
|
||||
|
||||
use crate::bot::Arena;
|
||||
use crate::eval::evaluate;
|
||||
use crate::find::find_locations;
|
||||
|
||||
/// Represents a node in the search tree. A node basically just consists of a board state
|
||||
|
@ -13,7 +12,6 @@ pub(crate) struct Node {
|
|||
matrix: *const Mat,
|
||||
queue: RawQueue,
|
||||
edge: Option<Edge>,
|
||||
pcnt: u32,
|
||||
rating: i32,
|
||||
// currently there is no need to store a node's children, but maybe this could change
|
||||
// in the future.
|
||||
|
@ -39,7 +37,7 @@ impl Node {
|
|||
pub fn alloc_root<'a>(arena: &'a Arena, matrix: &Mat, queue: Queue<'_>) -> &'a Self {
|
||||
let matrix = copy_matrix(arena, matrix);
|
||||
let queue = copy_queue(arena, queue);
|
||||
Node::alloc(arena, matrix, queue, None)
|
||||
Node::alloc(arena, matrix, queue, i32::MIN, None)
|
||||
}
|
||||
|
||||
// `matrix` and `queue` must be allocated inside `arena`
|
||||
|
@ -47,43 +45,15 @@ impl Node {
|
|||
arena: &'a Arena,
|
||||
matrix: &'a Mat,
|
||||
queue: Queue<'a>,
|
||||
rating: i32,
|
||||
edge: Option<Edge>,
|
||||
) -> &'a Self {
|
||||
let pcnt = match &edge {
|
||||
None => 0,
|
||||
Some(e) => e.parent().pcnt + 1,
|
||||
};
|
||||
let matrix = matrix as *const Mat;
|
||||
let queue = RawQueue::from(queue);
|
||||
|
||||
// FIXME: the old blockfish has two special edge cases for rating nodes that is
|
||||
// not done here.
|
||||
//
|
||||
// 1. nodes that reach the bottom of the board early ("solutions") are highly
|
||||
// prioritized. this is done by using the piece count *as the rating* in order to
|
||||
// force it to be extremely low, as well as sorting solutions by # of pieces in
|
||||
// case there are multiple. according to frey, this probably causes blockfish to
|
||||
// greed out in various scenarios where it sees a path to the bottom but it is not
|
||||
// actually the end of the race. part of the issue is of course that it isn't
|
||||
// communicated to blockfish whether or not the bottom of the board is actually
|
||||
// the end of the race, but also that the intermediate steps to get to the bottom
|
||||
// may be suboptimal placements when it isn't.
|
||||
//
|
||||
// 2. blockfish would actually average the last two evaluations and use that as
|
||||
// the final rating. this is meant as a concession for the fact that the last
|
||||
// placement made by the bot is not actually a placement we are required to make,
|
||||
// since in reality there is going to be the opportunity to hold the final piece
|
||||
// and use something else instead. so the 2nd to last rating is important in cases
|
||||
// where the last piece leads to suboptimal board states which may be able to be
|
||||
// avoided by holding the last piece. i think this improves the performance only
|
||||
// slightly, but it is also a bit of a hack that deserves further consideration.
|
||||
|
||||
let rating = evaluate(matrix, pcnt as usize); // FIXME: pass weights to evaluation function
|
||||
|
||||
arena.alloc_with(|| Self {
|
||||
matrix,
|
||||
queue,
|
||||
edge,
|
||||
pcnt,
|
||||
rating,
|
||||
})
|
||||
}
|
||||
|
@ -96,8 +66,8 @@ impl Node {
|
|||
unsafe { self.queue.as_queue() }
|
||||
}
|
||||
|
||||
pub fn rating(&self) -> i32 {
|
||||
self.rating
|
||||
pub fn is_better(&self, other: &Node) -> bool {
|
||||
self.rating > other.rating
|
||||
}
|
||||
|
||||
pub fn is_terminal(&self) -> bool {
|
||||
|
@ -121,7 +91,14 @@ impl Node {
|
|||
|
||||
/// Expands this node, allocating the children into the given arena.
|
||||
// `self` must be allocated inside `arena`
|
||||
pub fn expand<'a>(&'a self, arena: &'a Arena) -> impl Iterator<Item = &'a Node> + 'a {
|
||||
pub fn expand<'a, E>(
|
||||
&'a self,
|
||||
arena: &'a Arena,
|
||||
evaluate: E,
|
||||
) -> impl Iterator<Item = &'a Node> + 'a
|
||||
where
|
||||
E: Fn(&Mat, Queue<'_>) -> i32 + 'a,
|
||||
{
|
||||
let placements = self.queue().reachable().flat_map(|ty| {
|
||||
let locs = find_locations(self.matrix(), ty);
|
||||
locs.map(move |loc| Piece { ty, loc })
|
||||
|
@ -142,8 +119,11 @@ impl Node {
|
|||
let suc_matrix = copy_matrix(arena, &matrix);
|
||||
let suc_queue = self.queue().remove(placement.ty);
|
||||
|
||||
// TODO: transposition table lookup
|
||||
Node::alloc(arena, suc_matrix, suc_queue, Some(edge))
|
||||
// TODO: transposition table
|
||||
|
||||
let rating = evaluate(suc_matrix, suc_queue);
|
||||
|
||||
Node::alloc(arena, suc_matrix, suc_queue, rating, Some(edge))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue