From bbdc8b288a8310c818ca852adec090b78d3ecf5a Mon Sep 17 00:00:00 2001 From: Daniel Dolezal Date: Sat, 4 Apr 2026 12:00:02 +0200 Subject: [PATCH] create better future planning into BestBattleSnake --- snakes/BestBattleSnake.py | 290 +++++++++++++++++++++++++++++----- tests/test_BestBattleSnake.py | 192 ++++++++++++++++++++++ 2 files changed, 443 insertions(+), 39 deletions(-) diff --git a/snakes/BestBattleSnake.py b/snakes/BestBattleSnake.py index 3652ebb..c3e1390 100644 --- a/snakes/BestBattleSnake.py +++ b/snakes/BestBattleSnake.py @@ -45,6 +45,9 @@ class BestBattleSnake(TemplateSnake): self.rl_base_dataset_path = Path(os.getenv("RL_BASE_DATASET", "data/dataset/best_moves.jsonl")) self.rl_bootstrap_path = Path(os.getenv("RL_BOOTSTRAP_OUTPUT", "data/dataset/rl_bootstrap.jsonl")) self.rl_needs_more_data = False + self.future_planning_depth = max(1, min(4, self._env_int("BATTLE_FUTURE_PLANNING_DEPTH", default=2))) + self.future_planning_branch = max(1, min(3, self._env_int("BATTLE_FUTURE_PLANNING_BRANCH", default=2))) + self.future_planning_min_time_ms = max(25, self._env_int("BATTLE_FUTURE_PLANNING_MIN_MS", default=70)) def _get_duel_style(self) -> str: """Resolve duel tuning style from `BATTLE_SNAKE_DUEL_STYLE` or `DUEL_STYLE`.""" @@ -89,7 +92,7 @@ class BestBattleSnake(TemplateSnake): value = os.getenv(name) if value is None: return default - return value.lower() in {'1', 'true', 'yes', 'on'} + return value.lower() in {"1", "true", "yes", "on"} def _env_int(self, name:str, default:int) -> int: value = os.getenv(name) @@ -442,11 +445,20 @@ class BestBattleSnake(TemplateSnake): else: considered_moves = list(scores.keys()) - best_score = max(scores[move] for move in considered_moves) - top_moves = [ - move for move in considered_moves if best_score - scores[move] <= 1.5 - ] - best_move = random.choice(top_moves) + best_move = self._pick_best_with_future_planning( + considered_moves=considered_moves, + scores=scores, + safe_moves=safe_moves, + my_body=my_body, + other_snakes=other_snakes, + food_set=food_set, + is_constrictor=is_constrictor, + width=width, + height=height, + enemy_can_grow_cache=enemy_can_grow_cache, + deadline=deadline, + tie_window=1.5, + ) self.recent_heads.append(current_head_point) self.last_move = best_move self.add_to_history({"turn": turn, "move": best_move, "scores": scores}) @@ -643,11 +655,21 @@ class BestBattleSnake(TemplateSnake): if not scores: return random.choice(list(safe_moves.keys())), {} - best_score = max(scores[move] for move in considered_moves) - top_moves = [ - move for move in considered_moves if best_score - scores[move] <= 1.5 - ] - return random.choice(top_moves), scores + best_move = self._pick_best_with_future_planning( + considered_moves=considered_moves, + scores=scores, + safe_moves=safe_moves, + my_body=my_body, + other_snakes=other_snakes, + food_set=food_set, + is_constrictor=False, + width=width, + height=height, + enemy_can_grow_cache=enemy_can_grow_cache, + deadline=deadline, + tie_window=1.5, + ) + return best_move, scores def _choose_constrictor_move(self, safe_moves:MoveMap, my_body:list[Coord], my_len:int, other_snakes:list[SnakeState], food_set:set[Point], enemy_attack_map:AttackMap, enemy_heads:list[Point], enemy_can_grow_cache:dict[Any, bool], width:int, height:int, deadline:float|None=None) -> tuple[str, dict[str, float]]: """Score and select a move for constrictor games.""" @@ -773,11 +795,21 @@ class BestBattleSnake(TemplateSnake): if not scores: return random.choice(list(safe_moves.keys())), {} - best_score = max(scores[move] for move in considered_moves) - top_moves = [ - move for move in considered_moves if best_score - scores[move] <= 2.0 - ] - return random.choice(top_moves), scores + best_move = self._pick_best_with_future_planning( + considered_moves=considered_moves, + scores=scores, + safe_moves=safe_moves, + my_body=my_body, + other_snakes=other_snakes, + food_set=food_set, + is_constrictor=True, + width=width, + height=height, + enemy_can_grow_cache=enemy_can_grow_cache, + deadline=deadline, + tie_window=2.0, + ) + return best_move, scores def _legal_moves(self, my_head:Coord, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int) -> MoveMap: """Return legal immediate moves after body, wall, and tail checks.""" @@ -829,15 +861,6 @@ class BestBattleSnake(TemplateSnake): if is_constrictor: continue - snake_id = snake.get("id") - enemy_can_grow = ( - enemy_can_grow_cache.get(snake_id) - if enemy_can_grow_cache and snake_id is not None - else self._enemy_can_grow_this_turn(snake, food_set) - ) - if enemy_can_grow: - continue - if self._is_tail_stacked(snake["body"]): continue @@ -850,19 +873,14 @@ class BestBattleSnake(TemplateSnake): """Map cells enemies can contest next turn to their effective length.""" occupied = self._occupied_cells(my_snake["body"], other_snakes) my_body_points = {(segment["x"], segment["y"]) for segment in my_snake["body"]} + my_tail = (my_snake["body"][-1]["x"], my_snake["body"][-1]["y"]) + my_tail_stacked = self._is_tail_stacked(my_snake["body"]) attack_map = {} for enemy in other_snakes: enemy_len = enemy.get("length", len(enemy["body"])) enemy_tail = (enemy["body"][-1]["x"], enemy["body"][-1]["y"]) enemy_tail_stacked = self._is_tail_stacked(enemy["body"]) - snake_id = enemy.get("id") - enemy_can_grow = ( - enemy_can_grow_cache.get(snake_id) - if enemy_can_grow_cache and snake_id is not None - else self._enemy_can_grow_this_turn(enemy, food_set) - ) - enemy_head = enemy["head"] for dx, dy in self.DIRECTIONS.values(): point = (enemy_head["x"] + dx, enemy_head["y"] + dy) @@ -873,15 +891,17 @@ class BestBattleSnake(TemplateSnake): not is_constrictor and point == enemy_tail and not enemy_tail_stacked - and not enemy_can_grow ) + can_contest_my_tail = (not is_constrictor and point == my_tail and not my_tail_stacked) - if point in occupied and not can_step_on_enemy_tail: + if point in occupied and not can_step_on_enemy_tail and not can_contest_my_tail: continue - # Do not consider impossible overlap directly into my own occupied body except head swap possibilities. + # Ignore impossible overlap into our occupied body, but keep our vacatable tail + # so we can detect dangerous head-to-head contests when tail-chasing. if point in my_body_points: - continue + if is_constrictor or my_tail_stacked or point != my_tail: + continue previous = attack_map.get(point) if previous is None or enemy_len > previous: @@ -954,6 +974,199 @@ class BestBattleSnake(TemplateSnake): return False return perf_counter() >= deadline + def _remaining_ms(self, deadline:float|None) -> float: + if deadline is None: + return 10_000.0 + return max(0.0, (deadline - perf_counter()) * 1000.0) + + def _pick_best_with_future_planning(self, considered_moves:list[str], scores:dict[str, float], safe_moves:MoveMap, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int, enemy_can_grow_cache:dict[Any, bool]|None, deadline:float|None, tie_window:float) -> str: + best_score = max(scores[move] for move in considered_moves) + top_moves = [move for move in considered_moves if best_score - scores[move] <= tie_window] + if len(top_moves) <= 1: + return top_moves[0] + + if self._time_exceeded(deadline) or self._remaining_ms(deadline) < self.future_planning_min_time_ms: + return random.choice(top_moves) + + candidate_moves = sorted(top_moves, key=lambda move: scores[move], reverse=True)[:3] + lookahead_bonus:dict[str, float] = {} + for move in candidate_moves: + if self._time_exceeded(deadline): + break + bonus = self._future_rollout_bonus_for_move( + move=move, + safe_moves=safe_moves, + my_body=my_body, + other_snakes=other_snakes, + food_set=food_set, + is_constrictor=is_constrictor, + width=width, + height=height, + enemy_can_grow_cache=enemy_can_grow_cache, + depth=self.future_planning_depth, + branch_limit=self.future_planning_branch, + deadline=deadline, + ) + lookahead_bonus[move] = bonus + + if not lookahead_bonus: + return random.choice(top_moves) + + for move, bonus in lookahead_bonus.items(): + scores[move] += bonus + + refined_best = max(scores[move] for move in top_moves) + refined_top = [ + move + for move in top_moves + if refined_best - scores[move] <= max(0.5, tie_window / 2) + ] + return random.choice(refined_top) + + def _future_rollout_bonus_for_move(self, move:str, safe_moves:MoveMap, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int, enemy_can_grow_cache:dict[Any, bool]|None, depth:int, branch_limit:int, deadline:float|None) -> float: + pos = safe_moves.get(move) + if pos is None: + return -250.0 + point = (pos["x"], pos["y"]) + ate_food = point in food_set + future_body = self._future_body( + current_body=my_body, + next_head=pos, + ate_food=ate_food, + is_constrictor=is_constrictor, + ) + raw_score = self._future_survival_tree_score( + my_body=future_body, + other_snakes=other_snakes, + food_set=food_set, + is_constrictor=is_constrictor, + width=width, + height=height, + enemy_can_grow_cache=enemy_can_grow_cache, + depth=max(1, depth), + branch_limit=max(1, branch_limit), + deadline=deadline, + ) + return raw_score * 0.06 + + def _future_survival_tree_score(self, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int, enemy_can_grow_cache:dict[Any, bool]|None, depth:int, branch_limit:int, deadline:float|None, + ) -> float: + if depth <= 0 or self._time_exceeded(deadline): + return 0.0 + + my_head = my_body[0] + safe_moves = self._legal_moves( + my_head=my_head, + my_body=my_body, + other_snakes=other_snakes, + food_set=food_set, + is_constrictor=is_constrictor, + width=width, + height=height, + ) + if not safe_moves: + return -2400.0 + + scored_children:list[tuple[float, list[dict[str, int]]]] = [] + for move, pos in safe_moves.items(): + if self._time_exceeded(deadline): + break + point = (pos["x"], pos["y"]) + ate_food = point in food_set + future_body = self._future_body( + current_body=my_body, + next_head=pos, + ate_food=ate_food, + is_constrictor=is_constrictor, + ) + immediate_score = self._future_position_score( + my_body=future_body, + other_snakes=other_snakes, + food_set=food_set, + is_constrictor=is_constrictor, + width=width, + height=height, + enemy_can_grow_cache=enemy_can_grow_cache, + deadline=deadline, + ) + scored_children.append((immediate_score, future_body)) + + if not scored_children: + return -2200.0 + + scored_children.sort(key=lambda item: item[0], reverse=True) + if depth == 1: + return scored_children[0][0] + + best_total = scored_children[0][0] + for immediate_score, future_body in scored_children[:branch_limit]: + if self._time_exceeded(deadline): + break + continuation = self._future_survival_tree_score( + my_body=future_body, + other_snakes=other_snakes, + food_set=food_set, + is_constrictor=is_constrictor, + width=width, + height=height, + enemy_can_grow_cache=enemy_can_grow_cache, + depth=depth - 1, + branch_limit=branch_limit, + deadline=deadline, + ) + total = immediate_score + continuation * 0.72 + if total > best_total: + best_total = total + + return best_total + + def _future_position_score(self, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int, enemy_can_grow_cache:dict[Any, bool]|None, deadline:float|None) -> float: + if self._time_exceeded(deadline): + return 0.0 + + head_point = (my_body[0]["x"], my_body[0]["y"]) + blocked = self._simulation_blocked( + future_body=my_body, + other_snakes=other_snakes, + food_set=food_set, + is_constrictor=is_constrictor, + enemy_can_grow_cache=enemy_can_grow_cache, + ) + blocked.discard(head_point) + + reachable_space = self._flood_fill_count(head_point, blocked, width, height) + liberties = self._open_neighbor_count(head_point, blocked, width, height) + next_options = self._next_turn_option_count(my_body, blocked, width, height) + enemy_safe_options = self._safe_next_turn_option_count( + future_body=my_body, + other_snakes=other_snakes, + food_set=food_set, + is_constrictor=is_constrictor, + enemy_can_grow_cache=enemy_can_grow_cache, + width=width, + height=height, + ) + required_space = len(my_body) + (1 if is_constrictor else 0) + + score = 0.0 + score += reachable_space * 1.9 + score += liberties * 14.0 + score += next_options * 11.0 + score += enemy_safe_options * 26.0 + + if reachable_space < required_space: + score -= 1500.0 + if liberties == 0: + score -= 1000.0 + if next_options == 0: + score -= 1200.0 + if enemy_safe_options == 0: + score -= 1900.0 + elif enemy_safe_options == 1: + score -= 420.0 + + return score + def _nearest_food_distance(self, start:Point, food_set:set[Point], blocked:set[Point], width:int, height:int) -> int|None: """Compute shortest reachable distance to any food using BFS.""" if not food_set: @@ -978,7 +1191,7 @@ class BestBattleSnake(TemplateSnake): return None - def _path_distance( self, start:Point, goal:Point, blocked:set[Point], width:int, height:int) -> int|None: + def _path_distance(self, start:Point, goal:Point, blocked:set[Point], width:int, height:int) -> int|None: """Compute shortest path distance between two cells.""" queue = deque([(start, 0)]) seen = {start} @@ -1046,7 +1259,6 @@ class BestBattleSnake(TemplateSnake): count += 1 return count - def _safe_next_turn_option_count(self, future_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, enemy_can_grow_cache:dict[Any, bool]|None, width:int, height:int) -> int: """Count next-turn moves that stay safe from enemy head contests.""" if not future_body: @@ -1180,7 +1392,7 @@ class BestBattleSnake(TemplateSnake): ) return enemy_space, enemy_options - def _enemy_constrictor_projection(self, other_snakes:list[SnakeState], blocked:set[Point], width:int, height: int) -> tuple[int, int]: + def _enemy_constrictor_projection(self, other_snakes:list[SnakeState], blocked:set[Point], width:int, height:int) -> tuple[int, int]: """Estimate enemy best-space and total options after our candidate move.""" best_enemy_space = 0 total_enemy_options = 0 diff --git a/tests/test_BestBattleSnake.py b/tests/test_BestBattleSnake.py index 08733be..3523efe 100644 --- a/tests/test_BestBattleSnake.py +++ b/tests/test_BestBattleSnake.py @@ -829,5 +829,197 @@ class TestBestBattleSnake(unittest.TestCase): move = make_board(game_state).snake_neat_make_a_move() self.assertEqual(move, "right") + def test_enemy_attack_map_marks_own_tail_contest(self): + snake = BestBattleSnake() + + my_snake = { + "id": "me", + "name": "me", + "health": 90, + "length": 4, + "head": {"x": 3, "y": 3}, + "body": [ + {"x": 3, "y": 3}, + {"x": 3, "y": 2}, + {"x": 2, "y": 2}, + {"x": 2, "y": 3}, + ], + } + + enemy = { + "id": "enemy", + "name": "enemy", + "health": 90, + "length": 6, + "head": {"x": 1, "y": 3}, + "body": [ + {"x": 1, "y": 3}, + {"x": 1, "y": 2}, + {"x": 1, "y": 1}, + {"x": 0, "y": 1}, + {"x": 0, "y": 2}, + {"x": 0, "y": 3}, + ], + } + + attack_map = snake._build_enemy_attack_map( + my_snake=my_snake, + other_snakes=[enemy], + food_set=set(), + is_constrictor=False, + width=7, + height=7, + enemy_can_grow_cache={"enemy": False}, + ) + + self.assertEqual(attack_map.get((2, 3)), 6) + + def test_simulation_frees_enemy_tail_even_if_enemy_can_grow(self): + snake = BestBattleSnake() + + future_body = [ + {"x": 4, "y": 4}, + {"x": 4, "y": 3}, + {"x": 3, "y": 3}, + {"x": 3, "y": 4}, + ] + enemy = { + "id": "enemy", + "name": "enemy", + "health": 90, + "length": 4, + "head": {"x": 1, "y": 1}, + "body": [ + {"x": 1, "y": 1}, + {"x": 1, "y": 0}, + {"x": 0, "y": 0}, + {"x": 0, "y": 1}, + ], + } + + blocked = snake._simulation_blocked( + future_body=future_body, + other_snakes=[enemy], + food_set={(2, 1)}, + is_constrictor=False, + enemy_can_grow_cache={"enemy": True}, + ) + + self.assertNotIn((0, 1), blocked) + + def test_enemy_attack_map_allows_enemy_tail_move_when_enemy_can_grow(self): + snake = BestBattleSnake() + + my_snake = { + "id": "me", + "name": "me", + "health": 90, + "length": 4, + "head": {"x": 6, "y": 6}, + "body": [ + {"x": 6, "y": 6}, + {"x": 6, "y": 5}, + {"x": 5, "y": 5}, + {"x": 5, "y": 6}, + ], + } + + enemy = { + "id": "enemy", + "name": "enemy", + "health": 90, + "length": 4, + "head": {"x": 3, "y": 3}, + "body": [ + {"x": 3, "y": 3}, + {"x": 3, "y": 2}, + {"x": 2, "y": 2}, + {"x": 2, "y": 3}, + ], + } + + attack_map = snake._build_enemy_attack_map( + my_snake=my_snake, + other_snakes=[enemy], + food_set={(4, 3)}, + is_constrictor=False, + width=11, + height=11, + enemy_can_grow_cache={"enemy": True}, + ) + + self.assertEqual(attack_map.get((2, 3)), 4) + + def test_future_planning_prefers_non_trap_path(self): + snake = BestBattleSnake() + + my_body = [ + {"x": 3, "y": 3}, + {"x": 3, "y": 2}, + {"x": 2, "y": 2}, + {"x": 2, "y": 3}, + ] + enemy = { + "id": "enemy", + "name": "enemy", + "health": 90, + "length": 8, + "head": {"x": 0, "y": 0}, + "body": [ + {"x": 0, "y": 0}, + {"x": 4, "y": 4}, + {"x": 4, "y": 2}, + {"x": 5, "y": 4}, + {"x": 5, "y": 2}, + {"x": 6, "y": 4}, + {"x": 6, "y": 3}, + {"x": 6, "y": 2}, + ], + } + + safe_moves = snake._legal_moves( + my_head=my_body[0], + my_body=my_body, + other_snakes=[enemy], + food_set=set(), + is_constrictor=False, + width=7, + height=7, + ) + + self.assertIn("left", safe_moves) + self.assertIn("right", safe_moves) + + right_bonus = snake._future_rollout_bonus_for_move( + move="right", + safe_moves=safe_moves, + my_body=my_body, + other_snakes=[enemy], + food_set=set(), + is_constrictor=False, + width=7, + height=7, + enemy_can_grow_cache={"enemy": False}, + depth=3, + branch_limit=2, + deadline=None, + ) + left_bonus = snake._future_rollout_bonus_for_move( + move="left", + safe_moves=safe_moves, + my_body=my_body, + other_snakes=[enemy], + food_set=set(), + is_constrictor=False, + width=7, + height=7, + enemy_can_grow_cache={"enemy": False}, + depth=3, + branch_limit=2, + deadline=None, + ) + + self.assertGreater(left_bonus, right_bonus) + if __name__ == "__main__": unittest.main()