create better future planning into BestBattleSnake

This commit is contained in:
2026-04-04 12:00:02 +02:00
parent c6ebb5834b
commit bbdc8b288a
2 changed files with 443 additions and 39 deletions
+251 -39
View File
@@ -45,6 +45,9 @@ class BestBattleSnake(TemplateSnake):
self.rl_base_dataset_path = Path(os.getenv("RL_BASE_DATASET", "data/dataset/best_moves.jsonl")) self.rl_base_dataset_path = Path(os.getenv("RL_BASE_DATASET", "data/dataset/best_moves.jsonl"))
self.rl_bootstrap_path = Path(os.getenv("RL_BOOTSTRAP_OUTPUT", "data/dataset/rl_bootstrap.jsonl")) self.rl_bootstrap_path = Path(os.getenv("RL_BOOTSTRAP_OUTPUT", "data/dataset/rl_bootstrap.jsonl"))
self.rl_needs_more_data = False self.rl_needs_more_data = False
self.future_planning_depth = max(1, min(4, self._env_int("BATTLE_FUTURE_PLANNING_DEPTH", default=2)))
self.future_planning_branch = max(1, min(3, self._env_int("BATTLE_FUTURE_PLANNING_BRANCH", default=2)))
self.future_planning_min_time_ms = max(25, self._env_int("BATTLE_FUTURE_PLANNING_MIN_MS", default=70))
def _get_duel_style(self) -> str: def _get_duel_style(self) -> str:
"""Resolve duel tuning style from `BATTLE_SNAKE_DUEL_STYLE` or `DUEL_STYLE`.""" """Resolve duel tuning style from `BATTLE_SNAKE_DUEL_STYLE` or `DUEL_STYLE`."""
@@ -89,7 +92,7 @@ class BestBattleSnake(TemplateSnake):
value = os.getenv(name) value = os.getenv(name)
if value is None: if value is None:
return default return default
return value.lower() in {'1', 'true', 'yes', 'on'} return value.lower() in {"1", "true", "yes", "on"}
def _env_int(self, name:str, default:int) -> int: def _env_int(self, name:str, default:int) -> int:
value = os.getenv(name) value = os.getenv(name)
@@ -442,11 +445,20 @@ class BestBattleSnake(TemplateSnake):
else: else:
considered_moves = list(scores.keys()) considered_moves = list(scores.keys())
best_score = max(scores[move] for move in considered_moves) best_move = self._pick_best_with_future_planning(
top_moves = [ considered_moves=considered_moves,
move for move in considered_moves if best_score - scores[move] <= 1.5 scores=scores,
] safe_moves=safe_moves,
best_move = random.choice(top_moves) my_body=my_body,
other_snakes=other_snakes,
food_set=food_set,
is_constrictor=is_constrictor,
width=width,
height=height,
enemy_can_grow_cache=enemy_can_grow_cache,
deadline=deadline,
tie_window=1.5,
)
self.recent_heads.append(current_head_point) self.recent_heads.append(current_head_point)
self.last_move = best_move self.last_move = best_move
self.add_to_history({"turn": turn, "move": best_move, "scores": scores}) self.add_to_history({"turn": turn, "move": best_move, "scores": scores})
@@ -643,11 +655,21 @@ class BestBattleSnake(TemplateSnake):
if not scores: if not scores:
return random.choice(list(safe_moves.keys())), {} return random.choice(list(safe_moves.keys())), {}
best_score = max(scores[move] for move in considered_moves) best_move = self._pick_best_with_future_planning(
top_moves = [ considered_moves=considered_moves,
move for move in considered_moves if best_score - scores[move] <= 1.5 scores=scores,
] safe_moves=safe_moves,
return random.choice(top_moves), scores my_body=my_body,
other_snakes=other_snakes,
food_set=food_set,
is_constrictor=False,
width=width,
height=height,
enemy_can_grow_cache=enemy_can_grow_cache,
deadline=deadline,
tie_window=1.5,
)
return best_move, scores
def _choose_constrictor_move(self, safe_moves:MoveMap, my_body:list[Coord], my_len:int, other_snakes:list[SnakeState], food_set:set[Point], enemy_attack_map:AttackMap, enemy_heads:list[Point], enemy_can_grow_cache:dict[Any, bool], width:int, height:int, deadline:float|None=None) -> tuple[str, dict[str, float]]: def _choose_constrictor_move(self, safe_moves:MoveMap, my_body:list[Coord], my_len:int, other_snakes:list[SnakeState], food_set:set[Point], enemy_attack_map:AttackMap, enemy_heads:list[Point], enemy_can_grow_cache:dict[Any, bool], width:int, height:int, deadline:float|None=None) -> tuple[str, dict[str, float]]:
"""Score and select a move for constrictor games.""" """Score and select a move for constrictor games."""
@@ -773,11 +795,21 @@ class BestBattleSnake(TemplateSnake):
if not scores: if not scores:
return random.choice(list(safe_moves.keys())), {} return random.choice(list(safe_moves.keys())), {}
best_score = max(scores[move] for move in considered_moves) best_move = self._pick_best_with_future_planning(
top_moves = [ considered_moves=considered_moves,
move for move in considered_moves if best_score - scores[move] <= 2.0 scores=scores,
] safe_moves=safe_moves,
return random.choice(top_moves), scores my_body=my_body,
other_snakes=other_snakes,
food_set=food_set,
is_constrictor=True,
width=width,
height=height,
enemy_can_grow_cache=enemy_can_grow_cache,
deadline=deadline,
tie_window=2.0,
)
return best_move, scores
def _legal_moves(self, my_head:Coord, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int) -> MoveMap: def _legal_moves(self, my_head:Coord, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int) -> MoveMap:
"""Return legal immediate moves after body, wall, and tail checks.""" """Return legal immediate moves after body, wall, and tail checks."""
@@ -829,15 +861,6 @@ class BestBattleSnake(TemplateSnake):
if is_constrictor: if is_constrictor:
continue continue
snake_id = snake.get("id")
enemy_can_grow = (
enemy_can_grow_cache.get(snake_id)
if enemy_can_grow_cache and snake_id is not None
else self._enemy_can_grow_this_turn(snake, food_set)
)
if enemy_can_grow:
continue
if self._is_tail_stacked(snake["body"]): if self._is_tail_stacked(snake["body"]):
continue continue
@@ -850,19 +873,14 @@ class BestBattleSnake(TemplateSnake):
"""Map cells enemies can contest next turn to their effective length.""" """Map cells enemies can contest next turn to their effective length."""
occupied = self._occupied_cells(my_snake["body"], other_snakes) occupied = self._occupied_cells(my_snake["body"], other_snakes)
my_body_points = {(segment["x"], segment["y"]) for segment in my_snake["body"]} my_body_points = {(segment["x"], segment["y"]) for segment in my_snake["body"]}
my_tail = (my_snake["body"][-1]["x"], my_snake["body"][-1]["y"])
my_tail_stacked = self._is_tail_stacked(my_snake["body"])
attack_map = {} attack_map = {}
for enemy in other_snakes: for enemy in other_snakes:
enemy_len = enemy.get("length", len(enemy["body"])) enemy_len = enemy.get("length", len(enemy["body"]))
enemy_tail = (enemy["body"][-1]["x"], enemy["body"][-1]["y"]) enemy_tail = (enemy["body"][-1]["x"], enemy["body"][-1]["y"])
enemy_tail_stacked = self._is_tail_stacked(enemy["body"]) enemy_tail_stacked = self._is_tail_stacked(enemy["body"])
snake_id = enemy.get("id")
enemy_can_grow = (
enemy_can_grow_cache.get(snake_id)
if enemy_can_grow_cache and snake_id is not None
else self._enemy_can_grow_this_turn(enemy, food_set)
)
enemy_head = enemy["head"] enemy_head = enemy["head"]
for dx, dy in self.DIRECTIONS.values(): for dx, dy in self.DIRECTIONS.values():
point = (enemy_head["x"] + dx, enemy_head["y"] + dy) point = (enemy_head["x"] + dx, enemy_head["y"] + dy)
@@ -873,15 +891,17 @@ class BestBattleSnake(TemplateSnake):
not is_constrictor not is_constrictor
and point == enemy_tail and point == enemy_tail
and not enemy_tail_stacked and not enemy_tail_stacked
and not enemy_can_grow
) )
can_contest_my_tail = (not is_constrictor and point == my_tail and not my_tail_stacked)
if point in occupied and not can_step_on_enemy_tail: if point in occupied and not can_step_on_enemy_tail and not can_contest_my_tail:
continue continue
# Do not consider impossible overlap directly into my own occupied body except head swap possibilities. # Ignore impossible overlap into our occupied body, but keep our vacatable tail
# so we can detect dangerous head-to-head contests when tail-chasing.
if point in my_body_points: if point in my_body_points:
continue if is_constrictor or my_tail_stacked or point != my_tail:
continue
previous = attack_map.get(point) previous = attack_map.get(point)
if previous is None or enemy_len > previous: if previous is None or enemy_len > previous:
@@ -954,6 +974,199 @@ class BestBattleSnake(TemplateSnake):
return False return False
return perf_counter() >= deadline return perf_counter() >= deadline
def _remaining_ms(self, deadline:float|None) -> float:
if deadline is None:
return 10_000.0
return max(0.0, (deadline - perf_counter()) * 1000.0)
def _pick_best_with_future_planning(self, considered_moves:list[str], scores:dict[str, float], safe_moves:MoveMap, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int, enemy_can_grow_cache:dict[Any, bool]|None, deadline:float|None, tie_window:float) -> str:
best_score = max(scores[move] for move in considered_moves)
top_moves = [move for move in considered_moves if best_score - scores[move] <= tie_window]
if len(top_moves) <= 1:
return top_moves[0]
if self._time_exceeded(deadline) or self._remaining_ms(deadline) < self.future_planning_min_time_ms:
return random.choice(top_moves)
candidate_moves = sorted(top_moves, key=lambda move: scores[move], reverse=True)[:3]
lookahead_bonus:dict[str, float] = {}
for move in candidate_moves:
if self._time_exceeded(deadline):
break
bonus = self._future_rollout_bonus_for_move(
move=move,
safe_moves=safe_moves,
my_body=my_body,
other_snakes=other_snakes,
food_set=food_set,
is_constrictor=is_constrictor,
width=width,
height=height,
enemy_can_grow_cache=enemy_can_grow_cache,
depth=self.future_planning_depth,
branch_limit=self.future_planning_branch,
deadline=deadline,
)
lookahead_bonus[move] = bonus
if not lookahead_bonus:
return random.choice(top_moves)
for move, bonus in lookahead_bonus.items():
scores[move] += bonus
refined_best = max(scores[move] for move in top_moves)
refined_top = [
move
for move in top_moves
if refined_best - scores[move] <= max(0.5, tie_window / 2)
]
return random.choice(refined_top)
def _future_rollout_bonus_for_move(self, move:str, safe_moves:MoveMap, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int, enemy_can_grow_cache:dict[Any, bool]|None, depth:int, branch_limit:int, deadline:float|None) -> float:
pos = safe_moves.get(move)
if pos is None:
return -250.0
point = (pos["x"], pos["y"])
ate_food = point in food_set
future_body = self._future_body(
current_body=my_body,
next_head=pos,
ate_food=ate_food,
is_constrictor=is_constrictor,
)
raw_score = self._future_survival_tree_score(
my_body=future_body,
other_snakes=other_snakes,
food_set=food_set,
is_constrictor=is_constrictor,
width=width,
height=height,
enemy_can_grow_cache=enemy_can_grow_cache,
depth=max(1, depth),
branch_limit=max(1, branch_limit),
deadline=deadline,
)
return raw_score * 0.06
def _future_survival_tree_score(self, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int, enemy_can_grow_cache:dict[Any, bool]|None, depth:int, branch_limit:int, deadline:float|None,
) -> float:
if depth <= 0 or self._time_exceeded(deadline):
return 0.0
my_head = my_body[0]
safe_moves = self._legal_moves(
my_head=my_head,
my_body=my_body,
other_snakes=other_snakes,
food_set=food_set,
is_constrictor=is_constrictor,
width=width,
height=height,
)
if not safe_moves:
return -2400.0
scored_children:list[tuple[float, list[dict[str, int]]]] = []
for move, pos in safe_moves.items():
if self._time_exceeded(deadline):
break
point = (pos["x"], pos["y"])
ate_food = point in food_set
future_body = self._future_body(
current_body=my_body,
next_head=pos,
ate_food=ate_food,
is_constrictor=is_constrictor,
)
immediate_score = self._future_position_score(
my_body=future_body,
other_snakes=other_snakes,
food_set=food_set,
is_constrictor=is_constrictor,
width=width,
height=height,
enemy_can_grow_cache=enemy_can_grow_cache,
deadline=deadline,
)
scored_children.append((immediate_score, future_body))
if not scored_children:
return -2200.0
scored_children.sort(key=lambda item: item[0], reverse=True)
if depth == 1:
return scored_children[0][0]
best_total = scored_children[0][0]
for immediate_score, future_body in scored_children[:branch_limit]:
if self._time_exceeded(deadline):
break
continuation = self._future_survival_tree_score(
my_body=future_body,
other_snakes=other_snakes,
food_set=food_set,
is_constrictor=is_constrictor,
width=width,
height=height,
enemy_can_grow_cache=enemy_can_grow_cache,
depth=depth - 1,
branch_limit=branch_limit,
deadline=deadline,
)
total = immediate_score + continuation * 0.72
if total > best_total:
best_total = total
return best_total
def _future_position_score(self, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int, enemy_can_grow_cache:dict[Any, bool]|None, deadline:float|None) -> float:
if self._time_exceeded(deadline):
return 0.0
head_point = (my_body[0]["x"], my_body[0]["y"])
blocked = self._simulation_blocked(
future_body=my_body,
other_snakes=other_snakes,
food_set=food_set,
is_constrictor=is_constrictor,
enemy_can_grow_cache=enemy_can_grow_cache,
)
blocked.discard(head_point)
reachable_space = self._flood_fill_count(head_point, blocked, width, height)
liberties = self._open_neighbor_count(head_point, blocked, width, height)
next_options = self._next_turn_option_count(my_body, blocked, width, height)
enemy_safe_options = self._safe_next_turn_option_count(
future_body=my_body,
other_snakes=other_snakes,
food_set=food_set,
is_constrictor=is_constrictor,
enemy_can_grow_cache=enemy_can_grow_cache,
width=width,
height=height,
)
required_space = len(my_body) + (1 if is_constrictor else 0)
score = 0.0
score += reachable_space * 1.9
score += liberties * 14.0
score += next_options * 11.0
score += enemy_safe_options * 26.0
if reachable_space < required_space:
score -= 1500.0
if liberties == 0:
score -= 1000.0
if next_options == 0:
score -= 1200.0
if enemy_safe_options == 0:
score -= 1900.0
elif enemy_safe_options == 1:
score -= 420.0
return score
def _nearest_food_distance(self, start:Point, food_set:set[Point], blocked:set[Point], width:int, height:int) -> int|None: def _nearest_food_distance(self, start:Point, food_set:set[Point], blocked:set[Point], width:int, height:int) -> int|None:
"""Compute shortest reachable distance to any food using BFS.""" """Compute shortest reachable distance to any food using BFS."""
if not food_set: if not food_set:
@@ -978,7 +1191,7 @@ class BestBattleSnake(TemplateSnake):
return None return None
def _path_distance( self, start:Point, goal:Point, blocked:set[Point], width:int, height:int) -> int|None: def _path_distance(self, start:Point, goal:Point, blocked:set[Point], width:int, height:int) -> int|None:
"""Compute shortest path distance between two cells.""" """Compute shortest path distance between two cells."""
queue = deque([(start, 0)]) queue = deque([(start, 0)])
seen = {start} seen = {start}
@@ -1046,7 +1259,6 @@ class BestBattleSnake(TemplateSnake):
count += 1 count += 1
return count return count
def _safe_next_turn_option_count(self, future_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, enemy_can_grow_cache:dict[Any, bool]|None, width:int, height:int) -> int: def _safe_next_turn_option_count(self, future_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, enemy_can_grow_cache:dict[Any, bool]|None, width:int, height:int) -> int:
"""Count next-turn moves that stay safe from enemy head contests.""" """Count next-turn moves that stay safe from enemy head contests."""
if not future_body: if not future_body:
@@ -1180,7 +1392,7 @@ class BestBattleSnake(TemplateSnake):
) )
return enemy_space, enemy_options return enemy_space, enemy_options
def _enemy_constrictor_projection(self, other_snakes:list[SnakeState], blocked:set[Point], width:int, height: int) -> tuple[int, int]: def _enemy_constrictor_projection(self, other_snakes:list[SnakeState], blocked:set[Point], width:int, height:int) -> tuple[int, int]:
"""Estimate enemy best-space and total options after our candidate move.""" """Estimate enemy best-space and total options after our candidate move."""
best_enemy_space = 0 best_enemy_space = 0
total_enemy_options = 0 total_enemy_options = 0
+192
View File
@@ -829,5 +829,197 @@ class TestBestBattleSnake(unittest.TestCase):
move = make_board(game_state).snake_neat_make_a_move() move = make_board(game_state).snake_neat_make_a_move()
self.assertEqual(move, "right") self.assertEqual(move, "right")
def test_enemy_attack_map_marks_own_tail_contest(self):
snake = BestBattleSnake()
my_snake = {
"id": "me",
"name": "me",
"health": 90,
"length": 4,
"head": {"x": 3, "y": 3},
"body": [
{"x": 3, "y": 3},
{"x": 3, "y": 2},
{"x": 2, "y": 2},
{"x": 2, "y": 3},
],
}
enemy = {
"id": "enemy",
"name": "enemy",
"health": 90,
"length": 6,
"head": {"x": 1, "y": 3},
"body": [
{"x": 1, "y": 3},
{"x": 1, "y": 2},
{"x": 1, "y": 1},
{"x": 0, "y": 1},
{"x": 0, "y": 2},
{"x": 0, "y": 3},
],
}
attack_map = snake._build_enemy_attack_map(
my_snake=my_snake,
other_snakes=[enemy],
food_set=set(),
is_constrictor=False,
width=7,
height=7,
enemy_can_grow_cache={"enemy": False},
)
self.assertEqual(attack_map.get((2, 3)), 6)
def test_simulation_frees_enemy_tail_even_if_enemy_can_grow(self):
snake = BestBattleSnake()
future_body = [
{"x": 4, "y": 4},
{"x": 4, "y": 3},
{"x": 3, "y": 3},
{"x": 3, "y": 4},
]
enemy = {
"id": "enemy",
"name": "enemy",
"health": 90,
"length": 4,
"head": {"x": 1, "y": 1},
"body": [
{"x": 1, "y": 1},
{"x": 1, "y": 0},
{"x": 0, "y": 0},
{"x": 0, "y": 1},
],
}
blocked = snake._simulation_blocked(
future_body=future_body,
other_snakes=[enemy],
food_set={(2, 1)},
is_constrictor=False,
enemy_can_grow_cache={"enemy": True},
)
self.assertNotIn((0, 1), blocked)
def test_enemy_attack_map_allows_enemy_tail_move_when_enemy_can_grow(self):
snake = BestBattleSnake()
my_snake = {
"id": "me",
"name": "me",
"health": 90,
"length": 4,
"head": {"x": 6, "y": 6},
"body": [
{"x": 6, "y": 6},
{"x": 6, "y": 5},
{"x": 5, "y": 5},
{"x": 5, "y": 6},
],
}
enemy = {
"id": "enemy",
"name": "enemy",
"health": 90,
"length": 4,
"head": {"x": 3, "y": 3},
"body": [
{"x": 3, "y": 3},
{"x": 3, "y": 2},
{"x": 2, "y": 2},
{"x": 2, "y": 3},
],
}
attack_map = snake._build_enemy_attack_map(
my_snake=my_snake,
other_snakes=[enemy],
food_set={(4, 3)},
is_constrictor=False,
width=11,
height=11,
enemy_can_grow_cache={"enemy": True},
)
self.assertEqual(attack_map.get((2, 3)), 4)
def test_future_planning_prefers_non_trap_path(self):
snake = BestBattleSnake()
my_body = [
{"x": 3, "y": 3},
{"x": 3, "y": 2},
{"x": 2, "y": 2},
{"x": 2, "y": 3},
]
enemy = {
"id": "enemy",
"name": "enemy",
"health": 90,
"length": 8,
"head": {"x": 0, "y": 0},
"body": [
{"x": 0, "y": 0},
{"x": 4, "y": 4},
{"x": 4, "y": 2},
{"x": 5, "y": 4},
{"x": 5, "y": 2},
{"x": 6, "y": 4},
{"x": 6, "y": 3},
{"x": 6, "y": 2},
],
}
safe_moves = snake._legal_moves(
my_head=my_body[0],
my_body=my_body,
other_snakes=[enemy],
food_set=set(),
is_constrictor=False,
width=7,
height=7,
)
self.assertIn("left", safe_moves)
self.assertIn("right", safe_moves)
right_bonus = snake._future_rollout_bonus_for_move(
move="right",
safe_moves=safe_moves,
my_body=my_body,
other_snakes=[enemy],
food_set=set(),
is_constrictor=False,
width=7,
height=7,
enemy_can_grow_cache={"enemy": False},
depth=3,
branch_limit=2,
deadline=None,
)
left_bonus = snake._future_rollout_bonus_for_move(
move="left",
safe_moves=safe_moves,
my_body=my_body,
other_snakes=[enemy],
food_set=set(),
is_constrictor=False,
width=7,
height=7,
enemy_can_grow_cache={"enemy": False},
depth=3,
branch_limit=2,
deadline=None,
)
self.assertGreater(left_bonus, right_bonus)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()