create better future planning into BestBattleSnake
This commit is contained in:
+251
-39
@@ -45,6 +45,9 @@ class BestBattleSnake(TemplateSnake):
|
||||
self.rl_base_dataset_path = Path(os.getenv("RL_BASE_DATASET", "data/dataset/best_moves.jsonl"))
|
||||
self.rl_bootstrap_path = Path(os.getenv("RL_BOOTSTRAP_OUTPUT", "data/dataset/rl_bootstrap.jsonl"))
|
||||
self.rl_needs_more_data = False
|
||||
self.future_planning_depth = max(1, min(4, self._env_int("BATTLE_FUTURE_PLANNING_DEPTH", default=2)))
|
||||
self.future_planning_branch = max(1, min(3, self._env_int("BATTLE_FUTURE_PLANNING_BRANCH", default=2)))
|
||||
self.future_planning_min_time_ms = max(25, self._env_int("BATTLE_FUTURE_PLANNING_MIN_MS", default=70))
|
||||
|
||||
def _get_duel_style(self) -> str:
|
||||
"""Resolve duel tuning style from `BATTLE_SNAKE_DUEL_STYLE` or `DUEL_STYLE`."""
|
||||
@@ -89,7 +92,7 @@ class BestBattleSnake(TemplateSnake):
|
||||
value = os.getenv(name)
|
||||
if value is None:
|
||||
return default
|
||||
return value.lower() in {'1', 'true', 'yes', 'on'}
|
||||
return value.lower() in {"1", "true", "yes", "on"}
|
||||
|
||||
def _env_int(self, name:str, default:int) -> int:
|
||||
value = os.getenv(name)
|
||||
@@ -442,11 +445,20 @@ class BestBattleSnake(TemplateSnake):
|
||||
else:
|
||||
considered_moves = list(scores.keys())
|
||||
|
||||
best_score = max(scores[move] for move in considered_moves)
|
||||
top_moves = [
|
||||
move for move in considered_moves if best_score - scores[move] <= 1.5
|
||||
]
|
||||
best_move = random.choice(top_moves)
|
||||
best_move = self._pick_best_with_future_planning(
|
||||
considered_moves=considered_moves,
|
||||
scores=scores,
|
||||
safe_moves=safe_moves,
|
||||
my_body=my_body,
|
||||
other_snakes=other_snakes,
|
||||
food_set=food_set,
|
||||
is_constrictor=is_constrictor,
|
||||
width=width,
|
||||
height=height,
|
||||
enemy_can_grow_cache=enemy_can_grow_cache,
|
||||
deadline=deadline,
|
||||
tie_window=1.5,
|
||||
)
|
||||
self.recent_heads.append(current_head_point)
|
||||
self.last_move = best_move
|
||||
self.add_to_history({"turn": turn, "move": best_move, "scores": scores})
|
||||
@@ -643,11 +655,21 @@ class BestBattleSnake(TemplateSnake):
|
||||
if not scores:
|
||||
return random.choice(list(safe_moves.keys())), {}
|
||||
|
||||
best_score = max(scores[move] for move in considered_moves)
|
||||
top_moves = [
|
||||
move for move in considered_moves if best_score - scores[move] <= 1.5
|
||||
]
|
||||
return random.choice(top_moves), scores
|
||||
best_move = self._pick_best_with_future_planning(
|
||||
considered_moves=considered_moves,
|
||||
scores=scores,
|
||||
safe_moves=safe_moves,
|
||||
my_body=my_body,
|
||||
other_snakes=other_snakes,
|
||||
food_set=food_set,
|
||||
is_constrictor=False,
|
||||
width=width,
|
||||
height=height,
|
||||
enemy_can_grow_cache=enemy_can_grow_cache,
|
||||
deadline=deadline,
|
||||
tie_window=1.5,
|
||||
)
|
||||
return best_move, scores
|
||||
|
||||
def _choose_constrictor_move(self, safe_moves:MoveMap, my_body:list[Coord], my_len:int, other_snakes:list[SnakeState], food_set:set[Point], enemy_attack_map:AttackMap, enemy_heads:list[Point], enemy_can_grow_cache:dict[Any, bool], width:int, height:int, deadline:float|None=None) -> tuple[str, dict[str, float]]:
|
||||
"""Score and select a move for constrictor games."""
|
||||
@@ -773,11 +795,21 @@ class BestBattleSnake(TemplateSnake):
|
||||
if not scores:
|
||||
return random.choice(list(safe_moves.keys())), {}
|
||||
|
||||
best_score = max(scores[move] for move in considered_moves)
|
||||
top_moves = [
|
||||
move for move in considered_moves if best_score - scores[move] <= 2.0
|
||||
]
|
||||
return random.choice(top_moves), scores
|
||||
best_move = self._pick_best_with_future_planning(
|
||||
considered_moves=considered_moves,
|
||||
scores=scores,
|
||||
safe_moves=safe_moves,
|
||||
my_body=my_body,
|
||||
other_snakes=other_snakes,
|
||||
food_set=food_set,
|
||||
is_constrictor=True,
|
||||
width=width,
|
||||
height=height,
|
||||
enemy_can_grow_cache=enemy_can_grow_cache,
|
||||
deadline=deadline,
|
||||
tie_window=2.0,
|
||||
)
|
||||
return best_move, scores
|
||||
|
||||
def _legal_moves(self, my_head:Coord, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int) -> MoveMap:
|
||||
"""Return legal immediate moves after body, wall, and tail checks."""
|
||||
@@ -829,15 +861,6 @@ class BestBattleSnake(TemplateSnake):
|
||||
if is_constrictor:
|
||||
continue
|
||||
|
||||
snake_id = snake.get("id")
|
||||
enemy_can_grow = (
|
||||
enemy_can_grow_cache.get(snake_id)
|
||||
if enemy_can_grow_cache and snake_id is not None
|
||||
else self._enemy_can_grow_this_turn(snake, food_set)
|
||||
)
|
||||
if enemy_can_grow:
|
||||
continue
|
||||
|
||||
if self._is_tail_stacked(snake["body"]):
|
||||
continue
|
||||
|
||||
@@ -850,19 +873,14 @@ class BestBattleSnake(TemplateSnake):
|
||||
"""Map cells enemies can contest next turn to their effective length."""
|
||||
occupied = self._occupied_cells(my_snake["body"], other_snakes)
|
||||
my_body_points = {(segment["x"], segment["y"]) for segment in my_snake["body"]}
|
||||
my_tail = (my_snake["body"][-1]["x"], my_snake["body"][-1]["y"])
|
||||
my_tail_stacked = self._is_tail_stacked(my_snake["body"])
|
||||
attack_map = {}
|
||||
|
||||
for enemy in other_snakes:
|
||||
enemy_len = enemy.get("length", len(enemy["body"]))
|
||||
enemy_tail = (enemy["body"][-1]["x"], enemy["body"][-1]["y"])
|
||||
enemy_tail_stacked = self._is_tail_stacked(enemy["body"])
|
||||
snake_id = enemy.get("id")
|
||||
enemy_can_grow = (
|
||||
enemy_can_grow_cache.get(snake_id)
|
||||
if enemy_can_grow_cache and snake_id is not None
|
||||
else self._enemy_can_grow_this_turn(enemy, food_set)
|
||||
)
|
||||
|
||||
enemy_head = enemy["head"]
|
||||
for dx, dy in self.DIRECTIONS.values():
|
||||
point = (enemy_head["x"] + dx, enemy_head["y"] + dy)
|
||||
@@ -873,15 +891,17 @@ class BestBattleSnake(TemplateSnake):
|
||||
not is_constrictor
|
||||
and point == enemy_tail
|
||||
and not enemy_tail_stacked
|
||||
and not enemy_can_grow
|
||||
)
|
||||
can_contest_my_tail = (not is_constrictor and point == my_tail and not my_tail_stacked)
|
||||
|
||||
if point in occupied and not can_step_on_enemy_tail:
|
||||
if point in occupied and not can_step_on_enemy_tail and not can_contest_my_tail:
|
||||
continue
|
||||
|
||||
# Do not consider impossible overlap directly into my own occupied body except head swap possibilities.
|
||||
# Ignore impossible overlap into our occupied body, but keep our vacatable tail
|
||||
# so we can detect dangerous head-to-head contests when tail-chasing.
|
||||
if point in my_body_points:
|
||||
continue
|
||||
if is_constrictor or my_tail_stacked or point != my_tail:
|
||||
continue
|
||||
|
||||
previous = attack_map.get(point)
|
||||
if previous is None or enemy_len > previous:
|
||||
@@ -954,6 +974,199 @@ class BestBattleSnake(TemplateSnake):
|
||||
return False
|
||||
return perf_counter() >= deadline
|
||||
|
||||
def _remaining_ms(self, deadline:float|None) -> float:
|
||||
if deadline is None:
|
||||
return 10_000.0
|
||||
return max(0.0, (deadline - perf_counter()) * 1000.0)
|
||||
|
||||
def _pick_best_with_future_planning(self, considered_moves:list[str], scores:dict[str, float], safe_moves:MoveMap, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int, enemy_can_grow_cache:dict[Any, bool]|None, deadline:float|None, tie_window:float) -> str:
|
||||
best_score = max(scores[move] for move in considered_moves)
|
||||
top_moves = [move for move in considered_moves if best_score - scores[move] <= tie_window]
|
||||
if len(top_moves) <= 1:
|
||||
return top_moves[0]
|
||||
|
||||
if self._time_exceeded(deadline) or self._remaining_ms(deadline) < self.future_planning_min_time_ms:
|
||||
return random.choice(top_moves)
|
||||
|
||||
candidate_moves = sorted(top_moves, key=lambda move: scores[move], reverse=True)[:3]
|
||||
lookahead_bonus:dict[str, float] = {}
|
||||
for move in candidate_moves:
|
||||
if self._time_exceeded(deadline):
|
||||
break
|
||||
bonus = self._future_rollout_bonus_for_move(
|
||||
move=move,
|
||||
safe_moves=safe_moves,
|
||||
my_body=my_body,
|
||||
other_snakes=other_snakes,
|
||||
food_set=food_set,
|
||||
is_constrictor=is_constrictor,
|
||||
width=width,
|
||||
height=height,
|
||||
enemy_can_grow_cache=enemy_can_grow_cache,
|
||||
depth=self.future_planning_depth,
|
||||
branch_limit=self.future_planning_branch,
|
||||
deadline=deadline,
|
||||
)
|
||||
lookahead_bonus[move] = bonus
|
||||
|
||||
if not lookahead_bonus:
|
||||
return random.choice(top_moves)
|
||||
|
||||
for move, bonus in lookahead_bonus.items():
|
||||
scores[move] += bonus
|
||||
|
||||
refined_best = max(scores[move] for move in top_moves)
|
||||
refined_top = [
|
||||
move
|
||||
for move in top_moves
|
||||
if refined_best - scores[move] <= max(0.5, tie_window / 2)
|
||||
]
|
||||
return random.choice(refined_top)
|
||||
|
||||
def _future_rollout_bonus_for_move(self, move:str, safe_moves:MoveMap, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int, enemy_can_grow_cache:dict[Any, bool]|None, depth:int, branch_limit:int, deadline:float|None) -> float:
|
||||
pos = safe_moves.get(move)
|
||||
if pos is None:
|
||||
return -250.0
|
||||
point = (pos["x"], pos["y"])
|
||||
ate_food = point in food_set
|
||||
future_body = self._future_body(
|
||||
current_body=my_body,
|
||||
next_head=pos,
|
||||
ate_food=ate_food,
|
||||
is_constrictor=is_constrictor,
|
||||
)
|
||||
raw_score = self._future_survival_tree_score(
|
||||
my_body=future_body,
|
||||
other_snakes=other_snakes,
|
||||
food_set=food_set,
|
||||
is_constrictor=is_constrictor,
|
||||
width=width,
|
||||
height=height,
|
||||
enemy_can_grow_cache=enemy_can_grow_cache,
|
||||
depth=max(1, depth),
|
||||
branch_limit=max(1, branch_limit),
|
||||
deadline=deadline,
|
||||
)
|
||||
return raw_score * 0.06
|
||||
|
||||
def _future_survival_tree_score(self, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int, enemy_can_grow_cache:dict[Any, bool]|None, depth:int, branch_limit:int, deadline:float|None,
|
||||
) -> float:
|
||||
if depth <= 0 or self._time_exceeded(deadline):
|
||||
return 0.0
|
||||
|
||||
my_head = my_body[0]
|
||||
safe_moves = self._legal_moves(
|
||||
my_head=my_head,
|
||||
my_body=my_body,
|
||||
other_snakes=other_snakes,
|
||||
food_set=food_set,
|
||||
is_constrictor=is_constrictor,
|
||||
width=width,
|
||||
height=height,
|
||||
)
|
||||
if not safe_moves:
|
||||
return -2400.0
|
||||
|
||||
scored_children:list[tuple[float, list[dict[str, int]]]] = []
|
||||
for move, pos in safe_moves.items():
|
||||
if self._time_exceeded(deadline):
|
||||
break
|
||||
point = (pos["x"], pos["y"])
|
||||
ate_food = point in food_set
|
||||
future_body = self._future_body(
|
||||
current_body=my_body,
|
||||
next_head=pos,
|
||||
ate_food=ate_food,
|
||||
is_constrictor=is_constrictor,
|
||||
)
|
||||
immediate_score = self._future_position_score(
|
||||
my_body=future_body,
|
||||
other_snakes=other_snakes,
|
||||
food_set=food_set,
|
||||
is_constrictor=is_constrictor,
|
||||
width=width,
|
||||
height=height,
|
||||
enemy_can_grow_cache=enemy_can_grow_cache,
|
||||
deadline=deadline,
|
||||
)
|
||||
scored_children.append((immediate_score, future_body))
|
||||
|
||||
if not scored_children:
|
||||
return -2200.0
|
||||
|
||||
scored_children.sort(key=lambda item: item[0], reverse=True)
|
||||
if depth == 1:
|
||||
return scored_children[0][0]
|
||||
|
||||
best_total = scored_children[0][0]
|
||||
for immediate_score, future_body in scored_children[:branch_limit]:
|
||||
if self._time_exceeded(deadline):
|
||||
break
|
||||
continuation = self._future_survival_tree_score(
|
||||
my_body=future_body,
|
||||
other_snakes=other_snakes,
|
||||
food_set=food_set,
|
||||
is_constrictor=is_constrictor,
|
||||
width=width,
|
||||
height=height,
|
||||
enemy_can_grow_cache=enemy_can_grow_cache,
|
||||
depth=depth - 1,
|
||||
branch_limit=branch_limit,
|
||||
deadline=deadline,
|
||||
)
|
||||
total = immediate_score + continuation * 0.72
|
||||
if total > best_total:
|
||||
best_total = total
|
||||
|
||||
return best_total
|
||||
|
||||
def _future_position_score(self, my_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, width:int, height:int, enemy_can_grow_cache:dict[Any, bool]|None, deadline:float|None) -> float:
|
||||
if self._time_exceeded(deadline):
|
||||
return 0.0
|
||||
|
||||
head_point = (my_body[0]["x"], my_body[0]["y"])
|
||||
blocked = self._simulation_blocked(
|
||||
future_body=my_body,
|
||||
other_snakes=other_snakes,
|
||||
food_set=food_set,
|
||||
is_constrictor=is_constrictor,
|
||||
enemy_can_grow_cache=enemy_can_grow_cache,
|
||||
)
|
||||
blocked.discard(head_point)
|
||||
|
||||
reachable_space = self._flood_fill_count(head_point, blocked, width, height)
|
||||
liberties = self._open_neighbor_count(head_point, blocked, width, height)
|
||||
next_options = self._next_turn_option_count(my_body, blocked, width, height)
|
||||
enemy_safe_options = self._safe_next_turn_option_count(
|
||||
future_body=my_body,
|
||||
other_snakes=other_snakes,
|
||||
food_set=food_set,
|
||||
is_constrictor=is_constrictor,
|
||||
enemy_can_grow_cache=enemy_can_grow_cache,
|
||||
width=width,
|
||||
height=height,
|
||||
)
|
||||
required_space = len(my_body) + (1 if is_constrictor else 0)
|
||||
|
||||
score = 0.0
|
||||
score += reachable_space * 1.9
|
||||
score += liberties * 14.0
|
||||
score += next_options * 11.0
|
||||
score += enemy_safe_options * 26.0
|
||||
|
||||
if reachable_space < required_space:
|
||||
score -= 1500.0
|
||||
if liberties == 0:
|
||||
score -= 1000.0
|
||||
if next_options == 0:
|
||||
score -= 1200.0
|
||||
if enemy_safe_options == 0:
|
||||
score -= 1900.0
|
||||
elif enemy_safe_options == 1:
|
||||
score -= 420.0
|
||||
|
||||
return score
|
||||
|
||||
def _nearest_food_distance(self, start:Point, food_set:set[Point], blocked:set[Point], width:int, height:int) -> int|None:
|
||||
"""Compute shortest reachable distance to any food using BFS."""
|
||||
if not food_set:
|
||||
@@ -978,7 +1191,7 @@ class BestBattleSnake(TemplateSnake):
|
||||
|
||||
return None
|
||||
|
||||
def _path_distance( self, start:Point, goal:Point, blocked:set[Point], width:int, height:int) -> int|None:
|
||||
def _path_distance(self, start:Point, goal:Point, blocked:set[Point], width:int, height:int) -> int|None:
|
||||
"""Compute shortest path distance between two cells."""
|
||||
queue = deque([(start, 0)])
|
||||
seen = {start}
|
||||
@@ -1046,7 +1259,6 @@ class BestBattleSnake(TemplateSnake):
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def _safe_next_turn_option_count(self, future_body:list[Coord], other_snakes:list[SnakeState], food_set:set[Point], is_constrictor:bool, enemy_can_grow_cache:dict[Any, bool]|None, width:int, height:int) -> int:
|
||||
"""Count next-turn moves that stay safe from enemy head contests."""
|
||||
if not future_body:
|
||||
@@ -1180,7 +1392,7 @@ class BestBattleSnake(TemplateSnake):
|
||||
)
|
||||
return enemy_space, enemy_options
|
||||
|
||||
def _enemy_constrictor_projection(self, other_snakes:list[SnakeState], blocked:set[Point], width:int, height: int) -> tuple[int, int]:
|
||||
def _enemy_constrictor_projection(self, other_snakes:list[SnakeState], blocked:set[Point], width:int, height:int) -> tuple[int, int]:
|
||||
"""Estimate enemy best-space and total options after our candidate move."""
|
||||
best_enemy_space = 0
|
||||
total_enemy_options = 0
|
||||
|
||||
Reference in New Issue
Block a user