This commit is contained in:
+62
-8
@@ -33,6 +33,8 @@ class Server:
|
||||
|
||||
self.running_games:dict[str, GameBoard] = {}
|
||||
self.game_move_counts:dict[str, int] = {}
|
||||
self.game_last_seen_unix:dict[str, int] = {}
|
||||
self.stale_game_timeout_sec = self._get_stale_game_timeout_sec()
|
||||
self.metrics = {
|
||||
'games_started': 0,
|
||||
'games_ended': 0,
|
||||
@@ -49,9 +51,6 @@ class Server:
|
||||
'start': 0,
|
||||
'move': 0,
|
||||
'end': 0,
|
||||
'cleanup': 0,
|
||||
'metrics': 0,
|
||||
'metrics_prometheus': 0,
|
||||
},
|
||||
'move_direction_counts': {
|
||||
'up': 0,
|
||||
@@ -65,6 +64,7 @@ class Server:
|
||||
'last_game_start_unix': 0,
|
||||
'last_game_end_unix': 0,
|
||||
'last_move_unix': 0,
|
||||
'games_stuck_removed': 0,
|
||||
}
|
||||
self.logger = build_logger('Battlesnake', debug_env_var='DEBUG_SERVER')
|
||||
self.snake_version = self._get_snake_version()
|
||||
@@ -86,6 +86,7 @@ class Server:
|
||||
@self.app.post('/start')
|
||||
async def on_start():
|
||||
self._record_http_request('start')
|
||||
self._prune_stale_games()
|
||||
game_state = await request.get_json()
|
||||
await self._create_game_board(game_state)
|
||||
await await_log(self.logger.info(f'GAME START: {game_state['game']}'))
|
||||
@@ -122,6 +123,7 @@ class Server:
|
||||
@self.app.post('/end')
|
||||
async def on_end():
|
||||
self._record_http_request('end')
|
||||
self._prune_stale_games()
|
||||
game_state = await request.get_json()
|
||||
if self.store_game_state:
|
||||
game_board = await self._get_game_board(game_state, end=True)
|
||||
@@ -130,7 +132,7 @@ class Server:
|
||||
StorageLoader.build(self.storage_type),
|
||||
file_path=os.path.join(self.data_path, 'data'),
|
||||
database=os.getenv('EDGEDB_DATABASE', None),
|
||||
tls_security=None
|
||||
tls_security=None,
|
||||
)
|
||||
else:
|
||||
await game_board.save(
|
||||
@@ -208,7 +210,14 @@ class Server:
|
||||
return self.default_snake_config['version']
|
||||
return str(version)
|
||||
|
||||
async def _create_game_board(self, game_state:dict):
|
||||
def _get_stale_game_timeout_sec(self) -> int:
|
||||
value = os.getenv('SNAKE_STUCK_GAME_TIMEOUT_SEC', '180')
|
||||
try:
|
||||
return max(30, int(value))
|
||||
except ValueError:
|
||||
return 180
|
||||
|
||||
async def _create_game_board(self, game_state: dict):
|
||||
game_id = game_state['game']['id']
|
||||
new_game_board = GameBoard(
|
||||
game_id=game_id,
|
||||
@@ -223,6 +232,7 @@ class Server:
|
||||
|
||||
self.running_games[game_id] = new_game_board
|
||||
self.game_move_counts[game_id] = 0
|
||||
self.game_last_seen_unix[game_id] = int(time.time())
|
||||
self.metrics['games_started'] += 1
|
||||
self.metrics['active_games_peak'] = max(
|
||||
self.metrics['active_games_peak'],
|
||||
@@ -233,8 +243,9 @@ class Server:
|
||||
|
||||
def _delete_game_board(self, game_state:dict):
|
||||
game_id = game_state['game']['id']
|
||||
del self.running_games[game_id]
|
||||
self.running_games.pop(game_id, None)
|
||||
self.game_move_counts.pop(game_id, None)
|
||||
self.game_last_seen_unix.pop(game_id, None)
|
||||
|
||||
async def _get_game_board(self, game_state:dict, end:bool=False):
|
||||
game_id = game_state['game']['id']
|
||||
@@ -248,6 +259,8 @@ class Server:
|
||||
self.metrics['total_moves'] += 1
|
||||
self.game_move_counts[game_id] = self.game_move_counts.get(game_id, 0) + 1
|
||||
|
||||
self.game_last_seen_unix[game_id] = int(time.time())
|
||||
|
||||
game_board.read_game_data(game_state)
|
||||
if end:
|
||||
self._record_game_end(game_state)
|
||||
@@ -262,6 +275,22 @@ class Server:
|
||||
storage = StorageLoader.build(self.storage_type)()
|
||||
return storage.cleanup()
|
||||
|
||||
def _prune_stale_games(self):
|
||||
if not self.running_games:
|
||||
return
|
||||
|
||||
now = int(time.time())
|
||||
stale_ids = [
|
||||
game_id
|
||||
for game_id, last_seen in self.game_last_seen_unix.items()
|
||||
if now - last_seen >= self.stale_game_timeout_sec
|
||||
]
|
||||
for game_id in stale_ids:
|
||||
self.running_games.pop(game_id, None)
|
||||
self.game_move_counts.pop(game_id, None)
|
||||
self.game_last_seen_unix.pop(game_id, None)
|
||||
self.metrics['games_stuck_removed'] += 1
|
||||
|
||||
def _record_game_end(self, game_state: dict):
|
||||
self.metrics['games_ended'] += 1
|
||||
self.metrics['last_game_end_unix'] = int(time.time())
|
||||
@@ -286,6 +315,16 @@ class Server:
|
||||
win_rate = self.metrics['wins'] / games_ended if games_ended else 0.0
|
||||
avg_move_ms = self.metrics['move_response_time_ms_total'] / total_moves if total_moves else 0.0
|
||||
|
||||
now = int(time.time())
|
||||
oldest_active_age = 0
|
||||
if self.game_last_seen_unix:
|
||||
oldest_active_age = max(0, now - min(self.game_last_seen_unix.values()))
|
||||
stale_candidates = sum(
|
||||
1
|
||||
for last_seen in self.game_last_seen_unix.values()
|
||||
if now - last_seen >= self.stale_game_timeout_sec
|
||||
)
|
||||
|
||||
return {
|
||||
**self.metrics,
|
||||
'active_games': len(self.running_games),
|
||||
@@ -295,6 +334,9 @@ class Server:
|
||||
'avg_move_response_ms': round(avg_move_ms, 2),
|
||||
'http_requests_by_endpoint': dict(self.metrics['http_requests_by_endpoint']),
|
||||
'move_direction_counts': dict(self.metrics['move_direction_counts']),
|
||||
'oldest_active_game_age_sec': oldest_active_age,
|
||||
'stale_game_timeout_sec': self.stale_game_timeout_sec,
|
||||
'active_games_stale': stale_candidates,
|
||||
}
|
||||
|
||||
def _record_http_request(self, endpoint:str):
|
||||
@@ -343,10 +385,10 @@ class Server:
|
||||
f'snake_http_requests_total {snapshot['http_requests_total']}',
|
||||
'# HELP snake_move_response_ms_total Total move endpoint compute time in milliseconds.',
|
||||
'# TYPE snake_move_response_ms_total counter',
|
||||
f"snake_move_response_ms_total {round(snapshot['move_response_time_ms_total'], 3)}",
|
||||
f'snake_move_response_ms_total {round(snapshot['move_response_time_ms_total'], 3)}',
|
||||
'# HELP snake_move_response_ms_max Maximum move endpoint compute time in milliseconds.',
|
||||
'# TYPE snake_move_response_ms_max gauge',
|
||||
f"snake_move_response_ms_max {round(snapshot['move_response_time_ms_max'], 3)}",
|
||||
f'snake_move_response_ms_max {round(snapshot['move_response_time_ms_max'], 3)}',
|
||||
'# HELP snake_avg_turns_per_game Average final turn per ended game.',
|
||||
'# TYPE snake_avg_turns_per_game gauge',
|
||||
f'snake_avg_turns_per_game {snapshot['avg_turns_per_game']}',
|
||||
@@ -365,6 +407,18 @@ class Server:
|
||||
'# HELP snake_last_move_unix Unix timestamp of most recent /move response.',
|
||||
'# TYPE snake_last_move_unix gauge',
|
||||
f'snake_last_move_unix {snapshot['last_move_unix']}',
|
||||
'# HELP snake_games_stuck_removed_total Active games auto-removed due to inactivity timeout.',
|
||||
'# TYPE snake_games_stuck_removed_total counter',
|
||||
f'snake_games_stuck_removed_total {snapshot['games_stuck_removed']}',
|
||||
'# HELP snake_oldest_active_game_age_sec Age in seconds of the oldest active game.',
|
||||
'# TYPE snake_oldest_active_game_age_sec gauge',
|
||||
f'snake_oldest_active_game_age_sec {snapshot['oldest_active_game_age_sec']}',
|
||||
'# HELP snake_stale_game_timeout_sec Configured inactivity timeout for stale games.',
|
||||
'# TYPE snake_stale_game_timeout_sec gauge',
|
||||
f'snake_stale_game_timeout_sec {snapshot['stale_game_timeout_sec']}',
|
||||
'# HELP snake_active_games_stale Active games currently beyond stale timeout.',
|
||||
'# TYPE snake_active_games_stale gauge',
|
||||
f'snake_active_games_stale {snapshot['active_games_stale']}',
|
||||
]
|
||||
|
||||
lines.extend([
|
||||
|
||||
Reference in New Issue
Block a user