import time from server.metrics.MetricsManager import MetricsManager class ServerMetricsCollector: def __init__(self, metrics_manager:MetricsManager, game_state_local_cache:bool, metrics_backend:str, game_state_backend:str, stale_game_timeout_sec:int, game_last_seen_unix:dict, game_move_counts:dict,): self._manager = metrics_manager self._stale_game_timeout_sec = stale_game_timeout_sec self._game_last_seen_unix = game_last_seen_unix self._game_move_counts = game_move_counts self._game_state_backend_is_redis = game_state_backend.strip().lower() == 'redis' self._metrics = { 'games_started': 0, 'games_ended': 0, 'wins': 0, 'losses': 0, 'total_moves': 0, 'total_turns': 0, 'max_turn': 0, 'active_games_peak': 0, 'games_autocreated': 0, 'http_requests_total': 0, # 'http_requests_by_endpoint': { # 'info': 0, # 'start': 0, # 'move': 0, # 'end': 0, # }, # 'move_direction_counts': { # 'up': 0, # 'down': 0, # 'left': 0, # 'right': 0, # 'unknown': 0, # }, 'move_response_time_ms_total': 0.0, 'move_response_time_ms_max': 0.0, 'last_game_start_unix': 0, 'last_game_end_unix': 0, 'last_move_unix': 0, 'games_stuck_removed': 0, 'game_state_local_cache_enabled': bool(game_state_local_cache), 'metrics_backend': metrics_backend, } # ── internal ────────────────────────────────────────────────────────────── async def _auto_publish(self) -> None: snapshot = self.build_local_snapshot(self._game_last_seen_unix, self._game_move_counts) await self._manager.publish_only(snapshot) # ── record helpers ──────────────────────────────────────────────────────── def record_http_request(self, endpoint:str) -> None: self._metrics['http_requests_total'] += 1 # endpoint_counts = self._metrics['http_requests_by_endpoint'] # endpoint_counts[endpoint] = endpoint_counts.get(endpoint, 0) + 1 async def record_game_started(self, active_count:int) -> None: self._metrics['games_started'] += 1 self._metrics['active_games_peak'] = max( self._metrics['active_games_peak'], active_count, ) self._metrics['last_game_start_unix'] = int(time.time()) await self._auto_publish() async def record_game_autocreated(self) -> None: self._metrics['games_autocreated'] += 1 await self._auto_publish() async def record_move(self, direction:str, elapsed_ms:float) -> None: self._metrics['total_moves'] += 1 self._metrics['move_response_time_ms_total'] += elapsed_ms self._metrics['move_response_time_ms_max'] = max( self._metrics['move_response_time_ms_max'], elapsed_ms, ) # move_counts = self._metrics['move_direction_counts'] # if direction in move_counts: # move_counts[direction] += 1 # else: # move_counts['unknown'] += 1 # self._metrics['last_move_unix'] = int(time.time()) await self._auto_publish() async def record_game_end(self, game_state:dict) -> None: self._metrics['games_ended'] += 1 self._metrics['last_game_end_unix'] = int(time.time()) final_turn = int(game_state.get('turn', 0)) self._metrics['total_turns'] += final_turn self._metrics['max_turn'] = max(self._metrics['max_turn'], final_turn) you_id = game_state.get('you', {}).get('id') alive_ids = {s.get('id') for s in game_state.get('board', {}).get('snakes', [])} if you_id and you_id in alive_ids: self._metrics['wins'] += 1 else: self._metrics['losses'] += 1 await self._auto_publish() async def record_stuck_removed(self) -> None: if self._game_state_backend_is_redis: return self._metrics['games_stuck_removed'] += 1 await self._auto_publish() # ── snapshot builders ───────────────────────────────────────────────────── def _calc_active_game_stats(self, game_last_seen_unix:dict) -> tuple[int, int, int]: """Returns (report_active_games, report_stale_candidates, oldest_active_age_sec).""" now = int(time.time()) stale_candidates = sum( 1 for last_seen in game_last_seen_unix.values() if now - last_seen >= self._stale_game_timeout_sec ) if self._game_state_backend_is_redis: # Redis auto-expires stale keys via TTL, so stale games are already gone from the # server's perspective. We exclude them from all metrics so we only report games # that are actually still alive in Redis. report_active_games = len(game_last_seen_unix) - stale_candidates report_stale_candidates = 0 # Only include non-stale timestamps when calculating the oldest active game age, # so a game that Redis already deleted doesn't inflate the age metric. active_last_seen = [ last_seen for last_seen in game_last_seen_unix.values() if now - last_seen < self._stale_game_timeout_sec ] else: report_active_games = len(game_last_seen_unix) report_stale_candidates = stale_candidates active_last_seen = list(game_last_seen_unix.values()) oldest_active_age = max(0, now - min(active_last_seen)) if active_last_seen else 0 return report_active_games, report_stale_candidates, oldest_active_age def build_local_snapshot(self, game_last_seen_unix:dict, game_move_counts:dict) -> dict: games_ended = self._metrics['games_ended'] total_moves = self._metrics['total_moves'] avg_turns = self._metrics['total_turns'] / games_ended if games_ended else 0.0 win_rate = self._metrics['wins'] / games_ended if games_ended else 0.0 avg_move_ms = ( self._metrics['move_response_time_ms_total'] / total_moves if total_moves else 0.0 ) report_active_games, report_stale_candidates, oldest_active_age = self._calc_active_game_stats(game_last_seen_unix) return { **self._metrics, 'active_games': report_active_games, 'tracked_games': len(game_move_counts), 'avg_turns_per_game': round(avg_turns, 2), 'win_rate': round(win_rate, 4), 'avg_move_response_ms': round(avg_move_ms, 2), # 'http_requests_by_endpoint': dict(self._metrics['http_requests_by_endpoint']), # 'move_direction_counts': dict(self._metrics['move_direction_counts']), 'oldest_active_game_age_sec': oldest_active_age, 'stale_game_timeout_sec': self._stale_game_timeout_sec, 'active_games_stale': report_stale_candidates, } async def build_snapshot(self, game_last_seen_unix:dict, game_move_counts:dict) -> dict: local_snapshot = self.build_local_snapshot(game_last_seen_unix, game_move_counts) return await self._manager.snapshot(local_snapshot) def build_prometheus_metrics(self, snapshot:dict) -> str: lines = [ '# HELP snake_games_started_total Total games started by snake server.', '# TYPE snake_games_started_total counter', f'snake_games_started_total {snapshot["games_started"]}', '# HELP snake_games_ended_total Total games ended by snake server.', '# TYPE snake_games_ended_total counter', f'snake_games_ended_total {snapshot["games_ended"]}', '# HELP snake_wins_total Total games won by this snake.', '# TYPE snake_wins_total counter', f'snake_wins_total {snapshot["wins"]}', '# HELP snake_losses_total Total games lost by this snake.', '# TYPE snake_losses_total counter', f'snake_losses_total {snapshot["losses"]}', '# HELP snake_moves_total Total move decisions served by /move.', '# TYPE snake_moves_total counter', f'snake_moves_total {snapshot["total_moves"]}', '# HELP snake_turns_total Total turns across all ended games.', '# TYPE snake_turns_total counter', f'snake_turns_total {snapshot["total_turns"]}', '# HELP snake_active_games Currently active games in memory.', '# TYPE snake_active_games gauge', f'snake_active_games {snapshot["active_games"]}', '# HELP snake_tracked_games Currently tracked game IDs for move counters.', '# TYPE snake_tracked_games gauge', f'snake_tracked_games {snapshot["tracked_games"]}', '# HELP snake_max_turn Highest final turn seen in an ended game.', '# TYPE snake_max_turn gauge', f'snake_max_turn {snapshot["max_turn"]}', '# HELP snake_active_games_peak Highest active game count observed.', '# TYPE snake_active_games_peak gauge', f'snake_active_games_peak {snapshot["active_games_peak"]}', '# HELP snake_games_autocreated_total Games created on /move or /end due to missing /start.', '# TYPE snake_games_autocreated_total counter', f'snake_games_autocreated_total {snapshot["games_autocreated"]}', '# HELP snake_http_requests_total Total HTTP requests handled by this process.', '# TYPE snake_http_requests_total counter', f'snake_http_requests_total {snapshot["http_requests_total"]}', '# HELP snake_move_response_ms_total Total move endpoint compute time in milliseconds.', '# TYPE snake_move_response_ms_total counter', f'snake_move_response_ms_total {round(snapshot["move_response_time_ms_total"], 3)}', '# HELP snake_move_response_ms_max Maximum move endpoint compute time in milliseconds.', '# TYPE snake_move_response_ms_max gauge', f'snake_move_response_ms_max {round(snapshot["move_response_time_ms_max"], 3)}', '# HELP snake_avg_turns_per_game Average final turn per ended game.', '# TYPE snake_avg_turns_per_game gauge', f'snake_avg_turns_per_game {snapshot["avg_turns_per_game"]}', '# HELP snake_avg_move_response_ms Average move endpoint compute time in milliseconds.', '# TYPE snake_avg_move_response_ms gauge', f'snake_avg_move_response_ms {snapshot["avg_move_response_ms"]}', '# HELP snake_win_rate Win ratio from ended games (0.0 - 1.0).', '# TYPE snake_win_rate gauge', f'snake_win_rate {snapshot["win_rate"]}', '# HELP snake_last_game_start_unix Unix timestamp of most recent /start request.', '# TYPE snake_last_game_start_unix gauge', f'snake_last_game_start_unix {snapshot["last_game_start_unix"]}', '# HELP snake_last_game_end_unix Unix timestamp of most recent /end request.', '# TYPE snake_last_game_end_unix gauge', f'snake_last_game_end_unix {snapshot["last_game_end_unix"]}', '# HELP snake_last_move_unix Unix timestamp of most recent /move response.', '# TYPE snake_last_move_unix gauge', f'snake_last_move_unix {snapshot["last_move_unix"]}', '# HELP snake_games_stuck_removed_total Active games auto-removed due to inactivity timeout.', '# TYPE snake_games_stuck_removed_total counter', f'snake_games_stuck_removed_total {snapshot["games_stuck_removed"]}', '# HELP snake_oldest_active_game_age_sec Age in seconds of the oldest active game.', '# TYPE snake_oldest_active_game_age_sec gauge', f'snake_oldest_active_game_age_sec {snapshot["oldest_active_game_age_sec"]}', '# HELP snake_stale_game_timeout_sec Configured inactivity timeout for stale games.', '# TYPE snake_stale_game_timeout_sec gauge', f'snake_stale_game_timeout_sec {snapshot["stale_game_timeout_sec"]}', '# HELP snake_active_games_stale Active games currently beyond stale timeout.', '# TYPE snake_active_games_stale gauge', f'snake_active_games_stale {snapshot["active_games_stale"]}', ] # lines.extend([ # '# HELP snake_http_requests_by_endpoint_total Requests served grouped by endpoint.', # '# TYPE snake_http_requests_by_endpoint_total counter', # ]) # for endpoint, count in snapshot['http_requests_by_endpoint'].items(): # lines.append(f'snake_http_requests_by_endpoint_total{{endpoint="{endpoint}"}} {count}') # lines.extend([ # '# HELP snake_moves_by_direction_total Move responses grouped by direction.', # '# TYPE snake_moves_by_direction_total counter', # ]) # for direction, count in snapshot['move_direction_counts'].items(): # lines.append(f'snake_moves_by_direction_total{{direction="{direction}"}} {count}') return '\n'.join(lines) + '\n' async def close(self) -> None: await self._manager.close()