This commit is contained in:
@@ -473,6 +473,14 @@
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 120
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 250
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -488,7 +496,7 @@
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
@@ -531,6 +539,10 @@
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -545,7 +557,7 @@
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
@@ -825,7 +837,42 @@
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "avg move ms"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"mode": "fixed",
|
||||
"fixedColor": "orange"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "max move ms"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "color",
|
||||
"value": {
|
||||
"mode": "fixed",
|
||||
"fixedColor": "red"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "custom.lineWidth",
|
||||
"value": 3
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
@@ -872,6 +919,260 @@
|
||||
],
|
||||
"title": "Move Directions + Move Latency",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 2
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 24
|
||||
},
|
||||
"id": 13,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "11.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"editorMode": "code",
|
||||
"expr": "snake_active_games_stale",
|
||||
"legendFormat": "Stale Active Games",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Stale Active Games",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 24
|
||||
},
|
||||
"id": 14,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "11.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"editorMode": "code",
|
||||
"expr": "increase(snake_games_stuck_removed_total[$__range])",
|
||||
"legendFormat": "Stuck Games Removed",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Stuck Games Removed",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 90
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 150
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 24
|
||||
},
|
||||
"id": 15,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "11.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"editorMode": "code",
|
||||
"expr": "snake_oldest_active_game_age_sec",
|
||||
"legendFormat": "Oldest Active Game Age",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Oldest Active Game Age",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 24
|
||||
},
|
||||
"id": 16,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "11.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"editorMode": "code",
|
||||
"expr": "snake_stale_game_timeout_sec",
|
||||
"legendFormat": "Stale Timeout (Sec)",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Stale Timeout (Sec)",
|
||||
"type": "stat"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
@@ -912,6 +1213,6 @@
|
||||
"timezone": "",
|
||||
"title": "Snake Performance",
|
||||
"uid": "snake-performance",
|
||||
"version": 2,
|
||||
"version": 3,
|
||||
"weekStart": ""
|
||||
}
|
||||
|
||||
+62
-8
@@ -33,6 +33,8 @@ class Server:
|
||||
|
||||
self.running_games:dict[str, GameBoard] = {}
|
||||
self.game_move_counts:dict[str, int] = {}
|
||||
self.game_last_seen_unix:dict[str, int] = {}
|
||||
self.stale_game_timeout_sec = self._get_stale_game_timeout_sec()
|
||||
self.metrics = {
|
||||
'games_started': 0,
|
||||
'games_ended': 0,
|
||||
@@ -49,9 +51,6 @@ class Server:
|
||||
'start': 0,
|
||||
'move': 0,
|
||||
'end': 0,
|
||||
'cleanup': 0,
|
||||
'metrics': 0,
|
||||
'metrics_prometheus': 0,
|
||||
},
|
||||
'move_direction_counts': {
|
||||
'up': 0,
|
||||
@@ -65,6 +64,7 @@ class Server:
|
||||
'last_game_start_unix': 0,
|
||||
'last_game_end_unix': 0,
|
||||
'last_move_unix': 0,
|
||||
'games_stuck_removed': 0,
|
||||
}
|
||||
self.logger = build_logger('Battlesnake', debug_env_var='DEBUG_SERVER')
|
||||
self.snake_version = self._get_snake_version()
|
||||
@@ -86,6 +86,7 @@ class Server:
|
||||
@self.app.post('/start')
|
||||
async def on_start():
|
||||
self._record_http_request('start')
|
||||
self._prune_stale_games()
|
||||
game_state = await request.get_json()
|
||||
await self._create_game_board(game_state)
|
||||
await await_log(self.logger.info(f'GAME START: {game_state['game']}'))
|
||||
@@ -122,6 +123,7 @@ class Server:
|
||||
@self.app.post('/end')
|
||||
async def on_end():
|
||||
self._record_http_request('end')
|
||||
self._prune_stale_games()
|
||||
game_state = await request.get_json()
|
||||
if self.store_game_state:
|
||||
game_board = await self._get_game_board(game_state, end=True)
|
||||
@@ -130,7 +132,7 @@ class Server:
|
||||
StorageLoader.build(self.storage_type),
|
||||
file_path=os.path.join(self.data_path, 'data'),
|
||||
database=os.getenv('EDGEDB_DATABASE', None),
|
||||
tls_security=None
|
||||
tls_security=None,
|
||||
)
|
||||
else:
|
||||
await game_board.save(
|
||||
@@ -208,7 +210,14 @@ class Server:
|
||||
return self.default_snake_config['version']
|
||||
return str(version)
|
||||
|
||||
async def _create_game_board(self, game_state:dict):
|
||||
def _get_stale_game_timeout_sec(self) -> int:
|
||||
value = os.getenv('SNAKE_STUCK_GAME_TIMEOUT_SEC', '180')
|
||||
try:
|
||||
return max(30, int(value))
|
||||
except ValueError:
|
||||
return 180
|
||||
|
||||
async def _create_game_board(self, game_state: dict):
|
||||
game_id = game_state['game']['id']
|
||||
new_game_board = GameBoard(
|
||||
game_id=game_id,
|
||||
@@ -223,6 +232,7 @@ class Server:
|
||||
|
||||
self.running_games[game_id] = new_game_board
|
||||
self.game_move_counts[game_id] = 0
|
||||
self.game_last_seen_unix[game_id] = int(time.time())
|
||||
self.metrics['games_started'] += 1
|
||||
self.metrics['active_games_peak'] = max(
|
||||
self.metrics['active_games_peak'],
|
||||
@@ -233,8 +243,9 @@ class Server:
|
||||
|
||||
def _delete_game_board(self, game_state:dict):
|
||||
game_id = game_state['game']['id']
|
||||
del self.running_games[game_id]
|
||||
self.running_games.pop(game_id, None)
|
||||
self.game_move_counts.pop(game_id, None)
|
||||
self.game_last_seen_unix.pop(game_id, None)
|
||||
|
||||
async def _get_game_board(self, game_state:dict, end:bool=False):
|
||||
game_id = game_state['game']['id']
|
||||
@@ -248,6 +259,8 @@ class Server:
|
||||
self.metrics['total_moves'] += 1
|
||||
self.game_move_counts[game_id] = self.game_move_counts.get(game_id, 0) + 1
|
||||
|
||||
self.game_last_seen_unix[game_id] = int(time.time())
|
||||
|
||||
game_board.read_game_data(game_state)
|
||||
if end:
|
||||
self._record_game_end(game_state)
|
||||
@@ -262,6 +275,22 @@ class Server:
|
||||
storage = StorageLoader.build(self.storage_type)()
|
||||
return storage.cleanup()
|
||||
|
||||
def _prune_stale_games(self):
|
||||
if not self.running_games:
|
||||
return
|
||||
|
||||
now = int(time.time())
|
||||
stale_ids = [
|
||||
game_id
|
||||
for game_id, last_seen in self.game_last_seen_unix.items()
|
||||
if now - last_seen >= self.stale_game_timeout_sec
|
||||
]
|
||||
for game_id in stale_ids:
|
||||
self.running_games.pop(game_id, None)
|
||||
self.game_move_counts.pop(game_id, None)
|
||||
self.game_last_seen_unix.pop(game_id, None)
|
||||
self.metrics['games_stuck_removed'] += 1
|
||||
|
||||
def _record_game_end(self, game_state: dict):
|
||||
self.metrics['games_ended'] += 1
|
||||
self.metrics['last_game_end_unix'] = int(time.time())
|
||||
@@ -286,6 +315,16 @@ class Server:
|
||||
win_rate = self.metrics['wins'] / games_ended if games_ended else 0.0
|
||||
avg_move_ms = self.metrics['move_response_time_ms_total'] / total_moves if total_moves else 0.0
|
||||
|
||||
now = int(time.time())
|
||||
oldest_active_age = 0
|
||||
if self.game_last_seen_unix:
|
||||
oldest_active_age = max(0, now - min(self.game_last_seen_unix.values()))
|
||||
stale_candidates = sum(
|
||||
1
|
||||
for last_seen in self.game_last_seen_unix.values()
|
||||
if now - last_seen >= self.stale_game_timeout_sec
|
||||
)
|
||||
|
||||
return {
|
||||
**self.metrics,
|
||||
'active_games': len(self.running_games),
|
||||
@@ -295,6 +334,9 @@ class Server:
|
||||
'avg_move_response_ms': round(avg_move_ms, 2),
|
||||
'http_requests_by_endpoint': dict(self.metrics['http_requests_by_endpoint']),
|
||||
'move_direction_counts': dict(self.metrics['move_direction_counts']),
|
||||
'oldest_active_game_age_sec': oldest_active_age,
|
||||
'stale_game_timeout_sec': self.stale_game_timeout_sec,
|
||||
'active_games_stale': stale_candidates,
|
||||
}
|
||||
|
||||
def _record_http_request(self, endpoint:str):
|
||||
@@ -343,10 +385,10 @@ class Server:
|
||||
f'snake_http_requests_total {snapshot['http_requests_total']}',
|
||||
'# HELP snake_move_response_ms_total Total move endpoint compute time in milliseconds.',
|
||||
'# TYPE snake_move_response_ms_total counter',
|
||||
f"snake_move_response_ms_total {round(snapshot['move_response_time_ms_total'], 3)}",
|
||||
f'snake_move_response_ms_total {round(snapshot['move_response_time_ms_total'], 3)}',
|
||||
'# HELP snake_move_response_ms_max Maximum move endpoint compute time in milliseconds.',
|
||||
'# TYPE snake_move_response_ms_max gauge',
|
||||
f"snake_move_response_ms_max {round(snapshot['move_response_time_ms_max'], 3)}",
|
||||
f'snake_move_response_ms_max {round(snapshot['move_response_time_ms_max'], 3)}',
|
||||
'# HELP snake_avg_turns_per_game Average final turn per ended game.',
|
||||
'# TYPE snake_avg_turns_per_game gauge',
|
||||
f'snake_avg_turns_per_game {snapshot['avg_turns_per_game']}',
|
||||
@@ -365,6 +407,18 @@ class Server:
|
||||
'# HELP snake_last_move_unix Unix timestamp of most recent /move response.',
|
||||
'# TYPE snake_last_move_unix gauge',
|
||||
f'snake_last_move_unix {snapshot['last_move_unix']}',
|
||||
'# HELP snake_games_stuck_removed_total Active games auto-removed due to inactivity timeout.',
|
||||
'# TYPE snake_games_stuck_removed_total counter',
|
||||
f'snake_games_stuck_removed_total {snapshot['games_stuck_removed']}',
|
||||
'# HELP snake_oldest_active_game_age_sec Age in seconds of the oldest active game.',
|
||||
'# TYPE snake_oldest_active_game_age_sec gauge',
|
||||
f'snake_oldest_active_game_age_sec {snapshot['oldest_active_game_age_sec']}',
|
||||
'# HELP snake_stale_game_timeout_sec Configured inactivity timeout for stale games.',
|
||||
'# TYPE snake_stale_game_timeout_sec gauge',
|
||||
f'snake_stale_game_timeout_sec {snapshot['stale_game_timeout_sec']}',
|
||||
'# HELP snake_active_games_stale Active games currently beyond stale timeout.',
|
||||
'# TYPE snake_active_games_stale gauge',
|
||||
f'snake_active_games_stale {snapshot['active_games_stale']}',
|
||||
]
|
||||
|
||||
lines.extend([
|
||||
|
||||
Reference in New Issue
Block a user