rework dataset function and class structure

This commit is contained in:
2026-04-05 02:21:15 +02:00
parent 066a93f755
commit 332e86e3cc
9 changed files with 318 additions and 248 deletions
+38 -2
View File
@@ -1,6 +1,6 @@
import json
import tempfile
import unittest
import tempfile, json, gzip
from pathlib import Path
from server.dataset.DatasetExporter import DatasetExporter
@@ -43,5 +43,41 @@ class TestDatasetExporter(unittest.TestCase):
self.assertEqual(first["move"], "up")
self.assertTrue(first["is_good_move"])
def test_export_jsonl_gz(self):
with tempfile.TemporaryDirectory() as tmp:
input_dir = Path(tmp) / "data"
output_file = Path(tmp) / "out" / "dataset.jsonl.gz"
game_file = input_dir / "game-1.json"
game_file.parent.mkdir(parents=True, exist_ok=True)
game_payload = {
"dataset": {
"game": {"id": "g-1", "map": "standard", "type": {"name": "duel"}},
"snake": {"type": "BestBattleSnake"},
"samples": [
{
"turn": 1,
"move": "up",
"is_good_move": True,
"game_board": {"width": 11, "height": 11},
"history": {"data": []},
}
],
}
}
game_file.write_text(json.dumps(game_payload), encoding="utf-8")
report = DatasetExporter(str(input_dir), str(output_file)).export_jsonl()
self.assertEqual(report["games_scanned"], 1)
self.assertEqual(report["samples_exported"], 1)
self.assertTrue(output_file.exists())
with gzip.open(output_file, "rt", encoding="utf-8") as handle:
lines = handle.read().strip().splitlines()
self.assertEqual(len(lines), 1)
first = json.loads(lines[0])
self.assertEqual(first["game_id"], "g-1")
if __name__ == "__main__":
unittest.main()