rework dataset function and class structure

This commit is contained in:
2026-04-05 02:21:15 +02:00
parent 066a93f755
commit 332e86e3cc
9 changed files with 318 additions and 248 deletions
+4 -4
View File
@@ -1,15 +1,15 @@
from pathlib import Path
import argparse, json
from server.dataset.DatasetIO import DatasetIO
class DatasetExporter:
def __init__(self, input_dir:str, output_file:str):
self.input_dir = Path(input_dir)
self.output_file = Path(output_file)
def _iter_game_files(self):
if not self.input_dir.exists():
return []
return sorted(self.input_dir.rglob("*.json"))
return DatasetIO.list_directory_files(self.input_dir, directory_pattern="*.json")
def _extract_samples(self, payload:dict, source_file:Path):
dataset = payload.get("dataset", {})
@@ -37,7 +37,7 @@ class DatasetExporter:
self.output_file.parent.mkdir(parents=True, exist_ok=True)
sample_count = 0
with self.output_file.open("w", encoding="utf-8") as output:
with DatasetIO.open_text(self.output_file, "w") as output:
for game_file in game_files:
with game_file.open("r", encoding="utf-8") as source:
payload = json.load(source)