67 lines
2.2 KiB
Python
67 lines
2.2 KiB
Python
from pathlib import Path
|
|
import argparse, json
|
|
|
|
class DatasetExporter:
|
|
def __init__(self, input_dir:str, output_file:str):
|
|
self.input_dir = Path(input_dir)
|
|
self.output_file = Path(output_file)
|
|
|
|
def _iter_game_files(self):
|
|
if not self.input_dir.exists():
|
|
return []
|
|
return sorted(self.input_dir.rglob("*.json"))
|
|
|
|
def _extract_samples(self, payload:dict, source_file:Path):
|
|
dataset = payload.get("dataset", {})
|
|
game_info = dataset.get("game", payload.get("game", {}))
|
|
snake_info = dataset.get("snake", payload.get("snake", {}))
|
|
|
|
samples = []
|
|
for sample in dataset.get("samples", []):
|
|
samples.append({
|
|
"game_id": game_info.get("id"),
|
|
"game_map": game_info.get("map"),
|
|
"game_type": game_info.get("type"),
|
|
"snake_type": snake_info.get("type"),
|
|
"turn": sample.get("turn"),
|
|
"move": sample.get("move"),
|
|
"is_good_move": sample.get("is_good_move", False),
|
|
"game_board": sample.get("game_board"),
|
|
"history": sample.get("history"),
|
|
"source_file": str(source_file),
|
|
})
|
|
return samples
|
|
|
|
def export_jsonl(self):
|
|
game_files = self._iter_game_files()
|
|
self.output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
sample_count = 0
|
|
with self.output_file.open("w", encoding="utf-8") as output:
|
|
for game_file in game_files:
|
|
with game_file.open("r", encoding="utf-8") as source:
|
|
payload = json.load(source)
|
|
|
|
for sample in self._extract_samples(payload, game_file):
|
|
output.write(json.dumps(sample, ensure_ascii=False) + "\n")
|
|
sample_count += 1
|
|
|
|
return {
|
|
"games_scanned": len(game_files),
|
|
"samples_exported": sample_count,
|
|
"output_file": str(self.output_file),
|
|
}
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Export Battlesnake dataset to JSONL")
|
|
parser.add_argument(
|
|
"--input", default="data", help="Input directory with stored game JSON files"
|
|
)
|
|
parser.add_argument(
|
|
"--output", default="data/dataset/good_moves.jsonl", help="Output JSONL file"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
report = DatasetExporter(args.input, args.output).export_jsonl()
|
|
print(json.dumps(report, indent=2))
|