from pathlib import Path import argparse, json from server.dataset.DatasetIO import DatasetIO class DatasetExporter: def __init__(self, input_dir:str, output_file:str): self.input_dir = Path(input_dir) self.output_file = Path(output_file) def _iter_game_files(self): return DatasetIO.list_directory_files(self.input_dir, directory_pattern="*.json") def _extract_samples(self, payload:dict, source_file:Path): dataset = payload.get("dataset", {}) game_info = dataset.get("game", payload.get("game", {})) snake_info = dataset.get("snake", payload.get("snake", {})) samples = [] for sample in dataset.get("samples", []): samples.append({ "game_id": game_info.get("id"), "game_map": game_info.get("map"), "game_type": game_info.get("type"), "snake_type": snake_info.get("type"), "turn": sample.get("turn"), "move": sample.get("move"), "is_good_move": sample.get("is_good_move", False), "game_board": sample.get("game_board"), "history": sample.get("history"), "source_file": str(source_file), }) return samples def export_jsonl(self): game_files = self._iter_game_files() self.output_file.parent.mkdir(parents=True, exist_ok=True) sample_count = 0 with DatasetIO.open_text(self.output_file, "w") as output: for game_file in game_files: with game_file.open("r", encoding="utf-8") as source: payload = json.load(source) for sample in self._extract_samples(payload, game_file): output.write(json.dumps(sample, ensure_ascii=False) + "\n") sample_count += 1 return { "games_scanned": len(game_files), "samples_exported": sample_count, "output_file": str(self.output_file), } if __name__ == "__main__": parser = argparse.ArgumentParser(description="Export Battlesnake dataset to JSONL") parser.add_argument( "--input", default="data", help="Input directory with stored game JSON files" ) parser.add_argument( "--output", default="data/dataset/good_moves.jsonl", help="Output JSONL file" ) args = parser.parse_args() report = DatasetExporter(args.input, args.output).export_jsonl() print(json.dumps(report, indent=2))