rework dataset function and class structure
This commit is contained in:
@@ -5,6 +5,7 @@ from pathlib import Path
|
||||
import tempfile, gzip
|
||||
|
||||
from server.dataset.RLBootstrapDataset import RLBootstrapDataset
|
||||
from server.dataset.DatasetIO import DatasetIO
|
||||
|
||||
class TestRLBootstrapDataset(unittest.TestCase):
|
||||
def test_count_jsonl_rows_reads_gzip_dataset(self):
|
||||
@@ -15,17 +16,15 @@ class TestRLBootstrapDataset(unittest.TestCase):
|
||||
handle.write("\n")
|
||||
handle.write('{"turn":2}\n')
|
||||
|
||||
self.assertEqual(RLBootstrapDataset.count_jsonl_rows(dataset_path), 2)
|
||||
self.assertEqual(RLBootstrapDataset.count_jsonl_rows(Path(tmp) / "base.jsonl"), 2)
|
||||
self.assertEqual(DatasetIO.count_jsonl_rows(dataset_path), 2)
|
||||
self.assertEqual(DatasetIO.count_jsonl_rows(Path(tmp) / "base.jsonl"), 2)
|
||||
|
||||
def test_rotate_and_gzip_if_size_reached_rotates_jsonl(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "rl_bootstrap.jsonl"
|
||||
path.write_text("x" * 200, encoding="utf-8")
|
||||
|
||||
rotated = RLBootstrapDataset.rotate_and_gzip_if_size_reached(
|
||||
path, max_bytes=50
|
||||
)
|
||||
rotated = DatasetIO.rotate_and_gzip_if_size_reached(path, max_bytes=50)
|
||||
|
||||
self.assertTrue(rotated)
|
||||
self.assertFalse(path.exists())
|
||||
|
||||
Reference in New Issue
Block a user