Coverage for tests / test_folder_metadata_builder.py: 96%
102 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-03-21 12:19 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-03-21 12:19 +0000
1# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelomas@gmail.com>
2#
3# SPDX-License-Identifier: ISC
5import shutil
6import tempfile
8import pytest
9from pathlib import Path
10from rdflib import Dataset, Graph, Namespace
12from changes_metadata_manager.folder_metadata_builder import (
13 extract_metadata_for_stage,
14 extract_id_from_folder_name,
15 load_kg,
16 merge_provenance_files,
17 process_all_folders,
18 scan_folder_structure,
19)
22DATA_DIR = Path(__file__).parent.parent / "data"
23FIXTURE_DIR = Path(__file__).parent / "fixtures" / "folder_metadata"
24REAL_KG_PATH = DATA_DIR / "kg.ttl"
26TEST_ITEMS = [
27 ("Sala1", "S1-01-CNR_CartaNautica", "1"),
28 ("Sala2", "S2-24-CNR_MappaOrtoBotanicoPADOVA", "24"),
29 ("Sala5", "S5-57-FICLIT_VolumePolpo", "57"),
30]
33def load_fixture(relative_path: str) -> Graph:
34 g = Graph()
35 g.parse(FIXTURE_DIR / relative_path, format="turtle")
36 return g
39def assert_graphs_equal(actual: Graph, expected: Graph):
40 actual_triples = set(actual)
41 expected_triples = set(expected)
43 missing = expected_triples - actual_triples
44 extra = actual_triples - expected_triples
46 if missing:
47 missing_str = "\n".join(f" {s} {p} {o}" for s, p, o in sorted(missing, key=str))
48 raise AssertionError(f"Missing {len(missing)} triples:\n{missing_str}")
50 if extra:
51 extra_str = "\n".join(f" {s} {p} {o}" for s, p, o in sorted(extra, key=str))
52 raise AssertionError(f"Extra {len(extra)} triples:\n{extra_str}")
55@pytest.fixture(scope="module")
56def real_kg():
57 return load_kg(REAL_KG_PATH)
60STAGES = ["raw", "rawp", "dcho", "dchoo"]
63@pytest.fixture
64def test_folder_structure():
65 tmpdir = tempfile.mkdtemp()
66 root_path = Path(tmpdir) / "root"
68 for sala, folder, _ in TEST_ITEMS:
69 for stage in STAGES:
70 stage_dir = root_path / sala / folder / stage
71 stage_dir.mkdir(parents=True)
73 yield root_path
75 shutil.rmtree(tmpdir)
78class TestExtractMetadataForStageExact:
79 @pytest.mark.parametrize("nr,stage", [
80 ("1", "raw"), ("1", "rawp"), ("1", "dcho"), ("1", "dchoo"),
81 ("24", "raw"), ("24", "rawp"), ("24", "dcho"), ("24", "dchoo"),
82 ("57", "raw"), ("57", "rawp"), ("57", "dcho"), ("57", "dchoo"),
83 ])
84 def test_stage_output_matches_fixture(self, real_kg, nr, stage):
85 result = extract_metadata_for_stage(real_kg, nr, stage)
86 expected = load_fixture(f"nr_{nr}/{stage}.ttl")
87 assert_graphs_equal(result, expected)
90class TestExtractIdFromFolderName:
91 @pytest.mark.parametrize("folder_name,expected", [
92 ("S1-5-nome_oggetto", "5"),
93 ("S2-42-altro_nome", "42"),
94 ("S6-123-oggetto_complesso", "123"),
95 ("S1-7-nome con spazi", "7"),
96 ("S1-01-CNR_CartaNautica", "1"),
97 ("S2-27a-FICLIT_DelphiniumStaphisagria", "27a"),
98 ("S6-74b-ISPC-Orchis_morio_L", "74b"),
99 ])
100 def test_valid_folder_names(self, folder_name, expected):
101 assert extract_id_from_folder_name(folder_name) == expected
103 @pytest.mark.parametrize("folder_name", [
104 "1-5-nome",
105 "Sala1-5-nome",
106 "S1_5_nome",
107 ])
108 def test_invalid_folder_names(self, folder_name):
109 with pytest.raises(ValueError, match="Cannot extract ID"):
110 extract_id_from_folder_name(folder_name)
113class TestProcessAllFolders:
114 def test_creates_files_in_place(self, test_folder_structure):
115 root = test_folder_structure
116 process_all_folders(
117 root=root,
118 kg_path=REAL_KG_PATH,
119 )
121 for sala, folder, _ in TEST_ITEMS:
122 folder_dir = root / sala / folder
123 stage_dirs = [d for d in folder_dir.iterdir() if d.is_dir()]
124 assert len(stage_dirs) == 4, f"Expected 4 stages for {folder}, got {len(stage_dirs)}"
126 for stage_dir in stage_dirs:
127 meta_file = stage_dir / "meta.ttl"
128 prov_file = stage_dir / "prov.trig"
129 assert meta_file.exists(), f"meta.ttl not created for {folder}/{stage_dir.name}"
130 assert prov_file.exists(), f"prov.trig not created for {folder}/{stage_dir.name}"
133class TestMergeProvenanceFiles:
134 def test_merges_all_prov_trig_files(self, test_folder_structure):
135 root = test_folder_structure
136 process_all_folders(
137 root=root,
138 kg_path=REAL_KG_PATH,
139 )
141 output_path = root / "all_provenance.trig"
142 merge_provenance_files(root, output_path)
144 assert output_path.exists()
146 merged = Dataset()
147 merged.parse(str(output_path), format="trig")
149 PROV = Namespace("http://www.w3.org/ns/prov#")
150 prov_entities = set(merged.quads((None, PROV.specializationOf, None, None)))
151 individual_count = sum(
152 1 for _ in root.rglob("prov.trig")
153 )
155 assert individual_count == 12
156 assert len(prov_entities) == 200
159class TestScanFolderStructure:
160 def test_scans_folder_structure(self):
161 with tempfile.TemporaryDirectory() as tmpdir:
162 root = Path(tmpdir)
163 sala_dir = root / "Sala1"
164 folder_dir = sala_dir / "S1-01-TestFolder"
165 raw_dir = folder_dir / "raw"
166 dcho_dir = folder_dir / "dcho"
167 raw_dir.mkdir(parents=True)
168 dcho_dir.mkdir(parents=True)
169 (raw_dir / "file1.jpg").touch()
170 (raw_dir / "file2.jpg").touch()
171 (dcho_dir / "model.obj").touch()
173 result = scan_folder_structure(root)
175 assert "structure" in result
176 assert "Sala1" in result["structure"]
177 assert "S1-01-TestFolder" in result["structure"]["Sala1"]
178 folder_data = result["structure"]["Sala1"]["S1-01-TestFolder"]
179 assert "raw" in folder_data
180 assert "dcho" in folder_data
181 assert set(folder_data["raw"]["_files"]) == {"file1.jpg", "file2.jpg"}
182 assert folder_data["dcho"]["_files"] == ["model.obj"]