Coverage for tests / test_folder_metadata_builder.py: 96%

102 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-03-21 12:19 +0000

1# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelomas@gmail.com> 

2# 

3# SPDX-License-Identifier: ISC 

4 

5import shutil 

6import tempfile 

7 

8import pytest 

9from pathlib import Path 

10from rdflib import Dataset, Graph, Namespace 

11 

12from changes_metadata_manager.folder_metadata_builder import ( 

13 extract_metadata_for_stage, 

14 extract_id_from_folder_name, 

15 load_kg, 

16 merge_provenance_files, 

17 process_all_folders, 

18 scan_folder_structure, 

19) 

20 

21 

22DATA_DIR = Path(__file__).parent.parent / "data" 

23FIXTURE_DIR = Path(__file__).parent / "fixtures" / "folder_metadata" 

24REAL_KG_PATH = DATA_DIR / "kg.ttl" 

25 

26TEST_ITEMS = [ 

27 ("Sala1", "S1-01-CNR_CartaNautica", "1"), 

28 ("Sala2", "S2-24-CNR_MappaOrtoBotanicoPADOVA", "24"), 

29 ("Sala5", "S5-57-FICLIT_VolumePolpo", "57"), 

30] 

31 

32 

33def load_fixture(relative_path: str) -> Graph: 

34 g = Graph() 

35 g.parse(FIXTURE_DIR / relative_path, format="turtle") 

36 return g 

37 

38 

39def assert_graphs_equal(actual: Graph, expected: Graph): 

40 actual_triples = set(actual) 

41 expected_triples = set(expected) 

42 

43 missing = expected_triples - actual_triples 

44 extra = actual_triples - expected_triples 

45 

46 if missing: 

47 missing_str = "\n".join(f" {s} {p} {o}" for s, p, o in sorted(missing, key=str)) 

48 raise AssertionError(f"Missing {len(missing)} triples:\n{missing_str}") 

49 

50 if extra: 

51 extra_str = "\n".join(f" {s} {p} {o}" for s, p, o in sorted(extra, key=str)) 

52 raise AssertionError(f"Extra {len(extra)} triples:\n{extra_str}") 

53 

54 

55@pytest.fixture(scope="module") 

56def real_kg(): 

57 return load_kg(REAL_KG_PATH) 

58 

59 

60STAGES = ["raw", "rawp", "dcho", "dchoo"] 

61 

62 

63@pytest.fixture 

64def test_folder_structure(): 

65 tmpdir = tempfile.mkdtemp() 

66 root_path = Path(tmpdir) / "root" 

67 

68 for sala, folder, _ in TEST_ITEMS: 

69 for stage in STAGES: 

70 stage_dir = root_path / sala / folder / stage 

71 stage_dir.mkdir(parents=True) 

72 

73 yield root_path 

74 

75 shutil.rmtree(tmpdir) 

76 

77 

78class TestExtractMetadataForStageExact: 

79 @pytest.mark.parametrize("nr,stage", [ 

80 ("1", "raw"), ("1", "rawp"), ("1", "dcho"), ("1", "dchoo"), 

81 ("24", "raw"), ("24", "rawp"), ("24", "dcho"), ("24", "dchoo"), 

82 ("57", "raw"), ("57", "rawp"), ("57", "dcho"), ("57", "dchoo"), 

83 ]) 

84 def test_stage_output_matches_fixture(self, real_kg, nr, stage): 

85 result = extract_metadata_for_stage(real_kg, nr, stage) 

86 expected = load_fixture(f"nr_{nr}/{stage}.ttl") 

87 assert_graphs_equal(result, expected) 

88 

89 

90class TestExtractIdFromFolderName: 

91 @pytest.mark.parametrize("folder_name,expected", [ 

92 ("S1-5-nome_oggetto", "5"), 

93 ("S2-42-altro_nome", "42"), 

94 ("S6-123-oggetto_complesso", "123"), 

95 ("S1-7-nome con spazi", "7"), 

96 ("S1-01-CNR_CartaNautica", "1"), 

97 ("S2-27a-FICLIT_DelphiniumStaphisagria", "27a"), 

98 ("S6-74b-ISPC-Orchis_morio_L", "74b"), 

99 ]) 

100 def test_valid_folder_names(self, folder_name, expected): 

101 assert extract_id_from_folder_name(folder_name) == expected 

102 

103 @pytest.mark.parametrize("folder_name", [ 

104 "1-5-nome", 

105 "Sala1-5-nome", 

106 "S1_5_nome", 

107 ]) 

108 def test_invalid_folder_names(self, folder_name): 

109 with pytest.raises(ValueError, match="Cannot extract ID"): 

110 extract_id_from_folder_name(folder_name) 

111 

112 

113class TestProcessAllFolders: 

114 def test_creates_files_in_place(self, test_folder_structure): 

115 root = test_folder_structure 

116 process_all_folders( 

117 root=root, 

118 kg_path=REAL_KG_PATH, 

119 ) 

120 

121 for sala, folder, _ in TEST_ITEMS: 

122 folder_dir = root / sala / folder 

123 stage_dirs = [d for d in folder_dir.iterdir() if d.is_dir()] 

124 assert len(stage_dirs) == 4, f"Expected 4 stages for {folder}, got {len(stage_dirs)}" 

125 

126 for stage_dir in stage_dirs: 

127 meta_file = stage_dir / "meta.ttl" 

128 prov_file = stage_dir / "prov.trig" 

129 assert meta_file.exists(), f"meta.ttl not created for {folder}/{stage_dir.name}" 

130 assert prov_file.exists(), f"prov.trig not created for {folder}/{stage_dir.name}" 

131 

132 

133class TestMergeProvenanceFiles: 

134 def test_merges_all_prov_trig_files(self, test_folder_structure): 

135 root = test_folder_structure 

136 process_all_folders( 

137 root=root, 

138 kg_path=REAL_KG_PATH, 

139 ) 

140 

141 output_path = root / "all_provenance.trig" 

142 merge_provenance_files(root, output_path) 

143 

144 assert output_path.exists() 

145 

146 merged = Dataset() 

147 merged.parse(str(output_path), format="trig") 

148 

149 PROV = Namespace("http://www.w3.org/ns/prov#") 

150 prov_entities = set(merged.quads((None, PROV.specializationOf, None, None))) 

151 individual_count = sum( 

152 1 for _ in root.rglob("prov.trig") 

153 ) 

154 

155 assert individual_count == 12 

156 assert len(prov_entities) == 200 

157 

158 

159class TestScanFolderStructure: 

160 def test_scans_folder_structure(self): 

161 with tempfile.TemporaryDirectory() as tmpdir: 

162 root = Path(tmpdir) 

163 sala_dir = root / "Sala1" 

164 folder_dir = sala_dir / "S1-01-TestFolder" 

165 raw_dir = folder_dir / "raw" 

166 dcho_dir = folder_dir / "dcho" 

167 raw_dir.mkdir(parents=True) 

168 dcho_dir.mkdir(parents=True) 

169 (raw_dir / "file1.jpg").touch() 

170 (raw_dir / "file2.jpg").touch() 

171 (dcho_dir / "model.obj").touch() 

172 

173 result = scan_folder_structure(root) 

174 

175 assert "structure" in result 

176 assert "Sala1" in result["structure"] 

177 assert "S1-01-TestFolder" in result["structure"]["Sala1"] 

178 folder_data = result["structure"]["Sala1"]["S1-01-TestFolder"] 

179 assert "raw" in folder_data 

180 assert "dcho" in folder_data 

181 assert set(folder_data["raw"]["_files"]) == {"file1.jpg", "file2.jpg"} 

182 assert folder_data["dcho"]["_files"] == ["model.obj"] 

183 

184