Coverage for tests / test_license_metadata.py: 100%

209 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-05-29 18:29 +0000

1# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelomas@gmail.com> 

2# 

3# SPDX-License-Identifier: ISC 

4 

5import json 

6from unittest.mock import MagicMock, patch 

7 

8import pytest 

9import requests 

10 

11from changes_metadata_manager.patch.license_metadata import ( 

12 MAX_RETRIES, 

13 _create_edit_draft, 

14 _current_content_license, 

15 _extract_entity_id_from_config, 

16 _extract_stage_from_config_path, 

17 _fetch_record_metadata, 

18 _has_cc0_disclaimer, 

19 _rebuild_additional_descriptions, 

20 _request_with_retry, 

21 patch_drafts, 

22) 

23 

24 

25class TestExtractStageFromConfigPath: 

26 def test_extracts_raw(self): 

27 assert _extract_stage_from_config_path("configs/sala1-obj-42-raw.yaml") == "raw" 

28 

29 def test_extracts_dchoo(self): 

30 assert _extract_stage_from_config_path("configs/sala1-obj-42-dchoo.yaml") == "dchoo" 

31 

32 def test_raises_on_invalid(self): 

33 with pytest.raises(AssertionError): 

34 _extract_stage_from_config_path("configs/sala1-obj-42.yaml") 

35 

36 

37class TestExtractEntityIdFromConfig: 

38 def test_extracts_entity(self): 

39 config = {"identifiers": [ 

40 {"identifier": "https://w3id.org/changes/4/aldrovandi/itm/42/ob1/1"} 

41 ]} 

42 assert _extract_entity_id_from_config(config) == "42" 

43 

44 def test_raises_when_no_match(self): 

45 config = {"identifiers": [{"identifier": "https://example.com/other"}]} 

46 with pytest.raises(ValueError): 

47 _extract_entity_id_from_config(config) 

48 

49 

50class TestCurrentContentLicense: 

51 def test_detects_cc0(self): 

52 metadata = {"rights": [{"title": {"en": "CC0 (Content license)"}, "link": "https://creativecommons.org/publicdomain/zero/1.0/"}]} 

53 assert _current_content_license(metadata) == "cc0-1.0" 

54 

55 def test_detects_cc_by_nc(self): 

56 metadata = {"rights": [{"title": {"en": "CC BY-NC 4.0 (Content license)"}, "link": "https://creativecommons.org/licenses/by-nc/4.0/"}]} 

57 assert _current_content_license(metadata) == "cc-by-nc-4.0" 

58 

59 def test_returns_none_without_content_license(self): 

60 metadata = {"rights": [{"title": {"en": "ISC (Code license)"}, "link": "https://example.com"}]} 

61 assert _current_content_license(metadata) is None 

62 

63 def test_returns_none_on_empty(self): 

64 assert _current_content_license({}) is None 

65 

66 

67class TestHasCc0Disclaimer: 

68 def test_true_with_disclaimer(self): 

69 metadata = {"additional_descriptions": [{"description": "Ai sensi del D. Lgs. 42/2004..."}]} 

70 assert _has_cc0_disclaimer(metadata) is True 

71 

72 def test_false_without(self): 

73 assert _has_cc0_disclaimer({"additional_descriptions": []}) is False 

74 

75 

76class TestRebuildAdditionalDescriptions: 

77 def test_adds_disclaimer_for_cc0(self): 

78 result = _rebuild_additional_descriptions([], "cc0-1.0") 

79 assert len(result) == 1 

80 assert "D. Lgs. 42/2004" in result[0]["description"] 

81 

82 def test_removes_old_disclaimer_when_not_cc0(self): 

83 current = [{"description": "Ai sensi del D. Lgs. 42/2004..."}] 

84 result = _rebuild_additional_descriptions(current, "cc-by-nc-4.0") 

85 assert result == [] 

86 

87 def test_preserves_other_descriptions(self): 

88 current = [ 

89 {"description": "Some other note"}, 

90 {"description": "Ai sensi del D. Lgs. 42/2004..."}, 

91 ] 

92 result = _rebuild_additional_descriptions(current, "cc-by-nc-4.0") 

93 assert result == [{"description": "Some other note"}] 

94 

95 

96class TestRequestWithRetry: 

97 @patch("changes_metadata_manager.patch.license_metadata.requests.request") 

98 def test_returns_immediately_on_success(self, mock_request): 

99 mock_response = MagicMock(status_code=200) 

100 mock_request.return_value = mock_response 

101 result = _request_with_retry("GET", "https://example.com") 

102 assert result.status_code == 200 

103 assert mock_request.call_count == 1 

104 

105 @patch("changes_metadata_manager.patch.license_metadata.time.sleep") 

106 @patch("changes_metadata_manager.patch.license_metadata.requests.request") 

107 def test_retries_on_429(self, mock_request, mock_sleep): 

108 rate_limited = MagicMock(status_code=429) 

109 success = MagicMock(status_code=200) 

110 mock_request.side_effect = [rate_limited, rate_limited, success] 

111 result = _request_with_retry("GET", "https://example.com") 

112 assert result.status_code == 200 

113 assert mock_request.call_count == 3 

114 

115 @patch("changes_metadata_manager.patch.license_metadata.time.sleep") 

116 @patch("changes_metadata_manager.patch.license_metadata.requests.request") 

117 def test_returns_429_after_all_retries_exhausted(self, mock_request, mock_sleep): 

118 rate_limited = MagicMock(status_code=429) 

119 mock_request.return_value = rate_limited 

120 result = _request_with_retry("GET", "https://example.com") 

121 assert result.status_code == 429 

122 assert mock_request.call_count == MAX_RETRIES 

123 

124 

125class TestCreateEditDraft: 

126 @patch("changes_metadata_manager.patch.license_metadata._request_with_retry") 

127 def test_creates_draft(self, mock_retry): 

128 mock_retry.return_value = MagicMock(status_code=201) 

129 _create_edit_draft("https://zenodo.org/api", "123", "token", "agent") 

130 mock_retry.assert_called_once() 

131 assert "/records/123/draft" in mock_retry.call_args[0][1] 

132 

133 @patch("changes_metadata_manager.patch.license_metadata._request_with_retry") 

134 def test_ignores_403_already_exists(self, mock_retry): 

135 resp = MagicMock(status_code=403) 

136 resp.text = "Draft already exists" 

137 mock_retry.return_value = resp 

138 _create_edit_draft("https://zenodo.org/api", "123", "token", "agent") 

139 

140 @patch("changes_metadata_manager.patch.license_metadata._request_with_retry") 

141 def test_raises_on_other_error(self, mock_retry): 

142 resp = MagicMock(status_code=500) 

143 resp.text = "Internal error" 

144 resp.raise_for_status.side_effect = requests.HTTPError("500") 

145 mock_retry.return_value = resp 

146 with pytest.raises(requests.HTTPError): 

147 _create_edit_draft("https://zenodo.org/api", "123", "token", "agent") 

148 

149 

150class TestFetchRecordMetadata: 

151 @patch("changes_metadata_manager.patch.license_metadata._request_with_retry") 

152 def test_tries_draft_first(self, mock_retry): 

153 resp = MagicMock(status_code=200) 

154 resp.json.return_value = {"metadata": {"title": "test"}} 

155 mock_retry.return_value = resp 

156 result = _fetch_record_metadata("https://zenodo.org/api", "123", "token", "agent") 

157 assert result == {"title": "test"} 

158 assert mock_retry.call_count == 1 

159 assert "/records/123/draft" in mock_retry.call_args[0][1] 

160 

161 @patch("changes_metadata_manager.patch.license_metadata._request_with_retry") 

162 def test_falls_back_to_published_on_404(self, mock_retry): 

163 draft_resp = MagicMock(status_code=404) 

164 published_resp = MagicMock(status_code=200) 

165 published_resp.json.return_value = {"metadata": {"title": "published"}} 

166 mock_retry.side_effect = [draft_resp, published_resp] 

167 result = _fetch_record_metadata("https://zenodo.org/api", "123", "token", "agent") 

168 assert result == {"title": "published"} 

169 assert mock_retry.call_count == 2 

170 

171 

172class TestPatchDrafts: 

173 def _make_drafts_json(self, tmp_path, entries): 

174 path = tmp_path / "drafts.json" 

175 path.write_text(json.dumps(entries)) 

176 return path 

177 

178 def _make_config(self, tmp_path, filename, rights=None): 

179 config = { 

180 "access": {"record": "public", "files": "public"}, 

181 "rights": rights or [], 

182 "additional_descriptions": [], 

183 } 

184 config_path = tmp_path / filename 

185 import yaml 

186 config_path.write_text(yaml.dump(config)) 

187 return str(config_path) 

188 

189 @patch("changes_metadata_manager.patch.license_metadata.time.sleep") 

190 @patch("changes_metadata_manager.patch.license_metadata.extract_license_for_entity_stage") 

191 @patch("changes_metadata_manager.patch.license_metadata._fetch_record_metadata") 

192 def test_dry_run_logs_changes(self, mock_fetch, mock_extract, mock_sleep, tmp_path): 

193 mock_fetch.return_value = { 

194 "identifiers": [{"identifier": "https://w3id.org/changes/4/aldrovandi/itm/42/ob1/1"}], 

195 "rights": [{"title": {"en": "CC BY-NC 4.0 (Content license)"}, "link": "https://creativecommons.org/licenses/by-nc/4.0/"}], 

196 "additional_descriptions": [], 

197 } 

198 mock_extract.return_value = "cc0-1.0" 

199 

200 config_file = self._make_config(tmp_path, "entity-42-dcho.yaml") 

201 drafts_path = self._make_drafts_json(tmp_path, [{ 

202 "draft_id": 123, 

203 "config_file": f"{tmp_path}/entity-42-dcho.yaml", 

204 "zenodo_url": "https://zenodo.org/api", 

205 "access_token": "tok", 

206 "status": "published", 

207 }]) 

208 

209 kg_path = tmp_path / "kg.ttl" 

210 kg_path.write_text("") 

211 

212 with patch("changes_metadata_manager.patch.license_metadata.load_kg"): 

213 patch_drafts(drafts_path, kg_path, dry_run=True) 

214 

215 log = json.loads((tmp_path / "patch_license_log.json").read_text()) 

216 assert len(log) == 1 

217 assert log[0]["status"] == "dry_run" 

218 assert log[0]["old_license"] == "cc-by-nc-4.0" 

219 assert log[0]["new_license"] == "cc0-1.0" 

220 

221 @patch("changes_metadata_manager.patch.license_metadata.time.sleep") 

222 @patch("changes_metadata_manager.patch.license_metadata.publish_draft") 

223 @patch("changes_metadata_manager.patch.license_metadata.update_draft_metadata") 

224 @patch("changes_metadata_manager.patch.license_metadata.build_inveniordm_payload") 

225 @patch("changes_metadata_manager.patch.license_metadata._create_edit_draft") 

226 @patch("changes_metadata_manager.patch.license_metadata.extract_license_for_entity_stage") 

227 @patch("changes_metadata_manager.patch.license_metadata._fetch_record_metadata") 

228 def test_published_record_creates_edit_draft_and_publishes( 

229 self, mock_fetch, mock_extract, mock_create_edit, mock_build, mock_update, mock_publish, mock_sleep, tmp_path 

230 ): 

231 mock_fetch.return_value = { 

232 "identifiers": [{"identifier": "https://w3id.org/changes/4/aldrovandi/itm/42/ob1/1"}], 

233 "rights": [{"title": {"en": "CC BY-NC 4.0 (Content license)"}, "link": "https://creativecommons.org/licenses/by-nc/4.0/"}], 

234 "additional_descriptions": [], 

235 } 

236 mock_extract.return_value = "cc0-1.0" 

237 mock_build.return_value = {"metadata": {}} 

238 

239 config_file = self._make_config(tmp_path, "entity-42-dcho.yaml") 

240 drafts_path = self._make_drafts_json(tmp_path, [{ 

241 "draft_id": 456, 

242 "config_file": str(tmp_path / "entity-42-dcho.yaml"), 

243 "zenodo_url": "https://zenodo.org/api", 

244 "access_token": "tok", 

245 "status": "published", 

246 }]) 

247 

248 kg_path = tmp_path / "kg.ttl" 

249 kg_path.write_text("") 

250 

251 with patch("changes_metadata_manager.patch.license_metadata.load_kg"): 

252 patch_drafts(drafts_path, kg_path, dry_run=False) 

253 

254 mock_create_edit.assert_called_once_with("https://zenodo.org/api", "456", "tok", "changes-metadata-manager/1.0.0") 

255 mock_publish.assert_called_once_with("https://zenodo.org/api", "tok", "456", "changes-metadata-manager/1.0.0") 

256 

257 @patch("changes_metadata_manager.patch.license_metadata.time.sleep") 

258 @patch("changes_metadata_manager.patch.license_metadata.update_draft_metadata") 

259 @patch("changes_metadata_manager.patch.license_metadata.build_inveniordm_payload") 

260 @patch("changes_metadata_manager.patch.license_metadata._create_edit_draft") 

261 @patch("changes_metadata_manager.patch.license_metadata.extract_license_for_entity_stage") 

262 @patch("changes_metadata_manager.patch.license_metadata._fetch_record_metadata") 

263 def test_unpublished_record_skips_edit_draft_and_publish( 

264 self, mock_fetch, mock_extract, mock_create_edit, mock_build, mock_update, mock_sleep, tmp_path 

265 ): 

266 mock_fetch.return_value = { 

267 "identifiers": [{"identifier": "https://w3id.org/changes/4/aldrovandi/itm/42/ob1/1"}], 

268 "rights": [{"title": {"en": "CC BY-NC 4.0 (Content license)"}, "link": "https://creativecommons.org/licenses/by-nc/4.0/"}], 

269 "additional_descriptions": [], 

270 } 

271 mock_extract.return_value = "cc0-1.0" 

272 mock_build.return_value = {"metadata": {}} 

273 

274 config_file = self._make_config(tmp_path, "entity-42-dcho.yaml") 

275 drafts_path = self._make_drafts_json(tmp_path, [{ 

276 "draft_id": 789, 

277 "config_file": str(tmp_path / "entity-42-dcho.yaml"), 

278 "zenodo_url": "https://zenodo.org/api", 

279 "access_token": "tok", 

280 "status": "uploaded", 

281 }]) 

282 

283 kg_path = tmp_path / "kg.ttl" 

284 kg_path.write_text("") 

285 

286 with patch("changes_metadata_manager.patch.license_metadata.load_kg"), \ 

287 patch("changes_metadata_manager.patch.license_metadata.publish_draft") as mock_publish: 

288 patch_drafts(drafts_path, kg_path, dry_run=False) 

289 

290 mock_create_edit.assert_not_called() 

291 mock_publish.assert_not_called() 

292 

293 @patch("changes_metadata_manager.patch.license_metadata.time.sleep") 

294 @patch("changes_metadata_manager.patch.license_metadata._fetch_record_metadata") 

295 def test_errors_are_logged(self, mock_fetch, mock_sleep, tmp_path): 

296 mock_fetch.side_effect = requests.HTTPError("500 Server Error") 

297 

298 drafts_path = self._make_drafts_json(tmp_path, [{ 

299 "draft_id": 999, 

300 "config_file": f"{tmp_path}/entity-42-dcho.yaml", 

301 "zenodo_url": "https://zenodo.org/api", 

302 "access_token": "tok", 

303 "status": "published", 

304 }]) 

305 

306 kg_path = tmp_path / "kg.ttl" 

307 kg_path.write_text("") 

308 

309 with patch("changes_metadata_manager.patch.license_metadata.load_kg"): 

310 patch_drafts(drafts_path, kg_path, dry_run=False) 

311 

312 log = json.loads((tmp_path / "patch_license_log.json").read_text()) 

313 assert len(log) == 1 

314 assert log[0]["status"] == "error" 

315 assert "500 Server Error" in log[0]["error"] 

316 assert log[0]["record_id"] == 999 

317 

318 @patch("changes_metadata_manager.patch.license_metadata.time.sleep") 

319 @patch("changes_metadata_manager.patch.license_metadata.extract_license_for_entity_stage") 

320 @patch("changes_metadata_manager.patch.license_metadata._fetch_record_metadata") 

321 def test_skips_already_correct(self, mock_fetch, mock_extract, mock_sleep, tmp_path): 

322 mock_fetch.return_value = { 

323 "identifiers": [{"identifier": "https://w3id.org/changes/4/aldrovandi/itm/42/ob1/1"}], 

324 "rights": [{"title": {"en": "CC0 (Content license)"}, "link": "https://creativecommons.org/publicdomain/zero/1.0/"}], 

325 "additional_descriptions": [{"description": "Ai sensi del D. Lgs. 42/2004..."}], 

326 } 

327 mock_extract.return_value = "cc0-1.0" 

328 

329 drafts_path = self._make_drafts_json(tmp_path, [{ 

330 "draft_id": 111, 

331 "config_file": f"{tmp_path}/entity-42-dcho.yaml", 

332 "zenodo_url": "https://zenodo.org/api", 

333 "access_token": "tok", 

334 "status": "published", 

335 }]) 

336 

337 kg_path = tmp_path / "kg.ttl" 

338 kg_path.write_text("") 

339 

340 with patch("changes_metadata_manager.patch.license_metadata.load_kg"): 

341 patch_drafts(drafts_path, kg_path, dry_run=True) 

342 

343 log = json.loads((tmp_path / "patch_license_log.json").read_text()) 

344 assert log == []