Coverage for tests / test_license_metadata.py: 100%
209 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-05-29 18:29 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-05-29 18:29 +0000
1# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelomas@gmail.com>
2#
3# SPDX-License-Identifier: ISC
5import json
6from unittest.mock import MagicMock, patch
8import pytest
9import requests
11from changes_metadata_manager.patch.license_metadata import (
12 MAX_RETRIES,
13 _create_edit_draft,
14 _current_content_license,
15 _extract_entity_id_from_config,
16 _extract_stage_from_config_path,
17 _fetch_record_metadata,
18 _has_cc0_disclaimer,
19 _rebuild_additional_descriptions,
20 _request_with_retry,
21 patch_drafts,
22)
25class TestExtractStageFromConfigPath:
26 def test_extracts_raw(self):
27 assert _extract_stage_from_config_path("configs/sala1-obj-42-raw.yaml") == "raw"
29 def test_extracts_dchoo(self):
30 assert _extract_stage_from_config_path("configs/sala1-obj-42-dchoo.yaml") == "dchoo"
32 def test_raises_on_invalid(self):
33 with pytest.raises(AssertionError):
34 _extract_stage_from_config_path("configs/sala1-obj-42.yaml")
37class TestExtractEntityIdFromConfig:
38 def test_extracts_entity(self):
39 config = {"identifiers": [
40 {"identifier": "https://w3id.org/changes/4/aldrovandi/itm/42/ob1/1"}
41 ]}
42 assert _extract_entity_id_from_config(config) == "42"
44 def test_raises_when_no_match(self):
45 config = {"identifiers": [{"identifier": "https://example.com/other"}]}
46 with pytest.raises(ValueError):
47 _extract_entity_id_from_config(config)
50class TestCurrentContentLicense:
51 def test_detects_cc0(self):
52 metadata = {"rights": [{"title": {"en": "CC0 (Content license)"}, "link": "https://creativecommons.org/publicdomain/zero/1.0/"}]}
53 assert _current_content_license(metadata) == "cc0-1.0"
55 def test_detects_cc_by_nc(self):
56 metadata = {"rights": [{"title": {"en": "CC BY-NC 4.0 (Content license)"}, "link": "https://creativecommons.org/licenses/by-nc/4.0/"}]}
57 assert _current_content_license(metadata) == "cc-by-nc-4.0"
59 def test_returns_none_without_content_license(self):
60 metadata = {"rights": [{"title": {"en": "ISC (Code license)"}, "link": "https://example.com"}]}
61 assert _current_content_license(metadata) is None
63 def test_returns_none_on_empty(self):
64 assert _current_content_license({}) is None
67class TestHasCc0Disclaimer:
68 def test_true_with_disclaimer(self):
69 metadata = {"additional_descriptions": [{"description": "Ai sensi del D. Lgs. 42/2004..."}]}
70 assert _has_cc0_disclaimer(metadata) is True
72 def test_false_without(self):
73 assert _has_cc0_disclaimer({"additional_descriptions": []}) is False
76class TestRebuildAdditionalDescriptions:
77 def test_adds_disclaimer_for_cc0(self):
78 result = _rebuild_additional_descriptions([], "cc0-1.0")
79 assert len(result) == 1
80 assert "D. Lgs. 42/2004" in result[0]["description"]
82 def test_removes_old_disclaimer_when_not_cc0(self):
83 current = [{"description": "Ai sensi del D. Lgs. 42/2004..."}]
84 result = _rebuild_additional_descriptions(current, "cc-by-nc-4.0")
85 assert result == []
87 def test_preserves_other_descriptions(self):
88 current = [
89 {"description": "Some other note"},
90 {"description": "Ai sensi del D. Lgs. 42/2004..."},
91 ]
92 result = _rebuild_additional_descriptions(current, "cc-by-nc-4.0")
93 assert result == [{"description": "Some other note"}]
96class TestRequestWithRetry:
97 @patch("changes_metadata_manager.patch.license_metadata.requests.request")
98 def test_returns_immediately_on_success(self, mock_request):
99 mock_response = MagicMock(status_code=200)
100 mock_request.return_value = mock_response
101 result = _request_with_retry("GET", "https://example.com")
102 assert result.status_code == 200
103 assert mock_request.call_count == 1
105 @patch("changes_metadata_manager.patch.license_metadata.time.sleep")
106 @patch("changes_metadata_manager.patch.license_metadata.requests.request")
107 def test_retries_on_429(self, mock_request, mock_sleep):
108 rate_limited = MagicMock(status_code=429)
109 success = MagicMock(status_code=200)
110 mock_request.side_effect = [rate_limited, rate_limited, success]
111 result = _request_with_retry("GET", "https://example.com")
112 assert result.status_code == 200
113 assert mock_request.call_count == 3
115 @patch("changes_metadata_manager.patch.license_metadata.time.sleep")
116 @patch("changes_metadata_manager.patch.license_metadata.requests.request")
117 def test_returns_429_after_all_retries_exhausted(self, mock_request, mock_sleep):
118 rate_limited = MagicMock(status_code=429)
119 mock_request.return_value = rate_limited
120 result = _request_with_retry("GET", "https://example.com")
121 assert result.status_code == 429
122 assert mock_request.call_count == MAX_RETRIES
125class TestCreateEditDraft:
126 @patch("changes_metadata_manager.patch.license_metadata._request_with_retry")
127 def test_creates_draft(self, mock_retry):
128 mock_retry.return_value = MagicMock(status_code=201)
129 _create_edit_draft("https://zenodo.org/api", "123", "token", "agent")
130 mock_retry.assert_called_once()
131 assert "/records/123/draft" in mock_retry.call_args[0][1]
133 @patch("changes_metadata_manager.patch.license_metadata._request_with_retry")
134 def test_ignores_403_already_exists(self, mock_retry):
135 resp = MagicMock(status_code=403)
136 resp.text = "Draft already exists"
137 mock_retry.return_value = resp
138 _create_edit_draft("https://zenodo.org/api", "123", "token", "agent")
140 @patch("changes_metadata_manager.patch.license_metadata._request_with_retry")
141 def test_raises_on_other_error(self, mock_retry):
142 resp = MagicMock(status_code=500)
143 resp.text = "Internal error"
144 resp.raise_for_status.side_effect = requests.HTTPError("500")
145 mock_retry.return_value = resp
146 with pytest.raises(requests.HTTPError):
147 _create_edit_draft("https://zenodo.org/api", "123", "token", "agent")
150class TestFetchRecordMetadata:
151 @patch("changes_metadata_manager.patch.license_metadata._request_with_retry")
152 def test_tries_draft_first(self, mock_retry):
153 resp = MagicMock(status_code=200)
154 resp.json.return_value = {"metadata": {"title": "test"}}
155 mock_retry.return_value = resp
156 result = _fetch_record_metadata("https://zenodo.org/api", "123", "token", "agent")
157 assert result == {"title": "test"}
158 assert mock_retry.call_count == 1
159 assert "/records/123/draft" in mock_retry.call_args[0][1]
161 @patch("changes_metadata_manager.patch.license_metadata._request_with_retry")
162 def test_falls_back_to_published_on_404(self, mock_retry):
163 draft_resp = MagicMock(status_code=404)
164 published_resp = MagicMock(status_code=200)
165 published_resp.json.return_value = {"metadata": {"title": "published"}}
166 mock_retry.side_effect = [draft_resp, published_resp]
167 result = _fetch_record_metadata("https://zenodo.org/api", "123", "token", "agent")
168 assert result == {"title": "published"}
169 assert mock_retry.call_count == 2
172class TestPatchDrafts:
173 def _make_drafts_json(self, tmp_path, entries):
174 path = tmp_path / "drafts.json"
175 path.write_text(json.dumps(entries))
176 return path
178 def _make_config(self, tmp_path, filename, rights=None):
179 config = {
180 "access": {"record": "public", "files": "public"},
181 "rights": rights or [],
182 "additional_descriptions": [],
183 }
184 config_path = tmp_path / filename
185 import yaml
186 config_path.write_text(yaml.dump(config))
187 return str(config_path)
189 @patch("changes_metadata_manager.patch.license_metadata.time.sleep")
190 @patch("changes_metadata_manager.patch.license_metadata.extract_license_for_entity_stage")
191 @patch("changes_metadata_manager.patch.license_metadata._fetch_record_metadata")
192 def test_dry_run_logs_changes(self, mock_fetch, mock_extract, mock_sleep, tmp_path):
193 mock_fetch.return_value = {
194 "identifiers": [{"identifier": "https://w3id.org/changes/4/aldrovandi/itm/42/ob1/1"}],
195 "rights": [{"title": {"en": "CC BY-NC 4.0 (Content license)"}, "link": "https://creativecommons.org/licenses/by-nc/4.0/"}],
196 "additional_descriptions": [],
197 }
198 mock_extract.return_value = "cc0-1.0"
200 config_file = self._make_config(tmp_path, "entity-42-dcho.yaml")
201 drafts_path = self._make_drafts_json(tmp_path, [{
202 "draft_id": 123,
203 "config_file": f"{tmp_path}/entity-42-dcho.yaml",
204 "zenodo_url": "https://zenodo.org/api",
205 "access_token": "tok",
206 "status": "published",
207 }])
209 kg_path = tmp_path / "kg.ttl"
210 kg_path.write_text("")
212 with patch("changes_metadata_manager.patch.license_metadata.load_kg"):
213 patch_drafts(drafts_path, kg_path, dry_run=True)
215 log = json.loads((tmp_path / "patch_license_log.json").read_text())
216 assert len(log) == 1
217 assert log[0]["status"] == "dry_run"
218 assert log[0]["old_license"] == "cc-by-nc-4.0"
219 assert log[0]["new_license"] == "cc0-1.0"
221 @patch("changes_metadata_manager.patch.license_metadata.time.sleep")
222 @patch("changes_metadata_manager.patch.license_metadata.publish_draft")
223 @patch("changes_metadata_manager.patch.license_metadata.update_draft_metadata")
224 @patch("changes_metadata_manager.patch.license_metadata.build_inveniordm_payload")
225 @patch("changes_metadata_manager.patch.license_metadata._create_edit_draft")
226 @patch("changes_metadata_manager.patch.license_metadata.extract_license_for_entity_stage")
227 @patch("changes_metadata_manager.patch.license_metadata._fetch_record_metadata")
228 def test_published_record_creates_edit_draft_and_publishes(
229 self, mock_fetch, mock_extract, mock_create_edit, mock_build, mock_update, mock_publish, mock_sleep, tmp_path
230 ):
231 mock_fetch.return_value = {
232 "identifiers": [{"identifier": "https://w3id.org/changes/4/aldrovandi/itm/42/ob1/1"}],
233 "rights": [{"title": {"en": "CC BY-NC 4.0 (Content license)"}, "link": "https://creativecommons.org/licenses/by-nc/4.0/"}],
234 "additional_descriptions": [],
235 }
236 mock_extract.return_value = "cc0-1.0"
237 mock_build.return_value = {"metadata": {}}
239 config_file = self._make_config(tmp_path, "entity-42-dcho.yaml")
240 drafts_path = self._make_drafts_json(tmp_path, [{
241 "draft_id": 456,
242 "config_file": str(tmp_path / "entity-42-dcho.yaml"),
243 "zenodo_url": "https://zenodo.org/api",
244 "access_token": "tok",
245 "status": "published",
246 }])
248 kg_path = tmp_path / "kg.ttl"
249 kg_path.write_text("")
251 with patch("changes_metadata_manager.patch.license_metadata.load_kg"):
252 patch_drafts(drafts_path, kg_path, dry_run=False)
254 mock_create_edit.assert_called_once_with("https://zenodo.org/api", "456", "tok", "changes-metadata-manager/1.0.0")
255 mock_publish.assert_called_once_with("https://zenodo.org/api", "tok", "456", "changes-metadata-manager/1.0.0")
257 @patch("changes_metadata_manager.patch.license_metadata.time.sleep")
258 @patch("changes_metadata_manager.patch.license_metadata.update_draft_metadata")
259 @patch("changes_metadata_manager.patch.license_metadata.build_inveniordm_payload")
260 @patch("changes_metadata_manager.patch.license_metadata._create_edit_draft")
261 @patch("changes_metadata_manager.patch.license_metadata.extract_license_for_entity_stage")
262 @patch("changes_metadata_manager.patch.license_metadata._fetch_record_metadata")
263 def test_unpublished_record_skips_edit_draft_and_publish(
264 self, mock_fetch, mock_extract, mock_create_edit, mock_build, mock_update, mock_sleep, tmp_path
265 ):
266 mock_fetch.return_value = {
267 "identifiers": [{"identifier": "https://w3id.org/changes/4/aldrovandi/itm/42/ob1/1"}],
268 "rights": [{"title": {"en": "CC BY-NC 4.0 (Content license)"}, "link": "https://creativecommons.org/licenses/by-nc/4.0/"}],
269 "additional_descriptions": [],
270 }
271 mock_extract.return_value = "cc0-1.0"
272 mock_build.return_value = {"metadata": {}}
274 config_file = self._make_config(tmp_path, "entity-42-dcho.yaml")
275 drafts_path = self._make_drafts_json(tmp_path, [{
276 "draft_id": 789,
277 "config_file": str(tmp_path / "entity-42-dcho.yaml"),
278 "zenodo_url": "https://zenodo.org/api",
279 "access_token": "tok",
280 "status": "uploaded",
281 }])
283 kg_path = tmp_path / "kg.ttl"
284 kg_path.write_text("")
286 with patch("changes_metadata_manager.patch.license_metadata.load_kg"), \
287 patch("changes_metadata_manager.patch.license_metadata.publish_draft") as mock_publish:
288 patch_drafts(drafts_path, kg_path, dry_run=False)
290 mock_create_edit.assert_not_called()
291 mock_publish.assert_not_called()
293 @patch("changes_metadata_manager.patch.license_metadata.time.sleep")
294 @patch("changes_metadata_manager.patch.license_metadata._fetch_record_metadata")
295 def test_errors_are_logged(self, mock_fetch, mock_sleep, tmp_path):
296 mock_fetch.side_effect = requests.HTTPError("500 Server Error")
298 drafts_path = self._make_drafts_json(tmp_path, [{
299 "draft_id": 999,
300 "config_file": f"{tmp_path}/entity-42-dcho.yaml",
301 "zenodo_url": "https://zenodo.org/api",
302 "access_token": "tok",
303 "status": "published",
304 }])
306 kg_path = tmp_path / "kg.ttl"
307 kg_path.write_text("")
309 with patch("changes_metadata_manager.patch.license_metadata.load_kg"):
310 patch_drafts(drafts_path, kg_path, dry_run=False)
312 log = json.loads((tmp_path / "patch_license_log.json").read_text())
313 assert len(log) == 1
314 assert log[0]["status"] == "error"
315 assert "500 Server Error" in log[0]["error"]
316 assert log[0]["record_id"] == 999
318 @patch("changes_metadata_manager.patch.license_metadata.time.sleep")
319 @patch("changes_metadata_manager.patch.license_metadata.extract_license_for_entity_stage")
320 @patch("changes_metadata_manager.patch.license_metadata._fetch_record_metadata")
321 def test_skips_already_correct(self, mock_fetch, mock_extract, mock_sleep, tmp_path):
322 mock_fetch.return_value = {
323 "identifiers": [{"identifier": "https://w3id.org/changes/4/aldrovandi/itm/42/ob1/1"}],
324 "rights": [{"title": {"en": "CC0 (Content license)"}, "link": "https://creativecommons.org/publicdomain/zero/1.0/"}],
325 "additional_descriptions": [{"description": "Ai sensi del D. Lgs. 42/2004..."}],
326 }
327 mock_extract.return_value = "cc0-1.0"
329 drafts_path = self._make_drafts_json(tmp_path, [{
330 "draft_id": 111,
331 "config_file": f"{tmp_path}/entity-42-dcho.yaml",
332 "zenodo_url": "https://zenodo.org/api",
333 "access_token": "tok",
334 "status": "published",
335 }])
337 kg_path = tmp_path / "kg.ttl"
338 kg_path.write_text("")
340 with patch("changes_metadata_manager.patch.license_metadata.load_kg"):
341 patch_drafts(drafts_path, kg_path, dry_run=True)
343 log = json.loads((tmp_path / "patch_license_log.json").read_text())
344 assert log == []