Coverage for tests / test_zenodo_upload.py: 100%
816 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-05-29 18:29 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-05-29 18:29 +0000
1# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelomas@gmail.com>
2#
3# SPDX-License-Identifier: ISC
5import csv
6import json
7import tempfile
8import zipfile
9from pathlib import Path
10from unittest.mock import patch
12import yaml
14import pytest
15from rdflib import Graph, Literal, URIRef
17from changes_metadata_manager.folder_metadata_builder import load_kg
18from changes_metadata_manager.zenodo_upload import (
19 AAT,
20 BASE_URI,
21 CC0_DISCLAIMER,
22 RESTRICTED_NOTICE,
23 E21_PERSON,
24 P14_CARRIED_OUT_BY,
25 P16_USED_SPECIFIC_OBJECT,
26 P190_HAS_SYMBOLIC_CONTENT,
27 P1_IS_IDENTIFIED_BY,
28 P32_USED_GENERAL_TECHNIQUE,
29 P3_HAS_NOTE,
30 P70I,
31 P74_HAS_RESIDENCE,
32 RDF_TYPE,
33 _atomic_write_json,
34 _extract_doi,
35 _extract_license_from_meta,
36 _extract_record_url,
37 _format_creators_for_table,
38 _format_licenses_for_table,
39 _write_doi_table,
40 build_creators_for_entity_stage,
41 build_enhanced_description,
42 build_entity_uri,
43 build_metadata_creators,
44 build_methods_description,
45 create_stage_zip,
46 extract_acquisition_technique,
47 extract_authors_for_entity_stage,
48 extract_devices,
49 extract_entity_title,
50 extract_keeper_info,
51 extract_license_for_entity_stage,
52 extract_licensed_entity_stages,
53 extract_metadata_authors,
54 extract_software_for_stage,
55 generate_zenodo_config,
56 group_folders_by_entity,
57 load_creators_lookup,
58 merge_creators,
59 publish_all_drafts,
60 slugify,
61 upload_all,
62)
65DATA_DIR = Path(__file__).parent.parent / "data"
66REAL_KG_PATH = DATA_DIR / "kg.ttl"
69@pytest.fixture(scope="module")
70def real_kg():
71 return load_kg(REAL_KG_PATH)
74class TestExtractLicensedEntityStages:
75 def test_returns_set_of_tuples(self, real_kg):
76 result = extract_licensed_entity_stages(real_kg)
77 assert isinstance(result, set)
78 assert all(isinstance(item, tuple) and len(item) == 2 for item in result)
80 def test_known_licensed_entity(self, real_kg):
81 result = extract_licensed_entity_stages(real_kg)
82 assert ("1", "dcho") in result
83 assert ("1", "dchoo") in result
85 def test_maps_steps_to_stages(self):
86 g = Graph()
87 g.add((URIRef(f"{BASE_URI}/lic/42/00/1"), P70I, URIRef("https://example.com/license")))
88 g.add((URIRef(f"{BASE_URI}/lic/42/01/1"), P70I, URIRef("https://example.com/license")))
89 g.add((URIRef(f"{BASE_URI}/lic/42/02/1"), P70I, URIRef("https://example.com/license")))
90 g.add((URIRef(f"{BASE_URI}/lic/42/03/1"), P70I, URIRef("https://example.com/license")))
91 result = extract_licensed_entity_stages(g)
92 assert result == {("42", "raw"), ("42", "rawp"), ("42", "dcho"), ("42", "dchoo")}
95class TestGroupFoldersByEntity:
96 def test_groups_folders_by_entity_id(self):
97 structure = {
98 "structure": {
99 "Sala1": {
100 "S1-01-Test": {"raw": {}, "dcho": {}},
101 "S1-02-Other": {"raw": {}},
102 },
103 }
104 }
105 result = group_folders_by_entity(structure)
106 assert "1" in result
107 assert "2" in result
108 assert len(result["1"]) == 1
109 assert result["1"][0][1] == "S1-01-Test"
111 def test_groups_abc_variants(self):
112 structure = {
113 "structure": {
114 "Sala6": {
115 "S6-98a-DA-Calchi facciali colorati, boscimani": {"raw": {}},
116 "S6-98b-DA-Calchi facciali colorati, boscimani": {"raw": {}},
117 "S6-98c-DA-Calchi facciali colorati, boscimani": {"raw": {}},
118 },
119 }
120 }
121 result = group_folders_by_entity(structure)
122 assert "98" in result
123 assert len(result["98"]) == 3
125 def test_skips_skip_folders(self):
126 structure = {
127 "structure": {
128 "Sala1": {
129 "S1-CNR_SoffittoSala1": {"raw": {}},
130 "materials": {"raw": {}},
131 "S1-01-Test": {"raw": {}},
132 },
133 }
134 }
135 result = group_folders_by_entity(structure)
136 assert "1" in result
137 folder_names = [f[1] for f in result["1"]]
138 assert "S1-CNR_SoffittoSala1" not in folder_names
139 assert "materials" not in folder_names
142class TestSlugify:
143 def test_simple_text(self):
144 assert slugify("Carta nautica") == "carta-nautica"
146 def test_accented_characters(self):
147 assert slugify("Oggettò àccéntàto") == "oggetto-accentato"
149 def test_special_characters(self):
150 assert slugify("Test (object) #1") == "test-object-1"
152 def test_multiple_spaces(self):
153 assert slugify("Multiple spaces here") == "multiple-spaces-here"
155 def test_leading_trailing_spaces(self):
156 assert slugify(" trimmed ") == "trimmed"
159LICENSED_META_TTL = """\
160@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
162<https://w3id.org/changes/4/aldrovandi/lic/1/00/1>
163 crm:P70i_is_documented_in <https://creativecommons.org/publicdomain/zero/1.0/> .
164"""
166MIXED_LICENSE_META_TTL = """\
167@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
169<https://w3id.org/changes/4/aldrovandi/lic/1/00/1>
170 crm:P70i_is_documented_in <https://creativecommons.org/licenses/by-nc/4.0/> .
171<https://w3id.org/changes/4/aldrovandi/lic/1/01/1>
172 crm:P70i_is_documented_in <https://creativecommons.org/licenses/by-nc/4.0/> .
173<https://w3id.org/changes/4/aldrovandi/lic/1/02/1>
174 crm:P70i_is_documented_in <https://creativecommons.org/publicdomain/zero/1.0/> .
175"""
177UNLICENSED_META_TTL = """\
178@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
180<https://w3id.org/changes/4/aldrovandi/itm/1/ob00/1>
181 crm:P3_has_note "Test object" .
182"""
185class TestExtractLicenseFromMeta:
186 def test_returns_license_id_when_present(self):
187 with tempfile.TemporaryDirectory() as tmpdir:
188 stage_dir = Path(tmpdir)
189 (stage_dir / "meta.ttl").write_text(LICENSED_META_TTL)
190 assert _extract_license_from_meta(stage_dir, "raw") == "cc0-1.0"
192 def test_returns_none_when_no_license(self):
193 with tempfile.TemporaryDirectory() as tmpdir:
194 stage_dir = Path(tmpdir)
195 (stage_dir / "meta.ttl").write_text(UNLICENSED_META_TTL)
196 assert _extract_license_from_meta(stage_dir, "raw") is None
198 def test_picks_defining_step_license(self):
199 with tempfile.TemporaryDirectory() as tmpdir:
200 stage_dir = Path(tmpdir)
201 (stage_dir / "meta.ttl").write_text(MIXED_LICENSE_META_TTL)
202 assert _extract_license_from_meta(stage_dir, "dcho") == "cc0-1.0"
204 def test_returns_none_when_defining_step_missing(self):
205 ttl = """\
206@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
208<https://w3id.org/changes/4/aldrovandi/lic/1/00/1>
209 crm:P70i_is_documented_in <https://creativecommons.org/licenses/by-nc/4.0/> .
210<https://w3id.org/changes/4/aldrovandi/lic/1/01/1>
211 crm:P70i_is_documented_in <https://creativecommons.org/licenses/by-nc/4.0/> .
212"""
213 with tempfile.TemporaryDirectory() as tmpdir:
214 stage_dir = Path(tmpdir)
215 (stage_dir / "meta.ttl").write_text(ttl)
216 assert _extract_license_from_meta(stage_dir, "dcho") is None
219class TestCreateStageZip:
220 def test_includes_all_files_for_licensed_stage(self):
221 with tempfile.TemporaryDirectory() as tmpdir:
222 root = Path(tmpdir) / "root"
223 stage_dir = root / "Sala1" / "S1-01-Test" / "raw"
224 stage_dir.mkdir(parents=True)
225 (stage_dir / "meta.ttl").write_text(LICENSED_META_TTL)
226 (stage_dir / "prov.trig").write_text("{}")
227 (stage_dir / "photo.jpg").write_text("image")
229 output_dir = Path(tmpdir) / "output"
230 output_dir.mkdir()
232 folders = [("Sala1", "S1-01-Test", {"raw": {}})]
234 result = create_stage_zip("1", "raw", folders, root, output_dir, "Test Object")
236 assert result is not None
237 zip_path, license_id = result
238 assert zip_path.name == "sala1-test-object-1-raw.zip"
239 assert license_id == "cc0-1.0"
240 with zipfile.ZipFile(zip_path) as zf:
241 names = sorted(zf.namelist())
242 assert names == ["S1-01-Test/raw/meta.ttl", "S1-01-Test/raw/photo.jpg", "S1-01-Test/raw/prov.trig"]
244 def test_includes_only_metadata_for_unlicensed_stage(self):
245 with tempfile.TemporaryDirectory() as tmpdir:
246 root = Path(tmpdir) / "root"
247 stage_dir = root / "Sala1" / "S1-01-Test" / "raw"
248 stage_dir.mkdir(parents=True)
249 (stage_dir / "meta.ttl").write_text(UNLICENSED_META_TTL)
250 (stage_dir / "prov.trig").write_text("{}")
251 (stage_dir / "photo.jpg").write_text("image")
253 output_dir = Path(tmpdir) / "output"
254 output_dir.mkdir()
256 folders = [("Sala1", "S1-01-Test", {"raw": {}})]
258 result = create_stage_zip("1", "raw", folders, root, output_dir, "Test Object")
260 assert result is not None
261 zip_path, license_id = result
262 assert license_id is None
263 with zipfile.ZipFile(zip_path) as zf:
264 names = sorted(zf.namelist())
265 assert names == ["S1-01-Test/raw/meta.ttl", "S1-01-Test/raw/prov.trig"]
267 def test_multiple_folders_grouped_entity_license(self):
268 with tempfile.TemporaryDirectory() as tmpdir:
269 root = Path(tmpdir) / "root"
271 for variant in ["a", "b"]:
272 stage_dir = root / "Sala6" / f"S6-98{variant}-Test" / "raw"
273 stage_dir.mkdir(parents=True)
274 (stage_dir / "meta.ttl").write_text(LICENSED_META_TTL)
275 (stage_dir / "photo.jpg").write_text("image")
277 output_dir = Path(tmpdir) / "output"
278 output_dir.mkdir()
280 folders = [
281 ("Sala6", "S6-98a-Test", {"raw": {}}),
282 ("Sala6", "S6-98b-Test", {"raw": {}}),
283 ]
285 result = create_stage_zip("98", "raw", folders, root, output_dir, "Test Masks")
287 assert result is not None
288 zip_path, license_id = result
289 assert license_id == "cc0-1.0"
290 with zipfile.ZipFile(zip_path) as zf:
291 names = sorted(zf.namelist())
292 assert names == [
293 "S6-98a-Test/raw/meta.ttl",
294 "S6-98a-Test/raw/photo.jpg",
295 "S6-98b-Test/raw/meta.ttl",
296 "S6-98b-Test/raw/photo.jpg",
297 ]
299 def test_multiple_folders_unlicensed(self):
300 with tempfile.TemporaryDirectory() as tmpdir:
301 root = Path(tmpdir) / "root"
303 for variant in ["a", "b"]:
304 stage_dir = root / "Sala6" / f"S6-98{variant}-Test" / "raw"
305 stage_dir.mkdir(parents=True)
306 (stage_dir / "meta.ttl").write_text(UNLICENSED_META_TTL)
308 output_dir = Path(tmpdir) / "output"
309 output_dir.mkdir()
311 folders = [
312 ("Sala6", "S6-98a-Test", {"raw": {}}),
313 ("Sala6", "S6-98b-Test", {"raw": {}}),
314 ]
316 result = create_stage_zip("98", "raw", folders, root, output_dir, "Test Masks")
318 assert result is not None
319 zip_path, license_id = result
320 assert license_id is None
321 with zipfile.ZipFile(zip_path) as zf:
322 names = zf.namelist()
323 assert names == ["S6-98a-Test/raw/meta.ttl", "S6-98b-Test/raw/meta.ttl"]
325 def test_license_in_later_folder_includes_all_data(self):
326 with tempfile.TemporaryDirectory() as tmpdir:
327 root = Path(tmpdir) / "root"
329 stage_dir_a = root / "Sala6" / "S6-98a-Test" / "raw"
330 stage_dir_a.mkdir(parents=True)
331 (stage_dir_a / "meta.ttl").write_text(UNLICENSED_META_TTL)
332 (stage_dir_a / "photo.jpg").write_text("image_a")
334 stage_dir_b = root / "Sala6" / "S6-98b-Test" / "raw"
335 stage_dir_b.mkdir(parents=True)
336 (stage_dir_b / "meta.ttl").write_text(LICENSED_META_TTL)
337 (stage_dir_b / "photo.jpg").write_text("image_b")
339 output_dir = Path(tmpdir) / "output"
340 output_dir.mkdir()
342 folders = [
343 ("Sala6", "S6-98a-Test", {"raw": {}}),
344 ("Sala6", "S6-98b-Test", {"raw": {}}),
345 ]
347 result = create_stage_zip("98", "raw", folders, root, output_dir, "Test Masks")
349 assert result is not None
350 zip_path, license_id = result
351 assert license_id == "cc0-1.0"
352 with zipfile.ZipFile(zip_path) as zf:
353 names = sorted(zf.namelist())
354 assert names == [
355 "S6-98a-Test/raw/meta.ttl",
356 "S6-98a-Test/raw/photo.jpg",
357 "S6-98b-Test/raw/meta.ttl",
358 "S6-98b-Test/raw/photo.jpg",
359 ]
361 def test_returns_none_for_missing_stage(self):
362 with tempfile.TemporaryDirectory() as tmpdir:
363 root = Path(tmpdir) / "root"
364 stage_dir = root / "Sala1" / "S1-01-Test" / "raw"
365 stage_dir.mkdir(parents=True)
366 (stage_dir / "meta.ttl").write_text(UNLICENSED_META_TTL)
368 output_dir = Path(tmpdir) / "output"
369 output_dir.mkdir()
371 folders = [("Sala1", "S1-01-Test", {"raw": {}})]
373 result = create_stage_zip("1", "dcho", folders, root, output_dir, "Test Object")
375 assert result is None
376 assert not (output_dir / "sala1-test-object-1-dcho.zip").exists()
379class TestExtractEntityTitle:
380 def test_extracts_title_from_kg(self, real_kg):
381 title = extract_entity_title(real_kg, ["1"])
382 assert title == "Carta nautica"
384 def test_returns_default_for_missing(self):
385 g = Graph()
386 title = extract_entity_title(g, ["nonexistent"])
387 assert title == "Entity nonexistent"
389 def test_takes_first_line(self):
390 g = Graph()
391 item_uri = URIRef(f"{BASE_URI}/itm/42/ob00/1")
392 g.add((item_uri, P3_HAS_NOTE, Literal("First line\nSecond line")))
393 title = extract_entity_title(g, ["42"])
394 assert title == "First line"
397class TestExtractAuthorsForEntityStage:
398 def test_extracts_author_from_kg(self, real_kg):
399 authors = extract_authors_for_entity_stage(real_kg, ["1"], "raw")
400 assert authors == {"Federica Bonifazi"}
402 def test_accumulates_authors_across_steps(self, real_kg):
403 authors = extract_authors_for_entity_stage(real_kg, ["1"], "dchoo")
404 assert "Federica Bonifazi" in authors
405 assert len(authors) > 1
407 def test_returns_empty_for_missing_entity(self, real_kg):
408 authors = extract_authors_for_entity_stage(real_kg, ["nonexistent"], "raw")
409 assert authors == set()
411 def test_extracts_from_synthetic_graph(self):
412 g = Graph()
413 act_uri = URIRef(f"{BASE_URI}/act/42/00/1")
414 actor_uri = URIRef(f"{BASE_URI}/per/42/1")
415 apl_uri = URIRef(f"{BASE_URI}/apl/42/1")
416 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri))
417 g.add((actor_uri, RDF_TYPE, E21_PERSON))
418 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri))
419 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test Author")))
420 authors = extract_authors_for_entity_stage(g, ["42"], "raw")
421 assert authors == {"Test Author"}
424class TestExtractMetadataAuthors:
425 def test_extracts_step_05_authors(self):
426 g = Graph()
427 act_uri = URIRef(f"{BASE_URI}/act/42/05/1")
428 actor_uri = URIRef(f"{BASE_URI}/per/meta/1")
429 apl_uri = URIRef(f"{BASE_URI}/apl/meta/1")
430 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri))
431 g.add((actor_uri, RDF_TYPE, E21_PERSON))
432 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri))
433 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Metadata Author")))
434 authors = extract_metadata_authors(g, ["42"])
435 assert authors == {"Metadata Author"}
437 def test_returns_empty_for_missing_entity(self):
438 g = Graph()
439 authors = extract_metadata_authors(g, ["nonexistent"])
440 assert authors == set()
442 def test_extracts_from_real_kg(self, real_kg):
443 authors = extract_metadata_authors(real_kg, ["1"])
444 assert authors == {"Arcangelo Massari", "Arianna Moretti", "Sebastian Barzaghi"}
447class TestLoadCreatorsLookup:
448 def test_loads_creators_as_dict(self):
449 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
450 f.write(
451 "creators:\n"
452 " - name_in_rdf: Test Author\n"
453 " family_name: Author\n"
454 " given_name: Test\n"
455 " affiliation: Test Uni\n"
456 " orcid: 0000-0001-2345-6789\n"
457 )
458 f.flush()
459 lookup = load_creators_lookup(Path(f.name))
460 assert lookup == {
461 "Test Author": {
462 "family_name": "Author",
463 "given_name": "Test",
464 "affiliation": "Test Uni",
465 "orcid": "0000-0001-2345-6789",
466 }
467 }
470class TestBuildCreatorsForEntityStage:
471 def test_builds_creators_with_researcher_role(self, real_kg):
472 lookup = {
473 "Federica Bonifazi": {
474 "family_name": "Bonifazi",
475 "given_name": "Federica",
476 "affiliation": "CNR-ISPC",
477 "orcid": "0009-0000-8466-5541",
478 }
479 }
480 creators = build_creators_for_entity_stage(real_kg, ["1"], "raw", lookup)
481 assert creators == [
482 {
483 "person_or_org": {
484 "type": "personal",
485 "family_name": "Bonifazi",
486 "given_name": "Federica",
487 "identifiers": [{"scheme": "orcid", "identifier": "0009-0000-8466-5541"}],
488 },
489 "role": {"id": "researcher"},
490 "affiliations": [{"name": "CNR-ISPC"}],
491 }
492 ]
494 def test_ignores_authors_not_in_lookup(self, real_kg):
495 lookup = {}
496 creators = build_creators_for_entity_stage(real_kg, ["1"], "raw", lookup)
497 assert creators == []
499 def test_sorts_authors_alphabetically(self):
500 g = Graph()
501 for name in ["Zeta Author", "Alpha Author"]:
502 act_uri = URIRef(f"{BASE_URI}/act/42/00/1")
503 actor_uri = URIRef(f"{BASE_URI}/per/{name}/1")
504 apl_uri = URIRef(f"{BASE_URI}/apl/{name}/1")
505 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri))
506 g.add((actor_uri, RDF_TYPE, E21_PERSON))
507 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri))
508 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal(name)))
509 lookup = {
510 "Alpha Author": {
511 "family_name": "Author",
512 "given_name": "Alpha",
513 "affiliation": "Uni",
514 "orcid": "0000-0000-0000-0001",
515 },
516 "Zeta Author": {
517 "family_name": "Author",
518 "given_name": "Zeta",
519 "affiliation": "Uni",
520 "orcid": "0000-0000-0000-0002",
521 },
522 }
523 creators = build_creators_for_entity_stage(g, ["42"], "raw", lookup)
524 assert [c["person_or_org"]["given_name"] for c in creators] == ["Alpha", "Zeta"]
527class TestBuildMetadataCreators:
528 def test_builds_creators_with_datacurator_role(self):
529 g = Graph()
530 act_uri = URIRef(f"{BASE_URI}/act/42/05/1")
531 actor_uri = URIRef(f"{BASE_URI}/per/meta/1")
532 apl_uri = URIRef(f"{BASE_URI}/apl/meta/1")
533 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri))
534 g.add((actor_uri, RDF_TYPE, E21_PERSON))
535 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri))
536 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Metadata Author")))
537 lookup = {
538 "Metadata Author": {
539 "family_name": "Author",
540 "given_name": "Metadata",
541 "affiliation": "Test Uni",
542 "orcid": "0000-0001-2345-6789",
543 }
544 }
545 creators = build_metadata_creators(g, ["42"], lookup)
546 assert creators == [
547 {
548 "person_or_org": {
549 "type": "personal",
550 "family_name": "Author",
551 "given_name": "Metadata",
552 "identifiers": [{"scheme": "orcid", "identifier": "0000-0001-2345-6789"}],
553 },
554 "role": {"id": "datacurator"},
555 "affiliations": [{"name": "Test Uni"}],
556 }
557 ]
560class TestMergeCreators:
561 def test_merges_without_duplicates(self):
562 digitization = [
563 {
564 "person_or_org": {
565 "type": "personal",
566 "family_name": "Author",
567 "given_name": "Digit",
568 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}],
569 },
570 "role": {"id": "researcher"},
571 "affiliations": [{"name": "Uni"}],
572 }
573 ]
574 metadata = [
575 {
576 "person_or_org": {
577 "type": "personal",
578 "family_name": "Author",
579 "given_name": "Meta",
580 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0002"}],
581 },
582 "role": {"id": "datacurator"},
583 "affiliations": [{"name": "Uni"}],
584 }
585 ]
586 merged = merge_creators(digitization, metadata)
587 assert len(merged) == 2
588 assert merged[0]["role"] == {"id": "researcher"}
589 assert merged[1]["role"] == {"id": "datacurator"}
591 def test_deduplicates_by_orcid(self):
592 digitization = [
593 {
594 "person_or_org": {
595 "type": "personal",
596 "family_name": "Shared",
597 "given_name": "Author",
598 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}],
599 },
600 "role": {"id": "researcher"},
601 "affiliations": [{"name": "Uni"}],
602 }
603 ]
604 metadata = [
605 {
606 "person_or_org": {
607 "type": "personal",
608 "family_name": "Shared",
609 "given_name": "Author",
610 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}],
611 },
612 "role": {"id": "datacurator"},
613 "affiliations": [{"name": "Uni"}],
614 }
615 ]
616 merged = merge_creators(digitization, metadata)
617 assert len(merged) == 1
618 assert merged[0]["role"] == {"id": "researcher"}
620 def test_empty_lists(self):
621 assert merge_creators([], []) == []
623 def test_only_metadata_creators(self):
624 metadata = [
625 {
626 "person_or_org": {
627 "type": "personal",
628 "family_name": "Author",
629 "given_name": "Meta",
630 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}],
631 },
632 "role": {"id": "datacurator"},
633 "affiliations": [{"name": "Uni"}],
634 }
635 ]
636 merged = merge_creators([], metadata)
637 assert len(merged) == 1
638 assert merged[0]["role"] == {"id": "datacurator"}
641class TestBuildEntityUri:
642 def test_builds_uri_for_numeric_id(self):
643 result = build_entity_uri(["27"])
644 assert result == "https://w3id.org/changes/4/aldrovandi/itm/27/ob00/1"
646 def test_builds_uri_for_string_id(self):
647 result = build_entity_uri(["ptb"])
648 assert result == "https://w3id.org/changes/4/aldrovandi/itm/ptb/ob00/1"
651SAMPLE_CREATOR = {
652 "person_or_org": {
653 "type": "personal",
654 "family_name": "Author",
655 "given_name": "Test",
656 "identifiers": [{"scheme": "orcid", "identifier": "0000-0001-2345-6789"}],
657 },
658 "role": {"id": "researcher"},
659 "affiliations": [{"name": "Test Uni"}],
660}
662SAMPLE_BASE_CONFIG = {
663 "zenodo_url": "https://sandbox.zenodo.org/api",
664 "access_token": "test_token",
665 "user_agent": "piccione/2.1.0",
666 "subjects": [{"subject": "test"}],
667 "notes": "Test notes content",
668 "locations": [
669 {
670 "lat": 44.497,
671 "lon": 11.353,
672 "place": "Bologna, Italy",
673 "description": "Palazzo Poggi Museum",
674 },
675 ],
676}
678SAMPLE_METHODS = "Test method content"
681class TestGenerateZenodoConfig:
682 def test_generates_valid_config(self, freezer):
683 freezer.move_to("2024-06-15")
684 zip_path = Path("/tmp/1-raw.zip")
685 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS)
687 assert config == {
688 "zenodo_url": "https://sandbox.zenodo.org/api",
689 "access_token": "test_token",
690 "user_agent": "piccione/2.1.0",
691 "title": "Test Title - Raw - Aldrovandi Digital Twin",
692 "description": 'Raw acquisition data of "Test Title" from the Aldrovandi Digital Twin. This dataset contains the raw material generated during the acquisition phase. Includes metadata (meta.ttl) and provenance (prov.trig) files following the <a href="https://w3id.org/dharc/ontology/chad-ap">CHAD-AP</a> ontology.\n',
693 "resource_type": {"id": "dataset"},
694 "publisher": "Zenodo",
695 "access": {"record": "public", "files": "public"},
696 "creators": [SAMPLE_CREATOR],
697 "subjects": [{"subject": "test"}],
698 "files": [str(zip_path.absolute())],
699 "publication_date": "2024-06-15",
700 "rights": [
701 {
702 "title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"},
703 "description": {"en": "Applies to metadata files: meta.ttl, prov.trig"},
704 "link": "https://creativecommons.org/publicdomain/zero/1.0/",
705 },
706 ],
707 "additional_descriptions": [
708 {"description": "Test method content", "type": {"id": "methods"}},
709 {"description": "Test notes content", "type": {"id": "notes"}},
710 ],
711 "locations": {
712 "features": [
713 {
714 "geometry": {"type": "Point", "coordinates": [11.353, 44.497]},
715 "place": "Bologna, Italy",
716 "description": "Palazzo Poggi Museum",
717 },
718 ]
719 },
720 }
722 def test_adds_entity_uri_as_alternate_identifier(self, freezer):
723 freezer.move_to("2024-06-15")
724 zip_path = Path("/tmp/27-raw.zip")
725 entity_uri = "https://w3id.org/changes/4/aldrovandi/itm/27/ob00/1"
726 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, entity_uri=entity_uri)
728 assert config["identifiers"] == [
729 {"identifier": "https://w3id.org/changes/4/aldrovandi/itm/27/ob00/1", "scheme": "url"}
730 ]
732 def test_converts_related_identifiers(self, freezer):
733 freezer.move_to("2024-06-15")
734 base_config = {
735 **SAMPLE_BASE_CONFIG,
736 "related_identifiers": [
737 {
738 "identifier": "10.3724/2096-7004.di.2024.0061",
739 "relation": "isdocumentedby",
740 "resource_type": "publication-article",
741 }
742 ],
743 }
744 zip_path = Path("/tmp/27-raw.zip")
745 config = generate_zenodo_config("raw", zip_path, "Test Title", base_config, [SAMPLE_CREATOR], SAMPLE_METHODS)
747 assert config["related_identifiers"] == [
748 {
749 "identifier": "10.3724/2096-7004.di.2024.0061",
750 "relation_type": {"id": "isdocumentedby"},
751 "resource_type": {"id": "publication-article"},
752 },
753 ]
755 def test_converts_notes_and_method_to_additional_descriptions(self, freezer):
756 freezer.move_to("2024-06-15")
757 zip_path = Path("/tmp/1-raw.zip")
758 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS)
760 assert config["additional_descriptions"] == [
761 {"description": "Test method content", "type": {"id": "methods"}},
762 {"description": "Test notes content", "type": {"id": "notes"}},
763 ]
765 def test_cc0_disclaimer_in_additional_descriptions(self, freezer):
766 freezer.move_to("2024-06-15")
767 zip_path = Path("/tmp/1-raw.zip")
768 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, license="cc0-1.0")
770 assert config["additional_descriptions"] == [
771 {"description": "Test method content", "type": {"id": "methods"}},
772 {"description": "Test notes content", "type": {"id": "notes"}},
773 {"description": CC0_DISCLAIMER, "type": {"id": "notes"}},
774 ]
776 def test_converts_locations_to_geojson(self, freezer):
777 freezer.move_to("2024-06-15")
778 zip_path = Path("/tmp/1-raw.zip")
779 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS)
781 assert config["locations"] == {
782 "features": [
783 {
784 "geometry": {"type": "Point", "coordinates": [11.353, 44.497]},
785 "place": "Bologna, Italy",
786 "description": "Palazzo Poggi Museum",
787 },
788 ]
789 }
791 def test_includes_community_field(self, freezer):
792 freezer.move_to("2024-06-15")
793 base_config = {**SAMPLE_BASE_CONFIG, "community": "project-changes"}
794 zip_path = Path("/tmp/1-raw.zip")
795 config = generate_zenodo_config("raw", zip_path, "Test Title", base_config, [SAMPLE_CREATOR], SAMPLE_METHODS)
797 assert config["community"] == "project-changes"
799 def test_includes_restricted_notice_when_no_license(self, freezer):
800 freezer.move_to("2024-06-15")
801 zip_path = Path("/tmp/1-raw.zip")
802 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, has_license=False)
804 assert RESTRICTED_NOTICE not in config["description"]
805 assert {"description": RESTRICTED_NOTICE, "type": {"id": "notes"}} in config["additional_descriptions"]
807 def test_no_restricted_notice_when_licensed(self, freezer):
808 freezer.move_to("2024-06-15")
809 zip_path = Path("/tmp/1-raw.zip")
810 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, has_license=True)
812 assert {"description": RESTRICTED_NOTICE, "type": {"id": "notes"}} not in config["additional_descriptions"]
814 def test_propagates_funding_field(self, freezer):
815 freezer.move_to("2024-06-15")
816 funding = [
817 {
818 "funder": {"name": "European Union - NextGenerationEU"},
819 "award": {
820 "title": {"en": "CHANGES"},
821 "number": "PE 0000020",
822 },
823 }
824 ]
825 base_config = {**SAMPLE_BASE_CONFIG, "funding": funding}
826 zip_path = Path("/tmp/1-raw.zip")
827 config = generate_zenodo_config("raw", zip_path, "Test Title", base_config, [SAMPLE_CREATOR], SAMPLE_METHODS)
829 assert config["funding"] == funding
832class TestExtractLicenseForEntityStage:
833 def test_extracts_license_from_kg(self):
834 g = Graph()
835 lic_uri = URIRef(f"{BASE_URI}/lic/42/00/1")
836 license_url = URIRef("https://creativecommons.org/publicdomain/zero/1.0/")
837 g.add((lic_uri, P70I, license_url))
838 result = extract_license_for_entity_stage(g, "42", "raw")
839 assert result == "cc0-1.0"
841 def test_returns_none_for_missing_license(self):
842 g = Graph()
843 result = extract_license_for_entity_stage(g, "42", "raw")
844 assert result is None
846 def test_returns_none_for_unknown_license_uri(self):
847 g = Graph()
848 lic_uri = URIRef(f"{BASE_URI}/lic/42/00/1")
849 unknown_license = URIRef("https://example.com/custom-license")
850 g.add((lic_uri, P70I, unknown_license))
851 result = extract_license_for_entity_stage(g, "42", "raw")
852 assert result is None
854 def test_extracts_cc_by(self):
855 g = Graph()
856 lic_uri = URIRef(f"{BASE_URI}/lic/42/00/1")
857 license_url = URIRef("https://creativecommons.org/licenses/by/4.0/")
858 g.add((lic_uri, P70I, license_url))
859 result = extract_license_for_entity_stage(g, "42", "raw")
860 assert result == "cc-by-4.0"
862 def test_picks_defining_step_license(self):
863 g = Graph()
864 g.add((URIRef(f"{BASE_URI}/lic/42/00/1"), P70I, URIRef("https://creativecommons.org/licenses/by-nc/4.0/")))
865 g.add((URIRef(f"{BASE_URI}/lic/42/01/1"), P70I, URIRef("https://creativecommons.org/licenses/by-nc/4.0/")))
866 g.add((URIRef(f"{BASE_URI}/lic/42/02/1"), P70I, URIRef("https://creativecommons.org/publicdomain/zero/1.0/")))
867 assert extract_license_for_entity_stage(g, "42", "dcho") == "cc0-1.0"
869 def test_picks_defining_step_real_kg(self, real_kg):
870 assert extract_license_for_entity_stage(real_kg, "vetrina_2_basso", "dcho") == "cc0-1.0"
872 def test_raw_returns_defining_step_license(self):
873 g = Graph()
874 g.add((URIRef(f"{BASE_URI}/lic/42/00/1"), P70I, URIRef("https://creativecommons.org/licenses/by-nc/4.0/")))
875 g.add((URIRef(f"{BASE_URI}/lic/42/02/1"), P70I, URIRef("https://creativecommons.org/publicdomain/zero/1.0/")))
876 assert extract_license_for_entity_stage(g, "42", "raw") == "cc-by-nc-4.0"
878 def test_returns_none_when_defining_step_missing(self):
879 g = Graph()
880 g.add((URIRef(f"{BASE_URI}/lic/42/00/1"), P70I, URIRef("https://creativecommons.org/licenses/by-nc/4.0/")))
881 g.add((URIRef(f"{BASE_URI}/lic/42/01/1"), P70I, URIRef("https://creativecommons.org/licenses/by-nc/4.0/")))
882 assert extract_license_for_entity_stage(g, "42", "dcho") is None
885class TestExtractKeeperInfo:
886 def test_extracts_keeper_from_kg(self, real_kg):
887 keeper_name, keeper_location = extract_keeper_info(real_kg, ["1"])
888 assert keeper_name == "Biblioteca Universitaria di Bologna"
889 assert keeper_location == "Bologna"
891 def test_extracts_non_bologna_keeper(self, real_kg):
892 keeper_name, keeper_location = extract_keeper_info(real_kg, ["21"])
893 assert keeper_name == "Accademia Carrara"
894 assert keeper_location == "Bergamo"
896 def test_returns_none_for_missing_entity(self, real_kg):
897 keeper_name, keeper_location = extract_keeper_info(real_kg, ["nonexistent"])
898 assert keeper_name is None
899 assert keeper_location is None
901 def test_extracts_from_synthetic_graph(self):
902 g = Graph()
903 custody_uri = URIRef(f"{BASE_URI}/act/42/ob08/1")
904 keeper_uri = URIRef(f"{BASE_URI}/acr/test_museum/1")
905 apl_uri = URIRef(f"{BASE_URI}/apl/test_museum/1")
906 place_uri = URIRef(f"{BASE_URI}/plc/test_city/1")
907 place_apl_uri = URIRef(f"{BASE_URI}/apl/test_city/1")
908 g.add((custody_uri, P14_CARRIED_OUT_BY, keeper_uri))
909 g.add((keeper_uri, P1_IS_IDENTIFIED_BY, apl_uri))
910 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test Museum")))
911 g.add((keeper_uri, P74_HAS_RESIDENCE, place_uri))
912 g.add((place_uri, P1_IS_IDENTIFIED_BY, place_apl_uri))
913 g.add((place_apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test City")))
914 keeper_name, keeper_location = extract_keeper_info(g, ["42"])
915 assert keeper_name == "Test Museum"
916 assert keeper_location == "Test City"
918 def test_keeper_without_location(self):
919 g = Graph()
920 custody_uri = URIRef(f"{BASE_URI}/act/42/ob08/1")
921 keeper_uri = URIRef(f"{BASE_URI}/acr/test_museum/1")
922 apl_uri = URIRef(f"{BASE_URI}/apl/test_museum/1")
923 g.add((custody_uri, P14_CARRIED_OUT_BY, keeper_uri))
924 g.add((keeper_uri, P1_IS_IDENTIFIED_BY, apl_uri))
925 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test Museum")))
926 keeper_name, keeper_location = extract_keeper_info(g, ["42"])
927 assert keeper_name == "Test Museum"
928 assert keeper_location is None
931class TestBuildEnhancedDescription:
932 def test_raw_stage_description(self):
933 result = build_enhanced_description("raw", "Test Object")
934 assert result == (
935 'Raw acquisition data of "Test Object" from the Aldrovandi Digital Twin. '
936 "This dataset contains the raw material generated during the acquisition phase. "
937 'Includes metadata (meta.ttl) and provenance (prov.trig) files following the <a href="https://w3id.org/dharc/ontology/chad-ap">CHAD-AP</a> ontology.\n'
938 )
940 def test_dcho_stage_description(self):
941 result = build_enhanced_description("dcho", "Museum Specimen")
942 assert "Digital Cultural Heritage Object" in result
943 assert '"Museum Specimen"' in result
944 assert "interpolation, gap filling, and resolution of geometric issues" in result
946 def test_dchoo_stage_description(self):
947 result = build_enhanced_description("dchoo", "Object Title")
948 assert "Optimized Digital Cultural Heritage Object" in result
949 assert "optimised for real-time online interaction" in result
951 def test_description_never_contains_disclaimer(self):
952 result = build_enhanced_description("dcho", "Test Object")
953 assert CC0_DISCLAIMER not in result
955 def test_includes_keeper_and_location(self):
956 result = build_enhanced_description("raw", "Test Object", keeper_name="Test Museum", keeper_location="Test City")
957 assert "The original object is held by Test Museum (Test City)." in result
959 def test_includes_keeper_without_location(self):
960 result = build_enhanced_description("raw", "Test Object", keeper_name="Test Museum")
961 assert "The original object is held by Test Museum." in result
962 assert "Test Museum (" not in result
964 def test_no_keeper_line_when_none(self):
965 result = build_enhanced_description("raw", "Test Object")
966 assert "held by" not in result
968 def test_description_is_single_paragraph(self):
969 result = build_enhanced_description("raw", "Test Object", keeper_name="Museum", keeper_location="City")
970 assert "\n" not in result.rstrip("\n")
973class TestFormatCreatorsForTable:
974 def test_formats_multiple_creators(self):
975 config = {
976 "creators": [
977 {"person_or_org": {"family_name": "Bordignon", "given_name": "Alice", "identifiers": [{"scheme": "orcid", "identifier": "0009-0008-3556-0493"}]}},
978 {"person_or_org": {"family_name": "Massari", "given_name": "Arcangelo", "identifiers": [{"scheme": "orcid", "identifier": "0000-0002-8420-0696"}]}},
979 ]
980 }
981 assert _format_creators_for_table(config) == "Bordignon, Alice [orcid:0009-0008-3556-0493]; Massari, Arcangelo [orcid:0000-0002-8420-0696]"
983 def test_formats_single_creator(self):
984 config = {
985 "creators": [
986 {"person_or_org": {"family_name": "Barzaghi", "given_name": "Sebastian", "identifiers": [{"scheme": "orcid", "identifier": "0000-0002-0799-1527"}]}},
987 ]
988 }
989 assert _format_creators_for_table(config) == "Barzaghi, Sebastian [orcid:0000-0002-0799-1527]"
992class TestFormatLicensesForTable:
993 def test_formats_cc0_metadata_and_content(self):
994 config = {
995 "rights": [
996 {"title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}},
997 {"title": {"en": "Creative Commons Zero v1.0 Universal (Content license)"}},
998 ]
999 }
1000 assert _format_licenses_for_table(config) == "cc0-1.0 (Metadata license); cc0-1.0 (Content license)"
1002 def test_formats_mixed_licenses(self):
1003 config = {
1004 "rights": [
1005 {"title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}},
1006 {"title": {"en": "Creative Commons Attribution Non Commercial Share Alike 4.0 International (Content license)"}},
1007 ]
1008 }
1009 assert _format_licenses_for_table(config) == "cc0-1.0 (Metadata license); cc-by-nc-sa-4.0 (Content license)"
1011 def test_formats_metadata_only(self):
1012 config = {
1013 "rights": [
1014 {"title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}},
1015 ]
1016 }
1017 assert _format_licenses_for_table(config) == "cc0-1.0 (Metadata license)"
1020class TestExtractDoi:
1021 def test_extracts_doi_from_record(self):
1022 record = {"pids": {"doi": {"identifier": "10.5281/zenodo.12345"}}}
1023 assert _extract_doi(record) == "10.5281/zenodo.12345"
1025 def test_returns_empty_string_on_sandbox(self):
1026 assert _extract_doi({}) == ""
1027 assert _extract_doi({"pids": {}}) == ""
1030class TestExtractRecordUrl:
1031 def test_extracts_url_from_record(self):
1032 record = {"links": {"self_html": "https://zenodo.org/records/12345"}}
1033 assert _extract_record_url(record) == "https://zenodo.org/records/12345"
1036class TestExtractAcquisitionTechnique:
1037 def test_extracts_photography_from_kg(self, real_kg):
1038 technique = extract_acquisition_technique(real_kg, ["1"])
1039 assert technique == "digital photography"
1041 def test_extracts_scanning_from_kg(self, real_kg):
1042 technique = extract_acquisition_technique(real_kg, ["12"])
1043 assert technique == "optical scanning"
1045 def test_returns_none_for_missing_entity(self):
1046 g = Graph()
1047 assert extract_acquisition_technique(g, ["nonexistent"]) is None
1049 def test_extracts_from_synthetic_graph(self):
1050 g = Graph()
1051 act_uri = URIRef(f"{BASE_URI}/act/42/00/1")
1052 g.add((act_uri, P32_USED_GENERAL_TECHNIQUE, URIRef(f"{AAT}300266792")))
1053 assert extract_acquisition_technique(g, ["42"]) == "digital photography"
1056class TestExtractDevices:
1057 def test_extracts_devices_from_kg(self, real_kg):
1058 devices = extract_devices(real_kg, ["1"])
1059 assert devices == ["Nikkor 50mm", "Nikon D7200"]
1061 def test_extracts_scanner_device(self, real_kg):
1062 devices = extract_devices(real_kg, ["12"])
1063 assert devices == ["Artec Eva"]
1065 def test_returns_empty_for_missing_entity(self):
1066 g = Graph()
1067 assert extract_devices(g, ["nonexistent"]) == []
1069 def test_excludes_item_uris(self):
1070 g = Graph()
1071 act_uri = URIRef(f"{BASE_URI}/act/42/00/1")
1072 g.add((act_uri, P16_USED_SPECIFIC_OBJECT, URIRef(f"{BASE_URI}/dev/nikon_d7200/1")))
1073 g.add((act_uri, P16_USED_SPECIFIC_OBJECT, URIRef(f"{BASE_URI}/itm/42/ob00/1")))
1074 devices = extract_devices(g, ["42"])
1075 assert devices == ["Nikon D7200"]
1078class TestExtractSoftwareForStage:
1079 def test_extracts_raw_software(self, real_kg):
1080 software = extract_software_for_stage(real_kg, ["1"], "raw")
1081 assert software == []
1083 def test_extracts_rawp_software(self, real_kg):
1084 software = extract_software_for_stage(real_kg, ["1"], "rawp")
1085 assert "3DF Zephyr" in software
1087 def test_excludes_metadata_step_software(self, real_kg):
1088 software = extract_software_for_stage(real_kg, ["1"], "dchoo")
1089 assert "CHAD-AP" not in software
1090 assert "HeriTrace" not in software
1091 assert "Morph-KGC" not in software
1093 def test_includes_step_06_software(self, real_kg):
1094 software = extract_software_for_stage(real_kg, ["1"], "dchoo")
1095 assert "ATON" in software
1097 def test_returns_empty_for_missing_entity(self):
1098 g = Graph()
1099 assert extract_software_for_stage(g, ["nonexistent"], "raw") == []
1102class TestBuildMethodsDescription:
1103 def test_includes_workflow_reference(self):
1104 g = Graph()
1105 result = build_methods_description(g, ["nonexistent"], "raw")
1106 assert "doi:10.46298/transformations.14773" in result
1108 def test_includes_technique_and_devices(self, real_kg):
1109 result = build_methods_description(real_kg, ["1"], "raw")
1110 assert "digital photography" in result
1111 assert "Nikon D7200" in result
1113 def test_includes_software_for_rawp(self, real_kg):
1114 result = build_methods_description(real_kg, ["1"], "rawp")
1115 assert "Processing software:" in result
1116 assert "3DF Zephyr" in result
1118 def test_no_software_for_raw(self, real_kg):
1119 result = build_methods_description(real_kg, ["1"], "raw")
1120 assert "Processing software:" not in result
1122 def test_includes_chad_ap_reference(self):
1123 g = Graph()
1124 result = build_methods_description(g, ["nonexistent"], "raw")
1125 assert "CHAD-AP" in result
1127 def test_scanning_entity(self, real_kg):
1128 result = build_methods_description(real_kg, ["12"], "raw")
1129 assert "optical scanning" in result
1130 assert "Artec Eva" in result
1133MINIMAL_CONFIG = {
1134 "title": "Test Object - Raw - Aldrovandi Digital Twin",
1135 "zenodo_url": "https://sandbox.zenodo.org/api",
1136 "access_token": "fake-token",
1137 "user_agent": "test/1.0",
1138 "publication_date": "2026-05-22",
1139 "creators": [{
1140 "person_or_org": {
1141 "type": "personal",
1142 "family_name": "Rossi",
1143 "given_name": "Mario",
1144 "identifiers": [{"scheme": "orcid", "identifier": "0000-0001-0000-0001"}],
1145 },
1146 "role": {"id": "researcher"},
1147 "affiliations": [{"name": "University of Bologna"}],
1148 }],
1149 "rights": [{"title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}, "link": "https://creativecommons.org/publicdomain/zero/1.0/"}],
1150}
1152MOCK_RECORD = {
1153 "id": "999001",
1154 "pids": {"doi": {"identifier": "10.5281/zenodo.999001"}},
1155 "links": {"self_html": "https://sandbox.zenodo.org/records/999001"},
1156}
1159def _write_config(path: Path, overrides: dict | None = None) -> Path:
1160 if overrides is None:
1161 overrides = {}
1162 config = {**MINIMAL_CONFIG, **overrides}
1163 with open(path, "w") as f:
1164 yaml.dump(config, f, default_flow_style=False, allow_unicode=True)
1165 return path
1168class TestAtomicWriteJson:
1169 def test_writes_json(self, tmp_path):
1170 path = tmp_path / "data.json"
1171 _atomic_write_json(path, [{"a": 1}])
1172 with open(path) as f:
1173 assert json.load(f) == [{"a": 1}]
1175 def test_overwrites_existing(self, tmp_path):
1176 path = tmp_path / "data.json"
1177 _atomic_write_json(path, [{"old": True}])
1178 _atomic_write_json(path, [{"new": True}])
1179 with open(path) as f:
1180 assert json.load(f) == [{"new": True}]
1183class TestWriteDoiTable:
1184 def test_generates_csv_from_drafts(self, tmp_path):
1185 config_path = _write_config(tmp_path / "test-raw.yaml")
1186 drafts = [{
1187 "draft_id": "100",
1188 "config_file": str(config_path),
1189 "title": "Test",
1190 "zenodo_url": "https://sandbox.zenodo.org/api",
1191 "access_token": "tok",
1192 "user_agent": "ua",
1193 "status": "uploaded",
1194 "doi": "10.5281/zenodo.100",
1195 "record_url": "https://sandbox.zenodo.org/records/100",
1196 }]
1197 csv_path = _write_doi_table(drafts, tmp_path)
1198 with open(csv_path) as f:
1199 rows = list(csv.DictReader(f))
1200 assert len(rows) == 1
1201 assert rows[0]["DOI"] == "10.5281/zenodo.100"
1202 assert rows[0]["Titolo"] == "Test Object - Raw - Aldrovandi Digital Twin"
1204 def test_skips_failed_entries(self, tmp_path):
1205 config_path = _write_config(tmp_path / "test-raw.yaml")
1206 drafts = [
1207 {
1208 "draft_id": "100",
1209 "config_file": str(config_path),
1210 "title": "Good",
1211 "zenodo_url": "",
1212 "access_token": "",
1213 "user_agent": "",
1214 "status": "uploaded",
1215 "doi": "10.5281/zenodo.100",
1216 "record_url": "https://sandbox.zenodo.org/records/100",
1217 },
1218 {
1219 "draft_id": "",
1220 "config_file": str(config_path),
1221 "title": "Bad",
1222 "zenodo_url": "",
1223 "access_token": "",
1224 "user_agent": "",
1225 "status": "failed",
1226 "doi": "",
1227 "record_url": "",
1228 "error": "boom",
1229 },
1230 ]
1231 csv_path = _write_doi_table(drafts, tmp_path)
1232 with open(csv_path) as f:
1233 rows = list(csv.DictReader(f))
1234 assert len(rows) == 1
1237class TestUploadAllResume:
1238 def _setup_configs(self, tmp_path):
1239 configs_dir = tmp_path / "configs"
1240 configs_dir.mkdir()
1241 _write_config(configs_dir / "entity-a-raw.yaml", {"title": "Entity A - Raw"})
1242 _write_config(configs_dir / "entity-b-raw.yaml", {"title": "Entity B - Raw"})
1243 _write_config(configs_dir / "entity-c-raw.yaml", {"title": "Entity C - Raw"})
1244 return configs_dir
1246 @patch("changes_metadata_manager.zenodo_upload.time.sleep")
1247 @patch("changes_metadata_manager.zenodo_upload.piccione_upload")
1248 def test_fresh_upload(self, mock_upload, mock_sleep, tmp_path):
1249 configs_dir = self._setup_configs(tmp_path)
1250 call_count = 0
1252 def side_effect(config_file, publish=False):
1253 nonlocal call_count
1254 call_count += 1
1255 return {
1256 "id": f"draft-{call_count}",
1257 "pids": {},
1258 "links": {"self_html": f"https://sandbox.zenodo.org/records/draft-{call_count}"},
1259 }
1261 mock_upload.side_effect = side_effect
1262 upload_all(configs_dir, publish=False)
1264 drafts_path = tmp_path / "drafts.json"
1265 with open(drafts_path) as f:
1266 drafts = json.load(f)
1267 assert len(drafts) == 3
1268 assert all(d["status"] == "uploaded" for d in drafts)
1269 assert mock_upload.call_count == 3
1271 @patch("changes_metadata_manager.zenodo_upload.time.sleep")
1272 @patch("changes_metadata_manager.zenodo_upload.piccione_upload")
1273 def test_resume_skips_completed(self, mock_upload, mock_sleep, tmp_path):
1274 configs_dir = self._setup_configs(tmp_path)
1275 drafts_path = tmp_path / "drafts.json"
1276 _atomic_write_json(drafts_path, [{
1277 "draft_id": "existing-1",
1278 "config_file": str(configs_dir / "entity-a-raw.yaml"),
1279 "title": "Entity A - Raw",
1280 "zenodo_url": "https://sandbox.zenodo.org/api",
1281 "access_token": "tok",
1282 "user_agent": "ua",
1283 "status": "uploaded",
1284 "doi": "",
1285 "record_url": "https://sandbox.zenodo.org/uploads/existing-1",
1286 }])
1288 mock_upload.return_value = {
1289 "id": "new-draft",
1290 "pids": {},
1291 "links": {"self_html": "https://sandbox.zenodo.org/records/new-draft"},
1292 }
1294 upload_all(configs_dir, publish=False)
1296 with open(drafts_path) as f:
1297 drafts = json.load(f)
1298 assert len(drafts) == 3
1299 assert mock_upload.call_count == 2
1300 stems = {Path(d["config_file"]).stem for d in drafts if d["status"] == "uploaded"}
1301 assert "entity-a-raw" in stems
1302 assert "entity-b-raw" in stems
1303 assert "entity-c-raw" in stems
1305 @patch("changes_metadata_manager.zenodo_upload.time.sleep")
1306 @patch("changes_metadata_manager.zenodo_upload.piccione_upload")
1307 def test_failure_continues_and_records_error(self, mock_upload, mock_sleep, tmp_path):
1308 configs_dir = self._setup_configs(tmp_path)
1309 call_count = 0
1311 def side_effect(config_file, publish=False):
1312 nonlocal call_count
1313 call_count += 1
1314 if call_count == 2:
1315 raise RuntimeError("Zenodo is down")
1316 return {
1317 "id": f"draft-{call_count}",
1318 "pids": {},
1319 "links": {"self_html": f"https://sandbox.zenodo.org/records/draft-{call_count}"},
1320 }
1322 mock_upload.side_effect = side_effect
1323 upload_all(configs_dir, publish=False)
1325 drafts_path = tmp_path / "drafts.json"
1326 with open(drafts_path) as f:
1327 drafts = json.load(f)
1328 assert len(drafts) == 3
1329 statuses = [d["status"] for d in drafts]
1330 assert statuses.count("uploaded") == 2
1331 assert statuses.count("failed") == 1
1332 failed = [d for d in drafts if d["status"] == "failed"][0]
1333 assert failed["error"] == "Zenodo is down"
1335 @patch("changes_metadata_manager.zenodo_upload.time.sleep")
1336 @patch("changes_metadata_manager.zenodo_upload.piccione_upload")
1337 def test_failed_entry_retried_on_rerun(self, mock_upload, mock_sleep, tmp_path):
1338 configs_dir = self._setup_configs(tmp_path)
1339 drafts_path = tmp_path / "drafts.json"
1340 _atomic_write_json(drafts_path, [
1341 {
1342 "draft_id": "existing-1",
1343 "config_file": str(configs_dir / "entity-a-raw.yaml"),
1344 "title": "Entity A - Raw",
1345 "zenodo_url": "https://sandbox.zenodo.org/api",
1346 "access_token": "tok",
1347 "user_agent": "ua",
1348 "status": "uploaded",
1349 "doi": "",
1350 "record_url": "",
1351 },
1352 {
1353 "draft_id": "",
1354 "config_file": str(configs_dir / "entity-b-raw.yaml"),
1355 "title": "entity-b-raw",
1356 "zenodo_url": "",
1357 "access_token": "",
1358 "user_agent": "",
1359 "status": "failed",
1360 "doi": "",
1361 "record_url": "",
1362 "error": "previous failure",
1363 },
1364 {
1365 "draft_id": "existing-3",
1366 "config_file": str(configs_dir / "entity-c-raw.yaml"),
1367 "title": "Entity C - Raw",
1368 "zenodo_url": "https://sandbox.zenodo.org/api",
1369 "access_token": "tok",
1370 "user_agent": "ua",
1371 "status": "uploaded",
1372 "doi": "",
1373 "record_url": "",
1374 },
1375 ])
1377 mock_upload.return_value = {
1378 "id": "retried-draft",
1379 "pids": {},
1380 "links": {"self_html": "https://sandbox.zenodo.org/records/retried-draft"},
1381 }
1383 upload_all(configs_dir, publish=False)
1385 assert mock_upload.call_count == 1
1386 with open(drafts_path) as f:
1387 drafts = json.load(f)
1388 assert len(drafts) == 3
1389 assert all(d["status"] == "uploaded" for d in drafts)
1390 retried = [d for d in drafts if Path(d["config_file"]).stem == "entity-b-raw"][0]
1391 assert retried["draft_id"] == "retried-draft"
1392 assert "error" not in retried
1394 @patch("changes_metadata_manager.zenodo_upload.time.sleep")
1395 @patch("changes_metadata_manager.zenodo_upload.piccione_upload")
1396 def test_drafts_json_written_after_each_upload(self, mock_upload, mock_sleep, tmp_path):
1397 configs_dir = self._setup_configs(tmp_path)
1398 snapshots: list[int] = []
1400 def counting_upload(config_file, publish=False):
1401 return {
1402 "id": f"draft-{len(snapshots) + 1}",
1403 "pids": {},
1404 "links": {"self_html": f"https://sandbox.zenodo.org/records/draft-{len(snapshots) + 1}"},
1405 }
1407 mock_upload.side_effect = counting_upload
1409 def tracking_write(path, data):
1410 snapshots.append(len(data))
1411 fd, tmp = tempfile.mkstemp(dir=path.parent, suffix=".tmp")
1412 import os
1413 with os.fdopen(fd, "w") as f:
1414 json.dump(data, f, indent=2)
1415 os.replace(tmp, path)
1417 with patch("changes_metadata_manager.zenodo_upload._atomic_write_json", side_effect=tracking_write):
1418 upload_all(configs_dir, publish=False)
1420 assert snapshots == [1, 2, 3]
1422 @patch("changes_metadata_manager.zenodo_upload.time.sleep")
1423 @patch("changes_metadata_manager.zenodo_upload.piccione_upload")
1424 def test_publish_flag_sets_published_status(self, mock_upload, mock_sleep, tmp_path):
1425 configs_dir = self._setup_configs(tmp_path)
1426 mock_upload.return_value = {
1427 "id": "pub-1",
1428 "pids": {"doi": {"identifier": "10.5281/zenodo.pub1"}},
1429 "links": {"self_html": "https://zenodo.org/records/pub-1"},
1430 }
1432 upload_all(configs_dir, publish=True)
1434 drafts_path = tmp_path / "drafts.json"
1435 with open(drafts_path) as f:
1436 drafts = json.load(f)
1437 assert all(d["status"] == "published" for d in drafts)
1438 assert all(d["doi"] == "10.5281/zenodo.pub1" for d in drafts)
1441class TestPublishAllDraftsResume:
1442 def _make_drafts(self, tmp_path, statuses):
1443 configs_dir = tmp_path / "configs"
1444 configs_dir.mkdir(exist_ok=True)
1445 drafts = []
1446 for i, status in enumerate(statuses):
1447 config_path = _write_config(configs_dir / f"entity-{i}-raw.yaml", {"title": f"Entity {i}"})
1448 entry = {
1449 "draft_id": f"draft-{i}",
1450 "config_file": str(config_path),
1451 "title": f"Entity {i}",
1452 "zenodo_url": "https://sandbox.zenodo.org/api",
1453 "access_token": "tok",
1454 "user_agent": "ua",
1455 "status": status,
1456 "doi": "10.5281/existing" if status == "published" else "",
1457 "record_url": f"https://sandbox.zenodo.org/records/draft-{i}" if status == "published" else "",
1458 }
1459 if status in ("failed", "publish_failed"):
1460 entry["error"] = "old error"
1461 drafts.append(entry)
1462 drafts_path = tmp_path / "drafts.json"
1463 _atomic_write_json(drafts_path, drafts)
1464 return drafts_path
1466 @patch("changes_metadata_manager.zenodo_upload.time.sleep")
1467 @patch("changes_metadata_manager.zenodo_upload.piccione_publish_draft")
1468 def test_publishes_uploaded_drafts(self, mock_publish, mock_sleep, tmp_path):
1469 drafts_path = self._make_drafts(tmp_path, ["uploaded", "uploaded"])
1470 mock_publish.return_value = {
1471 "pids": {"doi": {"identifier": "10.5281/zenodo.pub"}},
1472 "links": {"self_html": "https://zenodo.org/records/pub"},
1473 }
1475 publish_all_drafts(drafts_path)
1477 with open(drafts_path) as f:
1478 drafts = json.load(f)
1479 assert all(d["status"] == "published" for d in drafts)
1480 assert all(d["doi"] == "10.5281/zenodo.pub" for d in drafts)
1481 assert mock_publish.call_count == 2
1483 @patch("changes_metadata_manager.zenodo_upload.time.sleep")
1484 @patch("changes_metadata_manager.zenodo_upload.piccione_publish_draft")
1485 def test_skips_already_published(self, mock_publish, mock_sleep, tmp_path):
1486 drafts_path = self._make_drafts(tmp_path, ["published", "uploaded"])
1487 mock_publish.return_value = {
1488 "pids": {"doi": {"identifier": "10.5281/zenodo.new"}},
1489 "links": {"self_html": "https://zenodo.org/records/new"},
1490 }
1492 publish_all_drafts(drafts_path)
1494 assert mock_publish.call_count == 1
1495 with open(drafts_path) as f:
1496 drafts = json.load(f)
1497 assert drafts[0]["doi"] == "10.5281/existing"
1498 assert drafts[1]["doi"] == "10.5281/zenodo.new"
1500 @patch("changes_metadata_manager.zenodo_upload.time.sleep")
1501 @patch("changes_metadata_manager.zenodo_upload.piccione_publish_draft")
1502 def test_failure_continues(self, mock_publish, mock_sleep, tmp_path):
1503 drafts_path = self._make_drafts(tmp_path, ["uploaded", "uploaded"])
1504 call_count = 0
1506 def side_effect(*args, **kwargs):
1507 nonlocal call_count
1508 call_count += 1
1509 if call_count == 1:
1510 raise RuntimeError("publish error")
1511 return {
1512 "pids": {"doi": {"identifier": "10.5281/zenodo.ok"}},
1513 "links": {"self_html": "https://zenodo.org/records/ok"},
1514 }
1516 mock_publish.side_effect = side_effect
1517 publish_all_drafts(drafts_path)
1519 with open(drafts_path) as f:
1520 drafts = json.load(f)
1521 assert drafts[0]["status"] == "publish_failed"
1522 assert drafts[0]["error"] == "publish error"
1523 assert drafts[1]["status"] == "published"
1524 assert drafts[1]["doi"] == "10.5281/zenodo.ok"
1526 @patch("changes_metadata_manager.zenodo_upload.time.sleep")
1527 @patch("changes_metadata_manager.zenodo_upload.piccione_publish_draft")
1528 def test_retries_publish_failed(self, mock_publish, mock_sleep, tmp_path):
1529 drafts_path = self._make_drafts(tmp_path, ["published", "publish_failed"])
1530 mock_publish.return_value = {
1531 "pids": {"doi": {"identifier": "10.5281/zenodo.retried"}},
1532 "links": {"self_html": "https://zenodo.org/records/retried"},
1533 }
1535 publish_all_drafts(drafts_path)
1537 assert mock_publish.call_count == 1
1538 with open(drafts_path) as f:
1539 drafts = json.load(f)
1540 assert drafts[1]["status"] == "published"
1541 assert drafts[1]["doi"] == "10.5281/zenodo.retried"
1542 assert "error" not in drafts[1]
1544 @patch("changes_metadata_manager.zenodo_upload.time.sleep")
1545 @patch("changes_metadata_manager.zenodo_upload.piccione_publish_draft")
1546 def test_skips_upload_failed_entries(self, mock_publish, mock_sleep, tmp_path):
1547 drafts_path = self._make_drafts(tmp_path, ["uploaded", "failed"])
1548 mock_publish.return_value = {
1549 "pids": {"doi": {"identifier": "10.5281/zenodo.ok"}},
1550 "links": {"self_html": "https://zenodo.org/records/ok"},
1551 }
1553 publish_all_drafts(drafts_path)
1555 assert mock_publish.call_count == 1
1556 with open(drafts_path) as f:
1557 drafts = json.load(f)
1558 assert drafts[0]["status"] == "published"
1559 assert drafts[1]["status"] == "failed"