Coverage for tests / test_zenodo_upload.py: 100%
511 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-03-21 12:19 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-03-21 12:19 +0000
1# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelomas@gmail.com>
2#
3# SPDX-License-Identifier: ISC
5import tempfile
6import zipfile
7from pathlib import Path
9import pytest
10from rdflib import Graph, Literal, URIRef
12from changes_metadata_manager.folder_metadata_builder import load_kg
13from changes_metadata_manager.zenodo_upload import (
14 AAT,
15 BASE_URI,
16 CC0_DISCLAIMER,
17 RESTRICTED_NOTICE,
18 E21_PERSON,
19 P14_CARRIED_OUT_BY,
20 P16_USED_SPECIFIC_OBJECT,
21 P190_HAS_SYMBOLIC_CONTENT,
22 P1_IS_IDENTIFIED_BY,
23 P32_USED_GENERAL_TECHNIQUE,
24 P3_HAS_NOTE,
25 P70I,
26 P74_HAS_RESIDENCE,
27 RDF_TYPE,
28 _extract_doi,
29 _extract_record_url,
30 _format_creators_for_table,
31 _format_licenses_for_table,
32 build_creators_for_entity_stage,
33 build_enhanced_description,
34 build_entity_uri,
35 build_metadata_creators,
36 build_methods_description,
37 create_stage_zip,
38 extract_acquisition_technique,
39 extract_authors_for_entity_stage,
40 extract_devices,
41 extract_entity_title,
42 extract_keeper_info,
43 extract_license_for_entity_stage,
44 extract_licensed_entity_stages,
45 extract_metadata_authors,
46 extract_software_for_stage,
47 generate_zenodo_config,
48 group_folders_by_entity,
49 load_creators_lookup,
50 merge_creators,
51 slugify,
52)
55DATA_DIR = Path(__file__).parent.parent / "data"
56REAL_KG_PATH = DATA_DIR / "kg.ttl"
59@pytest.fixture(scope="module")
60def real_kg():
61 return load_kg(REAL_KG_PATH)
64class TestExtractLicensedEntityStages:
65 def test_returns_set_of_tuples(self, real_kg):
66 result = extract_licensed_entity_stages(real_kg)
67 assert isinstance(result, set)
68 assert all(isinstance(item, tuple) and len(item) == 2 for item in result)
70 def test_known_licensed_entity(self, real_kg):
71 result = extract_licensed_entity_stages(real_kg)
72 assert ("1", "dcho") in result
73 assert ("1", "dchoo") in result
75 def test_maps_steps_to_stages(self):
76 g = Graph()
77 g.add((URIRef(f"{BASE_URI}/lic/42/00/1"), P70I, URIRef("https://example.com/license")))
78 g.add((URIRef(f"{BASE_URI}/lic/42/01/1"), P70I, URIRef("https://example.com/license")))
79 g.add((URIRef(f"{BASE_URI}/lic/42/02/1"), P70I, URIRef("https://example.com/license")))
80 g.add((URIRef(f"{BASE_URI}/lic/42/03/1"), P70I, URIRef("https://example.com/license")))
81 result = extract_licensed_entity_stages(g)
82 assert result == {("42", "raw"), ("42", "rawp"), ("42", "dcho"), ("42", "dchoo")}
85class TestGroupFoldersByEntity:
86 def test_groups_folders_by_entity_id(self):
87 structure = {
88 "structure": {
89 "Sala1": {
90 "S1-01-Test": {"raw": {}, "dcho": {}},
91 "S1-02-Other": {"raw": {}},
92 },
93 }
94 }
95 result = group_folders_by_entity(structure)
96 assert "1" in result
97 assert "2" in result
98 assert len(result["1"]) == 1
99 assert result["1"][0][1] == "S1-01-Test"
101 def test_groups_abc_variants(self):
102 structure = {
103 "structure": {
104 "Sala6": {
105 "S6-98a-DA-Calchi facciali colorati, boscimani": {"raw": {}},
106 "S6-98b-DA-Calchi facciali colorati, boscimani": {"raw": {}},
107 "S6-98c-DA-Calchi facciali colorati, boscimani": {"raw": {}},
108 },
109 }
110 }
111 result = group_folders_by_entity(structure)
112 assert "98" in result
113 assert len(result["98"]) == 3
115 def test_skips_skip_folders(self):
116 structure = {
117 "structure": {
118 "Sala1": {
119 "S1-CNR_SoffittoSala1": {"raw": {}},
120 "materials": {"raw": {}},
121 "S1-01-Test": {"raw": {}},
122 },
123 }
124 }
125 result = group_folders_by_entity(structure)
126 assert "1" in result
127 folder_names = [f[1] for f in result["1"]]
128 assert "S1-CNR_SoffittoSala1" not in folder_names
129 assert "materials" not in folder_names
132class TestSlugify:
133 def test_simple_text(self):
134 assert slugify("Carta nautica") == "carta-nautica"
136 def test_accented_characters(self):
137 assert slugify("Oggettò àccéntàto") == "oggetto-accentato"
139 def test_special_characters(self):
140 assert slugify("Test (object) #1") == "test-object-1"
142 def test_multiple_spaces(self):
143 assert slugify("Multiple spaces here") == "multiple-spaces-here"
145 def test_leading_trailing_spaces(self):
146 assert slugify(" trimmed ") == "trimmed"
149class TestCreateStageZip:
150 def test_includes_all_files_for_licensed_stage(self):
151 with tempfile.TemporaryDirectory() as tmpdir:
152 root = Path(tmpdir) / "root"
153 stage_dir = root / "Sala1" / "S1-01-Test" / "raw"
154 stage_dir.mkdir(parents=True)
155 (stage_dir / "meta.ttl").write_text("{}")
156 (stage_dir / "prov.trig").write_text("{}")
157 (stage_dir / "photo.jpg").write_text("image")
159 output_dir = Path(tmpdir) / "output"
160 output_dir.mkdir()
162 folders = [("Sala1", "S1-01-Test", {"raw": {}})]
163 licensed_stages = {("1", "raw")}
165 result = create_stage_zip("1", "raw", folders, root, licensed_stages, output_dir, "Test Object")
167 assert result is not None
168 zip_path, has_license = result
169 assert zip_path.name == "sala1-test-object-raw.zip"
170 assert has_license is True
171 with zipfile.ZipFile(zip_path) as zf:
172 names = sorted(zf.namelist())
173 assert names == ["S1-01-Test/raw/meta.ttl", "S1-01-Test/raw/photo.jpg", "S1-01-Test/raw/prov.trig"]
175 def test_includes_only_metadata_for_unlicensed_stage(self):
176 with tempfile.TemporaryDirectory() as tmpdir:
177 root = Path(tmpdir) / "root"
178 stage_dir = root / "Sala1" / "S1-01-Test" / "raw"
179 stage_dir.mkdir(parents=True)
180 (stage_dir / "meta.ttl").write_text("{}")
181 (stage_dir / "prov.trig").write_text("{}")
182 (stage_dir / "photo.jpg").write_text("image")
184 output_dir = Path(tmpdir) / "output"
185 output_dir.mkdir()
187 folders = [("Sala1", "S1-01-Test", {"raw": {}})]
188 licensed_stages = set()
190 result = create_stage_zip("1", "raw", folders, root, licensed_stages, output_dir, "Test Object")
192 assert result is not None
193 zip_path, has_license = result
194 assert has_license is False
195 with zipfile.ZipFile(zip_path) as zf:
196 names = sorted(zf.namelist())
197 assert names == ["S1-01-Test/raw/meta.ttl", "S1-01-Test/raw/prov.trig"]
199 def test_multiple_folders_in_zip(self):
200 with tempfile.TemporaryDirectory() as tmpdir:
201 root = Path(tmpdir) / "root"
203 for variant in ["a", "b"]:
204 stage_dir = root / "Sala6" / f"S6-98{variant}-Test" / "raw"
205 stage_dir.mkdir(parents=True)
206 (stage_dir / "meta.ttl").write_text("{}")
208 output_dir = Path(tmpdir) / "output"
209 output_dir.mkdir()
211 folders = [
212 ("Sala6", "S6-98a-Test", {"raw": {}}),
213 ("Sala6", "S6-98b-Test", {"raw": {}}),
214 ]
216 result = create_stage_zip("98", "raw", folders, root, set(), output_dir, "Test Masks")
218 assert result is not None
219 zip_path, has_license = result
220 assert has_license is False
221 with zipfile.ZipFile(zip_path) as zf:
222 names = zf.namelist()
223 assert names == ["S6-98a-Test/raw/meta.ttl", "S6-98b-Test/raw/meta.ttl"]
225 def test_returns_none_for_missing_stage(self):
226 with tempfile.TemporaryDirectory() as tmpdir:
227 root = Path(tmpdir) / "root"
228 stage_dir = root / "Sala1" / "S1-01-Test" / "raw"
229 stage_dir.mkdir(parents=True)
230 (stage_dir / "meta.ttl").write_text("{}")
232 output_dir = Path(tmpdir) / "output"
233 output_dir.mkdir()
235 folders = [("Sala1", "S1-01-Test", {"raw": {}})]
237 result = create_stage_zip("1", "dcho", folders, root, set(), output_dir, "Test Object")
239 assert result is None
240 assert not (output_dir / "sala1-test-object-dcho.zip").exists()
243class TestExtractEntityTitle:
244 def test_extracts_title_from_kg(self, real_kg):
245 title = extract_entity_title(real_kg, "1")
246 assert title == "Carta nautica"
248 def test_returns_default_for_missing(self):
249 g = Graph()
250 title = extract_entity_title(g, "nonexistent")
251 assert title == "Entity nonexistent"
253 def test_takes_first_line(self):
254 g = Graph()
255 item_uri = URIRef(f"{BASE_URI}/itm/42/ob00/1")
256 g.add((item_uri, P3_HAS_NOTE, Literal("First line\nSecond line")))
257 title = extract_entity_title(g, "42")
258 assert title == "First line"
261class TestExtractAuthorsForEntityStage:
262 def test_extracts_author_from_kg(self, real_kg):
263 authors = extract_authors_for_entity_stage(real_kg, "1", "raw")
264 assert authors == {"Federica Bonifazi"}
266 def test_accumulates_authors_across_steps(self, real_kg):
267 authors = extract_authors_for_entity_stage(real_kg, "1", "dchoo")
268 assert "Federica Bonifazi" in authors
269 assert len(authors) > 1
271 def test_returns_empty_for_missing_entity(self, real_kg):
272 authors = extract_authors_for_entity_stage(real_kg, "nonexistent", "raw")
273 assert authors == set()
275 def test_extracts_from_synthetic_graph(self):
276 g = Graph()
277 act_uri = URIRef(f"{BASE_URI}/act/42/00/1")
278 actor_uri = URIRef(f"{BASE_URI}/per/42/1")
279 apl_uri = URIRef(f"{BASE_URI}/apl/42/1")
280 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri))
281 g.add((actor_uri, RDF_TYPE, E21_PERSON))
282 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri))
283 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test Author")))
284 authors = extract_authors_for_entity_stage(g, "42", "raw")
285 assert authors == {"Test Author"}
288class TestExtractMetadataAuthors:
289 def test_extracts_step_05_authors(self):
290 g = Graph()
291 act_uri = URIRef(f"{BASE_URI}/act/42/05/1")
292 actor_uri = URIRef(f"{BASE_URI}/per/meta/1")
293 apl_uri = URIRef(f"{BASE_URI}/apl/meta/1")
294 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri))
295 g.add((actor_uri, RDF_TYPE, E21_PERSON))
296 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri))
297 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Metadata Author")))
298 authors = extract_metadata_authors(g, "42")
299 assert authors == {"Metadata Author"}
301 def test_returns_empty_for_missing_entity(self):
302 g = Graph()
303 authors = extract_metadata_authors(g, "nonexistent")
304 assert authors == set()
306 def test_extracts_from_real_kg(self, real_kg):
307 authors = extract_metadata_authors(real_kg, "1")
308 assert authors == {"Arcangelo Massari", "Arianna Moretti", "Sebastian Barzaghi"}
311class TestLoadCreatorsLookup:
312 def test_loads_creators_as_dict(self):
313 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
314 f.write(
315 "creators:\n"
316 " - name_in_rdf: Test Author\n"
317 " family_name: Author\n"
318 " given_name: Test\n"
319 " affiliation: Test Uni\n"
320 " orcid: 0000-0001-2345-6789\n"
321 )
322 f.flush()
323 lookup = load_creators_lookup(Path(f.name))
324 assert lookup == {
325 "Test Author": {
326 "family_name": "Author",
327 "given_name": "Test",
328 "affiliation": "Test Uni",
329 "orcid": "0000-0001-2345-6789",
330 }
331 }
334class TestBuildCreatorsForEntityStage:
335 def test_builds_creators_with_researcher_role(self, real_kg):
336 lookup = {
337 "Federica Bonifazi": {
338 "family_name": "Bonifazi",
339 "given_name": "Federica",
340 "affiliation": "CNR-ISPC",
341 "orcid": "0009-0000-8466-5541",
342 }
343 }
344 creators = build_creators_for_entity_stage(real_kg, "1", "raw", lookup)
345 assert creators == [
346 {
347 "person_or_org": {
348 "type": "personal",
349 "family_name": "Bonifazi",
350 "given_name": "Federica",
351 "identifiers": [{"scheme": "orcid", "identifier": "0009-0000-8466-5541"}],
352 },
353 "role": {"id": "researcher"},
354 "affiliations": [{"name": "CNR-ISPC"}],
355 }
356 ]
358 def test_ignores_authors_not_in_lookup(self, real_kg):
359 lookup = {}
360 creators = build_creators_for_entity_stage(real_kg, "1", "raw", lookup)
361 assert creators == []
363 def test_sorts_authors_alphabetically(self):
364 g = Graph()
365 for name in ["Zeta Author", "Alpha Author"]:
366 act_uri = URIRef(f"{BASE_URI}/act/42/00/1")
367 actor_uri = URIRef(f"{BASE_URI}/per/{name}/1")
368 apl_uri = URIRef(f"{BASE_URI}/apl/{name}/1")
369 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri))
370 g.add((actor_uri, RDF_TYPE, E21_PERSON))
371 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri))
372 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal(name)))
373 lookup = {
374 "Alpha Author": {
375 "family_name": "Author",
376 "given_name": "Alpha",
377 "affiliation": "Uni",
378 "orcid": "0000-0000-0000-0001",
379 },
380 "Zeta Author": {
381 "family_name": "Author",
382 "given_name": "Zeta",
383 "affiliation": "Uni",
384 "orcid": "0000-0000-0000-0002",
385 },
386 }
387 creators = build_creators_for_entity_stage(g, "42", "raw", lookup)
388 assert [c["person_or_org"]["given_name"] for c in creators] == ["Alpha", "Zeta"]
391class TestBuildMetadataCreators:
392 def test_builds_creators_with_datacurator_role(self):
393 g = Graph()
394 act_uri = URIRef(f"{BASE_URI}/act/42/05/1")
395 actor_uri = URIRef(f"{BASE_URI}/per/meta/1")
396 apl_uri = URIRef(f"{BASE_URI}/apl/meta/1")
397 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri))
398 g.add((actor_uri, RDF_TYPE, E21_PERSON))
399 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri))
400 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Metadata Author")))
401 lookup = {
402 "Metadata Author": {
403 "family_name": "Author",
404 "given_name": "Metadata",
405 "affiliation": "Test Uni",
406 "orcid": "0000-0001-2345-6789",
407 }
408 }
409 creators = build_metadata_creators(g, "42", lookup)
410 assert creators == [
411 {
412 "person_or_org": {
413 "type": "personal",
414 "family_name": "Author",
415 "given_name": "Metadata",
416 "identifiers": [{"scheme": "orcid", "identifier": "0000-0001-2345-6789"}],
417 },
418 "role": {"id": "datacurator"},
419 "affiliations": [{"name": "Test Uni"}],
420 }
421 ]
424class TestMergeCreators:
425 def test_merges_without_duplicates(self):
426 digitization = [
427 {
428 "person_or_org": {
429 "type": "personal",
430 "family_name": "Author",
431 "given_name": "Digit",
432 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}],
433 },
434 "role": {"id": "researcher"},
435 "affiliations": [{"name": "Uni"}],
436 }
437 ]
438 metadata = [
439 {
440 "person_or_org": {
441 "type": "personal",
442 "family_name": "Author",
443 "given_name": "Meta",
444 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0002"}],
445 },
446 "role": {"id": "datacurator"},
447 "affiliations": [{"name": "Uni"}],
448 }
449 ]
450 merged = merge_creators(digitization, metadata)
451 assert len(merged) == 2
452 assert merged[0]["role"] == {"id": "researcher"}
453 assert merged[1]["role"] == {"id": "datacurator"}
455 def test_deduplicates_by_orcid(self):
456 digitization = [
457 {
458 "person_or_org": {
459 "type": "personal",
460 "family_name": "Shared",
461 "given_name": "Author",
462 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}],
463 },
464 "role": {"id": "researcher"},
465 "affiliations": [{"name": "Uni"}],
466 }
467 ]
468 metadata = [
469 {
470 "person_or_org": {
471 "type": "personal",
472 "family_name": "Shared",
473 "given_name": "Author",
474 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}],
475 },
476 "role": {"id": "datacurator"},
477 "affiliations": [{"name": "Uni"}],
478 }
479 ]
480 merged = merge_creators(digitization, metadata)
481 assert len(merged) == 1
482 assert merged[0]["role"] == {"id": "researcher"}
484 def test_empty_lists(self):
485 assert merge_creators([], []) == []
487 def test_only_metadata_creators(self):
488 metadata = [
489 {
490 "person_or_org": {
491 "type": "personal",
492 "family_name": "Author",
493 "given_name": "Meta",
494 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}],
495 },
496 "role": {"id": "datacurator"},
497 "affiliations": [{"name": "Uni"}],
498 }
499 ]
500 merged = merge_creators([], metadata)
501 assert len(merged) == 1
502 assert merged[0]["role"] == {"id": "datacurator"}
505class TestBuildEntityUri:
506 def test_builds_uri_for_numeric_id(self):
507 result = build_entity_uri("27")
508 assert result == "https://w3id.org/changes/4/aldrovandi/itm/27/ob00/1"
510 def test_builds_uri_for_string_id(self):
511 result = build_entity_uri("ptb")
512 assert result == "https://w3id.org/changes/4/aldrovandi/itm/ptb/ob00/1"
515SAMPLE_CREATOR = {
516 "person_or_org": {
517 "type": "personal",
518 "family_name": "Author",
519 "given_name": "Test",
520 "identifiers": [{"scheme": "orcid", "identifier": "0000-0001-2345-6789"}],
521 },
522 "role": {"id": "researcher"},
523 "affiliations": [{"name": "Test Uni"}],
524}
526SAMPLE_BASE_CONFIG = {
527 "zenodo_url": "https://sandbox.zenodo.org/api",
528 "access_token": "test_token",
529 "user_agent": "piccione/2.1.0",
530 "subjects": [{"subject": "test"}],
531 "notes": "Test notes content",
532 "locations": [
533 {
534 "lat": 44.497,
535 "lon": 11.353,
536 "place": "Bologna, Italy",
537 "description": "Palazzo Poggi Museum",
538 },
539 ],
540}
542SAMPLE_METHODS = "Test method content"
545class TestGenerateZenodoConfig:
546 def test_generates_valid_config(self, freezer):
547 freezer.move_to("2024-06-15")
548 zip_path = Path("/tmp/1-raw.zip")
549 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS)
551 assert config == {
552 "zenodo_url": "https://sandbox.zenodo.org/api",
553 "access_token": "test_token",
554 "user_agent": "piccione/2.1.0",
555 "title": "Test Title - Raw - Aldrovandi Digital Twin",
556 "description": 'Raw acquisition data of "Test Title" from the Aldrovandi Digital Twin. This dataset contains the raw material generated during the acquisition phase. Includes metadata (meta.ttl) and provenance (prov.trig) files following the <a href="https://w3id.org/dharc/ontology/chad-ap">CHAD-AP</a> ontology.\n',
557 "resource_type": {"id": "dataset"},
558 "publisher": "Zenodo",
559 "access": {"record": "public", "files": "public"},
560 "creators": [SAMPLE_CREATOR],
561 "subjects": [{"subject": "test"}],
562 "files": [str(zip_path.absolute())],
563 "publication_date": "2024-06-15",
564 "rights": [
565 {
566 "title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"},
567 "description": {"en": "Applies to metadata files: meta.ttl, prov.trig"},
568 "link": "https://creativecommons.org/publicdomain/zero/1.0/",
569 },
570 ],
571 "additional_descriptions": [
572 {"description": "Test method content", "type": {"id": "methods"}},
573 {"description": "Test notes content", "type": {"id": "notes"}},
574 ],
575 "locations": {
576 "features": [
577 {
578 "geometry": {"type": "Point", "coordinates": [11.353, 44.497]},
579 "place": "Bologna, Italy",
580 "description": "Palazzo Poggi Museum",
581 },
582 ]
583 },
584 }
586 def test_adds_entity_uri_as_alternate_identifier(self, freezer):
587 freezer.move_to("2024-06-15")
588 zip_path = Path("/tmp/27-raw.zip")
589 entity_uri = "https://w3id.org/changes/4/aldrovandi/itm/27/ob00/1"
590 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, entity_uri=entity_uri)
592 assert config["identifiers"] == [
593 {"identifier": "https://w3id.org/changes/4/aldrovandi/itm/27/ob00/1", "scheme": "url"}
594 ]
596 def test_converts_related_identifiers(self, freezer):
597 freezer.move_to("2024-06-15")
598 base_config = {
599 **SAMPLE_BASE_CONFIG,
600 "related_identifiers": [
601 {
602 "identifier": "10.3724/2096-7004.di.2024.0061",
603 "relation": "isdocumentedby",
604 "resource_type": "publication-article",
605 }
606 ],
607 }
608 zip_path = Path("/tmp/27-raw.zip")
609 config = generate_zenodo_config("raw", zip_path, "Test Title", base_config, [SAMPLE_CREATOR], SAMPLE_METHODS)
611 assert config["related_identifiers"] == [
612 {
613 "identifier": "10.3724/2096-7004.di.2024.0061",
614 "relation_type": {"id": "isdocumentedby"},
615 "resource_type": {"id": "publication-article"},
616 },
617 ]
619 def test_converts_notes_and_method_to_additional_descriptions(self, freezer):
620 freezer.move_to("2024-06-15")
621 zip_path = Path("/tmp/1-raw.zip")
622 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS)
624 assert config["additional_descriptions"] == [
625 {"description": "Test method content", "type": {"id": "methods"}},
626 {"description": "Test notes content", "type": {"id": "notes"}},
627 ]
629 def test_cc0_disclaimer_in_additional_descriptions(self, freezer):
630 freezer.move_to("2024-06-15")
631 zip_path = Path("/tmp/1-raw.zip")
632 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, license="cc0-1.0")
634 assert config["additional_descriptions"] == [
635 {"description": "Test method content", "type": {"id": "methods"}},
636 {"description": "Test notes content", "type": {"id": "notes"}},
637 {"description": CC0_DISCLAIMER, "type": {"id": "notes"}},
638 ]
640 def test_converts_locations_to_geojson(self, freezer):
641 freezer.move_to("2024-06-15")
642 zip_path = Path("/tmp/1-raw.zip")
643 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS)
645 assert config["locations"] == {
646 "features": [
647 {
648 "geometry": {"type": "Point", "coordinates": [11.353, 44.497]},
649 "place": "Bologna, Italy",
650 "description": "Palazzo Poggi Museum",
651 },
652 ]
653 }
655 def test_includes_community_field(self, freezer):
656 freezer.move_to("2024-06-15")
657 base_config = {**SAMPLE_BASE_CONFIG, "community": "project-changes"}
658 zip_path = Path("/tmp/1-raw.zip")
659 config = generate_zenodo_config("raw", zip_path, "Test Title", base_config, [SAMPLE_CREATOR], SAMPLE_METHODS)
661 assert config["community"] == "project-changes"
663 def test_includes_restricted_notice_when_no_license(self, freezer):
664 freezer.move_to("2024-06-15")
665 zip_path = Path("/tmp/1-raw.zip")
666 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, has_license=False)
668 assert RESTRICTED_NOTICE not in config["description"]
669 assert {"description": RESTRICTED_NOTICE, "type": {"id": "notes"}} in config["additional_descriptions"]
671 def test_no_restricted_notice_when_licensed(self, freezer):
672 freezer.move_to("2024-06-15")
673 zip_path = Path("/tmp/1-raw.zip")
674 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, has_license=True)
676 assert {"description": RESTRICTED_NOTICE, "type": {"id": "notes"}} not in config["additional_descriptions"]
678 def test_propagates_funding_field(self, freezer):
679 freezer.move_to("2024-06-15")
680 funding = [
681 {
682 "funder": {"name": "European Union - NextGenerationEU"},
683 "award": {
684 "title": {"en": "CHANGES"},
685 "number": "PE 0000020",
686 },
687 }
688 ]
689 base_config = {**SAMPLE_BASE_CONFIG, "funding": funding}
690 zip_path = Path("/tmp/1-raw.zip")
691 config = generate_zenodo_config("raw", zip_path, "Test Title", base_config, [SAMPLE_CREATOR], SAMPLE_METHODS)
693 assert config["funding"] == funding
696class TestExtractLicenseForEntityStage:
697 def test_extracts_license_from_kg(self):
698 g = Graph()
699 lic_uri = URIRef(f"{BASE_URI}/lic/42/00/1")
700 license_url = URIRef("https://creativecommons.org/publicdomain/zero/1.0/")
701 g.add((lic_uri, P70I, license_url))
702 result = extract_license_for_entity_stage(g, "42", "raw")
703 assert result == "cc0-1.0"
705 def test_returns_none_for_missing_license(self):
706 g = Graph()
707 result = extract_license_for_entity_stage(g, "42", "raw")
708 assert result is None
710 def test_returns_none_for_unknown_license_uri(self):
711 g = Graph()
712 lic_uri = URIRef(f"{BASE_URI}/lic/42/00/1")
713 unknown_license = URIRef("https://example.com/custom-license")
714 g.add((lic_uri, P70I, unknown_license))
715 result = extract_license_for_entity_stage(g, "42", "raw")
716 assert result is None
718 def test_extracts_cc_by(self):
719 g = Graph()
720 lic_uri = URIRef(f"{BASE_URI}/lic/42/00/1")
721 license_url = URIRef("https://creativecommons.org/licenses/by/4.0/")
722 g.add((lic_uri, P70I, license_url))
723 result = extract_license_for_entity_stage(g, "42", "raw")
724 assert result == "cc-by-4.0"
727class TestExtractKeeperInfo:
728 def test_extracts_keeper_from_kg(self, real_kg):
729 keeper_name, keeper_location = extract_keeper_info(real_kg, "1")
730 assert keeper_name == "Biblioteca Universitaria di Bologna"
731 assert keeper_location == "Bologna"
733 def test_extracts_non_bologna_keeper(self, real_kg):
734 keeper_name, keeper_location = extract_keeper_info(real_kg, "21")
735 assert keeper_name == "Accademia Carrara"
736 assert keeper_location == "Bergamo"
738 def test_returns_none_for_missing_entity(self, real_kg):
739 keeper_name, keeper_location = extract_keeper_info(real_kg, "nonexistent")
740 assert keeper_name is None
741 assert keeper_location is None
743 def test_extracts_from_synthetic_graph(self):
744 g = Graph()
745 custody_uri = URIRef(f"{BASE_URI}/act/42/ob08/1")
746 keeper_uri = URIRef(f"{BASE_URI}/acr/test_museum/1")
747 apl_uri = URIRef(f"{BASE_URI}/apl/test_museum/1")
748 place_uri = URIRef(f"{BASE_URI}/plc/test_city/1")
749 place_apl_uri = URIRef(f"{BASE_URI}/apl/test_city/1")
750 g.add((custody_uri, P14_CARRIED_OUT_BY, keeper_uri))
751 g.add((keeper_uri, P1_IS_IDENTIFIED_BY, apl_uri))
752 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test Museum")))
753 g.add((keeper_uri, P74_HAS_RESIDENCE, place_uri))
754 g.add((place_uri, P1_IS_IDENTIFIED_BY, place_apl_uri))
755 g.add((place_apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test City")))
756 keeper_name, keeper_location = extract_keeper_info(g, "42")
757 assert keeper_name == "Test Museum"
758 assert keeper_location == "Test City"
760 def test_keeper_without_location(self):
761 g = Graph()
762 custody_uri = URIRef(f"{BASE_URI}/act/42/ob08/1")
763 keeper_uri = URIRef(f"{BASE_URI}/acr/test_museum/1")
764 apl_uri = URIRef(f"{BASE_URI}/apl/test_museum/1")
765 g.add((custody_uri, P14_CARRIED_OUT_BY, keeper_uri))
766 g.add((keeper_uri, P1_IS_IDENTIFIED_BY, apl_uri))
767 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test Museum")))
768 keeper_name, keeper_location = extract_keeper_info(g, "42")
769 assert keeper_name == "Test Museum"
770 assert keeper_location is None
773class TestBuildEnhancedDescription:
774 def test_raw_stage_description(self):
775 result = build_enhanced_description("raw", "Test Object")
776 assert result == (
777 'Raw acquisition data of "Test Object" from the Aldrovandi Digital Twin. '
778 "This dataset contains the raw material generated during the acquisition phase. "
779 'Includes metadata (meta.ttl) and provenance (prov.trig) files following the <a href="https://w3id.org/dharc/ontology/chad-ap">CHAD-AP</a> ontology.\n'
780 )
782 def test_dcho_stage_description(self):
783 result = build_enhanced_description("dcho", "Museum Specimen")
784 assert "Digital Cultural Heritage Object" in result
785 assert '"Museum Specimen"' in result
786 assert "interpolation, gap filling, and resolution of geometric issues" in result
788 def test_dchoo_stage_description(self):
789 result = build_enhanced_description("dchoo", "Object Title")
790 assert "Optimized Digital Cultural Heritage Object" in result
791 assert "optimised for real-time online interaction" in result
793 def test_description_never_contains_disclaimer(self):
794 result = build_enhanced_description("dcho", "Test Object")
795 assert CC0_DISCLAIMER not in result
797 def test_includes_keeper_and_location(self):
798 result = build_enhanced_description("raw", "Test Object", keeper_name="Test Museum", keeper_location="Test City")
799 assert "The original object is held by Test Museum (Test City)." in result
801 def test_includes_keeper_without_location(self):
802 result = build_enhanced_description("raw", "Test Object", keeper_name="Test Museum")
803 assert "The original object is held by Test Museum." in result
804 assert "Test Museum (" not in result
806 def test_no_keeper_line_when_none(self):
807 result = build_enhanced_description("raw", "Test Object")
808 assert "held by" not in result
810 def test_description_is_single_paragraph(self):
811 result = build_enhanced_description("raw", "Test Object", keeper_name="Museum", keeper_location="City")
812 assert "\n" not in result.rstrip("\n")
815class TestFormatCreatorsForTable:
816 def test_formats_multiple_creators(self):
817 config = {
818 "creators": [
819 {"person_or_org": {"family_name": "Bordignon", "given_name": "Alice", "identifiers": [{"scheme": "orcid", "identifier": "0009-0008-3556-0493"}]}},
820 {"person_or_org": {"family_name": "Massari", "given_name": "Arcangelo", "identifiers": [{"scheme": "orcid", "identifier": "0000-0002-8420-0696"}]}},
821 ]
822 }
823 assert _format_creators_for_table(config) == "Bordignon, Alice [orcid:0009-0008-3556-0493]; Massari, Arcangelo [orcid:0000-0002-8420-0696]"
825 def test_formats_single_creator(self):
826 config = {
827 "creators": [
828 {"person_or_org": {"family_name": "Barzaghi", "given_name": "Sebastian", "identifiers": [{"scheme": "orcid", "identifier": "0000-0002-0799-1527"}]}},
829 ]
830 }
831 assert _format_creators_for_table(config) == "Barzaghi, Sebastian [orcid:0000-0002-0799-1527]"
834class TestFormatLicensesForTable:
835 def test_formats_cc0_metadata_and_content(self):
836 config = {
837 "rights": [
838 {"title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}},
839 {"title": {"en": "Creative Commons Zero v1.0 Universal (Content license)"}},
840 ]
841 }
842 assert _format_licenses_for_table(config) == "cc0-1.0 (Metadata license); cc0-1.0 (Content license)"
844 def test_formats_mixed_licenses(self):
845 config = {
846 "rights": [
847 {"title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}},
848 {"title": {"en": "Creative Commons Attribution Non Commercial Share Alike 4.0 International (Content license)"}},
849 ]
850 }
851 assert _format_licenses_for_table(config) == "cc0-1.0 (Metadata license); cc-by-nc-sa-4.0 (Content license)"
853 def test_formats_metadata_only(self):
854 config = {
855 "rights": [
856 {"title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}},
857 ]
858 }
859 assert _format_licenses_for_table(config) == "cc0-1.0 (Metadata license)"
862class TestExtractDoi:
863 def test_extracts_doi_from_record(self):
864 record = {"pids": {"doi": {"identifier": "10.5281/zenodo.12345"}}}
865 assert _extract_doi(record) == "10.5281/zenodo.12345"
867 def test_returns_empty_string_on_sandbox(self):
868 assert _extract_doi({}) == ""
869 assert _extract_doi({"pids": {}}) == ""
872class TestExtractRecordUrl:
873 def test_extracts_url_from_record(self):
874 record = {"links": {"self_html": "https://zenodo.org/records/12345"}}
875 assert _extract_record_url(record) == "https://zenodo.org/records/12345"
878class TestExtractAcquisitionTechnique:
879 def test_extracts_photography_from_kg(self, real_kg):
880 technique = extract_acquisition_technique(real_kg, "1")
881 assert technique == "digital photography"
883 def test_extracts_scanning_from_kg(self, real_kg):
884 technique = extract_acquisition_technique(real_kg, "12")
885 assert technique == "optical scanning"
887 def test_returns_none_for_missing_entity(self):
888 g = Graph()
889 assert extract_acquisition_technique(g, "nonexistent") is None
891 def test_extracts_from_synthetic_graph(self):
892 g = Graph()
893 act_uri = URIRef(f"{BASE_URI}/act/42/00/1")
894 g.add((act_uri, P32_USED_GENERAL_TECHNIQUE, URIRef(f"{AAT}300266792")))
895 assert extract_acquisition_technique(g, "42") == "digital photography"
898class TestExtractDevices:
899 def test_extracts_devices_from_kg(self, real_kg):
900 devices = extract_devices(real_kg, "1")
901 assert devices == ["Nikkor 50mm", "Nikon D7200"]
903 def test_extracts_scanner_device(self, real_kg):
904 devices = extract_devices(real_kg, "12")
905 assert devices == ["Artec Eva"]
907 def test_returns_empty_for_missing_entity(self):
908 g = Graph()
909 assert extract_devices(g, "nonexistent") == []
911 def test_excludes_item_uris(self):
912 g = Graph()
913 act_uri = URIRef(f"{BASE_URI}/act/42/00/1")
914 g.add((act_uri, P16_USED_SPECIFIC_OBJECT, URIRef(f"{BASE_URI}/dev/nikon_d7200/1")))
915 g.add((act_uri, P16_USED_SPECIFIC_OBJECT, URIRef(f"{BASE_URI}/itm/42/ob00/1")))
916 devices = extract_devices(g, "42")
917 assert devices == ["Nikon D7200"]
920class TestExtractSoftwareForStage:
921 def test_extracts_raw_software(self, real_kg):
922 software = extract_software_for_stage(real_kg, "1", "raw")
923 assert software == []
925 def test_extracts_rawp_software(self, real_kg):
926 software = extract_software_for_stage(real_kg, "1", "rawp")
927 assert "3DF Zephyr" in software
929 def test_excludes_metadata_step_software(self, real_kg):
930 software = extract_software_for_stage(real_kg, "1", "dchoo")
931 assert "CHAD-AP" not in software
932 assert "HeriTrace" not in software
933 assert "Morph-KGC" not in software
935 def test_includes_step_06_software(self, real_kg):
936 software = extract_software_for_stage(real_kg, "1", "dchoo")
937 assert "ATON" in software
939 def test_returns_empty_for_missing_entity(self):
940 g = Graph()
941 assert extract_software_for_stage(g, "nonexistent", "raw") == []
944class TestBuildMethodsDescription:
945 def test_includes_workflow_reference(self):
946 g = Graph()
947 result = build_methods_description(g, "nonexistent", "raw")
948 assert "doi:10.46298/transformations.14773" in result
950 def test_includes_technique_and_devices(self, real_kg):
951 result = build_methods_description(real_kg, "1", "raw")
952 assert "digital photography" in result
953 assert "Nikon D7200" in result
955 def test_includes_software_for_rawp(self, real_kg):
956 result = build_methods_description(real_kg, "1", "rawp")
957 assert "Processing software:" in result
958 assert "3DF Zephyr" in result
960 def test_no_software_for_raw(self, real_kg):
961 result = build_methods_description(real_kg, "1", "raw")
962 assert "Processing software:" not in result
964 def test_includes_chad_ap_reference(self):
965 g = Graph()
966 result = build_methods_description(g, "nonexistent", "raw")
967 assert "CHAD-AP" in result
969 def test_scanning_entity(self, real_kg):
970 result = build_methods_description(real_kg, "12", "raw")
971 assert "optical scanning" in result
972 assert "Artec Eva" in result