Coverage for tests / test_zenodo_upload.py: 100%

511 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-03-21 12:19 +0000

1# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelomas@gmail.com> 

2# 

3# SPDX-License-Identifier: ISC 

4 

5import tempfile 

6import zipfile 

7from pathlib import Path 

8 

9import pytest 

10from rdflib import Graph, Literal, URIRef 

11 

12from changes_metadata_manager.folder_metadata_builder import load_kg 

13from changes_metadata_manager.zenodo_upload import ( 

14 AAT, 

15 BASE_URI, 

16 CC0_DISCLAIMER, 

17 RESTRICTED_NOTICE, 

18 E21_PERSON, 

19 P14_CARRIED_OUT_BY, 

20 P16_USED_SPECIFIC_OBJECT, 

21 P190_HAS_SYMBOLIC_CONTENT, 

22 P1_IS_IDENTIFIED_BY, 

23 P32_USED_GENERAL_TECHNIQUE, 

24 P3_HAS_NOTE, 

25 P70I, 

26 P74_HAS_RESIDENCE, 

27 RDF_TYPE, 

28 _extract_doi, 

29 _extract_record_url, 

30 _format_creators_for_table, 

31 _format_licenses_for_table, 

32 build_creators_for_entity_stage, 

33 build_enhanced_description, 

34 build_entity_uri, 

35 build_metadata_creators, 

36 build_methods_description, 

37 create_stage_zip, 

38 extract_acquisition_technique, 

39 extract_authors_for_entity_stage, 

40 extract_devices, 

41 extract_entity_title, 

42 extract_keeper_info, 

43 extract_license_for_entity_stage, 

44 extract_licensed_entity_stages, 

45 extract_metadata_authors, 

46 extract_software_for_stage, 

47 generate_zenodo_config, 

48 group_folders_by_entity, 

49 load_creators_lookup, 

50 merge_creators, 

51 slugify, 

52) 

53 

54 

55DATA_DIR = Path(__file__).parent.parent / "data" 

56REAL_KG_PATH = DATA_DIR / "kg.ttl" 

57 

58 

59@pytest.fixture(scope="module") 

60def real_kg(): 

61 return load_kg(REAL_KG_PATH) 

62 

63 

64class TestExtractLicensedEntityStages: 

65 def test_returns_set_of_tuples(self, real_kg): 

66 result = extract_licensed_entity_stages(real_kg) 

67 assert isinstance(result, set) 

68 assert all(isinstance(item, tuple) and len(item) == 2 for item in result) 

69 

70 def test_known_licensed_entity(self, real_kg): 

71 result = extract_licensed_entity_stages(real_kg) 

72 assert ("1", "dcho") in result 

73 assert ("1", "dchoo") in result 

74 

75 def test_maps_steps_to_stages(self): 

76 g = Graph() 

77 g.add((URIRef(f"{BASE_URI}/lic/42/00/1"), P70I, URIRef("https://example.com/license"))) 

78 g.add((URIRef(f"{BASE_URI}/lic/42/01/1"), P70I, URIRef("https://example.com/license"))) 

79 g.add((URIRef(f"{BASE_URI}/lic/42/02/1"), P70I, URIRef("https://example.com/license"))) 

80 g.add((URIRef(f"{BASE_URI}/lic/42/03/1"), P70I, URIRef("https://example.com/license"))) 

81 result = extract_licensed_entity_stages(g) 

82 assert result == {("42", "raw"), ("42", "rawp"), ("42", "dcho"), ("42", "dchoo")} 

83 

84 

85class TestGroupFoldersByEntity: 

86 def test_groups_folders_by_entity_id(self): 

87 structure = { 

88 "structure": { 

89 "Sala1": { 

90 "S1-01-Test": {"raw": {}, "dcho": {}}, 

91 "S1-02-Other": {"raw": {}}, 

92 }, 

93 } 

94 } 

95 result = group_folders_by_entity(structure) 

96 assert "1" in result 

97 assert "2" in result 

98 assert len(result["1"]) == 1 

99 assert result["1"][0][1] == "S1-01-Test" 

100 

101 def test_groups_abc_variants(self): 

102 structure = { 

103 "structure": { 

104 "Sala6": { 

105 "S6-98a-DA-Calchi facciali colorati, boscimani": {"raw": {}}, 

106 "S6-98b-DA-Calchi facciali colorati, boscimani": {"raw": {}}, 

107 "S6-98c-DA-Calchi facciali colorati, boscimani": {"raw": {}}, 

108 }, 

109 } 

110 } 

111 result = group_folders_by_entity(structure) 

112 assert "98" in result 

113 assert len(result["98"]) == 3 

114 

115 def test_skips_skip_folders(self): 

116 structure = { 

117 "structure": { 

118 "Sala1": { 

119 "S1-CNR_SoffittoSala1": {"raw": {}}, 

120 "materials": {"raw": {}}, 

121 "S1-01-Test": {"raw": {}}, 

122 }, 

123 } 

124 } 

125 result = group_folders_by_entity(structure) 

126 assert "1" in result 

127 folder_names = [f[1] for f in result["1"]] 

128 assert "S1-CNR_SoffittoSala1" not in folder_names 

129 assert "materials" not in folder_names 

130 

131 

132class TestSlugify: 

133 def test_simple_text(self): 

134 assert slugify("Carta nautica") == "carta-nautica" 

135 

136 def test_accented_characters(self): 

137 assert slugify("Oggettò àccéntàto") == "oggetto-accentato" 

138 

139 def test_special_characters(self): 

140 assert slugify("Test (object) #1") == "test-object-1" 

141 

142 def test_multiple_spaces(self): 

143 assert slugify("Multiple spaces here") == "multiple-spaces-here" 

144 

145 def test_leading_trailing_spaces(self): 

146 assert slugify(" trimmed ") == "trimmed" 

147 

148 

149class TestCreateStageZip: 

150 def test_includes_all_files_for_licensed_stage(self): 

151 with tempfile.TemporaryDirectory() as tmpdir: 

152 root = Path(tmpdir) / "root" 

153 stage_dir = root / "Sala1" / "S1-01-Test" / "raw" 

154 stage_dir.mkdir(parents=True) 

155 (stage_dir / "meta.ttl").write_text("{}") 

156 (stage_dir / "prov.trig").write_text("{}") 

157 (stage_dir / "photo.jpg").write_text("image") 

158 

159 output_dir = Path(tmpdir) / "output" 

160 output_dir.mkdir() 

161 

162 folders = [("Sala1", "S1-01-Test", {"raw": {}})] 

163 licensed_stages = {("1", "raw")} 

164 

165 result = create_stage_zip("1", "raw", folders, root, licensed_stages, output_dir, "Test Object") 

166 

167 assert result is not None 

168 zip_path, has_license = result 

169 assert zip_path.name == "sala1-test-object-raw.zip" 

170 assert has_license is True 

171 with zipfile.ZipFile(zip_path) as zf: 

172 names = sorted(zf.namelist()) 

173 assert names == ["S1-01-Test/raw/meta.ttl", "S1-01-Test/raw/photo.jpg", "S1-01-Test/raw/prov.trig"] 

174 

175 def test_includes_only_metadata_for_unlicensed_stage(self): 

176 with tempfile.TemporaryDirectory() as tmpdir: 

177 root = Path(tmpdir) / "root" 

178 stage_dir = root / "Sala1" / "S1-01-Test" / "raw" 

179 stage_dir.mkdir(parents=True) 

180 (stage_dir / "meta.ttl").write_text("{}") 

181 (stage_dir / "prov.trig").write_text("{}") 

182 (stage_dir / "photo.jpg").write_text("image") 

183 

184 output_dir = Path(tmpdir) / "output" 

185 output_dir.mkdir() 

186 

187 folders = [("Sala1", "S1-01-Test", {"raw": {}})] 

188 licensed_stages = set() 

189 

190 result = create_stage_zip("1", "raw", folders, root, licensed_stages, output_dir, "Test Object") 

191 

192 assert result is not None 

193 zip_path, has_license = result 

194 assert has_license is False 

195 with zipfile.ZipFile(zip_path) as zf: 

196 names = sorted(zf.namelist()) 

197 assert names == ["S1-01-Test/raw/meta.ttl", "S1-01-Test/raw/prov.trig"] 

198 

199 def test_multiple_folders_in_zip(self): 

200 with tempfile.TemporaryDirectory() as tmpdir: 

201 root = Path(tmpdir) / "root" 

202 

203 for variant in ["a", "b"]: 

204 stage_dir = root / "Sala6" / f"S6-98{variant}-Test" / "raw" 

205 stage_dir.mkdir(parents=True) 

206 (stage_dir / "meta.ttl").write_text("{}") 

207 

208 output_dir = Path(tmpdir) / "output" 

209 output_dir.mkdir() 

210 

211 folders = [ 

212 ("Sala6", "S6-98a-Test", {"raw": {}}), 

213 ("Sala6", "S6-98b-Test", {"raw": {}}), 

214 ] 

215 

216 result = create_stage_zip("98", "raw", folders, root, set(), output_dir, "Test Masks") 

217 

218 assert result is not None 

219 zip_path, has_license = result 

220 assert has_license is False 

221 with zipfile.ZipFile(zip_path) as zf: 

222 names = zf.namelist() 

223 assert names == ["S6-98a-Test/raw/meta.ttl", "S6-98b-Test/raw/meta.ttl"] 

224 

225 def test_returns_none_for_missing_stage(self): 

226 with tempfile.TemporaryDirectory() as tmpdir: 

227 root = Path(tmpdir) / "root" 

228 stage_dir = root / "Sala1" / "S1-01-Test" / "raw" 

229 stage_dir.mkdir(parents=True) 

230 (stage_dir / "meta.ttl").write_text("{}") 

231 

232 output_dir = Path(tmpdir) / "output" 

233 output_dir.mkdir() 

234 

235 folders = [("Sala1", "S1-01-Test", {"raw": {}})] 

236 

237 result = create_stage_zip("1", "dcho", folders, root, set(), output_dir, "Test Object") 

238 

239 assert result is None 

240 assert not (output_dir / "sala1-test-object-dcho.zip").exists() 

241 

242 

243class TestExtractEntityTitle: 

244 def test_extracts_title_from_kg(self, real_kg): 

245 title = extract_entity_title(real_kg, "1") 

246 assert title == "Carta nautica" 

247 

248 def test_returns_default_for_missing(self): 

249 g = Graph() 

250 title = extract_entity_title(g, "nonexistent") 

251 assert title == "Entity nonexistent" 

252 

253 def test_takes_first_line(self): 

254 g = Graph() 

255 item_uri = URIRef(f"{BASE_URI}/itm/42/ob00/1") 

256 g.add((item_uri, P3_HAS_NOTE, Literal("First line\nSecond line"))) 

257 title = extract_entity_title(g, "42") 

258 assert title == "First line" 

259 

260 

261class TestExtractAuthorsForEntityStage: 

262 def test_extracts_author_from_kg(self, real_kg): 

263 authors = extract_authors_for_entity_stage(real_kg, "1", "raw") 

264 assert authors == {"Federica Bonifazi"} 

265 

266 def test_accumulates_authors_across_steps(self, real_kg): 

267 authors = extract_authors_for_entity_stage(real_kg, "1", "dchoo") 

268 assert "Federica Bonifazi" in authors 

269 assert len(authors) > 1 

270 

271 def test_returns_empty_for_missing_entity(self, real_kg): 

272 authors = extract_authors_for_entity_stage(real_kg, "nonexistent", "raw") 

273 assert authors == set() 

274 

275 def test_extracts_from_synthetic_graph(self): 

276 g = Graph() 

277 act_uri = URIRef(f"{BASE_URI}/act/42/00/1") 

278 actor_uri = URIRef(f"{BASE_URI}/per/42/1") 

279 apl_uri = URIRef(f"{BASE_URI}/apl/42/1") 

280 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri)) 

281 g.add((actor_uri, RDF_TYPE, E21_PERSON)) 

282 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri)) 

283 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test Author"))) 

284 authors = extract_authors_for_entity_stage(g, "42", "raw") 

285 assert authors == {"Test Author"} 

286 

287 

288class TestExtractMetadataAuthors: 

289 def test_extracts_step_05_authors(self): 

290 g = Graph() 

291 act_uri = URIRef(f"{BASE_URI}/act/42/05/1") 

292 actor_uri = URIRef(f"{BASE_URI}/per/meta/1") 

293 apl_uri = URIRef(f"{BASE_URI}/apl/meta/1") 

294 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri)) 

295 g.add((actor_uri, RDF_TYPE, E21_PERSON)) 

296 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri)) 

297 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Metadata Author"))) 

298 authors = extract_metadata_authors(g, "42") 

299 assert authors == {"Metadata Author"} 

300 

301 def test_returns_empty_for_missing_entity(self): 

302 g = Graph() 

303 authors = extract_metadata_authors(g, "nonexistent") 

304 assert authors == set() 

305 

306 def test_extracts_from_real_kg(self, real_kg): 

307 authors = extract_metadata_authors(real_kg, "1") 

308 assert authors == {"Arcangelo Massari", "Arianna Moretti", "Sebastian Barzaghi"} 

309 

310 

311class TestLoadCreatorsLookup: 

312 def test_loads_creators_as_dict(self): 

313 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: 

314 f.write( 

315 "creators:\n" 

316 " - name_in_rdf: Test Author\n" 

317 " family_name: Author\n" 

318 " given_name: Test\n" 

319 " affiliation: Test Uni\n" 

320 " orcid: 0000-0001-2345-6789\n" 

321 ) 

322 f.flush() 

323 lookup = load_creators_lookup(Path(f.name)) 

324 assert lookup == { 

325 "Test Author": { 

326 "family_name": "Author", 

327 "given_name": "Test", 

328 "affiliation": "Test Uni", 

329 "orcid": "0000-0001-2345-6789", 

330 } 

331 } 

332 

333 

334class TestBuildCreatorsForEntityStage: 

335 def test_builds_creators_with_researcher_role(self, real_kg): 

336 lookup = { 

337 "Federica Bonifazi": { 

338 "family_name": "Bonifazi", 

339 "given_name": "Federica", 

340 "affiliation": "CNR-ISPC", 

341 "orcid": "0009-0000-8466-5541", 

342 } 

343 } 

344 creators = build_creators_for_entity_stage(real_kg, "1", "raw", lookup) 

345 assert creators == [ 

346 { 

347 "person_or_org": { 

348 "type": "personal", 

349 "family_name": "Bonifazi", 

350 "given_name": "Federica", 

351 "identifiers": [{"scheme": "orcid", "identifier": "0009-0000-8466-5541"}], 

352 }, 

353 "role": {"id": "researcher"}, 

354 "affiliations": [{"name": "CNR-ISPC"}], 

355 } 

356 ] 

357 

358 def test_ignores_authors_not_in_lookup(self, real_kg): 

359 lookup = {} 

360 creators = build_creators_for_entity_stage(real_kg, "1", "raw", lookup) 

361 assert creators == [] 

362 

363 def test_sorts_authors_alphabetically(self): 

364 g = Graph() 

365 for name in ["Zeta Author", "Alpha Author"]: 

366 act_uri = URIRef(f"{BASE_URI}/act/42/00/1") 

367 actor_uri = URIRef(f"{BASE_URI}/per/{name}/1") 

368 apl_uri = URIRef(f"{BASE_URI}/apl/{name}/1") 

369 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri)) 

370 g.add((actor_uri, RDF_TYPE, E21_PERSON)) 

371 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri)) 

372 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal(name))) 

373 lookup = { 

374 "Alpha Author": { 

375 "family_name": "Author", 

376 "given_name": "Alpha", 

377 "affiliation": "Uni", 

378 "orcid": "0000-0000-0000-0001", 

379 }, 

380 "Zeta Author": { 

381 "family_name": "Author", 

382 "given_name": "Zeta", 

383 "affiliation": "Uni", 

384 "orcid": "0000-0000-0000-0002", 

385 }, 

386 } 

387 creators = build_creators_for_entity_stage(g, "42", "raw", lookup) 

388 assert [c["person_or_org"]["given_name"] for c in creators] == ["Alpha", "Zeta"] 

389 

390 

391class TestBuildMetadataCreators: 

392 def test_builds_creators_with_datacurator_role(self): 

393 g = Graph() 

394 act_uri = URIRef(f"{BASE_URI}/act/42/05/1") 

395 actor_uri = URIRef(f"{BASE_URI}/per/meta/1") 

396 apl_uri = URIRef(f"{BASE_URI}/apl/meta/1") 

397 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri)) 

398 g.add((actor_uri, RDF_TYPE, E21_PERSON)) 

399 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri)) 

400 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Metadata Author"))) 

401 lookup = { 

402 "Metadata Author": { 

403 "family_name": "Author", 

404 "given_name": "Metadata", 

405 "affiliation": "Test Uni", 

406 "orcid": "0000-0001-2345-6789", 

407 } 

408 } 

409 creators = build_metadata_creators(g, "42", lookup) 

410 assert creators == [ 

411 { 

412 "person_or_org": { 

413 "type": "personal", 

414 "family_name": "Author", 

415 "given_name": "Metadata", 

416 "identifiers": [{"scheme": "orcid", "identifier": "0000-0001-2345-6789"}], 

417 }, 

418 "role": {"id": "datacurator"}, 

419 "affiliations": [{"name": "Test Uni"}], 

420 } 

421 ] 

422 

423 

424class TestMergeCreators: 

425 def test_merges_without_duplicates(self): 

426 digitization = [ 

427 { 

428 "person_or_org": { 

429 "type": "personal", 

430 "family_name": "Author", 

431 "given_name": "Digit", 

432 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}], 

433 }, 

434 "role": {"id": "researcher"}, 

435 "affiliations": [{"name": "Uni"}], 

436 } 

437 ] 

438 metadata = [ 

439 { 

440 "person_or_org": { 

441 "type": "personal", 

442 "family_name": "Author", 

443 "given_name": "Meta", 

444 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0002"}], 

445 }, 

446 "role": {"id": "datacurator"}, 

447 "affiliations": [{"name": "Uni"}], 

448 } 

449 ] 

450 merged = merge_creators(digitization, metadata) 

451 assert len(merged) == 2 

452 assert merged[0]["role"] == {"id": "researcher"} 

453 assert merged[1]["role"] == {"id": "datacurator"} 

454 

455 def test_deduplicates_by_orcid(self): 

456 digitization = [ 

457 { 

458 "person_or_org": { 

459 "type": "personal", 

460 "family_name": "Shared", 

461 "given_name": "Author", 

462 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}], 

463 }, 

464 "role": {"id": "researcher"}, 

465 "affiliations": [{"name": "Uni"}], 

466 } 

467 ] 

468 metadata = [ 

469 { 

470 "person_or_org": { 

471 "type": "personal", 

472 "family_name": "Shared", 

473 "given_name": "Author", 

474 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}], 

475 }, 

476 "role": {"id": "datacurator"}, 

477 "affiliations": [{"name": "Uni"}], 

478 } 

479 ] 

480 merged = merge_creators(digitization, metadata) 

481 assert len(merged) == 1 

482 assert merged[0]["role"] == {"id": "researcher"} 

483 

484 def test_empty_lists(self): 

485 assert merge_creators([], []) == [] 

486 

487 def test_only_metadata_creators(self): 

488 metadata = [ 

489 { 

490 "person_or_org": { 

491 "type": "personal", 

492 "family_name": "Author", 

493 "given_name": "Meta", 

494 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}], 

495 }, 

496 "role": {"id": "datacurator"}, 

497 "affiliations": [{"name": "Uni"}], 

498 } 

499 ] 

500 merged = merge_creators([], metadata) 

501 assert len(merged) == 1 

502 assert merged[0]["role"] == {"id": "datacurator"} 

503 

504 

505class TestBuildEntityUri: 

506 def test_builds_uri_for_numeric_id(self): 

507 result = build_entity_uri("27") 

508 assert result == "https://w3id.org/changes/4/aldrovandi/itm/27/ob00/1" 

509 

510 def test_builds_uri_for_string_id(self): 

511 result = build_entity_uri("ptb") 

512 assert result == "https://w3id.org/changes/4/aldrovandi/itm/ptb/ob00/1" 

513 

514 

515SAMPLE_CREATOR = { 

516 "person_or_org": { 

517 "type": "personal", 

518 "family_name": "Author", 

519 "given_name": "Test", 

520 "identifiers": [{"scheme": "orcid", "identifier": "0000-0001-2345-6789"}], 

521 }, 

522 "role": {"id": "researcher"}, 

523 "affiliations": [{"name": "Test Uni"}], 

524} 

525 

526SAMPLE_BASE_CONFIG = { 

527 "zenodo_url": "https://sandbox.zenodo.org/api", 

528 "access_token": "test_token", 

529 "user_agent": "piccione/2.1.0", 

530 "subjects": [{"subject": "test"}], 

531 "notes": "Test notes content", 

532 "locations": [ 

533 { 

534 "lat": 44.497, 

535 "lon": 11.353, 

536 "place": "Bologna, Italy", 

537 "description": "Palazzo Poggi Museum", 

538 }, 

539 ], 

540} 

541 

542SAMPLE_METHODS = "Test method content" 

543 

544 

545class TestGenerateZenodoConfig: 

546 def test_generates_valid_config(self, freezer): 

547 freezer.move_to("2024-06-15") 

548 zip_path = Path("/tmp/1-raw.zip") 

549 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS) 

550 

551 assert config == { 

552 "zenodo_url": "https://sandbox.zenodo.org/api", 

553 "access_token": "test_token", 

554 "user_agent": "piccione/2.1.0", 

555 "title": "Test Title - Raw - Aldrovandi Digital Twin", 

556 "description": 'Raw acquisition data of "Test Title" from the Aldrovandi Digital Twin. This dataset contains the raw material generated during the acquisition phase. Includes metadata (meta.ttl) and provenance (prov.trig) files following the <a href="https://w3id.org/dharc/ontology/chad-ap">CHAD-AP</a> ontology.\n', 

557 "resource_type": {"id": "dataset"}, 

558 "publisher": "Zenodo", 

559 "access": {"record": "public", "files": "public"}, 

560 "creators": [SAMPLE_CREATOR], 

561 "subjects": [{"subject": "test"}], 

562 "files": [str(zip_path.absolute())], 

563 "publication_date": "2024-06-15", 

564 "rights": [ 

565 { 

566 "title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}, 

567 "description": {"en": "Applies to metadata files: meta.ttl, prov.trig"}, 

568 "link": "https://creativecommons.org/publicdomain/zero/1.0/", 

569 }, 

570 ], 

571 "additional_descriptions": [ 

572 {"description": "Test method content", "type": {"id": "methods"}}, 

573 {"description": "Test notes content", "type": {"id": "notes"}}, 

574 ], 

575 "locations": { 

576 "features": [ 

577 { 

578 "geometry": {"type": "Point", "coordinates": [11.353, 44.497]}, 

579 "place": "Bologna, Italy", 

580 "description": "Palazzo Poggi Museum", 

581 }, 

582 ] 

583 }, 

584 } 

585 

586 def test_adds_entity_uri_as_alternate_identifier(self, freezer): 

587 freezer.move_to("2024-06-15") 

588 zip_path = Path("/tmp/27-raw.zip") 

589 entity_uri = "https://w3id.org/changes/4/aldrovandi/itm/27/ob00/1" 

590 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, entity_uri=entity_uri) 

591 

592 assert config["identifiers"] == [ 

593 {"identifier": "https://w3id.org/changes/4/aldrovandi/itm/27/ob00/1", "scheme": "url"} 

594 ] 

595 

596 def test_converts_related_identifiers(self, freezer): 

597 freezer.move_to("2024-06-15") 

598 base_config = { 

599 **SAMPLE_BASE_CONFIG, 

600 "related_identifiers": [ 

601 { 

602 "identifier": "10.3724/2096-7004.di.2024.0061", 

603 "relation": "isdocumentedby", 

604 "resource_type": "publication-article", 

605 } 

606 ], 

607 } 

608 zip_path = Path("/tmp/27-raw.zip") 

609 config = generate_zenodo_config("raw", zip_path, "Test Title", base_config, [SAMPLE_CREATOR], SAMPLE_METHODS) 

610 

611 assert config["related_identifiers"] == [ 

612 { 

613 "identifier": "10.3724/2096-7004.di.2024.0061", 

614 "relation_type": {"id": "isdocumentedby"}, 

615 "resource_type": {"id": "publication-article"}, 

616 }, 

617 ] 

618 

619 def test_converts_notes_and_method_to_additional_descriptions(self, freezer): 

620 freezer.move_to("2024-06-15") 

621 zip_path = Path("/tmp/1-raw.zip") 

622 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS) 

623 

624 assert config["additional_descriptions"] == [ 

625 {"description": "Test method content", "type": {"id": "methods"}}, 

626 {"description": "Test notes content", "type": {"id": "notes"}}, 

627 ] 

628 

629 def test_cc0_disclaimer_in_additional_descriptions(self, freezer): 

630 freezer.move_to("2024-06-15") 

631 zip_path = Path("/tmp/1-raw.zip") 

632 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, license="cc0-1.0") 

633 

634 assert config["additional_descriptions"] == [ 

635 {"description": "Test method content", "type": {"id": "methods"}}, 

636 {"description": "Test notes content", "type": {"id": "notes"}}, 

637 {"description": CC0_DISCLAIMER, "type": {"id": "notes"}}, 

638 ] 

639 

640 def test_converts_locations_to_geojson(self, freezer): 

641 freezer.move_to("2024-06-15") 

642 zip_path = Path("/tmp/1-raw.zip") 

643 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS) 

644 

645 assert config["locations"] == { 

646 "features": [ 

647 { 

648 "geometry": {"type": "Point", "coordinates": [11.353, 44.497]}, 

649 "place": "Bologna, Italy", 

650 "description": "Palazzo Poggi Museum", 

651 }, 

652 ] 

653 } 

654 

655 def test_includes_community_field(self, freezer): 

656 freezer.move_to("2024-06-15") 

657 base_config = {**SAMPLE_BASE_CONFIG, "community": "project-changes"} 

658 zip_path = Path("/tmp/1-raw.zip") 

659 config = generate_zenodo_config("raw", zip_path, "Test Title", base_config, [SAMPLE_CREATOR], SAMPLE_METHODS) 

660 

661 assert config["community"] == "project-changes" 

662 

663 def test_includes_restricted_notice_when_no_license(self, freezer): 

664 freezer.move_to("2024-06-15") 

665 zip_path = Path("/tmp/1-raw.zip") 

666 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, has_license=False) 

667 

668 assert RESTRICTED_NOTICE not in config["description"] 

669 assert {"description": RESTRICTED_NOTICE, "type": {"id": "notes"}} in config["additional_descriptions"] 

670 

671 def test_no_restricted_notice_when_licensed(self, freezer): 

672 freezer.move_to("2024-06-15") 

673 zip_path = Path("/tmp/1-raw.zip") 

674 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, has_license=True) 

675 

676 assert {"description": RESTRICTED_NOTICE, "type": {"id": "notes"}} not in config["additional_descriptions"] 

677 

678 def test_propagates_funding_field(self, freezer): 

679 freezer.move_to("2024-06-15") 

680 funding = [ 

681 { 

682 "funder": {"name": "European Union - NextGenerationEU"}, 

683 "award": { 

684 "title": {"en": "CHANGES"}, 

685 "number": "PE 0000020", 

686 }, 

687 } 

688 ] 

689 base_config = {**SAMPLE_BASE_CONFIG, "funding": funding} 

690 zip_path = Path("/tmp/1-raw.zip") 

691 config = generate_zenodo_config("raw", zip_path, "Test Title", base_config, [SAMPLE_CREATOR], SAMPLE_METHODS) 

692 

693 assert config["funding"] == funding 

694 

695 

696class TestExtractLicenseForEntityStage: 

697 def test_extracts_license_from_kg(self): 

698 g = Graph() 

699 lic_uri = URIRef(f"{BASE_URI}/lic/42/00/1") 

700 license_url = URIRef("https://creativecommons.org/publicdomain/zero/1.0/") 

701 g.add((lic_uri, P70I, license_url)) 

702 result = extract_license_for_entity_stage(g, "42", "raw") 

703 assert result == "cc0-1.0" 

704 

705 def test_returns_none_for_missing_license(self): 

706 g = Graph() 

707 result = extract_license_for_entity_stage(g, "42", "raw") 

708 assert result is None 

709 

710 def test_returns_none_for_unknown_license_uri(self): 

711 g = Graph() 

712 lic_uri = URIRef(f"{BASE_URI}/lic/42/00/1") 

713 unknown_license = URIRef("https://example.com/custom-license") 

714 g.add((lic_uri, P70I, unknown_license)) 

715 result = extract_license_for_entity_stage(g, "42", "raw") 

716 assert result is None 

717 

718 def test_extracts_cc_by(self): 

719 g = Graph() 

720 lic_uri = URIRef(f"{BASE_URI}/lic/42/00/1") 

721 license_url = URIRef("https://creativecommons.org/licenses/by/4.0/") 

722 g.add((lic_uri, P70I, license_url)) 

723 result = extract_license_for_entity_stage(g, "42", "raw") 

724 assert result == "cc-by-4.0" 

725 

726 

727class TestExtractKeeperInfo: 

728 def test_extracts_keeper_from_kg(self, real_kg): 

729 keeper_name, keeper_location = extract_keeper_info(real_kg, "1") 

730 assert keeper_name == "Biblioteca Universitaria di Bologna" 

731 assert keeper_location == "Bologna" 

732 

733 def test_extracts_non_bologna_keeper(self, real_kg): 

734 keeper_name, keeper_location = extract_keeper_info(real_kg, "21") 

735 assert keeper_name == "Accademia Carrara" 

736 assert keeper_location == "Bergamo" 

737 

738 def test_returns_none_for_missing_entity(self, real_kg): 

739 keeper_name, keeper_location = extract_keeper_info(real_kg, "nonexistent") 

740 assert keeper_name is None 

741 assert keeper_location is None 

742 

743 def test_extracts_from_synthetic_graph(self): 

744 g = Graph() 

745 custody_uri = URIRef(f"{BASE_URI}/act/42/ob08/1") 

746 keeper_uri = URIRef(f"{BASE_URI}/acr/test_museum/1") 

747 apl_uri = URIRef(f"{BASE_URI}/apl/test_museum/1") 

748 place_uri = URIRef(f"{BASE_URI}/plc/test_city/1") 

749 place_apl_uri = URIRef(f"{BASE_URI}/apl/test_city/1") 

750 g.add((custody_uri, P14_CARRIED_OUT_BY, keeper_uri)) 

751 g.add((keeper_uri, P1_IS_IDENTIFIED_BY, apl_uri)) 

752 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test Museum"))) 

753 g.add((keeper_uri, P74_HAS_RESIDENCE, place_uri)) 

754 g.add((place_uri, P1_IS_IDENTIFIED_BY, place_apl_uri)) 

755 g.add((place_apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test City"))) 

756 keeper_name, keeper_location = extract_keeper_info(g, "42") 

757 assert keeper_name == "Test Museum" 

758 assert keeper_location == "Test City" 

759 

760 def test_keeper_without_location(self): 

761 g = Graph() 

762 custody_uri = URIRef(f"{BASE_URI}/act/42/ob08/1") 

763 keeper_uri = URIRef(f"{BASE_URI}/acr/test_museum/1") 

764 apl_uri = URIRef(f"{BASE_URI}/apl/test_museum/1") 

765 g.add((custody_uri, P14_CARRIED_OUT_BY, keeper_uri)) 

766 g.add((keeper_uri, P1_IS_IDENTIFIED_BY, apl_uri)) 

767 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test Museum"))) 

768 keeper_name, keeper_location = extract_keeper_info(g, "42") 

769 assert keeper_name == "Test Museum" 

770 assert keeper_location is None 

771 

772 

773class TestBuildEnhancedDescription: 

774 def test_raw_stage_description(self): 

775 result = build_enhanced_description("raw", "Test Object") 

776 assert result == ( 

777 'Raw acquisition data of "Test Object" from the Aldrovandi Digital Twin. ' 

778 "This dataset contains the raw material generated during the acquisition phase. " 

779 'Includes metadata (meta.ttl) and provenance (prov.trig) files following the <a href="https://w3id.org/dharc/ontology/chad-ap">CHAD-AP</a> ontology.\n' 

780 ) 

781 

782 def test_dcho_stage_description(self): 

783 result = build_enhanced_description("dcho", "Museum Specimen") 

784 assert "Digital Cultural Heritage Object" in result 

785 assert '"Museum Specimen"' in result 

786 assert "interpolation, gap filling, and resolution of geometric issues" in result 

787 

788 def test_dchoo_stage_description(self): 

789 result = build_enhanced_description("dchoo", "Object Title") 

790 assert "Optimized Digital Cultural Heritage Object" in result 

791 assert "optimised for real-time online interaction" in result 

792 

793 def test_description_never_contains_disclaimer(self): 

794 result = build_enhanced_description("dcho", "Test Object") 

795 assert CC0_DISCLAIMER not in result 

796 

797 def test_includes_keeper_and_location(self): 

798 result = build_enhanced_description("raw", "Test Object", keeper_name="Test Museum", keeper_location="Test City") 

799 assert "The original object is held by Test Museum (Test City)." in result 

800 

801 def test_includes_keeper_without_location(self): 

802 result = build_enhanced_description("raw", "Test Object", keeper_name="Test Museum") 

803 assert "The original object is held by Test Museum." in result 

804 assert "Test Museum (" not in result 

805 

806 def test_no_keeper_line_when_none(self): 

807 result = build_enhanced_description("raw", "Test Object") 

808 assert "held by" not in result 

809 

810 def test_description_is_single_paragraph(self): 

811 result = build_enhanced_description("raw", "Test Object", keeper_name="Museum", keeper_location="City") 

812 assert "\n" not in result.rstrip("\n") 

813 

814 

815class TestFormatCreatorsForTable: 

816 def test_formats_multiple_creators(self): 

817 config = { 

818 "creators": [ 

819 {"person_or_org": {"family_name": "Bordignon", "given_name": "Alice", "identifiers": [{"scheme": "orcid", "identifier": "0009-0008-3556-0493"}]}}, 

820 {"person_or_org": {"family_name": "Massari", "given_name": "Arcangelo", "identifiers": [{"scheme": "orcid", "identifier": "0000-0002-8420-0696"}]}}, 

821 ] 

822 } 

823 assert _format_creators_for_table(config) == "Bordignon, Alice [orcid:0009-0008-3556-0493]; Massari, Arcangelo [orcid:0000-0002-8420-0696]" 

824 

825 def test_formats_single_creator(self): 

826 config = { 

827 "creators": [ 

828 {"person_or_org": {"family_name": "Barzaghi", "given_name": "Sebastian", "identifiers": [{"scheme": "orcid", "identifier": "0000-0002-0799-1527"}]}}, 

829 ] 

830 } 

831 assert _format_creators_for_table(config) == "Barzaghi, Sebastian [orcid:0000-0002-0799-1527]" 

832 

833 

834class TestFormatLicensesForTable: 

835 def test_formats_cc0_metadata_and_content(self): 

836 config = { 

837 "rights": [ 

838 {"title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}}, 

839 {"title": {"en": "Creative Commons Zero v1.0 Universal (Content license)"}}, 

840 ] 

841 } 

842 assert _format_licenses_for_table(config) == "cc0-1.0 (Metadata license); cc0-1.0 (Content license)" 

843 

844 def test_formats_mixed_licenses(self): 

845 config = { 

846 "rights": [ 

847 {"title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}}, 

848 {"title": {"en": "Creative Commons Attribution Non Commercial Share Alike 4.0 International (Content license)"}}, 

849 ] 

850 } 

851 assert _format_licenses_for_table(config) == "cc0-1.0 (Metadata license); cc-by-nc-sa-4.0 (Content license)" 

852 

853 def test_formats_metadata_only(self): 

854 config = { 

855 "rights": [ 

856 {"title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}}, 

857 ] 

858 } 

859 assert _format_licenses_for_table(config) == "cc0-1.0 (Metadata license)" 

860 

861 

862class TestExtractDoi: 

863 def test_extracts_doi_from_record(self): 

864 record = {"pids": {"doi": {"identifier": "10.5281/zenodo.12345"}}} 

865 assert _extract_doi(record) == "10.5281/zenodo.12345" 

866 

867 def test_returns_empty_string_on_sandbox(self): 

868 assert _extract_doi({}) == "" 

869 assert _extract_doi({"pids": {}}) == "" 

870 

871 

872class TestExtractRecordUrl: 

873 def test_extracts_url_from_record(self): 

874 record = {"links": {"self_html": "https://zenodo.org/records/12345"}} 

875 assert _extract_record_url(record) == "https://zenodo.org/records/12345" 

876 

877 

878class TestExtractAcquisitionTechnique: 

879 def test_extracts_photography_from_kg(self, real_kg): 

880 technique = extract_acquisition_technique(real_kg, "1") 

881 assert technique == "digital photography" 

882 

883 def test_extracts_scanning_from_kg(self, real_kg): 

884 technique = extract_acquisition_technique(real_kg, "12") 

885 assert technique == "optical scanning" 

886 

887 def test_returns_none_for_missing_entity(self): 

888 g = Graph() 

889 assert extract_acquisition_technique(g, "nonexistent") is None 

890 

891 def test_extracts_from_synthetic_graph(self): 

892 g = Graph() 

893 act_uri = URIRef(f"{BASE_URI}/act/42/00/1") 

894 g.add((act_uri, P32_USED_GENERAL_TECHNIQUE, URIRef(f"{AAT}300266792"))) 

895 assert extract_acquisition_technique(g, "42") == "digital photography" 

896 

897 

898class TestExtractDevices: 

899 def test_extracts_devices_from_kg(self, real_kg): 

900 devices = extract_devices(real_kg, "1") 

901 assert devices == ["Nikkor 50mm", "Nikon D7200"] 

902 

903 def test_extracts_scanner_device(self, real_kg): 

904 devices = extract_devices(real_kg, "12") 

905 assert devices == ["Artec Eva"] 

906 

907 def test_returns_empty_for_missing_entity(self): 

908 g = Graph() 

909 assert extract_devices(g, "nonexistent") == [] 

910 

911 def test_excludes_item_uris(self): 

912 g = Graph() 

913 act_uri = URIRef(f"{BASE_URI}/act/42/00/1") 

914 g.add((act_uri, P16_USED_SPECIFIC_OBJECT, URIRef(f"{BASE_URI}/dev/nikon_d7200/1"))) 

915 g.add((act_uri, P16_USED_SPECIFIC_OBJECT, URIRef(f"{BASE_URI}/itm/42/ob00/1"))) 

916 devices = extract_devices(g, "42") 

917 assert devices == ["Nikon D7200"] 

918 

919 

920class TestExtractSoftwareForStage: 

921 def test_extracts_raw_software(self, real_kg): 

922 software = extract_software_for_stage(real_kg, "1", "raw") 

923 assert software == [] 

924 

925 def test_extracts_rawp_software(self, real_kg): 

926 software = extract_software_for_stage(real_kg, "1", "rawp") 

927 assert "3DF Zephyr" in software 

928 

929 def test_excludes_metadata_step_software(self, real_kg): 

930 software = extract_software_for_stage(real_kg, "1", "dchoo") 

931 assert "CHAD-AP" not in software 

932 assert "HeriTrace" not in software 

933 assert "Morph-KGC" not in software 

934 

935 def test_includes_step_06_software(self, real_kg): 

936 software = extract_software_for_stage(real_kg, "1", "dchoo") 

937 assert "ATON" in software 

938 

939 def test_returns_empty_for_missing_entity(self): 

940 g = Graph() 

941 assert extract_software_for_stage(g, "nonexistent", "raw") == [] 

942 

943 

944class TestBuildMethodsDescription: 

945 def test_includes_workflow_reference(self): 

946 g = Graph() 

947 result = build_methods_description(g, "nonexistent", "raw") 

948 assert "doi:10.46298/transformations.14773" in result 

949 

950 def test_includes_technique_and_devices(self, real_kg): 

951 result = build_methods_description(real_kg, "1", "raw") 

952 assert "digital photography" in result 

953 assert "Nikon D7200" in result 

954 

955 def test_includes_software_for_rawp(self, real_kg): 

956 result = build_methods_description(real_kg, "1", "rawp") 

957 assert "Processing software:" in result 

958 assert "3DF Zephyr" in result 

959 

960 def test_no_software_for_raw(self, real_kg): 

961 result = build_methods_description(real_kg, "1", "raw") 

962 assert "Processing software:" not in result 

963 

964 def test_includes_chad_ap_reference(self): 

965 g = Graph() 

966 result = build_methods_description(g, "nonexistent", "raw") 

967 assert "CHAD-AP" in result 

968 

969 def test_scanning_entity(self, real_kg): 

970 result = build_methods_description(real_kg, "12", "raw") 

971 assert "optical scanning" in result 

972 assert "Artec Eva" in result