Coverage for tests / test_zenodo_upload.py: 100%

816 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-05-29 18:29 +0000

1# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelomas@gmail.com> 

2# 

3# SPDX-License-Identifier: ISC 

4 

5import csv 

6import json 

7import tempfile 

8import zipfile 

9from pathlib import Path 

10from unittest.mock import patch 

11 

12import yaml 

13 

14import pytest 

15from rdflib import Graph, Literal, URIRef 

16 

17from changes_metadata_manager.folder_metadata_builder import load_kg 

18from changes_metadata_manager.zenodo_upload import ( 

19 AAT, 

20 BASE_URI, 

21 CC0_DISCLAIMER, 

22 RESTRICTED_NOTICE, 

23 E21_PERSON, 

24 P14_CARRIED_OUT_BY, 

25 P16_USED_SPECIFIC_OBJECT, 

26 P190_HAS_SYMBOLIC_CONTENT, 

27 P1_IS_IDENTIFIED_BY, 

28 P32_USED_GENERAL_TECHNIQUE, 

29 P3_HAS_NOTE, 

30 P70I, 

31 P74_HAS_RESIDENCE, 

32 RDF_TYPE, 

33 _atomic_write_json, 

34 _extract_doi, 

35 _extract_license_from_meta, 

36 _extract_record_url, 

37 _format_creators_for_table, 

38 _format_licenses_for_table, 

39 _write_doi_table, 

40 build_creators_for_entity_stage, 

41 build_enhanced_description, 

42 build_entity_uri, 

43 build_metadata_creators, 

44 build_methods_description, 

45 create_stage_zip, 

46 extract_acquisition_technique, 

47 extract_authors_for_entity_stage, 

48 extract_devices, 

49 extract_entity_title, 

50 extract_keeper_info, 

51 extract_license_for_entity_stage, 

52 extract_licensed_entity_stages, 

53 extract_metadata_authors, 

54 extract_software_for_stage, 

55 generate_zenodo_config, 

56 group_folders_by_entity, 

57 load_creators_lookup, 

58 merge_creators, 

59 publish_all_drafts, 

60 slugify, 

61 upload_all, 

62) 

63 

64 

65DATA_DIR = Path(__file__).parent.parent / "data" 

66REAL_KG_PATH = DATA_DIR / "kg.ttl" 

67 

68 

69@pytest.fixture(scope="module") 

70def real_kg(): 

71 return load_kg(REAL_KG_PATH) 

72 

73 

74class TestExtractLicensedEntityStages: 

75 def test_returns_set_of_tuples(self, real_kg): 

76 result = extract_licensed_entity_stages(real_kg) 

77 assert isinstance(result, set) 

78 assert all(isinstance(item, tuple) and len(item) == 2 for item in result) 

79 

80 def test_known_licensed_entity(self, real_kg): 

81 result = extract_licensed_entity_stages(real_kg) 

82 assert ("1", "dcho") in result 

83 assert ("1", "dchoo") in result 

84 

85 def test_maps_steps_to_stages(self): 

86 g = Graph() 

87 g.add((URIRef(f"{BASE_URI}/lic/42/00/1"), P70I, URIRef("https://example.com/license"))) 

88 g.add((URIRef(f"{BASE_URI}/lic/42/01/1"), P70I, URIRef("https://example.com/license"))) 

89 g.add((URIRef(f"{BASE_URI}/lic/42/02/1"), P70I, URIRef("https://example.com/license"))) 

90 g.add((URIRef(f"{BASE_URI}/lic/42/03/1"), P70I, URIRef("https://example.com/license"))) 

91 result = extract_licensed_entity_stages(g) 

92 assert result == {("42", "raw"), ("42", "rawp"), ("42", "dcho"), ("42", "dchoo")} 

93 

94 

95class TestGroupFoldersByEntity: 

96 def test_groups_folders_by_entity_id(self): 

97 structure = { 

98 "structure": { 

99 "Sala1": { 

100 "S1-01-Test": {"raw": {}, "dcho": {}}, 

101 "S1-02-Other": {"raw": {}}, 

102 }, 

103 } 

104 } 

105 result = group_folders_by_entity(structure) 

106 assert "1" in result 

107 assert "2" in result 

108 assert len(result["1"]) == 1 

109 assert result["1"][0][1] == "S1-01-Test" 

110 

111 def test_groups_abc_variants(self): 

112 structure = { 

113 "structure": { 

114 "Sala6": { 

115 "S6-98a-DA-Calchi facciali colorati, boscimani": {"raw": {}}, 

116 "S6-98b-DA-Calchi facciali colorati, boscimani": {"raw": {}}, 

117 "S6-98c-DA-Calchi facciali colorati, boscimani": {"raw": {}}, 

118 }, 

119 } 

120 } 

121 result = group_folders_by_entity(structure) 

122 assert "98" in result 

123 assert len(result["98"]) == 3 

124 

125 def test_skips_skip_folders(self): 

126 structure = { 

127 "structure": { 

128 "Sala1": { 

129 "S1-CNR_SoffittoSala1": {"raw": {}}, 

130 "materials": {"raw": {}}, 

131 "S1-01-Test": {"raw": {}}, 

132 }, 

133 } 

134 } 

135 result = group_folders_by_entity(structure) 

136 assert "1" in result 

137 folder_names = [f[1] for f in result["1"]] 

138 assert "S1-CNR_SoffittoSala1" not in folder_names 

139 assert "materials" not in folder_names 

140 

141 

142class TestSlugify: 

143 def test_simple_text(self): 

144 assert slugify("Carta nautica") == "carta-nautica" 

145 

146 def test_accented_characters(self): 

147 assert slugify("Oggettò àccéntàto") == "oggetto-accentato" 

148 

149 def test_special_characters(self): 

150 assert slugify("Test (object) #1") == "test-object-1" 

151 

152 def test_multiple_spaces(self): 

153 assert slugify("Multiple spaces here") == "multiple-spaces-here" 

154 

155 def test_leading_trailing_spaces(self): 

156 assert slugify(" trimmed ") == "trimmed" 

157 

158 

159LICENSED_META_TTL = """\ 

160@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> . 

161 

162<https://w3id.org/changes/4/aldrovandi/lic/1/00/1> 

163 crm:P70i_is_documented_in <https://creativecommons.org/publicdomain/zero/1.0/> . 

164""" 

165 

166MIXED_LICENSE_META_TTL = """\ 

167@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> . 

168 

169<https://w3id.org/changes/4/aldrovandi/lic/1/00/1> 

170 crm:P70i_is_documented_in <https://creativecommons.org/licenses/by-nc/4.0/> . 

171<https://w3id.org/changes/4/aldrovandi/lic/1/01/1> 

172 crm:P70i_is_documented_in <https://creativecommons.org/licenses/by-nc/4.0/> . 

173<https://w3id.org/changes/4/aldrovandi/lic/1/02/1> 

174 crm:P70i_is_documented_in <https://creativecommons.org/publicdomain/zero/1.0/> . 

175""" 

176 

177UNLICENSED_META_TTL = """\ 

178@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> . 

179 

180<https://w3id.org/changes/4/aldrovandi/itm/1/ob00/1> 

181 crm:P3_has_note "Test object" . 

182""" 

183 

184 

185class TestExtractLicenseFromMeta: 

186 def test_returns_license_id_when_present(self): 

187 with tempfile.TemporaryDirectory() as tmpdir: 

188 stage_dir = Path(tmpdir) 

189 (stage_dir / "meta.ttl").write_text(LICENSED_META_TTL) 

190 assert _extract_license_from_meta(stage_dir, "raw") == "cc0-1.0" 

191 

192 def test_returns_none_when_no_license(self): 

193 with tempfile.TemporaryDirectory() as tmpdir: 

194 stage_dir = Path(tmpdir) 

195 (stage_dir / "meta.ttl").write_text(UNLICENSED_META_TTL) 

196 assert _extract_license_from_meta(stage_dir, "raw") is None 

197 

198 def test_picks_defining_step_license(self): 

199 with tempfile.TemporaryDirectory() as tmpdir: 

200 stage_dir = Path(tmpdir) 

201 (stage_dir / "meta.ttl").write_text(MIXED_LICENSE_META_TTL) 

202 assert _extract_license_from_meta(stage_dir, "dcho") == "cc0-1.0" 

203 

204 def test_returns_none_when_defining_step_missing(self): 

205 ttl = """\ 

206@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> . 

207 

208<https://w3id.org/changes/4/aldrovandi/lic/1/00/1> 

209 crm:P70i_is_documented_in <https://creativecommons.org/licenses/by-nc/4.0/> . 

210<https://w3id.org/changes/4/aldrovandi/lic/1/01/1> 

211 crm:P70i_is_documented_in <https://creativecommons.org/licenses/by-nc/4.0/> . 

212""" 

213 with tempfile.TemporaryDirectory() as tmpdir: 

214 stage_dir = Path(tmpdir) 

215 (stage_dir / "meta.ttl").write_text(ttl) 

216 assert _extract_license_from_meta(stage_dir, "dcho") is None 

217 

218 

219class TestCreateStageZip: 

220 def test_includes_all_files_for_licensed_stage(self): 

221 with tempfile.TemporaryDirectory() as tmpdir: 

222 root = Path(tmpdir) / "root" 

223 stage_dir = root / "Sala1" / "S1-01-Test" / "raw" 

224 stage_dir.mkdir(parents=True) 

225 (stage_dir / "meta.ttl").write_text(LICENSED_META_TTL) 

226 (stage_dir / "prov.trig").write_text("{}") 

227 (stage_dir / "photo.jpg").write_text("image") 

228 

229 output_dir = Path(tmpdir) / "output" 

230 output_dir.mkdir() 

231 

232 folders = [("Sala1", "S1-01-Test", {"raw": {}})] 

233 

234 result = create_stage_zip("1", "raw", folders, root, output_dir, "Test Object") 

235 

236 assert result is not None 

237 zip_path, license_id = result 

238 assert zip_path.name == "sala1-test-object-1-raw.zip" 

239 assert license_id == "cc0-1.0" 

240 with zipfile.ZipFile(zip_path) as zf: 

241 names = sorted(zf.namelist()) 

242 assert names == ["S1-01-Test/raw/meta.ttl", "S1-01-Test/raw/photo.jpg", "S1-01-Test/raw/prov.trig"] 

243 

244 def test_includes_only_metadata_for_unlicensed_stage(self): 

245 with tempfile.TemporaryDirectory() as tmpdir: 

246 root = Path(tmpdir) / "root" 

247 stage_dir = root / "Sala1" / "S1-01-Test" / "raw" 

248 stage_dir.mkdir(parents=True) 

249 (stage_dir / "meta.ttl").write_text(UNLICENSED_META_TTL) 

250 (stage_dir / "prov.trig").write_text("{}") 

251 (stage_dir / "photo.jpg").write_text("image") 

252 

253 output_dir = Path(tmpdir) / "output" 

254 output_dir.mkdir() 

255 

256 folders = [("Sala1", "S1-01-Test", {"raw": {}})] 

257 

258 result = create_stage_zip("1", "raw", folders, root, output_dir, "Test Object") 

259 

260 assert result is not None 

261 zip_path, license_id = result 

262 assert license_id is None 

263 with zipfile.ZipFile(zip_path) as zf: 

264 names = sorted(zf.namelist()) 

265 assert names == ["S1-01-Test/raw/meta.ttl", "S1-01-Test/raw/prov.trig"] 

266 

267 def test_multiple_folders_grouped_entity_license(self): 

268 with tempfile.TemporaryDirectory() as tmpdir: 

269 root = Path(tmpdir) / "root" 

270 

271 for variant in ["a", "b"]: 

272 stage_dir = root / "Sala6" / f"S6-98{variant}-Test" / "raw" 

273 stage_dir.mkdir(parents=True) 

274 (stage_dir / "meta.ttl").write_text(LICENSED_META_TTL) 

275 (stage_dir / "photo.jpg").write_text("image") 

276 

277 output_dir = Path(tmpdir) / "output" 

278 output_dir.mkdir() 

279 

280 folders = [ 

281 ("Sala6", "S6-98a-Test", {"raw": {}}), 

282 ("Sala6", "S6-98b-Test", {"raw": {}}), 

283 ] 

284 

285 result = create_stage_zip("98", "raw", folders, root, output_dir, "Test Masks") 

286 

287 assert result is not None 

288 zip_path, license_id = result 

289 assert license_id == "cc0-1.0" 

290 with zipfile.ZipFile(zip_path) as zf: 

291 names = sorted(zf.namelist()) 

292 assert names == [ 

293 "S6-98a-Test/raw/meta.ttl", 

294 "S6-98a-Test/raw/photo.jpg", 

295 "S6-98b-Test/raw/meta.ttl", 

296 "S6-98b-Test/raw/photo.jpg", 

297 ] 

298 

299 def test_multiple_folders_unlicensed(self): 

300 with tempfile.TemporaryDirectory() as tmpdir: 

301 root = Path(tmpdir) / "root" 

302 

303 for variant in ["a", "b"]: 

304 stage_dir = root / "Sala6" / f"S6-98{variant}-Test" / "raw" 

305 stage_dir.mkdir(parents=True) 

306 (stage_dir / "meta.ttl").write_text(UNLICENSED_META_TTL) 

307 

308 output_dir = Path(tmpdir) / "output" 

309 output_dir.mkdir() 

310 

311 folders = [ 

312 ("Sala6", "S6-98a-Test", {"raw": {}}), 

313 ("Sala6", "S6-98b-Test", {"raw": {}}), 

314 ] 

315 

316 result = create_stage_zip("98", "raw", folders, root, output_dir, "Test Masks") 

317 

318 assert result is not None 

319 zip_path, license_id = result 

320 assert license_id is None 

321 with zipfile.ZipFile(zip_path) as zf: 

322 names = zf.namelist() 

323 assert names == ["S6-98a-Test/raw/meta.ttl", "S6-98b-Test/raw/meta.ttl"] 

324 

325 def test_license_in_later_folder_includes_all_data(self): 

326 with tempfile.TemporaryDirectory() as tmpdir: 

327 root = Path(tmpdir) / "root" 

328 

329 stage_dir_a = root / "Sala6" / "S6-98a-Test" / "raw" 

330 stage_dir_a.mkdir(parents=True) 

331 (stage_dir_a / "meta.ttl").write_text(UNLICENSED_META_TTL) 

332 (stage_dir_a / "photo.jpg").write_text("image_a") 

333 

334 stage_dir_b = root / "Sala6" / "S6-98b-Test" / "raw" 

335 stage_dir_b.mkdir(parents=True) 

336 (stage_dir_b / "meta.ttl").write_text(LICENSED_META_TTL) 

337 (stage_dir_b / "photo.jpg").write_text("image_b") 

338 

339 output_dir = Path(tmpdir) / "output" 

340 output_dir.mkdir() 

341 

342 folders = [ 

343 ("Sala6", "S6-98a-Test", {"raw": {}}), 

344 ("Sala6", "S6-98b-Test", {"raw": {}}), 

345 ] 

346 

347 result = create_stage_zip("98", "raw", folders, root, output_dir, "Test Masks") 

348 

349 assert result is not None 

350 zip_path, license_id = result 

351 assert license_id == "cc0-1.0" 

352 with zipfile.ZipFile(zip_path) as zf: 

353 names = sorted(zf.namelist()) 

354 assert names == [ 

355 "S6-98a-Test/raw/meta.ttl", 

356 "S6-98a-Test/raw/photo.jpg", 

357 "S6-98b-Test/raw/meta.ttl", 

358 "S6-98b-Test/raw/photo.jpg", 

359 ] 

360 

361 def test_returns_none_for_missing_stage(self): 

362 with tempfile.TemporaryDirectory() as tmpdir: 

363 root = Path(tmpdir) / "root" 

364 stage_dir = root / "Sala1" / "S1-01-Test" / "raw" 

365 stage_dir.mkdir(parents=True) 

366 (stage_dir / "meta.ttl").write_text(UNLICENSED_META_TTL) 

367 

368 output_dir = Path(tmpdir) / "output" 

369 output_dir.mkdir() 

370 

371 folders = [("Sala1", "S1-01-Test", {"raw": {}})] 

372 

373 result = create_stage_zip("1", "dcho", folders, root, output_dir, "Test Object") 

374 

375 assert result is None 

376 assert not (output_dir / "sala1-test-object-1-dcho.zip").exists() 

377 

378 

379class TestExtractEntityTitle: 

380 def test_extracts_title_from_kg(self, real_kg): 

381 title = extract_entity_title(real_kg, ["1"]) 

382 assert title == "Carta nautica" 

383 

384 def test_returns_default_for_missing(self): 

385 g = Graph() 

386 title = extract_entity_title(g, ["nonexistent"]) 

387 assert title == "Entity nonexistent" 

388 

389 def test_takes_first_line(self): 

390 g = Graph() 

391 item_uri = URIRef(f"{BASE_URI}/itm/42/ob00/1") 

392 g.add((item_uri, P3_HAS_NOTE, Literal("First line\nSecond line"))) 

393 title = extract_entity_title(g, ["42"]) 

394 assert title == "First line" 

395 

396 

397class TestExtractAuthorsForEntityStage: 

398 def test_extracts_author_from_kg(self, real_kg): 

399 authors = extract_authors_for_entity_stage(real_kg, ["1"], "raw") 

400 assert authors == {"Federica Bonifazi"} 

401 

402 def test_accumulates_authors_across_steps(self, real_kg): 

403 authors = extract_authors_for_entity_stage(real_kg, ["1"], "dchoo") 

404 assert "Federica Bonifazi" in authors 

405 assert len(authors) > 1 

406 

407 def test_returns_empty_for_missing_entity(self, real_kg): 

408 authors = extract_authors_for_entity_stage(real_kg, ["nonexistent"], "raw") 

409 assert authors == set() 

410 

411 def test_extracts_from_synthetic_graph(self): 

412 g = Graph() 

413 act_uri = URIRef(f"{BASE_URI}/act/42/00/1") 

414 actor_uri = URIRef(f"{BASE_URI}/per/42/1") 

415 apl_uri = URIRef(f"{BASE_URI}/apl/42/1") 

416 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri)) 

417 g.add((actor_uri, RDF_TYPE, E21_PERSON)) 

418 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri)) 

419 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test Author"))) 

420 authors = extract_authors_for_entity_stage(g, ["42"], "raw") 

421 assert authors == {"Test Author"} 

422 

423 

424class TestExtractMetadataAuthors: 

425 def test_extracts_step_05_authors(self): 

426 g = Graph() 

427 act_uri = URIRef(f"{BASE_URI}/act/42/05/1") 

428 actor_uri = URIRef(f"{BASE_URI}/per/meta/1") 

429 apl_uri = URIRef(f"{BASE_URI}/apl/meta/1") 

430 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri)) 

431 g.add((actor_uri, RDF_TYPE, E21_PERSON)) 

432 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri)) 

433 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Metadata Author"))) 

434 authors = extract_metadata_authors(g, ["42"]) 

435 assert authors == {"Metadata Author"} 

436 

437 def test_returns_empty_for_missing_entity(self): 

438 g = Graph() 

439 authors = extract_metadata_authors(g, ["nonexistent"]) 

440 assert authors == set() 

441 

442 def test_extracts_from_real_kg(self, real_kg): 

443 authors = extract_metadata_authors(real_kg, ["1"]) 

444 assert authors == {"Arcangelo Massari", "Arianna Moretti", "Sebastian Barzaghi"} 

445 

446 

447class TestLoadCreatorsLookup: 

448 def test_loads_creators_as_dict(self): 

449 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: 

450 f.write( 

451 "creators:\n" 

452 " - name_in_rdf: Test Author\n" 

453 " family_name: Author\n" 

454 " given_name: Test\n" 

455 " affiliation: Test Uni\n" 

456 " orcid: 0000-0001-2345-6789\n" 

457 ) 

458 f.flush() 

459 lookup = load_creators_lookup(Path(f.name)) 

460 assert lookup == { 

461 "Test Author": { 

462 "family_name": "Author", 

463 "given_name": "Test", 

464 "affiliation": "Test Uni", 

465 "orcid": "0000-0001-2345-6789", 

466 } 

467 } 

468 

469 

470class TestBuildCreatorsForEntityStage: 

471 def test_builds_creators_with_researcher_role(self, real_kg): 

472 lookup = { 

473 "Federica Bonifazi": { 

474 "family_name": "Bonifazi", 

475 "given_name": "Federica", 

476 "affiliation": "CNR-ISPC", 

477 "orcid": "0009-0000-8466-5541", 

478 } 

479 } 

480 creators = build_creators_for_entity_stage(real_kg, ["1"], "raw", lookup) 

481 assert creators == [ 

482 { 

483 "person_or_org": { 

484 "type": "personal", 

485 "family_name": "Bonifazi", 

486 "given_name": "Federica", 

487 "identifiers": [{"scheme": "orcid", "identifier": "0009-0000-8466-5541"}], 

488 }, 

489 "role": {"id": "researcher"}, 

490 "affiliations": [{"name": "CNR-ISPC"}], 

491 } 

492 ] 

493 

494 def test_ignores_authors_not_in_lookup(self, real_kg): 

495 lookup = {} 

496 creators = build_creators_for_entity_stage(real_kg, ["1"], "raw", lookup) 

497 assert creators == [] 

498 

499 def test_sorts_authors_alphabetically(self): 

500 g = Graph() 

501 for name in ["Zeta Author", "Alpha Author"]: 

502 act_uri = URIRef(f"{BASE_URI}/act/42/00/1") 

503 actor_uri = URIRef(f"{BASE_URI}/per/{name}/1") 

504 apl_uri = URIRef(f"{BASE_URI}/apl/{name}/1") 

505 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri)) 

506 g.add((actor_uri, RDF_TYPE, E21_PERSON)) 

507 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri)) 

508 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal(name))) 

509 lookup = { 

510 "Alpha Author": { 

511 "family_name": "Author", 

512 "given_name": "Alpha", 

513 "affiliation": "Uni", 

514 "orcid": "0000-0000-0000-0001", 

515 }, 

516 "Zeta Author": { 

517 "family_name": "Author", 

518 "given_name": "Zeta", 

519 "affiliation": "Uni", 

520 "orcid": "0000-0000-0000-0002", 

521 }, 

522 } 

523 creators = build_creators_for_entity_stage(g, ["42"], "raw", lookup) 

524 assert [c["person_or_org"]["given_name"] for c in creators] == ["Alpha", "Zeta"] 

525 

526 

527class TestBuildMetadataCreators: 

528 def test_builds_creators_with_datacurator_role(self): 

529 g = Graph() 

530 act_uri = URIRef(f"{BASE_URI}/act/42/05/1") 

531 actor_uri = URIRef(f"{BASE_URI}/per/meta/1") 

532 apl_uri = URIRef(f"{BASE_URI}/apl/meta/1") 

533 g.add((act_uri, P14_CARRIED_OUT_BY, actor_uri)) 

534 g.add((actor_uri, RDF_TYPE, E21_PERSON)) 

535 g.add((actor_uri, P1_IS_IDENTIFIED_BY, apl_uri)) 

536 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Metadata Author"))) 

537 lookup = { 

538 "Metadata Author": { 

539 "family_name": "Author", 

540 "given_name": "Metadata", 

541 "affiliation": "Test Uni", 

542 "orcid": "0000-0001-2345-6789", 

543 } 

544 } 

545 creators = build_metadata_creators(g, ["42"], lookup) 

546 assert creators == [ 

547 { 

548 "person_or_org": { 

549 "type": "personal", 

550 "family_name": "Author", 

551 "given_name": "Metadata", 

552 "identifiers": [{"scheme": "orcid", "identifier": "0000-0001-2345-6789"}], 

553 }, 

554 "role": {"id": "datacurator"}, 

555 "affiliations": [{"name": "Test Uni"}], 

556 } 

557 ] 

558 

559 

560class TestMergeCreators: 

561 def test_merges_without_duplicates(self): 

562 digitization = [ 

563 { 

564 "person_or_org": { 

565 "type": "personal", 

566 "family_name": "Author", 

567 "given_name": "Digit", 

568 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}], 

569 }, 

570 "role": {"id": "researcher"}, 

571 "affiliations": [{"name": "Uni"}], 

572 } 

573 ] 

574 metadata = [ 

575 { 

576 "person_or_org": { 

577 "type": "personal", 

578 "family_name": "Author", 

579 "given_name": "Meta", 

580 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0002"}], 

581 }, 

582 "role": {"id": "datacurator"}, 

583 "affiliations": [{"name": "Uni"}], 

584 } 

585 ] 

586 merged = merge_creators(digitization, metadata) 

587 assert len(merged) == 2 

588 assert merged[0]["role"] == {"id": "researcher"} 

589 assert merged[1]["role"] == {"id": "datacurator"} 

590 

591 def test_deduplicates_by_orcid(self): 

592 digitization = [ 

593 { 

594 "person_or_org": { 

595 "type": "personal", 

596 "family_name": "Shared", 

597 "given_name": "Author", 

598 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}], 

599 }, 

600 "role": {"id": "researcher"}, 

601 "affiliations": [{"name": "Uni"}], 

602 } 

603 ] 

604 metadata = [ 

605 { 

606 "person_or_org": { 

607 "type": "personal", 

608 "family_name": "Shared", 

609 "given_name": "Author", 

610 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}], 

611 }, 

612 "role": {"id": "datacurator"}, 

613 "affiliations": [{"name": "Uni"}], 

614 } 

615 ] 

616 merged = merge_creators(digitization, metadata) 

617 assert len(merged) == 1 

618 assert merged[0]["role"] == {"id": "researcher"} 

619 

620 def test_empty_lists(self): 

621 assert merge_creators([], []) == [] 

622 

623 def test_only_metadata_creators(self): 

624 metadata = [ 

625 { 

626 "person_or_org": { 

627 "type": "personal", 

628 "family_name": "Author", 

629 "given_name": "Meta", 

630 "identifiers": [{"scheme": "orcid", "identifier": "0000-0000-0000-0001"}], 

631 }, 

632 "role": {"id": "datacurator"}, 

633 "affiliations": [{"name": "Uni"}], 

634 } 

635 ] 

636 merged = merge_creators([], metadata) 

637 assert len(merged) == 1 

638 assert merged[0]["role"] == {"id": "datacurator"} 

639 

640 

641class TestBuildEntityUri: 

642 def test_builds_uri_for_numeric_id(self): 

643 result = build_entity_uri(["27"]) 

644 assert result == "https://w3id.org/changes/4/aldrovandi/itm/27/ob00/1" 

645 

646 def test_builds_uri_for_string_id(self): 

647 result = build_entity_uri(["ptb"]) 

648 assert result == "https://w3id.org/changes/4/aldrovandi/itm/ptb/ob00/1" 

649 

650 

651SAMPLE_CREATOR = { 

652 "person_or_org": { 

653 "type": "personal", 

654 "family_name": "Author", 

655 "given_name": "Test", 

656 "identifiers": [{"scheme": "orcid", "identifier": "0000-0001-2345-6789"}], 

657 }, 

658 "role": {"id": "researcher"}, 

659 "affiliations": [{"name": "Test Uni"}], 

660} 

661 

662SAMPLE_BASE_CONFIG = { 

663 "zenodo_url": "https://sandbox.zenodo.org/api", 

664 "access_token": "test_token", 

665 "user_agent": "piccione/2.1.0", 

666 "subjects": [{"subject": "test"}], 

667 "notes": "Test notes content", 

668 "locations": [ 

669 { 

670 "lat": 44.497, 

671 "lon": 11.353, 

672 "place": "Bologna, Italy", 

673 "description": "Palazzo Poggi Museum", 

674 }, 

675 ], 

676} 

677 

678SAMPLE_METHODS = "Test method content" 

679 

680 

681class TestGenerateZenodoConfig: 

682 def test_generates_valid_config(self, freezer): 

683 freezer.move_to("2024-06-15") 

684 zip_path = Path("/tmp/1-raw.zip") 

685 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS) 

686 

687 assert config == { 

688 "zenodo_url": "https://sandbox.zenodo.org/api", 

689 "access_token": "test_token", 

690 "user_agent": "piccione/2.1.0", 

691 "title": "Test Title - Raw - Aldrovandi Digital Twin", 

692 "description": 'Raw acquisition data of "Test Title" from the Aldrovandi Digital Twin. This dataset contains the raw material generated during the acquisition phase. Includes metadata (meta.ttl) and provenance (prov.trig) files following the <a href="https://w3id.org/dharc/ontology/chad-ap">CHAD-AP</a> ontology.\n', 

693 "resource_type": {"id": "dataset"}, 

694 "publisher": "Zenodo", 

695 "access": {"record": "public", "files": "public"}, 

696 "creators": [SAMPLE_CREATOR], 

697 "subjects": [{"subject": "test"}], 

698 "files": [str(zip_path.absolute())], 

699 "publication_date": "2024-06-15", 

700 "rights": [ 

701 { 

702 "title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}, 

703 "description": {"en": "Applies to metadata files: meta.ttl, prov.trig"}, 

704 "link": "https://creativecommons.org/publicdomain/zero/1.0/", 

705 }, 

706 ], 

707 "additional_descriptions": [ 

708 {"description": "Test method content", "type": {"id": "methods"}}, 

709 {"description": "Test notes content", "type": {"id": "notes"}}, 

710 ], 

711 "locations": { 

712 "features": [ 

713 { 

714 "geometry": {"type": "Point", "coordinates": [11.353, 44.497]}, 

715 "place": "Bologna, Italy", 

716 "description": "Palazzo Poggi Museum", 

717 }, 

718 ] 

719 }, 

720 } 

721 

722 def test_adds_entity_uri_as_alternate_identifier(self, freezer): 

723 freezer.move_to("2024-06-15") 

724 zip_path = Path("/tmp/27-raw.zip") 

725 entity_uri = "https://w3id.org/changes/4/aldrovandi/itm/27/ob00/1" 

726 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, entity_uri=entity_uri) 

727 

728 assert config["identifiers"] == [ 

729 {"identifier": "https://w3id.org/changes/4/aldrovandi/itm/27/ob00/1", "scheme": "url"} 

730 ] 

731 

732 def test_converts_related_identifiers(self, freezer): 

733 freezer.move_to("2024-06-15") 

734 base_config = { 

735 **SAMPLE_BASE_CONFIG, 

736 "related_identifiers": [ 

737 { 

738 "identifier": "10.3724/2096-7004.di.2024.0061", 

739 "relation": "isdocumentedby", 

740 "resource_type": "publication-article", 

741 } 

742 ], 

743 } 

744 zip_path = Path("/tmp/27-raw.zip") 

745 config = generate_zenodo_config("raw", zip_path, "Test Title", base_config, [SAMPLE_CREATOR], SAMPLE_METHODS) 

746 

747 assert config["related_identifiers"] == [ 

748 { 

749 "identifier": "10.3724/2096-7004.di.2024.0061", 

750 "relation_type": {"id": "isdocumentedby"}, 

751 "resource_type": {"id": "publication-article"}, 

752 }, 

753 ] 

754 

755 def test_converts_notes_and_method_to_additional_descriptions(self, freezer): 

756 freezer.move_to("2024-06-15") 

757 zip_path = Path("/tmp/1-raw.zip") 

758 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS) 

759 

760 assert config["additional_descriptions"] == [ 

761 {"description": "Test method content", "type": {"id": "methods"}}, 

762 {"description": "Test notes content", "type": {"id": "notes"}}, 

763 ] 

764 

765 def test_cc0_disclaimer_in_additional_descriptions(self, freezer): 

766 freezer.move_to("2024-06-15") 

767 zip_path = Path("/tmp/1-raw.zip") 

768 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, license="cc0-1.0") 

769 

770 assert config["additional_descriptions"] == [ 

771 {"description": "Test method content", "type": {"id": "methods"}}, 

772 {"description": "Test notes content", "type": {"id": "notes"}}, 

773 {"description": CC0_DISCLAIMER, "type": {"id": "notes"}}, 

774 ] 

775 

776 def test_converts_locations_to_geojson(self, freezer): 

777 freezer.move_to("2024-06-15") 

778 zip_path = Path("/tmp/1-raw.zip") 

779 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS) 

780 

781 assert config["locations"] == { 

782 "features": [ 

783 { 

784 "geometry": {"type": "Point", "coordinates": [11.353, 44.497]}, 

785 "place": "Bologna, Italy", 

786 "description": "Palazzo Poggi Museum", 

787 }, 

788 ] 

789 } 

790 

791 def test_includes_community_field(self, freezer): 

792 freezer.move_to("2024-06-15") 

793 base_config = {**SAMPLE_BASE_CONFIG, "community": "project-changes"} 

794 zip_path = Path("/tmp/1-raw.zip") 

795 config = generate_zenodo_config("raw", zip_path, "Test Title", base_config, [SAMPLE_CREATOR], SAMPLE_METHODS) 

796 

797 assert config["community"] == "project-changes" 

798 

799 def test_includes_restricted_notice_when_no_license(self, freezer): 

800 freezer.move_to("2024-06-15") 

801 zip_path = Path("/tmp/1-raw.zip") 

802 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, has_license=False) 

803 

804 assert RESTRICTED_NOTICE not in config["description"] 

805 assert {"description": RESTRICTED_NOTICE, "type": {"id": "notes"}} in config["additional_descriptions"] 

806 

807 def test_no_restricted_notice_when_licensed(self, freezer): 

808 freezer.move_to("2024-06-15") 

809 zip_path = Path("/tmp/1-raw.zip") 

810 config = generate_zenodo_config("raw", zip_path, "Test Title", SAMPLE_BASE_CONFIG, [SAMPLE_CREATOR], SAMPLE_METHODS, has_license=True) 

811 

812 assert {"description": RESTRICTED_NOTICE, "type": {"id": "notes"}} not in config["additional_descriptions"] 

813 

814 def test_propagates_funding_field(self, freezer): 

815 freezer.move_to("2024-06-15") 

816 funding = [ 

817 { 

818 "funder": {"name": "European Union - NextGenerationEU"}, 

819 "award": { 

820 "title": {"en": "CHANGES"}, 

821 "number": "PE 0000020", 

822 }, 

823 } 

824 ] 

825 base_config = {**SAMPLE_BASE_CONFIG, "funding": funding} 

826 zip_path = Path("/tmp/1-raw.zip") 

827 config = generate_zenodo_config("raw", zip_path, "Test Title", base_config, [SAMPLE_CREATOR], SAMPLE_METHODS) 

828 

829 assert config["funding"] == funding 

830 

831 

832class TestExtractLicenseForEntityStage: 

833 def test_extracts_license_from_kg(self): 

834 g = Graph() 

835 lic_uri = URIRef(f"{BASE_URI}/lic/42/00/1") 

836 license_url = URIRef("https://creativecommons.org/publicdomain/zero/1.0/") 

837 g.add((lic_uri, P70I, license_url)) 

838 result = extract_license_for_entity_stage(g, "42", "raw") 

839 assert result == "cc0-1.0" 

840 

841 def test_returns_none_for_missing_license(self): 

842 g = Graph() 

843 result = extract_license_for_entity_stage(g, "42", "raw") 

844 assert result is None 

845 

846 def test_returns_none_for_unknown_license_uri(self): 

847 g = Graph() 

848 lic_uri = URIRef(f"{BASE_URI}/lic/42/00/1") 

849 unknown_license = URIRef("https://example.com/custom-license") 

850 g.add((lic_uri, P70I, unknown_license)) 

851 result = extract_license_for_entity_stage(g, "42", "raw") 

852 assert result is None 

853 

854 def test_extracts_cc_by(self): 

855 g = Graph() 

856 lic_uri = URIRef(f"{BASE_URI}/lic/42/00/1") 

857 license_url = URIRef("https://creativecommons.org/licenses/by/4.0/") 

858 g.add((lic_uri, P70I, license_url)) 

859 result = extract_license_for_entity_stage(g, "42", "raw") 

860 assert result == "cc-by-4.0" 

861 

862 def test_picks_defining_step_license(self): 

863 g = Graph() 

864 g.add((URIRef(f"{BASE_URI}/lic/42/00/1"), P70I, URIRef("https://creativecommons.org/licenses/by-nc/4.0/"))) 

865 g.add((URIRef(f"{BASE_URI}/lic/42/01/1"), P70I, URIRef("https://creativecommons.org/licenses/by-nc/4.0/"))) 

866 g.add((URIRef(f"{BASE_URI}/lic/42/02/1"), P70I, URIRef("https://creativecommons.org/publicdomain/zero/1.0/"))) 

867 assert extract_license_for_entity_stage(g, "42", "dcho") == "cc0-1.0" 

868 

869 def test_picks_defining_step_real_kg(self, real_kg): 

870 assert extract_license_for_entity_stage(real_kg, "vetrina_2_basso", "dcho") == "cc0-1.0" 

871 

872 def test_raw_returns_defining_step_license(self): 

873 g = Graph() 

874 g.add((URIRef(f"{BASE_URI}/lic/42/00/1"), P70I, URIRef("https://creativecommons.org/licenses/by-nc/4.0/"))) 

875 g.add((URIRef(f"{BASE_URI}/lic/42/02/1"), P70I, URIRef("https://creativecommons.org/publicdomain/zero/1.0/"))) 

876 assert extract_license_for_entity_stage(g, "42", "raw") == "cc-by-nc-4.0" 

877 

878 def test_returns_none_when_defining_step_missing(self): 

879 g = Graph() 

880 g.add((URIRef(f"{BASE_URI}/lic/42/00/1"), P70I, URIRef("https://creativecommons.org/licenses/by-nc/4.0/"))) 

881 g.add((URIRef(f"{BASE_URI}/lic/42/01/1"), P70I, URIRef("https://creativecommons.org/licenses/by-nc/4.0/"))) 

882 assert extract_license_for_entity_stage(g, "42", "dcho") is None 

883 

884 

885class TestExtractKeeperInfo: 

886 def test_extracts_keeper_from_kg(self, real_kg): 

887 keeper_name, keeper_location = extract_keeper_info(real_kg, ["1"]) 

888 assert keeper_name == "Biblioteca Universitaria di Bologna" 

889 assert keeper_location == "Bologna" 

890 

891 def test_extracts_non_bologna_keeper(self, real_kg): 

892 keeper_name, keeper_location = extract_keeper_info(real_kg, ["21"]) 

893 assert keeper_name == "Accademia Carrara" 

894 assert keeper_location == "Bergamo" 

895 

896 def test_returns_none_for_missing_entity(self, real_kg): 

897 keeper_name, keeper_location = extract_keeper_info(real_kg, ["nonexistent"]) 

898 assert keeper_name is None 

899 assert keeper_location is None 

900 

901 def test_extracts_from_synthetic_graph(self): 

902 g = Graph() 

903 custody_uri = URIRef(f"{BASE_URI}/act/42/ob08/1") 

904 keeper_uri = URIRef(f"{BASE_URI}/acr/test_museum/1") 

905 apl_uri = URIRef(f"{BASE_URI}/apl/test_museum/1") 

906 place_uri = URIRef(f"{BASE_URI}/plc/test_city/1") 

907 place_apl_uri = URIRef(f"{BASE_URI}/apl/test_city/1") 

908 g.add((custody_uri, P14_CARRIED_OUT_BY, keeper_uri)) 

909 g.add((keeper_uri, P1_IS_IDENTIFIED_BY, apl_uri)) 

910 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test Museum"))) 

911 g.add((keeper_uri, P74_HAS_RESIDENCE, place_uri)) 

912 g.add((place_uri, P1_IS_IDENTIFIED_BY, place_apl_uri)) 

913 g.add((place_apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test City"))) 

914 keeper_name, keeper_location = extract_keeper_info(g, ["42"]) 

915 assert keeper_name == "Test Museum" 

916 assert keeper_location == "Test City" 

917 

918 def test_keeper_without_location(self): 

919 g = Graph() 

920 custody_uri = URIRef(f"{BASE_URI}/act/42/ob08/1") 

921 keeper_uri = URIRef(f"{BASE_URI}/acr/test_museum/1") 

922 apl_uri = URIRef(f"{BASE_URI}/apl/test_museum/1") 

923 g.add((custody_uri, P14_CARRIED_OUT_BY, keeper_uri)) 

924 g.add((keeper_uri, P1_IS_IDENTIFIED_BY, apl_uri)) 

925 g.add((apl_uri, P190_HAS_SYMBOLIC_CONTENT, Literal("Test Museum"))) 

926 keeper_name, keeper_location = extract_keeper_info(g, ["42"]) 

927 assert keeper_name == "Test Museum" 

928 assert keeper_location is None 

929 

930 

931class TestBuildEnhancedDescription: 

932 def test_raw_stage_description(self): 

933 result = build_enhanced_description("raw", "Test Object") 

934 assert result == ( 

935 'Raw acquisition data of "Test Object" from the Aldrovandi Digital Twin. ' 

936 "This dataset contains the raw material generated during the acquisition phase. " 

937 'Includes metadata (meta.ttl) and provenance (prov.trig) files following the <a href="https://w3id.org/dharc/ontology/chad-ap">CHAD-AP</a> ontology.\n' 

938 ) 

939 

940 def test_dcho_stage_description(self): 

941 result = build_enhanced_description("dcho", "Museum Specimen") 

942 assert "Digital Cultural Heritage Object" in result 

943 assert '"Museum Specimen"' in result 

944 assert "interpolation, gap filling, and resolution of geometric issues" in result 

945 

946 def test_dchoo_stage_description(self): 

947 result = build_enhanced_description("dchoo", "Object Title") 

948 assert "Optimized Digital Cultural Heritage Object" in result 

949 assert "optimised for real-time online interaction" in result 

950 

951 def test_description_never_contains_disclaimer(self): 

952 result = build_enhanced_description("dcho", "Test Object") 

953 assert CC0_DISCLAIMER not in result 

954 

955 def test_includes_keeper_and_location(self): 

956 result = build_enhanced_description("raw", "Test Object", keeper_name="Test Museum", keeper_location="Test City") 

957 assert "The original object is held by Test Museum (Test City)." in result 

958 

959 def test_includes_keeper_without_location(self): 

960 result = build_enhanced_description("raw", "Test Object", keeper_name="Test Museum") 

961 assert "The original object is held by Test Museum." in result 

962 assert "Test Museum (" not in result 

963 

964 def test_no_keeper_line_when_none(self): 

965 result = build_enhanced_description("raw", "Test Object") 

966 assert "held by" not in result 

967 

968 def test_description_is_single_paragraph(self): 

969 result = build_enhanced_description("raw", "Test Object", keeper_name="Museum", keeper_location="City") 

970 assert "\n" not in result.rstrip("\n") 

971 

972 

973class TestFormatCreatorsForTable: 

974 def test_formats_multiple_creators(self): 

975 config = { 

976 "creators": [ 

977 {"person_or_org": {"family_name": "Bordignon", "given_name": "Alice", "identifiers": [{"scheme": "orcid", "identifier": "0009-0008-3556-0493"}]}}, 

978 {"person_or_org": {"family_name": "Massari", "given_name": "Arcangelo", "identifiers": [{"scheme": "orcid", "identifier": "0000-0002-8420-0696"}]}}, 

979 ] 

980 } 

981 assert _format_creators_for_table(config) == "Bordignon, Alice [orcid:0009-0008-3556-0493]; Massari, Arcangelo [orcid:0000-0002-8420-0696]" 

982 

983 def test_formats_single_creator(self): 

984 config = { 

985 "creators": [ 

986 {"person_or_org": {"family_name": "Barzaghi", "given_name": "Sebastian", "identifiers": [{"scheme": "orcid", "identifier": "0000-0002-0799-1527"}]}}, 

987 ] 

988 } 

989 assert _format_creators_for_table(config) == "Barzaghi, Sebastian [orcid:0000-0002-0799-1527]" 

990 

991 

992class TestFormatLicensesForTable: 

993 def test_formats_cc0_metadata_and_content(self): 

994 config = { 

995 "rights": [ 

996 {"title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}}, 

997 {"title": {"en": "Creative Commons Zero v1.0 Universal (Content license)"}}, 

998 ] 

999 } 

1000 assert _format_licenses_for_table(config) == "cc0-1.0 (Metadata license); cc0-1.0 (Content license)" 

1001 

1002 def test_formats_mixed_licenses(self): 

1003 config = { 

1004 "rights": [ 

1005 {"title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}}, 

1006 {"title": {"en": "Creative Commons Attribution Non Commercial Share Alike 4.0 International (Content license)"}}, 

1007 ] 

1008 } 

1009 assert _format_licenses_for_table(config) == "cc0-1.0 (Metadata license); cc-by-nc-sa-4.0 (Content license)" 

1010 

1011 def test_formats_metadata_only(self): 

1012 config = { 

1013 "rights": [ 

1014 {"title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}}, 

1015 ] 

1016 } 

1017 assert _format_licenses_for_table(config) == "cc0-1.0 (Metadata license)" 

1018 

1019 

1020class TestExtractDoi: 

1021 def test_extracts_doi_from_record(self): 

1022 record = {"pids": {"doi": {"identifier": "10.5281/zenodo.12345"}}} 

1023 assert _extract_doi(record) == "10.5281/zenodo.12345" 

1024 

1025 def test_returns_empty_string_on_sandbox(self): 

1026 assert _extract_doi({}) == "" 

1027 assert _extract_doi({"pids": {}}) == "" 

1028 

1029 

1030class TestExtractRecordUrl: 

1031 def test_extracts_url_from_record(self): 

1032 record = {"links": {"self_html": "https://zenodo.org/records/12345"}} 

1033 assert _extract_record_url(record) == "https://zenodo.org/records/12345" 

1034 

1035 

1036class TestExtractAcquisitionTechnique: 

1037 def test_extracts_photography_from_kg(self, real_kg): 

1038 technique = extract_acquisition_technique(real_kg, ["1"]) 

1039 assert technique == "digital photography" 

1040 

1041 def test_extracts_scanning_from_kg(self, real_kg): 

1042 technique = extract_acquisition_technique(real_kg, ["12"]) 

1043 assert technique == "optical scanning" 

1044 

1045 def test_returns_none_for_missing_entity(self): 

1046 g = Graph() 

1047 assert extract_acquisition_technique(g, ["nonexistent"]) is None 

1048 

1049 def test_extracts_from_synthetic_graph(self): 

1050 g = Graph() 

1051 act_uri = URIRef(f"{BASE_URI}/act/42/00/1") 

1052 g.add((act_uri, P32_USED_GENERAL_TECHNIQUE, URIRef(f"{AAT}300266792"))) 

1053 assert extract_acquisition_technique(g, ["42"]) == "digital photography" 

1054 

1055 

1056class TestExtractDevices: 

1057 def test_extracts_devices_from_kg(self, real_kg): 

1058 devices = extract_devices(real_kg, ["1"]) 

1059 assert devices == ["Nikkor 50mm", "Nikon D7200"] 

1060 

1061 def test_extracts_scanner_device(self, real_kg): 

1062 devices = extract_devices(real_kg, ["12"]) 

1063 assert devices == ["Artec Eva"] 

1064 

1065 def test_returns_empty_for_missing_entity(self): 

1066 g = Graph() 

1067 assert extract_devices(g, ["nonexistent"]) == [] 

1068 

1069 def test_excludes_item_uris(self): 

1070 g = Graph() 

1071 act_uri = URIRef(f"{BASE_URI}/act/42/00/1") 

1072 g.add((act_uri, P16_USED_SPECIFIC_OBJECT, URIRef(f"{BASE_URI}/dev/nikon_d7200/1"))) 

1073 g.add((act_uri, P16_USED_SPECIFIC_OBJECT, URIRef(f"{BASE_URI}/itm/42/ob00/1"))) 

1074 devices = extract_devices(g, ["42"]) 

1075 assert devices == ["Nikon D7200"] 

1076 

1077 

1078class TestExtractSoftwareForStage: 

1079 def test_extracts_raw_software(self, real_kg): 

1080 software = extract_software_for_stage(real_kg, ["1"], "raw") 

1081 assert software == [] 

1082 

1083 def test_extracts_rawp_software(self, real_kg): 

1084 software = extract_software_for_stage(real_kg, ["1"], "rawp") 

1085 assert "3DF Zephyr" in software 

1086 

1087 def test_excludes_metadata_step_software(self, real_kg): 

1088 software = extract_software_for_stage(real_kg, ["1"], "dchoo") 

1089 assert "CHAD-AP" not in software 

1090 assert "HeriTrace" not in software 

1091 assert "Morph-KGC" not in software 

1092 

1093 def test_includes_step_06_software(self, real_kg): 

1094 software = extract_software_for_stage(real_kg, ["1"], "dchoo") 

1095 assert "ATON" in software 

1096 

1097 def test_returns_empty_for_missing_entity(self): 

1098 g = Graph() 

1099 assert extract_software_for_stage(g, ["nonexistent"], "raw") == [] 

1100 

1101 

1102class TestBuildMethodsDescription: 

1103 def test_includes_workflow_reference(self): 

1104 g = Graph() 

1105 result = build_methods_description(g, ["nonexistent"], "raw") 

1106 assert "doi:10.46298/transformations.14773" in result 

1107 

1108 def test_includes_technique_and_devices(self, real_kg): 

1109 result = build_methods_description(real_kg, ["1"], "raw") 

1110 assert "digital photography" in result 

1111 assert "Nikon D7200" in result 

1112 

1113 def test_includes_software_for_rawp(self, real_kg): 

1114 result = build_methods_description(real_kg, ["1"], "rawp") 

1115 assert "Processing software:" in result 

1116 assert "3DF Zephyr" in result 

1117 

1118 def test_no_software_for_raw(self, real_kg): 

1119 result = build_methods_description(real_kg, ["1"], "raw") 

1120 assert "Processing software:" not in result 

1121 

1122 def test_includes_chad_ap_reference(self): 

1123 g = Graph() 

1124 result = build_methods_description(g, ["nonexistent"], "raw") 

1125 assert "CHAD-AP" in result 

1126 

1127 def test_scanning_entity(self, real_kg): 

1128 result = build_methods_description(real_kg, ["12"], "raw") 

1129 assert "optical scanning" in result 

1130 assert "Artec Eva" in result 

1131 

1132 

1133MINIMAL_CONFIG = { 

1134 "title": "Test Object - Raw - Aldrovandi Digital Twin", 

1135 "zenodo_url": "https://sandbox.zenodo.org/api", 

1136 "access_token": "fake-token", 

1137 "user_agent": "test/1.0", 

1138 "publication_date": "2026-05-22", 

1139 "creators": [{ 

1140 "person_or_org": { 

1141 "type": "personal", 

1142 "family_name": "Rossi", 

1143 "given_name": "Mario", 

1144 "identifiers": [{"scheme": "orcid", "identifier": "0000-0001-0000-0001"}], 

1145 }, 

1146 "role": {"id": "researcher"}, 

1147 "affiliations": [{"name": "University of Bologna"}], 

1148 }], 

1149 "rights": [{"title": {"en": "Creative Commons Zero v1.0 Universal (Metadata license)"}, "link": "https://creativecommons.org/publicdomain/zero/1.0/"}], 

1150} 

1151 

1152MOCK_RECORD = { 

1153 "id": "999001", 

1154 "pids": {"doi": {"identifier": "10.5281/zenodo.999001"}}, 

1155 "links": {"self_html": "https://sandbox.zenodo.org/records/999001"}, 

1156} 

1157 

1158 

1159def _write_config(path: Path, overrides: dict | None = None) -> Path: 

1160 if overrides is None: 

1161 overrides = {} 

1162 config = {**MINIMAL_CONFIG, **overrides} 

1163 with open(path, "w") as f: 

1164 yaml.dump(config, f, default_flow_style=False, allow_unicode=True) 

1165 return path 

1166 

1167 

1168class TestAtomicWriteJson: 

1169 def test_writes_json(self, tmp_path): 

1170 path = tmp_path / "data.json" 

1171 _atomic_write_json(path, [{"a": 1}]) 

1172 with open(path) as f: 

1173 assert json.load(f) == [{"a": 1}] 

1174 

1175 def test_overwrites_existing(self, tmp_path): 

1176 path = tmp_path / "data.json" 

1177 _atomic_write_json(path, [{"old": True}]) 

1178 _atomic_write_json(path, [{"new": True}]) 

1179 with open(path) as f: 

1180 assert json.load(f) == [{"new": True}] 

1181 

1182 

1183class TestWriteDoiTable: 

1184 def test_generates_csv_from_drafts(self, tmp_path): 

1185 config_path = _write_config(tmp_path / "test-raw.yaml") 

1186 drafts = [{ 

1187 "draft_id": "100", 

1188 "config_file": str(config_path), 

1189 "title": "Test", 

1190 "zenodo_url": "https://sandbox.zenodo.org/api", 

1191 "access_token": "tok", 

1192 "user_agent": "ua", 

1193 "status": "uploaded", 

1194 "doi": "10.5281/zenodo.100", 

1195 "record_url": "https://sandbox.zenodo.org/records/100", 

1196 }] 

1197 csv_path = _write_doi_table(drafts, tmp_path) 

1198 with open(csv_path) as f: 

1199 rows = list(csv.DictReader(f)) 

1200 assert len(rows) == 1 

1201 assert rows[0]["DOI"] == "10.5281/zenodo.100" 

1202 assert rows[0]["Titolo"] == "Test Object - Raw - Aldrovandi Digital Twin" 

1203 

1204 def test_skips_failed_entries(self, tmp_path): 

1205 config_path = _write_config(tmp_path / "test-raw.yaml") 

1206 drafts = [ 

1207 { 

1208 "draft_id": "100", 

1209 "config_file": str(config_path), 

1210 "title": "Good", 

1211 "zenodo_url": "", 

1212 "access_token": "", 

1213 "user_agent": "", 

1214 "status": "uploaded", 

1215 "doi": "10.5281/zenodo.100", 

1216 "record_url": "https://sandbox.zenodo.org/records/100", 

1217 }, 

1218 { 

1219 "draft_id": "", 

1220 "config_file": str(config_path), 

1221 "title": "Bad", 

1222 "zenodo_url": "", 

1223 "access_token": "", 

1224 "user_agent": "", 

1225 "status": "failed", 

1226 "doi": "", 

1227 "record_url": "", 

1228 "error": "boom", 

1229 }, 

1230 ] 

1231 csv_path = _write_doi_table(drafts, tmp_path) 

1232 with open(csv_path) as f: 

1233 rows = list(csv.DictReader(f)) 

1234 assert len(rows) == 1 

1235 

1236 

1237class TestUploadAllResume: 

1238 def _setup_configs(self, tmp_path): 

1239 configs_dir = tmp_path / "configs" 

1240 configs_dir.mkdir() 

1241 _write_config(configs_dir / "entity-a-raw.yaml", {"title": "Entity A - Raw"}) 

1242 _write_config(configs_dir / "entity-b-raw.yaml", {"title": "Entity B - Raw"}) 

1243 _write_config(configs_dir / "entity-c-raw.yaml", {"title": "Entity C - Raw"}) 

1244 return configs_dir 

1245 

1246 @patch("changes_metadata_manager.zenodo_upload.time.sleep") 

1247 @patch("changes_metadata_manager.zenodo_upload.piccione_upload") 

1248 def test_fresh_upload(self, mock_upload, mock_sleep, tmp_path): 

1249 configs_dir = self._setup_configs(tmp_path) 

1250 call_count = 0 

1251 

1252 def side_effect(config_file, publish=False): 

1253 nonlocal call_count 

1254 call_count += 1 

1255 return { 

1256 "id": f"draft-{call_count}", 

1257 "pids": {}, 

1258 "links": {"self_html": f"https://sandbox.zenodo.org/records/draft-{call_count}"}, 

1259 } 

1260 

1261 mock_upload.side_effect = side_effect 

1262 upload_all(configs_dir, publish=False) 

1263 

1264 drafts_path = tmp_path / "drafts.json" 

1265 with open(drafts_path) as f: 

1266 drafts = json.load(f) 

1267 assert len(drafts) == 3 

1268 assert all(d["status"] == "uploaded" for d in drafts) 

1269 assert mock_upload.call_count == 3 

1270 

1271 @patch("changes_metadata_manager.zenodo_upload.time.sleep") 

1272 @patch("changes_metadata_manager.zenodo_upload.piccione_upload") 

1273 def test_resume_skips_completed(self, mock_upload, mock_sleep, tmp_path): 

1274 configs_dir = self._setup_configs(tmp_path) 

1275 drafts_path = tmp_path / "drafts.json" 

1276 _atomic_write_json(drafts_path, [{ 

1277 "draft_id": "existing-1", 

1278 "config_file": str(configs_dir / "entity-a-raw.yaml"), 

1279 "title": "Entity A - Raw", 

1280 "zenodo_url": "https://sandbox.zenodo.org/api", 

1281 "access_token": "tok", 

1282 "user_agent": "ua", 

1283 "status": "uploaded", 

1284 "doi": "", 

1285 "record_url": "https://sandbox.zenodo.org/uploads/existing-1", 

1286 }]) 

1287 

1288 mock_upload.return_value = { 

1289 "id": "new-draft", 

1290 "pids": {}, 

1291 "links": {"self_html": "https://sandbox.zenodo.org/records/new-draft"}, 

1292 } 

1293 

1294 upload_all(configs_dir, publish=False) 

1295 

1296 with open(drafts_path) as f: 

1297 drafts = json.load(f) 

1298 assert len(drafts) == 3 

1299 assert mock_upload.call_count == 2 

1300 stems = {Path(d["config_file"]).stem for d in drafts if d["status"] == "uploaded"} 

1301 assert "entity-a-raw" in stems 

1302 assert "entity-b-raw" in stems 

1303 assert "entity-c-raw" in stems 

1304 

1305 @patch("changes_metadata_manager.zenodo_upload.time.sleep") 

1306 @patch("changes_metadata_manager.zenodo_upload.piccione_upload") 

1307 def test_failure_continues_and_records_error(self, mock_upload, mock_sleep, tmp_path): 

1308 configs_dir = self._setup_configs(tmp_path) 

1309 call_count = 0 

1310 

1311 def side_effect(config_file, publish=False): 

1312 nonlocal call_count 

1313 call_count += 1 

1314 if call_count == 2: 

1315 raise RuntimeError("Zenodo is down") 

1316 return { 

1317 "id": f"draft-{call_count}", 

1318 "pids": {}, 

1319 "links": {"self_html": f"https://sandbox.zenodo.org/records/draft-{call_count}"}, 

1320 } 

1321 

1322 mock_upload.side_effect = side_effect 

1323 upload_all(configs_dir, publish=False) 

1324 

1325 drafts_path = tmp_path / "drafts.json" 

1326 with open(drafts_path) as f: 

1327 drafts = json.load(f) 

1328 assert len(drafts) == 3 

1329 statuses = [d["status"] for d in drafts] 

1330 assert statuses.count("uploaded") == 2 

1331 assert statuses.count("failed") == 1 

1332 failed = [d for d in drafts if d["status"] == "failed"][0] 

1333 assert failed["error"] == "Zenodo is down" 

1334 

1335 @patch("changes_metadata_manager.zenodo_upload.time.sleep") 

1336 @patch("changes_metadata_manager.zenodo_upload.piccione_upload") 

1337 def test_failed_entry_retried_on_rerun(self, mock_upload, mock_sleep, tmp_path): 

1338 configs_dir = self._setup_configs(tmp_path) 

1339 drafts_path = tmp_path / "drafts.json" 

1340 _atomic_write_json(drafts_path, [ 

1341 { 

1342 "draft_id": "existing-1", 

1343 "config_file": str(configs_dir / "entity-a-raw.yaml"), 

1344 "title": "Entity A - Raw", 

1345 "zenodo_url": "https://sandbox.zenodo.org/api", 

1346 "access_token": "tok", 

1347 "user_agent": "ua", 

1348 "status": "uploaded", 

1349 "doi": "", 

1350 "record_url": "", 

1351 }, 

1352 { 

1353 "draft_id": "", 

1354 "config_file": str(configs_dir / "entity-b-raw.yaml"), 

1355 "title": "entity-b-raw", 

1356 "zenodo_url": "", 

1357 "access_token": "", 

1358 "user_agent": "", 

1359 "status": "failed", 

1360 "doi": "", 

1361 "record_url": "", 

1362 "error": "previous failure", 

1363 }, 

1364 { 

1365 "draft_id": "existing-3", 

1366 "config_file": str(configs_dir / "entity-c-raw.yaml"), 

1367 "title": "Entity C - Raw", 

1368 "zenodo_url": "https://sandbox.zenodo.org/api", 

1369 "access_token": "tok", 

1370 "user_agent": "ua", 

1371 "status": "uploaded", 

1372 "doi": "", 

1373 "record_url": "", 

1374 }, 

1375 ]) 

1376 

1377 mock_upload.return_value = { 

1378 "id": "retried-draft", 

1379 "pids": {}, 

1380 "links": {"self_html": "https://sandbox.zenodo.org/records/retried-draft"}, 

1381 } 

1382 

1383 upload_all(configs_dir, publish=False) 

1384 

1385 assert mock_upload.call_count == 1 

1386 with open(drafts_path) as f: 

1387 drafts = json.load(f) 

1388 assert len(drafts) == 3 

1389 assert all(d["status"] == "uploaded" for d in drafts) 

1390 retried = [d for d in drafts if Path(d["config_file"]).stem == "entity-b-raw"][0] 

1391 assert retried["draft_id"] == "retried-draft" 

1392 assert "error" not in retried 

1393 

1394 @patch("changes_metadata_manager.zenodo_upload.time.sleep") 

1395 @patch("changes_metadata_manager.zenodo_upload.piccione_upload") 

1396 def test_drafts_json_written_after_each_upload(self, mock_upload, mock_sleep, tmp_path): 

1397 configs_dir = self._setup_configs(tmp_path) 

1398 snapshots: list[int] = [] 

1399 

1400 def counting_upload(config_file, publish=False): 

1401 return { 

1402 "id": f"draft-{len(snapshots) + 1}", 

1403 "pids": {}, 

1404 "links": {"self_html": f"https://sandbox.zenodo.org/records/draft-{len(snapshots) + 1}"}, 

1405 } 

1406 

1407 mock_upload.side_effect = counting_upload 

1408 

1409 def tracking_write(path, data): 

1410 snapshots.append(len(data)) 

1411 fd, tmp = tempfile.mkstemp(dir=path.parent, suffix=".tmp") 

1412 import os 

1413 with os.fdopen(fd, "w") as f: 

1414 json.dump(data, f, indent=2) 

1415 os.replace(tmp, path) 

1416 

1417 with patch("changes_metadata_manager.zenodo_upload._atomic_write_json", side_effect=tracking_write): 

1418 upload_all(configs_dir, publish=False) 

1419 

1420 assert snapshots == [1, 2, 3] 

1421 

1422 @patch("changes_metadata_manager.zenodo_upload.time.sleep") 

1423 @patch("changes_metadata_manager.zenodo_upload.piccione_upload") 

1424 def test_publish_flag_sets_published_status(self, mock_upload, mock_sleep, tmp_path): 

1425 configs_dir = self._setup_configs(tmp_path) 

1426 mock_upload.return_value = { 

1427 "id": "pub-1", 

1428 "pids": {"doi": {"identifier": "10.5281/zenodo.pub1"}}, 

1429 "links": {"self_html": "https://zenodo.org/records/pub-1"}, 

1430 } 

1431 

1432 upload_all(configs_dir, publish=True) 

1433 

1434 drafts_path = tmp_path / "drafts.json" 

1435 with open(drafts_path) as f: 

1436 drafts = json.load(f) 

1437 assert all(d["status"] == "published" for d in drafts) 

1438 assert all(d["doi"] == "10.5281/zenodo.pub1" for d in drafts) 

1439 

1440 

1441class TestPublishAllDraftsResume: 

1442 def _make_drafts(self, tmp_path, statuses): 

1443 configs_dir = tmp_path / "configs" 

1444 configs_dir.mkdir(exist_ok=True) 

1445 drafts = [] 

1446 for i, status in enumerate(statuses): 

1447 config_path = _write_config(configs_dir / f"entity-{i}-raw.yaml", {"title": f"Entity {i}"}) 

1448 entry = { 

1449 "draft_id": f"draft-{i}", 

1450 "config_file": str(config_path), 

1451 "title": f"Entity {i}", 

1452 "zenodo_url": "https://sandbox.zenodo.org/api", 

1453 "access_token": "tok", 

1454 "user_agent": "ua", 

1455 "status": status, 

1456 "doi": "10.5281/existing" if status == "published" else "", 

1457 "record_url": f"https://sandbox.zenodo.org/records/draft-{i}" if status == "published" else "", 

1458 } 

1459 if status in ("failed", "publish_failed"): 

1460 entry["error"] = "old error" 

1461 drafts.append(entry) 

1462 drafts_path = tmp_path / "drafts.json" 

1463 _atomic_write_json(drafts_path, drafts) 

1464 return drafts_path 

1465 

1466 @patch("changes_metadata_manager.zenodo_upload.time.sleep") 

1467 @patch("changes_metadata_manager.zenodo_upload.piccione_publish_draft") 

1468 def test_publishes_uploaded_drafts(self, mock_publish, mock_sleep, tmp_path): 

1469 drafts_path = self._make_drafts(tmp_path, ["uploaded", "uploaded"]) 

1470 mock_publish.return_value = { 

1471 "pids": {"doi": {"identifier": "10.5281/zenodo.pub"}}, 

1472 "links": {"self_html": "https://zenodo.org/records/pub"}, 

1473 } 

1474 

1475 publish_all_drafts(drafts_path) 

1476 

1477 with open(drafts_path) as f: 

1478 drafts = json.load(f) 

1479 assert all(d["status"] == "published" for d in drafts) 

1480 assert all(d["doi"] == "10.5281/zenodo.pub" for d in drafts) 

1481 assert mock_publish.call_count == 2 

1482 

1483 @patch("changes_metadata_manager.zenodo_upload.time.sleep") 

1484 @patch("changes_metadata_manager.zenodo_upload.piccione_publish_draft") 

1485 def test_skips_already_published(self, mock_publish, mock_sleep, tmp_path): 

1486 drafts_path = self._make_drafts(tmp_path, ["published", "uploaded"]) 

1487 mock_publish.return_value = { 

1488 "pids": {"doi": {"identifier": "10.5281/zenodo.new"}}, 

1489 "links": {"self_html": "https://zenodo.org/records/new"}, 

1490 } 

1491 

1492 publish_all_drafts(drafts_path) 

1493 

1494 assert mock_publish.call_count == 1 

1495 with open(drafts_path) as f: 

1496 drafts = json.load(f) 

1497 assert drafts[0]["doi"] == "10.5281/existing" 

1498 assert drafts[1]["doi"] == "10.5281/zenodo.new" 

1499 

1500 @patch("changes_metadata_manager.zenodo_upload.time.sleep") 

1501 @patch("changes_metadata_manager.zenodo_upload.piccione_publish_draft") 

1502 def test_failure_continues(self, mock_publish, mock_sleep, tmp_path): 

1503 drafts_path = self._make_drafts(tmp_path, ["uploaded", "uploaded"]) 

1504 call_count = 0 

1505 

1506 def side_effect(*args, **kwargs): 

1507 nonlocal call_count 

1508 call_count += 1 

1509 if call_count == 1: 

1510 raise RuntimeError("publish error") 

1511 return { 

1512 "pids": {"doi": {"identifier": "10.5281/zenodo.ok"}}, 

1513 "links": {"self_html": "https://zenodo.org/records/ok"}, 

1514 } 

1515 

1516 mock_publish.side_effect = side_effect 

1517 publish_all_drafts(drafts_path) 

1518 

1519 with open(drafts_path) as f: 

1520 drafts = json.load(f) 

1521 assert drafts[0]["status"] == "publish_failed" 

1522 assert drafts[0]["error"] == "publish error" 

1523 assert drafts[1]["status"] == "published" 

1524 assert drafts[1]["doi"] == "10.5281/zenodo.ok" 

1525 

1526 @patch("changes_metadata_manager.zenodo_upload.time.sleep") 

1527 @patch("changes_metadata_manager.zenodo_upload.piccione_publish_draft") 

1528 def test_retries_publish_failed(self, mock_publish, mock_sleep, tmp_path): 

1529 drafts_path = self._make_drafts(tmp_path, ["published", "publish_failed"]) 

1530 mock_publish.return_value = { 

1531 "pids": {"doi": {"identifier": "10.5281/zenodo.retried"}}, 

1532 "links": {"self_html": "https://zenodo.org/records/retried"}, 

1533 } 

1534 

1535 publish_all_drafts(drafts_path) 

1536 

1537 assert mock_publish.call_count == 1 

1538 with open(drafts_path) as f: 

1539 drafts = json.load(f) 

1540 assert drafts[1]["status"] == "published" 

1541 assert drafts[1]["doi"] == "10.5281/zenodo.retried" 

1542 assert "error" not in drafts[1] 

1543 

1544 @patch("changes_metadata_manager.zenodo_upload.time.sleep") 

1545 @patch("changes_metadata_manager.zenodo_upload.piccione_publish_draft") 

1546 def test_skips_upload_failed_entries(self, mock_publish, mock_sleep, tmp_path): 

1547 drafts_path = self._make_drafts(tmp_path, ["uploaded", "failed"]) 

1548 mock_publish.return_value = { 

1549 "pids": {"doi": {"identifier": "10.5281/zenodo.ok"}}, 

1550 "links": {"self_html": "https://zenodo.org/records/ok"}, 

1551 } 

1552 

1553 publish_all_drafts(drafts_path) 

1554 

1555 assert mock_publish.call_count == 1 

1556 with open(drafts_path) as f: 

1557 drafts = json.load(f) 

1558 assert drafts[0]["status"] == "published" 

1559 assert drafts[1]["status"] == "failed"