Coverage for tests / test_provenance.py: 100%
74 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-03-21 12:19 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-03-21 12:19 +0000
1#!/usr/bin/env python3
3# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelomas@gmail.com>
4#
5# SPDX-License-Identifier: ISC
7"""
8Tests for the provenance generator script.
9"""
11import os
12import sys
13import tempfile
14import shutil
15import pytest
16from rdflib import Dataset, URIRef, Namespace
17from rdflib.namespace import RDF
19# Add the src directory to the path so we can import the module
20sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
21from changes_metadata_manager.generate_provenance import generate_provenance_snapshots
23@pytest.fixture
24def test_environment():
25 """Set up test data and environment."""
26 test_dir = tempfile.mkdtemp(dir='./tests/')
27 test_ttl = os.path.join(test_dir, 'test_data.ttl')
28 test_output = tempfile.mktemp(suffix='.nq')
30 # Create test data file
31 with open(test_ttl, 'w') as f:
32 f.write("""
33@prefix ex: <http://example.org/> .
34@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
35@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
36@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
38ex:item1 a crm:E22_Human-Made_Object ;
39 rdfs:label "Test Manuscript" .
41ex:item2 a crm:E21_Person ;
42 rdfs:label "John Doe" .
43 """)
45 yield {"test_dir": test_dir, "test_ttl": test_ttl, "test_output": test_output}
47 # Clean up
48 if os.path.exists(test_dir):
49 shutil.rmtree(test_dir)
50 if os.path.exists(test_output):
51 os.remove(test_output)
53def test_provenance_generation(test_environment):
54 """Test that provenance snapshots are generated correctly."""
55 # Get test environment variables
56 test_dir = test_environment["test_dir"]
57 test_output = test_environment["test_output"]
59 # Generate provenance snapshots
60 agent_orcid = "https://orcid.org/0000-0002-8420-0696"
61 primary_source = "https://example.org/primary-source"
62 generate_provenance_snapshots(test_dir, test_output, output_format='trig', agent_orcid=agent_orcid, primary_source=primary_source)
64 # Check that the output file was created
65 assert os.path.exists(test_output), "Output file was not created"
67 # Load the output file
68 dataset = Dataset()
69 dataset.parse(test_output, format='trig')
71 # Define namespaces
72 PROV = Namespace('http://www.w3.org/ns/prov#')
74 # Check that we have the expected named graphs
75 expected_graphs = [
76 URIRef('http://example.org/item1/prov/'),
77 URIRef('http://example.org/item2/prov/')
78 ]
79 actual_graphs = [g.identifier for g in dataset.graphs()]
81 for graph in expected_graphs:
82 assert graph in actual_graphs, f"Expected graph {graph} not found"
84 # Check that snapshots are typed as prov:Entity
85 item1_prov_graph = dataset.graph(URIRef('http://example.org/item1/prov/'))
86 item2_prov_graph = dataset.graph(URIRef('http://example.org/item2/prov/'))
88 item1_snapshot = URIRef('http://example.org/item1/prov/se/1')
89 item2_snapshot = URIRef('http://example.org/item2/prov/se/1')
91 assert (item1_snapshot, RDF.type, PROV.Entity) in item1_prov_graph, "item1 snapshot is not typed as prov:Entity"
92 assert (item2_snapshot, RDF.type, PROV.Entity) in item2_prov_graph, "item2 snapshot is not typed as prov:Entity"
94 # Check for specializationOf relationship
95 assert (item1_snapshot, PROV.specializationOf, URIRef('http://example.org/item1')) in item1_prov_graph
96 assert (item2_snapshot, PROV.specializationOf, URIRef('http://example.org/item2')) in item2_prov_graph
98 # Check for primary source relationship
99 assert (item1_snapshot, PROV.hadPrimarySource, URIRef(primary_source)) in item1_prov_graph, "item1 snapshot missing primary source"
100 assert (item2_snapshot, PROV.hadPrimarySource, URIRef(primary_source)) in item2_prov_graph, "item2 snapshot missing primary source"
102def test_input_format_parameter(test_environment):
103 """Test that the input_format parameter works correctly."""
104 # Get test environment variables
105 test_dir = test_environment["test_dir"]
106 test_output = test_environment["test_output"]
108 # Create a file with an unknown extension but containing Turtle content
109 test_unknown = os.path.join(test_dir, 'unknown_format.xyz')
110 with open(test_unknown, 'w') as f:
111 f.write("""
112@prefix ex: <http://example.org/> .
113@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
114@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
116ex:item3 a crm:E22_Human-Made_Object ;
117 rdfs:label "Test Object with Unknown Format" .
118 """)
120 # Generate provenance snapshots, specifying the format explicitly
121 agent_orcid = "https://orcid.org/0000-0002-8420-0696"
122 primary_source = "https://example.org/primary-source"
123 generate_provenance_snapshots(test_dir, test_output, input_format='turtle', output_format='trig', agent_orcid=agent_orcid, primary_source=primary_source)
125 # Check that the output file was created
126 assert os.path.exists(test_output), "Output file was not created"
128 # Load the output file
129 dataset = Dataset()
130 dataset.parse(test_output, format='trig')
132 # Define namespaces
133 PROV = Namespace('http://www.w3.org/ns/prov#')
135 # Check that we have the expected named graph for item3
136 item3_graph = URIRef('http://example.org/item3/prov/')
137 actual_graphs = [g.identifier for g in dataset.graphs()]
138 assert item3_graph in actual_graphs, f"Expected graph {item3_graph} not found"
140 # Check that snapshot is typed as prov:Entity
141 item3_prov_graph = dataset.graph(item3_graph)
142 item3_snapshot = URIRef('http://example.org/item3/prov/se/1')
143 assert (item3_snapshot, RDF.type, PROV.Entity) in item3_prov_graph, "item3 snapshot is not typed as prov:Entity"
145def test_empty_directory(test_environment):
146 """Test that the script handles empty directories correctly."""
147 # Create an empty directory
148 empty_dir = tempfile.mkdtemp(dir='./tests/')
149 test_output = test_environment["test_output"]
151 try:
152 # Generate provenance snapshots for the empty directory
153 agent_orcid = "https://orcid.org/0000-0002-8420-0696"
154 primary_source = "https://example.org/primary-source"
155 generate_provenance_snapshots(empty_dir, test_output, agent_orcid=agent_orcid, primary_source=primary_source)
157 # Check that the output file was not created
158 assert not os.path.exists(test_output), "Output file should not be created for empty directory"
159 finally:
160 # Clean up
161 if os.path.exists(empty_dir):
162 shutil.rmtree(empty_dir)
164if __name__ == '__main__':
165 pytest.main()