Coverage for tests / test_provenance.py: 100%

74 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-03-21 12:19 +0000

1#!/usr/bin/env python3 

2 

3# SPDX-FileCopyrightText: 2025-2026 Arcangelo Massari <arcangelomas@gmail.com> 

4# 

5# SPDX-License-Identifier: ISC 

6 

7""" 

8Tests for the provenance generator script. 

9""" 

10 

11import os 

12import sys 

13import tempfile 

14import shutil 

15import pytest 

16from rdflib import Dataset, URIRef, Namespace 

17from rdflib.namespace import RDF 

18 

19# Add the src directory to the path so we can import the module 

20sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 

21from changes_metadata_manager.generate_provenance import generate_provenance_snapshots 

22 

23@pytest.fixture 

24def test_environment(): 

25 """Set up test data and environment.""" 

26 test_dir = tempfile.mkdtemp(dir='./tests/') 

27 test_ttl = os.path.join(test_dir, 'test_data.ttl') 

28 test_output = tempfile.mktemp(suffix='.nq') 

29 

30 # Create test data file 

31 with open(test_ttl, 'w') as f: 

32 f.write(""" 

33@prefix ex: <http://example.org/> . 

34@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . 

35@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . 

36@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> . 

37 

38ex:item1 a crm:E22_Human-Made_Object ; 

39 rdfs:label "Test Manuscript" . 

40 

41ex:item2 a crm:E21_Person ; 

42 rdfs:label "John Doe" . 

43 """) 

44 

45 yield {"test_dir": test_dir, "test_ttl": test_ttl, "test_output": test_output} 

46 

47 # Clean up 

48 if os.path.exists(test_dir): 

49 shutil.rmtree(test_dir) 

50 if os.path.exists(test_output): 

51 os.remove(test_output) 

52 

53def test_provenance_generation(test_environment): 

54 """Test that provenance snapshots are generated correctly.""" 

55 # Get test environment variables 

56 test_dir = test_environment["test_dir"] 

57 test_output = test_environment["test_output"] 

58 

59 # Generate provenance snapshots 

60 agent_orcid = "https://orcid.org/0000-0002-8420-0696" 

61 primary_source = "https://example.org/primary-source" 

62 generate_provenance_snapshots(test_dir, test_output, output_format='trig', agent_orcid=agent_orcid, primary_source=primary_source) 

63 

64 # Check that the output file was created 

65 assert os.path.exists(test_output), "Output file was not created" 

66 

67 # Load the output file 

68 dataset = Dataset() 

69 dataset.parse(test_output, format='trig') 

70 

71 # Define namespaces 

72 PROV = Namespace('http://www.w3.org/ns/prov#') 

73 

74 # Check that we have the expected named graphs 

75 expected_graphs = [ 

76 URIRef('http://example.org/item1/prov/'), 

77 URIRef('http://example.org/item2/prov/') 

78 ] 

79 actual_graphs = [g.identifier for g in dataset.graphs()] 

80 

81 for graph in expected_graphs: 

82 assert graph in actual_graphs, f"Expected graph {graph} not found" 

83 

84 # Check that snapshots are typed as prov:Entity 

85 item1_prov_graph = dataset.graph(URIRef('http://example.org/item1/prov/')) 

86 item2_prov_graph = dataset.graph(URIRef('http://example.org/item2/prov/')) 

87 

88 item1_snapshot = URIRef('http://example.org/item1/prov/se/1') 

89 item2_snapshot = URIRef('http://example.org/item2/prov/se/1') 

90 

91 assert (item1_snapshot, RDF.type, PROV.Entity) in item1_prov_graph, "item1 snapshot is not typed as prov:Entity" 

92 assert (item2_snapshot, RDF.type, PROV.Entity) in item2_prov_graph, "item2 snapshot is not typed as prov:Entity" 

93 

94 # Check for specializationOf relationship 

95 assert (item1_snapshot, PROV.specializationOf, URIRef('http://example.org/item1')) in item1_prov_graph 

96 assert (item2_snapshot, PROV.specializationOf, URIRef('http://example.org/item2')) in item2_prov_graph 

97 

98 # Check for primary source relationship 

99 assert (item1_snapshot, PROV.hadPrimarySource, URIRef(primary_source)) in item1_prov_graph, "item1 snapshot missing primary source" 

100 assert (item2_snapshot, PROV.hadPrimarySource, URIRef(primary_source)) in item2_prov_graph, "item2 snapshot missing primary source" 

101 

102def test_input_format_parameter(test_environment): 

103 """Test that the input_format parameter works correctly.""" 

104 # Get test environment variables 

105 test_dir = test_environment["test_dir"] 

106 test_output = test_environment["test_output"] 

107 

108 # Create a file with an unknown extension but containing Turtle content 

109 test_unknown = os.path.join(test_dir, 'unknown_format.xyz') 

110 with open(test_unknown, 'w') as f: 

111 f.write(""" 

112@prefix ex: <http://example.org/> . 

113@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . 

114@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> . 

115 

116ex:item3 a crm:E22_Human-Made_Object ; 

117 rdfs:label "Test Object with Unknown Format" . 

118 """) 

119 

120 # Generate provenance snapshots, specifying the format explicitly 

121 agent_orcid = "https://orcid.org/0000-0002-8420-0696" 

122 primary_source = "https://example.org/primary-source" 

123 generate_provenance_snapshots(test_dir, test_output, input_format='turtle', output_format='trig', agent_orcid=agent_orcid, primary_source=primary_source) 

124 

125 # Check that the output file was created 

126 assert os.path.exists(test_output), "Output file was not created" 

127 

128 # Load the output file 

129 dataset = Dataset() 

130 dataset.parse(test_output, format='trig') 

131 

132 # Define namespaces 

133 PROV = Namespace('http://www.w3.org/ns/prov#') 

134 

135 # Check that we have the expected named graph for item3 

136 item3_graph = URIRef('http://example.org/item3/prov/') 

137 actual_graphs = [g.identifier for g in dataset.graphs()] 

138 assert item3_graph in actual_graphs, f"Expected graph {item3_graph} not found" 

139 

140 # Check that snapshot is typed as prov:Entity 

141 item3_prov_graph = dataset.graph(item3_graph) 

142 item3_snapshot = URIRef('http://example.org/item3/prov/se/1') 

143 assert (item3_snapshot, RDF.type, PROV.Entity) in item3_prov_graph, "item3 snapshot is not typed as prov:Entity" 

144 

145def test_empty_directory(test_environment): 

146 """Test that the script handles empty directories correctly.""" 

147 # Create an empty directory 

148 empty_dir = tempfile.mkdtemp(dir='./tests/') 

149 test_output = test_environment["test_output"] 

150 

151 try: 

152 # Generate provenance snapshots for the empty directory 

153 agent_orcid = "https://orcid.org/0000-0002-8420-0696" 

154 primary_source = "https://example.org/primary-source" 

155 generate_provenance_snapshots(empty_dir, test_output, agent_orcid=agent_orcid, primary_source=primary_source) 

156 

157 # Check that the output file was not created 

158 assert not os.path.exists(test_output), "Output file should not be created for empty directory" 

159 finally: 

160 # Clean up 

161 if os.path.exists(empty_dir): 

162 shutil.rmtree(empty_dir) 

163 

164if __name__ == '__main__': 

165 pytest.main()