22import sys
33import csv
44import pprint
5+ import re
56import smtplib
67import mimetypes
78import logging
89from email .mime .multipart import MIMEMultipart
910from email .mime .text import MIMEText
1011
12+ from .user import UserReports
13+
1114class DataverseReports (object ):
1215 def __init__ (self , dataverse_api = None , config = None ):
1316 if dataverse_api is None :
@@ -19,6 +22,12 @@ def __init__(self, dataverse_api=None, config=None):
1922 return
2023
2124 self .dataverse_api = dataverse_api
25+ self .config = config
26+ self .dataverse_size_pattern = re .compile ('dataverse:\s(.*)\sbyte' )
27+ self .logger = logging .getLogger ('dataverse-reports' )
28+
29+ # Create UserReports object to retrieve user metadata
30+ self .user_reports = UserReports (dataverse_api = dataverse_api , config = config )
2231
2332 # Ensure trailing slash on work_dir
2433 if config ['work_dir' ][len (config ['work_dir' ])- 1 ] != '/' :
@@ -28,15 +37,12 @@ def __init__(self, dataverse_api=None, config=None):
2837 self .ns = {'atom' : 'http://www.w3.org/2005/Atom' ,
2938 'sword' : 'http://purl.org/net/sword/terms/state' }
3039
31- self .config = config
32- self .logger = logging .getLogger ('dataverse-reports' )
33-
34- def report_dataverses_recursive (self , account_info ):
40+ def report_dataverses_recursive (self , dataverse_identifier ):
3541 # List of dataverses
3642 dataverses = []
3743
3844 # Load dataverses
39- self .load_dataverses_recursive (dataverses , account_info [ 'identifier' ] )
45+ self .load_dataverses_recursive (dataverses , dataverse_identifier )
4046
4147 return dataverses
4248
@@ -64,21 +70,71 @@ def load_dataverse(self, dataverses, dataverse_identifier):
6470
6571 self .logger .info ("Dataverse name: %s" , dataverse ['name' ])
6672
67- # Flatten the nested creator information
68- if 'creator' in dataverse :
73+ # Flatten the nested contact information
74+ if 'dataverseContacts' in dataverse :
75+ dataverseContacts = dataverse ['dataverseContacts' ]
76+ if len (dataverseContacts ) > 0 :
77+ self .logger .debug ("The dataverseContacts list contains " + str (len (dataverseContacts )) + " contacts." )
78+ dataverseContact = dataverseContacts [0 ]
79+ if 'contactEmail' in dataverseContact :
80+ contactEmail = dataverseContact ['contactEmail' ].strip ()
81+ self .logger .debug ("Found email of dataverse contact: %s" , str (contactEmail ))
82+ user = self .user_reports .find_user_email (contactEmail )
83+ if bool (user ):
84+ self .logger .debug ("Adding contact information: %s" , user )
85+ if 'userIdentifier' in user :
86+ dataverse ['contactIdentifier' ] = user ['userIdentifier' ]
87+ if 'firstName' in user :
88+ dataverse ['contactFirstName' ] = user ['firstName' ]
89+ if 'lastName' in user :
90+ dataverse ['contactLastName' ] = user ['lastName' ]
91+ if 'email' in user :
92+ dataverse ['contactEmail' ] = user ['email' ]
93+ if 'affiliation' in user :
94+ dataverse ['contactAffiliation' ] = user ['affiliation' ]
95+ if 'roles' in user :
96+ dataverse ['contactRoles' ] = user ['roles' ]
97+ else :
98+ self .logger .warn ("Unable to find user from dataverseContact email: " + contactEmail )
99+ dataverse ['contactEmail' ] = contactEmail
100+ else :
101+ self .logger .warn ("First dataverseContact doesn't have an email." )
102+ else :
103+ self .logger .warn ("List of dataverseContacts is empty." )
104+ elif 'creator' in dataverse : # Legacy field in older Dataverse versions
69105 self .logger .debug ("Replacing creator array." )
70106 creator = dataverse ['creator' ]
71107 if 'identifier' in creator :
72- dataverse ['creatorIdentifier ' ] = creator ['identifier' ]
108+ dataverse ['contactIdentifier ' ] = creator ['identifier' ]
73109 if 'displayName' in creator :
74- dataverse ['creatorName ' ] = creator ['displayName' ]
110+ dataverse ['contactName ' ] = creator ['displayName' ]
75111 if 'email' in creator :
76- dataverse ['creatorEmail ' ] = creator ['email' ]
112+ dataverse ['contactEmail ' ] = creator ['email' ]
77113 if 'affiliation' in creator :
78- dataverse ['creatorAffiliation ' ] = creator ['affiliation' ]
114+ dataverse ['contactAffiliation ' ] = creator ['affiliation' ]
79115 if 'position' in creator :
80- dataverse ['creatorPosition ' ] = creator ['position' ]
116+ dataverse ['contactPosition ' ] = creator ['position' ]
81117 dataverse .pop ('creator' )
118+ else :
119+ self .logger .warn ("Unable to find dataverse contact information." )
120+
121+ # Add the data (file) size of the dataverse and all its sub-dataverses
122+ dataverse_size_response = self .dataverse_api .get_dataverse_size (identifier = dataverse_identifier , includeCached = True )
123+ response_size_json = dataverse_size_response .json ()
124+ if response_size_json ['status' ] == 'OK' and 'data' in response_size_json :
125+ dataverse_size = response_size_json ['data' ]
126+ if 'message' in dataverse_size :
127+ size_message = dataverse_size ['message' ]
128+ self .logger .debug ("The message element from storagesize endpoint: " + size_message )
129+ size_bytes_match = re .search (self .dataverse_size_pattern , size_message )
130+ if size_bytes_match is not None :
131+ size_bytes_string = size_bytes_match .group (1 )
132+ size_bytes = int (size_bytes_string .replace (',' ,'' ))
133+ dataverse ['contentSize (MB)' ] = (size_bytes / 1048576 )
134+ else :
135+ self .logger .warning ("Unable to find the bytes value in the message." )
136+ else :
137+ self .logger .warning ("No message element in response from storagesize endpoint." )
82138
83139 # Add the 'dataverseHasBeenReleased' field from the Sword API
84140 if 'alias' in dataverse :
@@ -94,6 +150,12 @@ def load_dataverse(self, dataverses, dataverse_identifier):
94150 else :
95151 self .logger .debug ("Element 'dataverseHasBeenReleased' is not present in XML." )
96152
153+ # Load datasets
154+ #dataverse_contents = self.dataverse_api.get_dataverse_contents(identifier=dataverse_identifier)
155+ #for dvObject in dataverse_contents:
156+ #if dvObject['type'] == 'dataset':
157+ #self.load_dataset(dataverse, dvObject['id'])
158+
97159 dataverses .append (dataverse )
98160 else :
99161 self .logger .warn ("Dataverse was empty." )
0 commit comments