From b63f4d777eb93488239098db5315ecb1289c1d28 Mon Sep 17 00:00:00 2001
From: Maeva Pourpoint <maeva@passcal.nmt.edu>
Date: Thu, 9 Sep 2021 16:42:53 -0600
Subject: [PATCH] Module handling metadata - Survey level data classes

---
 lemi2seed/lemi_metadata.py | 165 +++++++++++++++++++++++++++++++++++++
 1 file changed, 165 insertions(+)
 create mode 100644 lemi2seed/lemi_metadata.py

diff --git a/lemi2seed/lemi_metadata.py b/lemi2seed/lemi_metadata.py
new file mode 100644
index 0000000..a9e048e
--- /dev/null
+++ b/lemi2seed/lemi_metadata.py
@@ -0,0 +1,165 @@
+# -*- coding: utf-8 -*-
+"""
+Routines for parsing LEMI field spreadsheets(i.e. install and demobilization
+sheets) and validating metadata;
+Templates for the spreadsheets are provided to the PIs and their formatting is
+hard coded.
+The metadata for a complete  MT dataset was structured into the following
+categories: Survey, Station, Run, Electric, Magnetic, and Auxiliary
+
+Maeva Pourpoint - IRIS/PASSCAL
+"""
+from __future__ import annotations
+
+import logging
+import logging.config
+import re
+
+from dataclasses import dataclass, field
+from obspy import UTCDateTime
+from pathlib import Path
+from typing import Optional, Set
+
+from lemi2seed.utils import check_email_formatting, is_valid_uri, str2list
+
+# Read logging config file
+log_file_path = Path(__file__).parent.joinpath('logging.conf')
+logging.config.fileConfig(log_file_path)
+# Create logger
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class BaseMetadata:
+    # List of metadata fields that are required for archiving but "missing"
+    # because not provided by the user.
+    metadata_missing: Set[str] = field(default_factory=set)
+    # List of metadata fields that are invalid
+    metadata_invalid: Set[str] = field(default_factory=set)
+
+
+@dataclass
+class BaseSurvey(BaseMetadata):
+    restricted_status: Optional[str] = field(default=None,
+                                             metadata={'xml_id': ['restricted_status'],
+                                                       'required': True,
+                                                       'gui': True})
+    time_period_end: Optional[UTCDateTime] = field(default=None,
+                                                   metadata={'xml_id': ['end_date'],
+                                                             'required': True,
+                                                             'gui': True})
+    time_period_start: Optional[UTCDateTime] = field(default=None,
+                                                     metadata={'xml_id': ['start_date'],
+                                                               'required': True,
+                                                               'gui': True})
+
+    def validate_time_period_end(self, metadata_input: UTCDateTime, data_input: UTCDateTime) -> bool:
+        if not (isinstance(metadata_input, UTCDateTime) and metadata_input >= data_input):
+            logger.error("End date of {} should be in UTC and greater than "
+                         "the data acquisition end time."
+                         .format(self.__class__.__name__.split('_')[0]))
+            return False
+        return True
+
+    def validate_time_period_start(self, metadata_input: UTCDateTime, data_input: UTCDateTime) -> bool:
+        if not (isinstance(metadata_input, UTCDateTime) and metadata_input <= data_input):
+            logger.error("Start date of {} should be in UTC and lower than "
+                         "the data acquisition start time."
+                         .format(self.__class__.__name__.split('_')[0]))
+            return False
+        return True
+
+
+@dataclass
+class Survey(BaseSurvey):
+    acquired_by_author: Optional[str] = field(default=None,
+                                              metadata={'xml_id': ['comments', 'mt.survey.acquired_by.author'],
+                                                        'required': False,
+                                                        'gui': True})
+    acquired_by_comments: Optional[str] = field(default=None,
+                                                metadata={'xml_id': ['comments', 'mt.survey.acquired_by.comments'],
+                                                          'required': False,
+                                                          'gui': True})
+    archive_network: Optional[str] = field(default=None,
+                                           metadata={'required': True,
+                                                     'gui': True})
+    citation_dataset_doi: Optional[str] = field(default=None,
+                                                metadata={'xml_id': ['identifiers'],
+                                                          'required': True,
+                                                          'gui': True})
+    comments: Optional[str] = field(default=None,
+                                    metadata={'xml_id': ['comments', 'mt.survey.comments'],
+                                              'required': False,
+                                              'gui': True})
+    country: Optional[str] = field(default=None,
+                                   metadata={'xml_id': ['comments', 'mt.survey.country'],
+                                             'required': False,
+                                             'gui': True})
+    geographic_name: Optional[str] = field(default=None,
+                                           metadata={'xml_id': ['comments', 'mt.survey.geographic_name'],
+                                                     'required': True,
+                                                     'gui': True})
+    name: Optional[str] = field(default=None,
+                                metadata={'xml_id': ['description'],
+                                          'required': True,
+                                          'gui': True})
+    project: Optional[str] = field(default=None,
+                                   metadata={'xml_id': ['comments', 'mt.survey.project'],
+                                             'required': True,
+                                             'gui': True})
+    project_lead_author: Optional[str] = field(default=None,
+                                               metadata={'xml_id': ['operators.contacts.names'],
+                                                         'required': False,
+                                                         'gui': True})
+    project_lead_email: Optional[str] = field(default=None,
+                                              metadata={'xml_id': ['operators.contacts.emails'],
+                                                        'required': False,
+                                                        'gui': True})
+    project_lead_organization: Optional[str] = field(default=None,
+                                                     metadata={'xml_id': ['operators.agency'],
+                                                               'required': False,
+                                                               'gui': True})
+    summary: Optional[str] = field(default=None,
+                                   metadata={'xml_id': ['comments', 'mt.survey.summary'],
+                                             'required': False,
+                                             'gui': True})
+
+    @staticmethod
+    def validate_archive_network(metadata_input: str) -> bool:
+        try:
+            valid = re.match(r"^\w{2}$", metadata_input)
+        except TypeError:
+            logger.error("The network code should be a string.")
+            return False
+        else:
+            if valid is None:
+                logger.error("The network code should be two alphanumeric "
+                             "character long.")
+            return bool(valid)
+
+    @staticmethod
+    def validate_citation_dataset_doi(metadata_input: str) -> bool:
+        # Proper formatting for DOI?
+        try:
+            dois = str2list(metadata_input)
+        except AttributeError:
+            logger.error("The DOI number(s) should be a string.")
+            return False
+        else:
+            if not all([is_valid_uri(doi) for doi in dois]):
+                logger.error("Invalid DOI(s). The DOI number(s) provided by "
+                             "the archive should be strings formatted as "
+                             "follows: 'scheme: path'.")
+                return False
+            return True
+
+    @staticmethod
+    def validate_project_lead_email(metadata_input: str) -> bool:
+        if metadata_input is not None:
+            try:
+                emails = str2list(metadata_input)
+            except AttributeError:
+                logger.error("The project lead email(s) should be a string.")
+                return False
+            return all([check_email_formatting(email) for email in emails])
+        return True
-- 
GitLab