Skip to content
Snippets Groups Projects
test_lemi_data.py 15.8 KiB
Newer Older
# -*- coding: utf-8 -*-

"""Tests for `lemi_data` module."""

import numpy as np
import pickle
import unittest

from obspy import UTCDateTime
from pathlib import Path

from lemi2seed.lemi_data import LemiData
from lemi2seed.logging import setup_logger
OUTPUT_MSEED = Path(__file__).resolve().resolve().parent.joinpath('MSEED')
OUTPUT_LOG = Path(__file__).resolve().parent.joinpath('LOG')
TEST_DIR = Path(__file__).resolve().resolve().parent.joinpath('test_data')
SCR_DIR = "lemi2seed.lemi_data"

# Set up logging
logger = setup_logger(SCR_DIR)


class TestLemiData(unittest.TestCase):
    """Test  suite for lemi_data.LemiData class"""

    def setUp(self):
        """Set up test fixtures"""
        self.path2data = TEST_DIR.joinpath("EM", "TEST1")
        self.path2data_corrupted_1 = TEST_DIR.joinpath("EM", "TEST3")
        self.path2data_corrupted_2 = TEST_DIR.joinpath("EM", "TEST4")
        self.path2data_run = TEST_DIR.joinpath("EM", "TEST5")
        files = ['202009302105.TXT', '202009302112.TXT', '202009302114.TXT', '202010010000.TXT']
        self.data_files = [self.path2data.joinpath('DATA0110', x) for x in files]

    def test_scan_path2data_valid_filenaming(self):
        """Test basic functionality of scan_path2data."""
        lemi_data = LemiData(self.path2data, OUTPUT_MSEED, OUTPUT_LOG)
        self.assertListEqual(lemi_data.data_files, self.data_files)
        self.assertEqual(lemi_data.stats['num_runs'], 1)

    def test_scan_path2data_erroneous_filenaming(self):
        """Test basic functionality of scan_path2data."""
        path2data = TEST_DIR.joinpath("EM", "TEST2")
        with self.assertRaises(SystemExit) as cmd:
            LemiData(path2data, OUTPUT_MSEED, OUTPUT_LOG)
        self.assertEqual(cmd.exception.code, 1)

    def test_scan_path2data_inf_files(self):
        """Test basic functionality of scan_path2data."""
        inf_files = ['202009302053.INF', '202009302111.INF', '202009302346.INF', '202010011052.INF']
        lemi_data = LemiData(self.path2data_run, OUTPUT_MSEED, OUTPUT_LOG)
        self.assertListEqual([x.name for x in lemi_data.inf_files], inf_files)
        self.assertEqual(lemi_data.stats['num_runs'], 4)

    def test_scan_path2data_no_def_file(self):
        """Test basic functionality of scan_path2data."""
        with self.assertLogs(logger, level='WARNING') as cmd:
            lemi_data = LemiData(self.path2data, OUTPUT_MSEED, OUTPUT_LOG)
        msg = ("No def files found under the following path - {}. Calibration "
               "coefficients used by the LEMI424 logger to 'internally correct' "
               "the recorded data will not be archived!".format(self.path2data))
        self.assertEqual(cmd.output, [":".join(['WARNING', SCR_DIR, msg])])
        self.assertListEqual(lemi_data.def_files, [])

    def test_scan_path2data_two_def_files(self):
        """Test basic functionality of scan_path2data."""
        def_files = ['lemi424N0110.def', 'lemi424N0132.def']
        lemi_data = LemiData(self.path2data_run, OUTPUT_MSEED, OUTPUT_LOG)
        self.assertListEqual([x.name for x in lemi_data.def_files], def_files)

    def test_parse_inf_files(self):
        """Test basic functionality of parse_inf_files."""
        lemi_data = LemiData(self.path2data_run, OUTPUT_MSEED, OUTPUT_LOG)
        self.assertDictEqual(lemi_data.stats['fluxgate_sn'],
                             {'a': '110', 'b': '110', 'c': '110', 'd': '132'})
        self.assertDictEqual(lemi_data.stats['datalogger_sn'],
                             {'a': '110', 'b': '110', 'c': '110', 'd': '132'})
        self.assertDictEqual(lemi_data.stats['dipole_len'],
                             {'a': {'E1': 78.0, 'E2': 93.0, 'E3': 78.0, 'E4': 93.0},
                              'b': {'E1': 78.0, 'E2': 93.0, 'E3': 78.0, 'E4': 93.0},
                              'c': {'E1': 78.0, 'E2': 93.0, 'E3': 78.0, 'E4': 93.0},
                              'd': {'E1': 78.0, 'E2': 93.0, 'E3': 78.0, 'E4': 93.0}})
        self.assertListEqual(lemi_data.stats['acq_start_time'],
                             [UTCDateTime('2020-09-30T20:53:41.000000Z'),
                              UTCDateTime('2020-09-30T21:11:13.000000Z'),
                              UTCDateTime('2020-09-30T23:46:51.000000Z'),
                              UTCDateTime('2020-10-01T10:52:25.000000Z')])

    def test_parse_file_valid_file(self):
        """Test basic functionality of parse_data_file."""
        lemi_data = LemiData(self.path2data, OUTPUT_MSEED, OUTPUT_LOG)
        data = lemi_data.parse_data_file(lemi_data.data_files[0])
        input_file = self.path2data.joinpath('test_data_file.pkl')
        with open(input_file, 'rb') as fin:
            expected_data = pickle.load(fin)
        self.assertDictEqual(data, expected_data)

    def test_parse_file_corrupted_no_CR(self):
        """Test basic functionality of parse_data_file."""
        lemi_data = LemiData(self.path2data_corrupted_1, OUTPUT_MSEED, OUTPUT_LOG)
        data_file = lemi_data.data_files[0]
        with self.assertLogs(logger, level='WARNING') as cmd:
            data = lemi_data.parse_data_file(data_file)
        msg = ("The data file - {} - may have been corrupted. Skipping file!"
               .format(data_file))
        self.assertEqual(cmd.output, [":".join(['WARNING', SCR_DIR, msg])])
        self.assertIsNone(data)

    def test_parse_file_corrupted_missing_column(self):
        """Test basic functionality of parse_data_file."""
        lemi_data = LemiData(self.path2data_corrupted_1, OUTPUT_MSEED, OUTPUT_LOG)
        data_file = lemi_data.data_files[1]
        with self.assertLogs(logger, level='WARNING') as cmd:
            data = lemi_data.parse_data_file(data_file)
        msg = ("The data file - {} - may have been corrupted. Skipping file!"
               .format(data_file))
        self.assertEqual(cmd.output, [":".join(['WARNING', SCR_DIR, msg])])
        self.assertIsNone(data)

    def test_parse_file_corrupted_erroneous_time_formatting(self):
        """Test basic functionality of parse_data_file."""
        lemi_data = LemiData(self.path2data_corrupted_2, OUTPUT_MSEED, OUTPUT_LOG)
        data_file = lemi_data.data_files[0]
        with self.assertLogs(logger) as cmd:
            data = lemi_data.parse_data_file(data_file)
        msg = ("The data file - {} - may have been corrupted. Skipping file!"
               .format(data_file))
        self.assertEqual(cmd.output, [":".join(['WARNING', SCR_DIR, msg])])
        self.assertIsNone(data)

    def test_parse_file_corrupted_erroneous_coordinate_formatting(self):
        """Test basic functionality of parse_data_file."""
        lemi_data = LemiData(self.path2data_corrupted_2, OUTPUT_MSEED, OUTPUT_LOG)
        data_file = lemi_data.data_files[1]
        with self.assertLogs(logger) as cmd:
            data = lemi_data.parse_data_file(data_file)
        msg = ("The data file - {} - may have been corrupted. Skipping file!"
               .format(data_file))
        self.assertEqual(cmd.output, [":".join(['WARNING', SCR_DIR, msg])])
        self.assertIsNone(data)

    def test_parse_all_files(self):
        """Test basic functionality of parse_data_files."""
        lemi_data = LemiData(self.path2data, OUTPUT_MSEED, OUTPUT_LOG)
        input_file = self.path2data.joinpath('test_data_files.pkl')
        with open(input_file, 'rb') as fin:
            expected_data = pickle.load(fin)
        self.assertDictEqual(lemi_data.data, expected_data)

    def test_order_files(self):
        """Test basic functionality of order_files."""
        ordered_data_files = LemiData.order_files(files)
        self.assertListEqual(ordered_data_files, self.data_files)

    def test_reformat_data_valid_formatting(self):
        """Test basic functionality of reformat_data."""
        output = {'Time_stamp': UTCDateTime('2020-09-30T21:05:00.000000Z'),
                  'Lat': 34.0483955, 'Lon': -107.12843720000001,
                  'Elev': 2202.2, 'Hx': 23777.803, 'Hy': 210.731,
                  'Hz': 41834.585, 'E1': 136.521, 'E2': -110.769,
                  'E3': 174.046, 'E4': 16.178, 'Ui': 13.01, 'Te': 46.75,
                  'Tf': 37.09, 'Sn': 12.0, 'Fq': 2.0, 'Ce': 0.0}
        line = ('2020 09 30 21 05 00 23777.803   210.731 41834.585  46.75  '
                '37.09   136.521  -110.769   174.046    16.178 13.01 2202.2 '
                '3404.83955 N 10712.84372 W 12 2 0')
        columns = line.strip().split()
        self.assertDictEqual(LemiData.reformat_data(columns), output)

    def test_reformat_data_erroneous_time_formatting(self):
        """Test basic functionality of reformat_data."""
        line = ('2020 09 30 21 05 23777.803 23777.803   210.731 41834.585  '
                '46.75  37.09   136.521  -110.769   174.046    16.178 13.01 '
                '2202.2 3404.83955 N 10712.84372 W 12 2 0')
        columns = line.strip().split()
        self.assertIsNone(LemiData.reformat_data(columns))

    def test_reformat_data_erroneous_coordinate_formatting(self):
        """Test basic functionality of reformat_data."""
        line = ('2020 09 30 21 12 00 23775.684   212.121 41834.749  46.68  '
                '36.46   133.427  -111.936   171.563    15.734 13.00 2200.8 '
                '3404.83994 Z 10712.84418 W 12 2 0')
        columns = line.strip().split()
        self.assertIsNone(LemiData.reformat_data(columns))

    def test_detect_gaps_correlates_with_acquisition_start(self):
        """Test basic functionality of detect_gaps."""
        lemi_data = LemiData(self.path2data_run, OUTPUT_MSEED, OUTPUT_LOG)
        lemi_data.parse_data_files()
        lemi_data.parse_inf_files()
        time_stamp = lemi_data.data["Time_stamp"]
        ind_gaps = lemi_data.detect_gaps(time_stamp)
        self.assertEqual(len(ind_gaps), 3)
        self.assertListEqual([time_stamp[ind] for ind in ind_gaps],
                             [UTCDateTime('2020-09-30T21:12:00.000000Z'),
                              UTCDateTime('2020-09-30T23:47:00.000000Z'),
                              UTCDateTime('2020-10-01T10:53:00.000000Z')])

    def test_detect_gaps_not_all_correlates_with_acquisition_start(self):
        """Test basic functionality of detect_gaps."""
        lemi_data = LemiData(self.path2data_run, OUTPUT_MSEED, OUTPUT_LOG)
        lemi_data.parse_data_files()
        lemi_data.parse_inf_files()
        lemi_data.stats['acq_start_time'] = [UTCDateTime('2020-09-30T23:46:51.000000Z'),
                                             UTCDateTime('2020-10-01T10:52:25.000000Z')]
        time_stamp = lemi_data.data["Time_stamp"]
        ind_gaps = lemi_data.detect_gaps(time_stamp)
        self.assertEqual(len(ind_gaps), 2)
        self.assertListEqual([time_stamp[ind] for ind in ind_gaps],
                             [UTCDateTime('2020-09-30T23:47:00.000000Z'),
                              UTCDateTime('2020-10-01T10:53:00.000000Z')])

    def test_detect_new_day(self):
        """Test basic functionality of detect_new_day."""
        lemi_data = LemiData(self.path2data, OUTPUT_MSEED, OUTPUT_LOG)
        lemi_data.parse_data_files()
        lemi_data.parse_inf_files()
        time_stamp = lemi_data.data["Time_stamp"]
        ind_day = LemiData.detect_new_day(time_stamp)
        self.assertEqual(len(ind_day), 1)
        self.assertEqual(time_stamp[ind_day[0]],
                         UTCDateTime('2020-10-01T00:00:00.000000Z'))

    def test_create_data_array(self):
        """Test basic functionality of create_data_array."""
        lemi_data = LemiData(self.path2data_run, OUTPUT_MSEED, OUTPUT_LOG)
        lemi_data.parse_data_files()
        lemi_data.parse_inf_files()
        lemi_data.create_data_array()
        input_file = self.path2data_run.joinpath('test_data_np_files.pkl')
        with open(input_file, 'rb') as fin:
            expected_data_np = pickle.load(fin)
        self.assertEqual(lemi_data.stats['num_runs'], 4)
        self.assertTrue(all([all(x == y) if isinstance(x, np.ndarray) else x == y
                             for x, y in zip(lemi_data.data_np[0], expected_data_np)]))
    def test_update_stats_geo_time(self):
        """Test basic functionality of update_stats_geo_time."""
        lemi_data = LemiData(self.path2data, OUTPUT_MSEED, OUTPUT_LOG)
        lemi_data.parse_data_files()
        lemi_data.parse_inf_files()
        lemi_data.update_stats_geo_time()
        self.assertAlmostEqual(lemi_data.stats['lat'], 34.04839, 5)
        self.assertAlmostEqual(lemi_data.stats['lon'], -107.12844, 5)
        self.assertAlmostEqual(lemi_data.stats['elev'], 2201.77122, 5)
        self.assertEqual(lemi_data.stats['start'],
                         UTCDateTime('2020-09-30T21:05:00.000000Z'))
        self.assertEqual(lemi_data.stats['end'],
                         UTCDateTime('2020-10-01T00:05:59.000000Z'))

    def test_update_array_list_of_channels(self):
        """Test basic functionality of update_array."""
        lemi_data = LemiData(self.path2data, OUTPUT_MSEED, OUTPUT_LOG)
        lemi_data.parse_data_files()
        lemi_data.parse_inf_files()
        lemi_data.create_data_array()
        data_channels = {'a': ['E1', 'E2', 'Hx', 'Hy', 'Hz'],
                         'b': ['E1', 'E2', 'Hx', 'Hy', 'Hz'],
                         'c': ['E1', 'E2', 'Hx', 'Hy', 'Hz'],
                         'd': ['E1', 'E2', 'Hx', 'Hy', 'Hz']}
        e_info = {'a': {'E1': 'Ex', 'E2': 'Ey'},
                  'b': {'E1': 'Ex', 'E2': 'Ey'},
                  'c': {'E1': 'Ex', 'E2': 'Ey'},
                  'd': {'E1': 'Ex', 'E2': 'Ey'}}
        lemi_data.update_array(data_channels, e_info)
        self.assertNotIn('E3', lemi_data.data_np['cha_num'])
        self.assertNotIn('E4', lemi_data.data_np['cha_num'])

    def test_update_array_electrode_info(self):
        """Test basic functionality of update_array."""
        lemi_data = LemiData(self.path2data, OUTPUT_MSEED, OUTPUT_LOG)
        lemi_data.parse_data_files()
        lemi_data.parse_inf_files()
        lemi_data.create_data_array()
        data_channels = {'a': ['E1', 'E2', 'E3', 'E4', 'Hx', 'Hy', 'Hz'],
                         'b': ['E1', 'E2', 'E3', 'E4', 'Hx', 'Hy', 'Hz'],
                         'c': ['E1', 'E2', 'E3', 'E4', 'Hx', 'Hy', 'Hz'],
                         'd': ['E1', 'E2', 'E3', 'E4', 'Hx', 'Hy', 'Hz']}
        e_info = {'a': {'E1': 'Ex', 'E2': 'Ey', 'E3': 'Ex', 'E4': 'Ey'},
                  'b': {'E1': 'Ex', 'E2': 'Ey', 'E3': 'Ex', 'E4': 'Ey'},
                  'c': {'E1': 'Ex', 'E2': 'Ey', 'E3': 'Ex', 'E4': 'Ey'},
                  'd': {'E1': 'Ex', 'E2': 'Ey', 'E3': 'Ex', 'E4': 'Ey'}}
        lemi_data.update_array(data_channels, e_info)
        data_np_E1 = lemi_data.data_np[lemi_data.data_np['cha_num'] == 'E1']
        self.assertSetEqual(set(data_np_E1['comp']), {'Ex'})
        self.assertSetEqual(set(data_np_E1['cha_name']), {'LQN'})
        self.assertSetEqual(set(data_np_E1['loc']), {'00'})
        data_np_E2 = lemi_data.data_np[lemi_data.data_np['cha_num'] == 'E2']
        self.assertSetEqual(set(data_np_E2['comp']), {'Ey'})
        self.assertSetEqual(set(data_np_E2['cha_name']), {'LQE'})
        self.assertSetEqual(set(data_np_E2['loc']), {'00'})
        data_np_E3 = lemi_data.data_np[lemi_data.data_np['cha_num'] == 'E3']
        self.assertSetEqual(set(data_np_E3['comp']), {'Ex'})
        self.assertSetEqual(set(data_np_E3['cha_name']), {'LQN'})
        self.assertSetEqual(set(data_np_E3['loc']), {'01'})
        data_np_E4 = lemi_data.data_np[lemi_data.data_np['cha_num'] == 'E4']
        self.assertSetEqual(set(data_np_E4['comp']), {'Ey'})
        self.assertSetEqual(set(data_np_E4['cha_name']), {'LQE'})
        self.assertSetEqual(set(data_np_E4['loc']), {'01'})

    def test_prep_data(self):
        """Test basic functionality if prep_data."""
        lemi_data = LemiData(self.path2data_corrupted_1, OUTPUT_MSEED, OUTPUT_LOG)
        with self.assertRaises(SystemExit) as cmd:
            lemi_data.prep_data()
        self.assertEqual(cmd.exception.code, 1)