diff --git a/HISTORY.rst b/HISTORY.rst index 53b6392c0a8e911832132bb3d3742445f768bab9..34c1b70f5ab314d5ebb84f9f1d460d53c96bf21d 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -93,3 +93,8 @@ to read. 2024.4.0.1 -------- * Fix bug caused by PySide6 version 6.8 + +2025.1.0.0 +-------- +* Add support for Q8 dataloggers +* Change how the data type of a dataset is detected diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index 1d67f3b420aeeaaf11629b30a1c26333b7f2aed4..ee34f63f2142022160309517b82881469fde6791 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -1,6 +1,6 @@ package: name: sohviewer - version: 2024.4.0.1 + version: 2025.1.0.0 source: path: ../ diff --git a/setup.py b/setup.py index a390cfa3ffe2640b7bf1f5c17c7b0efc45e2b30b..ebcae8b71e1c6a47859dfca86bcd105f65e5afa2 100644 --- a/setup.py +++ b/setup.py @@ -51,6 +51,6 @@ setup( name='sohviewer', packages=find_packages(include=['sohstationviewer*']), url='https://git.passcal.nmt.edu/software_public/passoft/sohstationviewer', - version='2024.4.0.1', + version='2025.1.0.0', zip_safe=False, ) diff --git a/sohstationviewer/CHANGELOG.rst b/sohstationviewer/CHANGELOG.rst index 53b6392c0a8e911832132bb3d3742445f768bab9..34c1b70f5ab314d5ebb84f9f1d460d53c96bf21d 100644 --- a/sohstationviewer/CHANGELOG.rst +++ b/sohstationviewer/CHANGELOG.rst @@ -93,3 +93,8 @@ to read. 2024.4.0.1 -------- * Fix bug caused by PySide6 version 6.8 + +2025.1.0.0 +-------- +* Add support for Q8 dataloggers +* Change how the data type of a dataset is detected diff --git a/sohstationviewer/conf/constants.py b/sohstationviewer/conf/constants.py index 19d8445c85e0c74f5dec242c0edb29a4697fc60a..51b46c0d383e7970673dc566e14fb4ea12cad01d 100644 --- a/sohstationviewer/conf/constants.py +++ b/sohstationviewer/conf/constants.py @@ -5,8 +5,8 @@ from typing import Literal ROOT_PATH = Path(__file__).resolve().parent.parent # The current version of SOHStationViewer -SOFTWARE_VERSION = '2024.4.0.1' -BUILD_TIME = "December 12, 2024" +SOFTWARE_VERSION = '2025.1.0.0' +BUILD_TIME = "January 10, 2025" # waveform pattern WF_1ST = 'A-HLM-V' diff --git a/sohstationviewer/controller/processing.py b/sohstationviewer/controller/processing.py index 12c44bda2488fcd28c4d42c1174615aac00eb47a..044cd6e74adfd64d89dcb716736d7339b16b1bdc 100644 --- a/sohstationviewer/controller/processing.py +++ b/sohstationviewer/controller/processing.py @@ -4,10 +4,8 @@ channels, datatype """ import os -import json -import re from pathlib import Path -from typing import List, Optional, Dict, Tuple, Union, BinaryIO +from typing import List, Optional, Dict, Tuple, Union, BinaryIO, Set, FrozenSet from PySide6.QtCore import QEventLoop, Qt from PySide6.QtGui import QCursor @@ -15,6 +13,7 @@ from PySide6.QtWidgets import QTextBrowser, QApplication from obspy.io import reftek from obspy import UTCDateTime +from sohstationviewer.conf.dbSettings import dbConf from sohstationviewer.model.mseed_data.mseed_reader import \ move_to_next_record @@ -22,7 +21,10 @@ from sohstationviewer.model.mseed_data.record_reader import RecordReader \ as MSeedRecordReader from sohstationviewer.model.mseed_data.record_reader_helper import \ MSeedReadError -from sohstationviewer.database.extract_data import get_signature_channels +from sohstationviewer.database.extract_data import ( + get_signature_channels, + get_all_channels, +) from sohstationviewer.controller.util import ( validate_file, display_tracking_info, check_chan, @@ -175,77 +177,61 @@ def read_mseed_channels(tracking_box: QTextBrowser, list_of_dir: List[str], return channel_info -def detect_data_type(list_of_dir: List[Union[str, Path]]) -> Optional[str]: +def detect_data_type(d: Union[str, Path]) -> Tuple[FrozenSet[str], bool]: """ - Detect data type for the given directories using get_data_type_from_file - :param list_of_dir: list of directories selected by users + Detect the possible data types for the given directories using + get_data_type_from_file + :param d: the directory of the data set :return: - + if there are more than one data types detected, - return None with a warning message - + if only Unknown data type detected, - return None with a warning message - + if data type found, return data_type, + + the set of possible data types + + whether the data set is multiplexed """ - sign_chan_data_type_dict = get_signature_channels() + data_types_by_signature_channels = get_signature_channels() + data_types_by_channels = get_all_channels() + possible_data_types: Optional[Set[str]] = None - dir_data_type_dict = {} - is_multiplex_dict = {} - for d in list_of_dir: - try: - d = d.as_posix() - except AttributeError: - pass - data_type = "Unknown" - is_multiplex = None - for path, subdirs, files in os.walk(d): - for file_name in files: - path2file = Path(path).joinpath(file_name) - if not validate_file(path2file, file_name): - continue - ret = get_data_type_from_file(path2file, - sign_chan_data_type_dict, - is_multiplex) - if ret is not None: - d_type, is_multiplex = ret - if d_type is not None: - data_type = d_type - break - if data_type != "Unknown": - break + try: + d = d.as_posix() + except AttributeError: + pass + is_multiplex = None + for path, subdirs, files in os.walk(d): + for file_name in files: + path2file = Path(path).joinpath(file_name) + if not validate_file(path2file, file_name): + continue + ret = get_data_type_from_file( + path2file, data_types_by_signature_channels, + data_types_by_channels, is_multiplex + ) + file_possible_data_types, file_is_multiplex = ret + if file_possible_data_types is not None: + is_multiplex = is_multiplex or file_is_multiplex + if possible_data_types is None: + possible_data_types = file_possible_data_types + else: + possible_data_types &= file_possible_data_types + if (possible_data_types is not None and + len(possible_data_types) == 1): + break - if is_multiplex is None: - raise Exception("No channel found for the data set") + if (possible_data_types is not None and + len(possible_data_types) == 1): + break + if possible_data_types is None or is_multiplex is None: + raise Exception("No channel found for the data set") - is_multiplex_dict[d] = is_multiplex - if data_type == "Unknown": - dir_data_type_dict[d] = "Unknown" - else: - dir_data_type_dict[d] = data_type - is_multiplex_list = list(set(is_multiplex_dict.values())) - data_type_list = list(set(dir_data_type_dict.values())) - if len(data_type_list) > 1: - dir_data_type_str = json.dumps(dir_data_type_dict) - dir_data_type_str = re.sub(r'\{|\}|"', '', dir_data_type_str) - dir_data_type_str = re.sub(r'], ', ']\n', dir_data_type_str) - msg = (f"There are more than one types of data detected:\n" - f"{dir_data_type_str}\n\n" - f"Please have only data that related to each other.") - raise Exception(msg) - elif len(is_multiplex_list) > 1: - msg = ("There are both multiplexed and non-multiplexed data " - "detected.\n\nPlease have only data that related to" - " each other.") - raise Exception(msg) - return data_type_list[0], is_multiplex_list[0] - - -def get_data_type_from_file( - path2file: Path, - sign_chan_data_type_dict: Dict[str, str], - is_multiplex: bool = None -) -> Optional[Tuple[Optional[str], bool]]: + return frozenset(possible_data_types), is_multiplex + + +def get_data_type_from_file(path2file: Path, + data_types_by_signature_channels: Dict[str, str], + data_types_by_channels: Dict[str, Set[str]], + is_multiplex: bool = None + ) -> Tuple[Optional[Set[str]], Optional[bool]]: """ - Get the data type contained in a given file. The procedure is given below. + Get the possible data types contained in a given file. The procedure is + given below. - Assume the file is an MSeed file and loop through each record and get the channel of each record. + If the file turns out to not be an MSeed file, check if the file is @@ -254,23 +240,44 @@ def get_data_type_from_file( file. + If more than one channels are found in the file, mark the file as multiplexed. + - For each channel, determine the data types that have this channel. Add + them to the list of possible data types. - If a signature channel is found, the data type of the file has been - determined. Keep looping until the file is marked as multiplexed or until - all records are processed. - - This function has been rewritten for improved performance. The performance - is required because we do not skip waveform files. + determined. In this case, we stop doing the previous step. + - Anyhow, loop until all the records in the file is exhausted. :param path2file: path to the given file - :param sign_chan_data_type_dict: dict that maps each signature channel to - its corresponding data type - :param is_multiplex: whether the file is multiplexed - :return: None if the given file is neither a MSeed nor RT130 file, the - detected data type and whether the given file is multiplexed otherwise + :param data_types_by_signature_channels: dict that maps each signature + channel to its corresponding data type + :param data_types_by_channels: dict that maps each channel in the database + to its corresponding data types + :param is_multiplex: whether the data set that contains the file is + multiplexed + :return: None if the given file is neither a MSeed nor RT130 file, the list + of possible data types and whether the given file belongs to a + multiplexed data set otherwise. """ file = open(path2file, 'rb') chans_in_stream = set() - data_type = None + possible_data_types = set() + + # Handle waveform files, which give no information about the possible data + # type. The standard in the industry at the moment of writing is to only + # multiplex SOH channels if at all, so we only need to check the first + # channel in a file to determine if it is a waveform file. + try: + chan = get_next_channel_from_mseed_file(file) + if dbConf['seisRE'].match(chan): + file.close() + return None, is_multiplex + file.seek(0) + except ValueError: + file.close() + if reftek.core._is_reftek130(path2file): + return {'RT130'}, False + return None, is_multiplex + + is_data_type_found = False while 1: is_eof = (file.read(1) == b'') @@ -283,21 +290,43 @@ def get_data_type_from_file( except ValueError: file.close() if reftek.core._is_reftek130(path2file): - return 'RT130', False - return None + return {'RT130'}, False + return None, None - if is_multiplex in [None, False]: + # Handle mass-position channels, which give no information about the + # possible data type. Mass-position channels are considered SOH + # channels, so they can be multiplexed with other SOH channels in the + # same file. As a result, unlike the waveform channels, we can't deal + # with them at the file level. + if chan.startswith('VM'): chans_in_stream.add(chan) - if len(chans_in_stream) > 1: - is_multiplex = True - if chan in sign_chan_data_type_dict.keys(): - data_type = sign_chan_data_type_dict[chan] - if is_multiplex: - file.close() - return data_type, is_multiplex + continue + + if chan in chans_in_stream: + continue + + chans_in_stream.add(chan) + + if chan in data_types_by_signature_channels.keys(): + possible_data_types = {data_types_by_signature_channels[chan]} + is_data_type_found = True + + if chan in data_types_by_channels: + if not is_data_type_found: + possible_data_types |= data_types_by_channels[chan] + file.close() - is_multiplex = True if len(chans_in_stream) > 1 else False - return data_type, is_multiplex + + is_multiplex = is_multiplex or (len(chans_in_stream) > 1) + + # Handle case where mass-position channels are not multiplexed with other + # SOH channels. We have to do this because mass-position-only files give + # an empty set for the possible data types, which means that any data sets + # with these files will be processed as having no data type. + if all(chan.startswith('VM') for chan in chans_in_stream): + return None, is_multiplex + + return possible_data_types, is_multiplex def get_next_channel_from_mseed_file(mseed_file: BinaryIO) -> str: diff --git a/sohstationviewer/database/backup.db b/sohstationviewer/database/backup.db index 7fbabac8c3b481ad2f1e54cf36c0d9f66d788785..d9279fbbe62bd3b6cc6dfbbeac0c4ab94f0513d6 100755 Binary files a/sohstationviewer/database/backup.db and b/sohstationviewer/database/backup.db differ diff --git a/sohstationviewer/database/extract_data.py b/sohstationviewer/database/extract_data.py index d692dae57f78ac4b9b73e70861f5323e4c5076b5..5d8333f7d4ffbff70ceedc700fa3702507781913 100755 --- a/sohstationviewer/database/extract_data.py +++ b/sohstationviewer/database/extract_data.py @@ -1,5 +1,5 @@ import re -from typing import Dict, List +from typing import Dict, List, Set from sohstationviewer.conf.constants import ColorMode from sohstationviewer.database.process_db import execute_db_dict, execute_db @@ -127,12 +127,8 @@ def convert_actual_channel_to_db_channel_w_question_mark( if chan_id[-1].isdigit(): # to prevent the case prefix similar to prefix of channel w/o ? chan_id = chan_id[:-1] + '?' - # Mass-position channels for Q330 data can sometimes end with a letter. - elif data_type == 'Q330': - mass_pos_letter_suffixes = ['Z', 'N', 'E', 'U', 'V', 'W'] - if (chan_id.startswith('VM') and - chan_id[-1] in mass_pos_letter_suffixes): - chan_id = 'VM?' + if chan_id.startswith('VM'): + chan_id = 'VM?' return chan_id @@ -154,7 +150,7 @@ def get_seismic_chan_label(chan_id): return label -def get_signature_channels(): +def get_signature_channels() -> Dict[str, str]: """ return the dict {channel: dataType} in which channel is unique for dataType """ @@ -166,6 +162,22 @@ def get_signature_channels(): return sign_chan_data_type_dict +def get_all_channels() -> Dict[str, Set[str]]: + """ + Get all the channels in the database alongside their associated data types. + + :return: the dictionary {channel: data types} that contains all channels in + the database. A channel can be associated with multiple data types. + """ + sql = "SELECT channel, dataType FROM Channels" + rows = execute_db_dict(sql) + all_channels_dict = {} + for r in rows: + channel_data_types = all_channels_dict.setdefault(r['channel'], set()) + channel_data_types.add(r['dataType']) + return all_channels_dict + + def get_color_def(): sql = "SELECT color FROM TPS_ColorDefinition ORDER BY name ASC" rows = execute_db(sql) diff --git a/sohstationviewer/database/resources/data_manual_links.txt b/sohstationviewer/database/resources/data_manual_links.txt index 1f6048c0846e4948bfa9e2f500d0f8026f6b73bd..354c38e185880f20af396bf274af58290cbea4b3 100644 --- a/sohstationviewer/database/resources/data_manual_links.txt +++ b/sohstationviewer/database/resources/data_manual_links.txt @@ -3,4 +3,6 @@ Q330: https://www.passcal.nmt.edu/webfm_send/1853 Centaur: https://dms.passcal.nmt.edu/index.php/apps/files/files/76191?dir=/PASSCAL_Documents/Instrumentation/Recording_Systems/DAS/Nanometrics/Centaur/Manuals_and_Documentation -Pegasus: https://dms.passcal.nmt.edu/index.php/apps/files/files/642234?dir=/PASSCAL_Documents/Instrumentation/Recording_Systems/DAS/Nanometrics/Pegasus/Manuals_and_Documentation \ No newline at end of file +Pegasus: https://dms.passcal.nmt.edu/index.php/apps/files/files/642234?dir=/PASSCAL_Documents/Instrumentation/Recording_Systems/DAS/Nanometrics/Pegasus/Manuals_and_Documentation + +Q8: https://dms.passcal.nmt.edu/index.php/apps/files/files/105081?dir=/PASSCAL_Documents/Instrumentation/Recording_Systems/DAS/Quanterra/Q8/Manuals%20and%20Documentation diff --git a/sohstationviewer/database/soh.db b/sohstationviewer/database/soh.db index 7fbabac8c3b481ad2f1e54cf36c0d9f66d788785..d9279fbbe62bd3b6cc6dfbbeac0c4ab94f0513d6 100755 Binary files a/sohstationviewer/database/soh.db and b/sohstationviewer/database/soh.db differ diff --git a/sohstationviewer/view/main_window.py b/sohstationviewer/view/main_window.py index e6469eac1c3fa04b3526eded6024535b434de230..6ee524b28dfc9e32efadd40af329168189c41a8e 100755 --- a/sohstationviewer/view/main_window.py +++ b/sohstationviewer/view/main_window.py @@ -10,7 +10,10 @@ from obspy import UTCDateTime from PySide6 import QtCore, QtWidgets, QtGui from PySide6.QtCore import QSize, QCoreApplication from PySide6.QtGui import QFont, QPalette, QColor -from PySide6.QtWidgets import QFrame, QListWidgetItem, QMessageBox +from PySide6.QtWidgets import ( + QFrame, QListWidgetItem, QMessageBox, + QInputDialog, +) from sohstationviewer.conf import constants from sohstationviewer.conf.dbSettings import dbConf @@ -32,7 +35,7 @@ from sohstationviewer.view.file_list.file_list_widget import FileListItem from sohstationviewer.view.plotting.gps_plot.extract_gps_data import \ extract_gps_data from sohstationviewer.view.plotting.gps_plot.gps_dialog import GPSDialog -from sohstationviewer.view.plotting.time_power_square.\ +from sohstationviewer.view.plotting.time_power_square. \ time_power_squared_dialog import TimePowerSquaredDialog from sohstationviewer.view.plotting.waveform_dialog import WaveformDialog from sohstationviewer.view.search_message.search_message_dialog import ( @@ -52,7 +55,7 @@ from sohstationviewer.view.create_muti_buttons_dialog import ( from sohstationviewer.controller.processing import detect_data_type from sohstationviewer.controller.util import ( - display_tracking_info, rt130_find_cf_dass, check_data_sdata + display_tracking_info, rt130_find_cf_dass, check_data_sdata, ) from sohstationviewer.database.process_db import execute_db_dict, execute_db @@ -531,6 +534,49 @@ class MainWindow(QtWidgets.QMainWindow, UIMainWindow): return (self.pref_soh_list if not self.all_soh_chans_check_box.isChecked() else []) + def get_data_type_from_selected_dirs(self) -> Tuple[str, bool]: + dir_data_types = {} + dir_is_multiplex = {} + for dir in self.list_of_dir: + possible_data_types, is_multiplex = detect_data_type(dir) + dir_data_types[dir] = possible_data_types + dir_is_multiplex[dir] = is_multiplex + + if len(set(dir_data_types.values())) > 1: + dir_data_types_str = '' + for dir, data_types in dir_data_types.values(): + dir_data_types_str += f'{dir}: {"".join(sorted(data_types))}' + msg = (f"The selected directories contain different data types:\n" + f"{dir_data_types_str}\n\n" + f"Please have only data that is related to each other.") + raise Exception(msg) + if len(set(dir_is_multiplex.values())) > 1: + msg = ("There are both multiplexed and non-multiplexed data " + "detected.\n\nPlease have only data that is related to " + "each other.") + raise Exception(msg) + + possible_data_types = list(set(dir_data_types.values()))[0] + if len(possible_data_types) == 0: + data_type = 'Unknown' + elif len(possible_data_types) == 1: + data_type = list(possible_data_types)[0] + else: + info = ('Could not conclusively determine the data type of the ' + 'selected data set. This can happen when there are ' + 'multiple data types in the database with overlapping set ' + 'of SOH channels. To proceed, please select the data type ' + 'that best fits the data set.') + data_type_choices = list(possible_data_types) + data_type_choices.append('Unknown') + data_type, ok = QInputDialog.getItem(self, 'Similar data types', + info, data_type_choices, + editable=False) + if not ok: + data_type = 'Unknown' + is_multiplex = list(set(dir_is_multiplex.values()))[0] + return data_type, is_multiplex + def get_file_list(self): """ Read from self.open_files_list to identify @@ -621,8 +667,9 @@ class MainWindow(QtWidgets.QMainWindow, UIMainWindow): # Log files don't have a data type that can be detected, so we don't # detect the data type if we are reading them. if self.rt130_das_dict == {} and not self.log_checkbox.isChecked(): - self.data_type, self.is_multiplex = detect_data_type( - self.list_of_dir) + self.data_type, self.is_multiplex = ( + self.get_data_type_from_selected_dirs() + ) if self.data_type == 'Unknown': # raise Exception for Unknown data_type here so that # data_type and is_multiplex is set in case user choose to diff --git a/sohstationviewer/view/plotting/gps_plot/extract_gps_data.py b/sohstationviewer/view/plotting/gps_plot/extract_gps_data.py index 3cfc52ff8b58f10b6ffb8dc08570582494db23d2..c6f0961ee6dde5b267a0525e92f46c5471f3d3da 100644 --- a/sohstationviewer/view/plotting/gps_plot/extract_gps_data.py +++ b/sohstationviewer/view/plotting/gps_plot/extract_gps_data.py @@ -306,6 +306,82 @@ def extract_gps_data_pegasus_centaur(data_obj: MSeed, data_type: str return extracted_gps_points +def extract_gps_data_q8(data_obj: MSeed, data_type: str) -> List[GPSPoint]: + """ + Extract GPS data of the current data set and store it in self.gps_points. + Only applicable to Q8 data sets. + + :param data_obj: the data object that stores the read data + :param data_type: data type of the current data set + """ + GPS_CHANS = {'LAT', 'LON', 'LEV', 'LFT', 'LSU'} + + channels = data_obj.soh_data[data_obj.selected_data_set_id].keys() + if not GPS_CHANS.issubset(channels): + missing_gps_chans = GPS_CHANS - channels + missing_gps_chans_string = ', '.join(missing_gps_chans) + raise ValueError(f"Some GPS channels are missing: " + f"{missing_gps_chans_string}.") + + # Caching GPS data in dictionaries for faster access. In the algorithm + # below, we need to access the data associated with a time. If we leave + # the times and data in arrays, we will need to search for the index of + # the specified time in the times array, which takes O(N) time. The + # algorithm then repeats this step n times, which gives us a total + # complexity of O(n^2). Meanwhile, if we cache the times and data in + # a dictionary, we only need to spend O(n) time building the cache and + # O(n) time accessing the cache, which amounts to O(n) time in total. + ns_dict = get_chan_soh_trace_as_dict(data_obj, 'LSU') + la_dict = get_chan_soh_trace_as_dict(data_obj, 'LAT') + lo_dict = get_chan_soh_trace_as_dict(data_obj, 'LON') + el_dict = get_chan_soh_trace_as_dict(data_obj, 'LEV') + + extracted_gps_points = [] + for time, num_sats_used in ns_dict.items(): + # We currently don't know how to translate the data in the LFT channel + # into the actual fix type, so we are giving it a dummy value until we + # can do so. + fix_type = 'N/A' + current_lat = la_dict.get(time, None) + current_long = lo_dict.get(time, None) + current_height = el_dict.get(time, None) + # We are ignoring any point that does not have complete location data. + # It might be possible to only ignore points with missing latitude or + # longitude, seeing as height is not required to plot a GPS point. + if (current_lat is None or + current_long is None or + current_height is None): + continue + for i, num_sats in enumerate(num_sats_used): + try: + # Convert the location data to the appropriate unit. Q8 stores + # latitude and longitude in microdegrees, and we want them to + # be in degrees. The unit of the elevation is m, which is what + # we want so there is no conversion done. + lat = current_lat[i] / 1e6 + long = current_long[i] / 1e6 + height = current_height[i] + height_unit = 'M' + formatted_time = UTCDateTime(time).strftime( + '%Y-%m-%d %H:%M:%S' + ) + gps_point = GPSPoint(formatted_time, fix_type, num_sats, lat, + long, height, height_unit) + extracted_gps_points.append(gps_point) + except IndexError: + break + # We only need to loop through one dictionary. If a time is not + # available for a channel, the GPS data point at that time would be + # invalid (it is missing a piece of data). Once we loop through a + # channel's dictionary, we know that any time not contained in that + # dictionary is not available for the channel. As a result, any time + # we pass through in the other channels after the first loop would + # result in an invalid GPS data point. Because we discard any invalid + # point, there is no point in looping through the dictionary of other + # channels. + return extracted_gps_points + + def extract_gps_data_rt130(data_obj: RT130) -> List[GPSPoint]: """ Retrieve the GPS of the current data set. Works by looking into the log @@ -450,6 +526,8 @@ def gps_data_mseed(data_obj: MSeed) -> List[GPSPoint]: return extract_gps_data_q330(data_obj) elif data_type == 'Centaur' or data_type == 'Pegasus': return extract_gps_data_pegasus_centaur(data_obj, data_type) + elif data_type == 'Q8': + return extract_gps_data_q8(data_obj, data_type) else: # data_type = "Unknown" try: diff --git a/tests/controller/test_processing.py b/tests/controller/test_processing.py index fbd4bf62c4e015237bb054e6b733d0a55833df57..ed038c7bf47db5fad24fb1566b6b269d3b254a5d 100644 --- a/tests/controller/test_processing.py +++ b/tests/controller/test_processing.py @@ -1,3 +1,4 @@ +import itertools from tempfile import TemporaryDirectory, NamedTemporaryFile from pathlib import Path @@ -11,13 +12,17 @@ from sohstationviewer.controller.processing import ( get_data_type_from_file, get_next_channel_from_mseed_file, ) -from sohstationviewer.database.extract_data import get_signature_channels +from sohstationviewer.database.extract_data import ( + get_signature_channels, + get_all_channels, +) from PySide6 import QtWidgets from tests.base_test_case import BaseTestCase TEST_DATA_DIR = Path(__file__).resolve().parent.parent.joinpath('test_data') rt130_dir = TEST_DATA_DIR.joinpath('RT130-sample/2017149.92EB/2017150') +q8_dir = TEST_DATA_DIR.joinpath('Q8-sample') q330_dir = TEST_DATA_DIR.joinpath('Q330-sample/day_vols_AX08') centaur_dir = TEST_DATA_DIR.joinpath('Centaur-sample/SOH') pegasus_dir = TEST_DATA_DIR.joinpath('Pegasus-sample/Pegasus_SVC4/soh') @@ -220,141 +225,209 @@ class TestDetectDataType(BaseTestCase): self.addCleanup(func_patcher.stop) self.mock_get_data_type_from_file = func_patcher.start() - self.dir1 = TemporaryDirectory() - self.dir2 = TemporaryDirectory() - self.file1 = NamedTemporaryFile(dir=self.dir1.name) - self.file2 = NamedTemporaryFile(dir=self.dir2.name) + self.dir = TemporaryDirectory() + self.files = [] + for i in range(6): + self.files.append(NamedTemporaryFile(dir=self.dir.name)) def tearDown(self) -> None: """Teardown text fixtures.""" - del self.file1, self.file2 - self.dir1.cleanup() - self.dir2.cleanup() + while len(self.files) != 0: + file = self.files.pop() # noqa + del file + + self.dir.cleanup() - def test_one_directory_not_unknown_data_type(self): + def test_non_multiplexed_dir_only_one_data_type(self): """ - Test basic functionality of detect_data_type - only one directory was - given and the data type it contains can be detected. + Test basic functionality of detect_data_type - the data type in the + given directory can be detected. """ - expected_data_type = ('RT130', False) - self.mock_get_data_type_from_file.return_value = expected_data_type + with self.subTest('test_files_can_only_be_one_data_type'): + file_data_types = ({'RT130'}, False) + self.mock_get_data_type_from_file.return_value = file_data_types - self.assertEqual( - detect_data_type([self.dir1.name]), - expected_data_type - ) + self.assertEqual( + detect_data_type(self.dir.name), + ({'RT130'}, False) + ) + + with self.subTest('test_files_can_be_multiple_data_types'): + file_data_types = itertools.cycle( + [({'Q330', 'Q8'}, False), ({'Q330'}, False)] + ) + self.mock_get_data_type_from_file.side_effect = file_data_types + + self.assertEqual( + detect_data_type(self.dir.name), + ({'Q330'}, False) + ) - def test_same_data_type_not_multiplex(self): + def test_multiplexed_dir_only_one_data_type(self): """ Test basic functionality of detect_data_type - the given directories - contain the same data type and the data type was detected using the - same channel. + contain the same data type but the data type was detected using + different channels. """ - expected_data_type = ('RT130', False) - self.mock_get_data_type_from_file.return_value = expected_data_type + with self.subTest('test_files_can_only_be_one_data_type'): + file_data_types = itertools.cycle( + [({'Q330'}, True), ({'Q330'}, False)] + ) + self.mock_get_data_type_from_file.side_effect = file_data_types - self.assertEqual( - detect_data_type([self.dir1.name, self.dir2.name]), - expected_data_type - ) + self.assertEqual( + detect_data_type(self.dir.name), + ({'Q330'}, True) + ) + + with self.subTest('test_files_can_be_multiple_data_types'): + file_data_types = itertools.cycle( + [({'Q330', 'Q8'}, True), ({'Q330'}, False)] + ) + self.mock_get_data_type_from_file.side_effect = file_data_types + + self.assertEqual( + detect_data_type(self.dir.name), + ({'Q330'}, True) + ) - def test_same_data_type_multiplex(self): + def test_non_multiplexed_dir_multiple_possible_data_types(self): """ Test basic functionality of detect_data_type - the given directories - contain the same data type but the data type was detected using - different channels. + contain different data types. """ - returned_data_types = [('Q330', True), ('Q330', True)] - self.mock_get_data_type_from_file.side_effect = returned_data_types + file_data_types = itertools.cycle( + [({'Q8', 'Q330', 'Centaur'}, False), + ({'Q8', 'Q330'}, False)] + ) + self.mock_get_data_type_from_file.side_effect = file_data_types self.assertEqual( - detect_data_type([self.dir1.name, self.dir2.name]), - returned_data_types[0] + detect_data_type(self.dir.name), + ({'Q8', 'Q330'}, False) ) - def test_different_data_types(self): + def test_multiplexed_dir_multiple_possible_data_types(self): """ Test basic functionality of detect_data_type - the given directories contain different data types. """ - returned_data_types = [('RT130', False), ('Q330', False)] - self.mock_get_data_type_from_file.side_effect = returned_data_types + file_data_types = itertools.cycle( + [({'Q8', 'Q330', 'Centaur'}, True), + ({'Q8', 'Q330'}, False)] + ) + self.mock_get_data_type_from_file.side_effect = file_data_types - with self.assertRaises(Exception) as context: - detect_data_type([self.dir1.name, self.dir2.name]) self.assertEqual( - str(context.exception), - f"There are more than one types of data detected:\n" - f"{self.dir1.name}: RT130, " - f"{self.dir2.name}: Q330\n\n" - f"Please have only data that related to each other.") + detect_data_type(self.dir.name), + ({'Q8', 'Q330'}, True) + ) - def test_same_data_types_different_multiplex(self): + def test_non_multiplexed_dir_no_possible_data_type(self): """ - Test basic functionality of detect_data_type - the given directories - contain same data types but different multiplex. + Test basic functionality of detect_data_type - can't detect any data + type. """ - returned_data_types = [('Q330', True), ('Q330', False)] - self.mock_get_data_type_from_file.side_effect = returned_data_types + file_data_types = (set(), False) + self.mock_get_data_type_from_file.return_value = file_data_types - with self.assertRaises(Exception) as context: - detect_data_type([self.dir1.name, self.dir2.name]) self.assertEqual( - str(context.exception), - "There are both multiplexed and non-multiplexed data " - "detected.\n\nPlease have only data that related to" - " each other.") + detect_data_type(self.dir.name), + (set(), False) + ) - def test_unknown_data_type(self): + def test_multiplexed_dir_no_possible_data_type(self): """ Test basic functionality of detect_data_type - can't detect any data type. """ - expected_data_type = ('Unknown', False) - self.mock_get_data_type_from_file.return_value = expected_data_type + file_data_types = itertools.cycle( + [(set(), False), (set(), True)] + ) + self.mock_get_data_type_from_file.side_effect = file_data_types + self.assertEqual( - detect_data_type([self.dir1.name]), - expected_data_type + detect_data_type(self.dir.name), + (set(), True) ) - def test_multiplex_none(self): + def test_some_files_do_not_contain_data(self): """ Test basic functionality of detect_data_type - can't detect any data type. """ - unknown_data_type = ('Unknown', None) - self.mock_get_data_type_from_file.return_value = unknown_data_type + file_data_types = itertools.cycle( + [(None, None), + ({'Q8', 'Q330', 'Centaur'}, True)] + ) + self.mock_get_data_type_from_file.side_effect = file_data_types + + self.assertEqual( + detect_data_type(self.dir.name), + ({'Q8', 'Q330', 'Centaur'}, True) + ) + + def test_no_file_contain_data(self): + file_data_types = (None, None) + self.mock_get_data_type_from_file.return_value = file_data_types + with self.assertRaises(Exception) as context: - detect_data_type([self.dir1.name]) + detect_data_type(self.dir.name) self.assertEqual( str(context.exception), - "No channel found for the data set") + "No channel found for the data set" + ) class TestGetDataTypeFromFile(BaseTestCase): """Test suite for get_data_type_from_file""" - def test_can_detect_data_type_from_mseed_file(self): + + def test_can_detect_one_data_type_from_mseed_file(self): """ Test basic functionality of get_data_type_from_file - given file contains MSeed data and the data type can be detected from the file. """ - q330_file = q330_dir.joinpath('AX08.XA..VKI.2021.186') + q8_file = q8_dir.joinpath('MM10.XX..LCL.2023.313.ms') centaur_file = centaur_dir.joinpath( 'XX.3734.SOH.centaur-3_3734..20180817_000000.miniseed.miniseed') pegasus_file = pegasus_dir.joinpath( '2020/XX/KC01/VE1.D/XX.KC01..VE1.D.2020.129') - q330_data_type = ('Q330', False) - centaur_data_type = ('Centaur', True) - pegasus_data_type = ('Pegasus', False) + q8_data_type = ({'Q8'}, False) + centaur_data_type = ({'Centaur'}, True) + pegasus_data_type = ({'Pegasus'}, False) + + all_chan = get_all_channels() + sig_chan = get_signature_channels() + + self.assertTupleEqual( + get_data_type_from_file(q8_file, sig_chan, all_chan), + q8_data_type) + self.assertTupleEqual( + get_data_type_from_file(centaur_file, sig_chan, all_chan), + centaur_data_type) + self.assertTupleEqual( + get_data_type_from_file(pegasus_file, sig_chan, all_chan), + pegasus_data_type) + + def test_can_detect_multiple_data_types_from_mseed_file(self): + q330_q8_file = q330_dir.joinpath('AX08.XA..VKI.2021.186') + centaur_pegasus_file = pegasus_dir.joinpath( + '2020/XX/KC01/VDT.D/XX.KC01..VDT.D.2020.129' + ) + q330_q8_data_type = ({'Q330', 'Q8'}, False) + centaur_pegasus_data_type = ({'Centaur', 'Pegasus'}, False) + + all_chan = get_all_channels() sig_chan = get_signature_channels() - self.assertTupleEqual(get_data_type_from_file(q330_file, sig_chan), - q330_data_type) - self.assertTupleEqual(get_data_type_from_file(centaur_file, sig_chan), - centaur_data_type) - self.assertTupleEqual(get_data_type_from_file(pegasus_file, sig_chan), - pegasus_data_type) + self.assertTupleEqual( + get_data_type_from_file(q330_q8_file, sig_chan, all_chan), + q330_q8_data_type) + + self.assertTupleEqual( + get_data_type_from_file(centaur_pegasus_file, sig_chan, all_chan), + centaur_pegasus_data_type) def test_cannot_detect_data_type_from_mseed_file(self): """ @@ -364,8 +437,35 @@ class TestGetDataTypeFromFile(BaseTestCase): # We choose a waveform file because waveform channels cannot be used to # determine the data type in a file. mseed_file = q330_dir.joinpath('AX08.XA..LHE.2021.186') + expected = (None, None) + actual = get_data_type_from_file(mseed_file, get_signature_channels(), + get_all_channels()) + self.assertEqual(expected, actual) + + def test_mass_position_multiplexed_with_soh_channels(self): + """ + Test against bug found when modifying get_data_type_from_file - cannot + detect data type from files with mass-position and SOH channels + multiplexed together. + """ + multiplexed_file = centaur_dir.joinpath( + 'XX.3734.SOH.centaur-3_3734..20180817_000000.miniseed.miniseed' + ) + expected = ({'Centaur'}, True) + actual = get_data_type_from_file( + multiplexed_file, get_signature_channels(), get_all_channels() + ) + self.assertEqual(expected, actual) + + def test_mass_position_only_file(self): + """ + Test against bug found when modifying get_data_type_from_file - cannot + detect data type from files with only mass-position data. + """ + mseed_file = q330_dir.joinpath('AX08.XA..VM1.2021.186') expected = (None, False) - actual = get_data_type_from_file(mseed_file, get_signature_channels()) + actual = get_data_type_from_file(mseed_file, get_signature_channels(), + get_all_channels()) self.assertEqual(expected, actual) def test_rt130_data(self): @@ -375,9 +475,10 @@ class TestGetDataTypeFromFile(BaseTestCase): """ rt130_file = Path(rt130_dir).joinpath( '92EB/0/000000000_00000000') - expected_data_type = ('RT130', False) + expected_data_type = ({'RT130'}, False) self.assertTupleEqual( - get_data_type_from_file(rt130_file, get_signature_channels()), + get_data_type_from_file(rt130_file, get_signature_channels(), + get_all_channels()), expected_data_type ) @@ -387,9 +488,10 @@ class TestGetDataTypeFromFile(BaseTestCase): empty. """ test_file = NamedTemporaryFile() - expected = (None, False) + expected = (None, None) actual = get_data_type_from_file( - Path(test_file.name), get_signature_channels()) + Path(test_file.name), get_signature_channels(), get_all_channels() + ) self.assertEqual(expected, actual) def test_file_does_not_exist(self): @@ -400,15 +502,19 @@ class TestGetDataTypeFromFile(BaseTestCase): empty_name_file = Path('') non_existent_file = Path('non_existent_dir') with self.assertRaises(IsADirectoryError): - get_data_type_from_file(empty_name_file, get_signature_channels()) + get_data_type_from_file(empty_name_file, get_signature_channels(), + get_all_channels()) with self.assertRaises(FileNotFoundError): get_data_type_from_file(non_existent_file, - get_signature_channels()) + get_signature_channels(), + get_all_channels()) def test_non_data_binary_file(self): binary_file = TEST_DATA_DIR / 'Non-data-file' / 'non_data_file' - ret = get_data_type_from_file(binary_file, get_signature_channels()) - self.assertIsNone(ret) + expected = (None, None) + actual = get_data_type_from_file(binary_file, get_signature_channels(), + get_all_channels()) + self.assertEqual(expected, actual) class TestGetNextChannelFromMseedFile(BaseTestCase): diff --git a/tests/test_data/Q8-sample/MM10.XX..ACE.2023.313.ms b/tests/test_data/Q8-sample/MM10.XX..ACE.2023.313.ms new file mode 100755 index 0000000000000000000000000000000000000000..f705aad52284dd962ce0d6777eecf81508c279cc Binary files /dev/null and b/tests/test_data/Q8-sample/MM10.XX..ACE.2023.313.ms differ diff --git a/tests/test_data/Q8-sample/MM10.XX..HH1.2023.313.ms b/tests/test_data/Q8-sample/MM10.XX..HH1.2023.313.ms new file mode 100755 index 0000000000000000000000000000000000000000..a68dc35c775545ac5016dcbc65bf46de2eccc6c6 Binary files /dev/null and b/tests/test_data/Q8-sample/MM10.XX..HH1.2023.313.ms differ diff --git a/tests/test_data/Q8-sample/MM10.XX..HN1.2023.313.ms b/tests/test_data/Q8-sample/MM10.XX..HN1.2023.313.ms new file mode 100755 index 0000000000000000000000000000000000000000..35b143df9fb406d72283ff8990f99e1435d59988 Binary files /dev/null and b/tests/test_data/Q8-sample/MM10.XX..HN1.2023.313.ms differ diff --git a/tests/test_data/Q8-sample/MM10.XX..LAT.2023.313.ms b/tests/test_data/Q8-sample/MM10.XX..LAT.2023.313.ms new file mode 100755 index 0000000000000000000000000000000000000000..16b12327644b60ec12736f8d1214399d7b67bb57 Binary files /dev/null and b/tests/test_data/Q8-sample/MM10.XX..LAT.2023.313.ms differ diff --git a/tests/test_data/Q8-sample/MM10.XX..LCL.2023.313.ms b/tests/test_data/Q8-sample/MM10.XX..LCL.2023.313.ms new file mode 100755 index 0000000000000000000000000000000000000000..3ed73c6b49b460254d5b2b9a8183dc6b94a77b4c Binary files /dev/null and b/tests/test_data/Q8-sample/MM10.XX..LCL.2023.313.ms differ diff --git a/tests/test_data/Q8-sample/MM10.XX..LOG.2023.313.ms b/tests/test_data/Q8-sample/MM10.XX..LOG.2023.313.ms new file mode 100755 index 0000000000000000000000000000000000000000..9e690de57b12d292a1d22a2071c735b1f7e1fe84 Binary files /dev/null and b/tests/test_data/Q8-sample/MM10.XX..LOG.2023.313.ms differ diff --git a/tests/test_data/Q8-sample/MM10.XX..VMA.2023.313.ms b/tests/test_data/Q8-sample/MM10.XX..VMA.2023.313.ms new file mode 100755 index 0000000000000000000000000000000000000000..99795169918a41f89f73e006ded203e6ed40f14c Binary files /dev/null and b/tests/test_data/Q8-sample/MM10.XX..VMA.2023.313.ms differ