Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
processing.py 7.17 KiB
"""
Function that ignite from main_window, Dialogs to read data files for data,
channels, datatype
"""

import os
import json
import re
import traceback
from pathlib import Path
from typing import List, Set, Optional, Dict, Tuple

from PySide2.QtWidgets import QTextBrowser
from obspy.core import read as read_ms
from obspy.io.reftek.core import Reftek130Exception

from sohstationviewer.model.mseed.mseed import MSeed
from sohstationviewer.database.extract_data import get_signature_channels
from sohstationviewer.model.data_type_model import DataTypeModel

from sohstationviewer.controller.util import (
    validate_file, display_tracking_info
)

from sohstationviewer.view.util.enums import LogType


def load_data(data_type: str, tracking_box: QTextBrowser, dir_list: List[str],
              req_wf_chans: List[str] = [], req_soh_chans: List[str] = [],
              read_start: Optional[float] = None,
              read_end: Optional[float] = None) -> DataTypeModel:
    """
    Load the data stored in list_of_dir and store it in a DataTypeModel object.
    The concrete class of the data object is based on dataType. Run on the same
    thread as its caller, and so will block the GUI if called on the main
    thread. It is advisable to use model.data_loader.DataLoader to load data
    unless it is necessary to load data in the main thread (e.g. if there is
    a need to access the call stack).

    :param data_type: type of data read
    :param tracking_box: widget to display tracking info
    :param dir_list: list of directories selected by users
    :param req_wf_chans: requested waveform channel list
    :param req_soh_chans: requested soh channel list
    :param read_start: start time of read data
    :param read_end: finish time of read data
    :return data_object: object that keep the data read from
        list_of_dir
    """
    data_object = None
    for d in dir_list:
        if data_object is None:
            try:
                data_object = DataTypeModel.create_data_object(
                    data_type, tracking_box, d,
                    req_wf_chans=req_wf_chans, req_soh_chans=req_soh_chans,
                    read_start=read_start, read_end=read_end)
            except Exception:
                fmt = traceback.format_exc()
                msg = f"Dir {d} can't be read due to error: {str(fmt)}"
                display_tracking_info(tracking_box, msg, LogType.WARNING)

            # if data_object.has_data():
            #     continue
            # If no data can be read from the first dir, throw exception
            # raise Exception("No data can be read from ", d)
        # TODO: will work with select more than one dir later
        # else:
        #     data_object.readDir(d)

    # return data_object.plottingData
    return data_object


def read_channels(tracking_box: QTextBrowser, list_of_dir: List[str]
                  ) -> Set[str]:
    """
    Scan available for SOH channels (to be used in channel preferences dialog).
        Since channels for RT130 is hard code, this function won't be applied
        for it.
    Note that Mass position channels are excluded because the default
        include_mp123 and include_mp456 for MSeed are False
    :param tracking_box: widget to display tracking info
    :param list_of_dir: list of directories selected by users
    :return data_object.channels: set of channels present in listofDir
    """
    data_object = None
    for d in list_of_dir:
        if data_object is None:
            # data_object = Reftek.Reftek(parent, d)
            # if data_object.has_data():
            #     continue

            # data_object = MSeed_Text(parent, d, read_chan_only=True)
            data_object = MSeed(tracking_box, d, readChanOnly=True)
            if len(data_object.channels) == 0:
                # If no data can be read from the first dir, throw exception
                raise Exception("No data can be read from ", d)
        else:
            data_object.readDir(d, readChanOnly=True)
    return data_object.channels


def detect_data_type(tracking_box: QTextBrowser, list_of_dir: List[str]
                     ) -> Optional[str]:
    """
    Detect data type for the given directories using get_data_type_from_file
    :param tracking_box: widget to display tracking info
    :param list_of_dir: list of directories selected by users
    :return:
        + if there are more than one data types detected,
            return None with a warning message
        + if only Unknown data type detected,
            return None with a warning message
        + if data type found, return data_type,
    """
    sign_chan_data_type_dict = get_signature_channels()

    dir_data_type_dict = {}
    for d in list_of_dir:
        data_type = "Unknown"
        for path, subdirs, files in os.walk(d):
            for file_name in files:
                path2file = Path(path).joinpath(file_name)
                if not validate_file(path2file, file_name):
                    continue
                ret = get_data_type_from_file(path2file,
                                              sign_chan_data_type_dict)
                if ret is not None:
                    data_type, chan = ret
                    break
            if data_type != "Unknown":
                break
        if data_type == "Unknown":
            dir_data_type_dict[d] = ("Unknown", '_')
        else:
            dir_data_type_dict[d] = (data_type, chan)
    data_type_list = {d[0] for d in dir_data_type_dict.values()}
    if len(data_type_list) > 1:
        dir_data_type_str = json.dumps(dir_data_type_dict)
        dir_data_type_str = re.sub(r'\{|\}|"', '', dir_data_type_str)
        dir_data_type_str = re.sub(r', ', '\n', dir_data_type_str)
        msg = (f"There are more than one types of data detected:\n"
               f"{dir_data_type_str}\n\n"
               f"Please have only data that related to each other.")
        display_tracking_info(tracking_box, msg, LogType.ERROR)
        return

    elif data_type_list == {'Unknown'}:
        msg = ("There are no known data detected.\n"
               "Please select different folder(s).")
        display_tracking_info(tracking_box, msg, LogType.ERROR)
        return

    return list(dir_data_type_dict.values())[0][0]


def get_data_type_from_file(
        path2file: Path,
        sign_chan_data_type_dict: Dict[str, str]
) -> Optional[Tuple[str, str]]:
    """
    + Try to read mseed data from given file
        if catch TypeError: no data type detected => return None
        if catch Reftek130Exception: data type => return data type RT130
        otherwise data type is mseed which includes: q330, pegasus, centaur
    + Continue to identify data type for a file by checking if the channel
    in that file is a unique channel of a data type.
    :param path2file: absolute path to processed file
    :param sign_chan_data_type_dict: dict of unique chan for data
        type
    :return: detected data type, channel from which data type is detected
    """
    try:
        stream = read_ms(path2file)
    except TypeError:
        return
    except Reftek130Exception:
        return 'RT130', '_'

    for trace in stream:
        chan = trace.stats['channel']
        if chan in sign_chan_data_type_dict.keys():
            return sign_chan_data_type_dict[chan], chan