Skip to content
Snippets Groups Projects

connect GUI to new mseed

Merged Lan Dam requested to merge connect_gui_to_new_mseed into master
Files
32
@@ -6,18 +6,22 @@ channels, datatype
import os
import json
import re
import traceback
from pathlib import Path
from typing import List, Set, Optional, Dict, Tuple
from PySide2.QtCore import QEventLoop, Qt
from PySide2.QtGui import QCursor
from PySide2.QtWidgets import QTextBrowser, QApplication
from obspy.core import read as read_ms
from obspy.io.reftek.core import Reftek130Exception
from obspy.io import reftek
from sohstationviewer.model.mseed_data.record_reader import RecordReader \
as MSeedRecordReader
from sohstationviewer.model.mseed_data.record_reader_helper import \
MSeedReadError
from sohstationviewer.model.mseed_data.mseed_reader import \
move_to_next_record
from sohstationviewer.database.extract_data import get_signature_channels
from sohstationviewer.model.data_type_model import DataTypeModel
from sohstationviewer.model.handling_data import (
read_mseed_chanids_from_headers)
@@ -28,69 +32,6 @@ from sohstationviewer.controller.util import (
from sohstationviewer.view.util.enums import LogType
def load_data(data_type: str, tracking_box: QTextBrowser, dir_list: List[str],
list_of_rt130_paths: List[Path],
req_wf_chans: List[str] = [], req_soh_chans: List[str] = [],
read_start: Optional[float] = None,
read_end: Optional[float] = None) -> DataTypeModel:
"""
Load the data stored in list_of_dir and store it in a DataTypeModel object.
The concrete class of the data object is based on dataType. Run on the same
thread as its caller, and so will block the GUI if called on the main
thread. It is advisable to use model.data_loader.DataLoader to load data
unless it is necessary to load data in the main thread (e.g. if there is
a need to access the call stack).
:param data_type: type of data read
:param tracking_box: widget to display tracking info
:param dir_list: list of directories selected by users
:param list_of_rt130_paths: list of rt130 directories selected by users
:param req_wf_chans: requested waveform channel list
:param req_soh_chans: requested soh channel list
:param read_start: start time of read data
:param read_end: finish time of read data
:return data_object: object that keep the data read from
list_of_dir
"""
data_object = None
if list_of_rt130_paths == []:
for d in dir_list:
if data_object is None:
try:
data_object = DataTypeModel.create_data_object(
data_type, tracking_box, d, [],
req_wf_chans=req_wf_chans, req_soh_chans=req_soh_chans,
read_start=read_start, read_end=read_end)
except Exception:
fmt = traceback.format_exc()
msg = f"Dir {d} can't be read due to error: {str(fmt)}"
display_tracking_info(tracking_box, msg, LogType.WARNING)
# if data_object.has_data():
# continue
# If no data can be read from the first dir, throw exception
# raise Exception("No data can be read from ", d)
# TODO: will work with select more than one dir later
# else:
# data_object.readDir(d)
else:
try:
data_object = DataTypeModel.create_data_object(
data_type, tracking_box, [''], list_of_rt130_paths,
req_wf_chans=req_wf_chans, req_soh_chans=req_soh_chans,
read_start=read_start, read_end=read_end)
except Exception:
fmt = traceback.format_exc()
msg = f"RT130 selected can't be read due to error: {str(fmt)}"
display_tracking_info(tracking_box, msg, LogType.WARNING)
if data_object is None:
msg = "No data object created. Check with implementer"
display_tracking_info(tracking_box, msg, LogType.WARNING)
return data_object
def read_mseed_channels(tracking_box: QTextBrowser, list_of_dir: List[str],
on_unittest: bool = False
) -> Set[str]:
@@ -157,6 +98,7 @@ def detect_data_type(list_of_dir: List[str]) -> Optional[str]:
sign_chan_data_type_dict = get_signature_channels()
dir_data_type_dict = {}
is_multiplex = None
for d in list_of_dir:
data_type = "Unknown"
for path, subdirs, files in os.walk(d):
@@ -165,17 +107,24 @@ def detect_data_type(list_of_dir: List[str]) -> Optional[str]:
if not validate_file(path2file, file_name):
continue
ret = get_data_type_from_file(path2file,
sign_chan_data_type_dict)
sign_chan_data_type_dict,
is_multiplex)
if ret is not None:
data_type, chan = ret
break
d_type, is_multiplex = ret
if d_type is not None:
data_type = d_type
break
if data_type != "Unknown":
break
if is_multiplex is None:
raise Exception("No channel found for the data set")
if data_type == "Unknown":
dir_data_type_dict[d] = ("Unknown", '_')
dir_data_type_dict[d] = "Unknown"
else:
dir_data_type_dict[d] = (data_type, chan)
data_type_list = {d[0] for d in dir_data_type_dict.values()}
dir_data_type_dict[d] = data_type
data_type_list = list(set(dir_data_type_dict.values()))
if len(data_type_list) > 1:
dir_data_type_str = json.dumps(dir_data_type_dict)
dir_data_type_str = re.sub(r'\{|\}|"', '', dir_data_type_str)
@@ -185,39 +134,78 @@ def detect_data_type(list_of_dir: List[str]) -> Optional[str]:
f"Please have only data that related to each other.")
raise Exception(msg)
elif data_type_list == {'Unknown'}:
elif data_type_list == ['Unknown']:
msg = ("There are no known data detected.\n\n"
"Do you want to cancel to select different folder(s)\n"
"Or continue to read any available mseed file?")
raise Exception(msg)
return list(dir_data_type_dict.values())[0][0]
return data_type_list[0], is_multiplex
def get_data_type_from_file(
path2file: Path,
sign_chan_data_type_dict: Dict[str, str]
) -> Optional[Tuple[str, str]]:
sign_chan_data_type_dict: Dict[str, str],
is_multiplex: bool = None
) -> Optional[Tuple[Optional[str], bool]]:
"""
+ Try to read mseed data from given file
if catch TypeError: no data type detected => return None
if catch Reftek130Exception: data type => return data type RT130
otherwise data type is mseed which includes: q330, pegasus, centaur
+ Continue to identify data type for a file by checking if the channel
in that file is a unique channel of a data type.
+ Exclude files for waveform data to improve performance
+ Loop through each record for file
If MSeedRecordReader gives Error; check if the file is RT130, report
data_type is RT130 or else, return to continue checking on another
file.
If there're more than one channels in a file, this file is multiplex.
If found signature channel, report the data_type of the file.
:param path2file: absolute path to processed file
:param sign_chan_data_type_dict: dict of unique chan for data
type
:param is_multiplex: if the file is multiplex
:return: detected data type, channel from which data type is detected
"""
try:
stream = read_ms(path2file)
except TypeError:
return
except Reftek130Exception:
return 'RT130', '_'
for trace in stream:
chan = trace.stats['channel']
wf_chan_posibilities = ['FH', 'FN', # ≥ 1000 to < 5000
'GH', 'GL', # ≥ 1000 to < 5000
'DH', 'DL', # ≥ 250 to < 1000
'CH', 'CN', # ≥ 250 to < 1000
'EH', 'EL', 'EP', # ≥ 80
'SH', 'SL', 'SP', # ≥ 10 to < 80
'HH', 'HN', # ≥ 80
'BH', 'BN', # ≥ 10 to < 80
'MH', 'MN', 'MP', 'ML',
'LH', 'LL', 'LP', 'LN',
'VP', 'VL', 'VL', 'VH',
'UN', 'UP', 'UL', 'UH']
if any(x in path2file.name for x in wf_chan_posibilities):
# Skip checking waveform files which aren't signature channels
return None, False
file = open(path2file, 'rb')
chans_in_stream = set()
data_type = None
while 1:
is_eof = (file.read(1) == b'')
if is_eof:
break
file.seek(-1, 1)
current_record_start = file.tell()
try:
record = MSeedRecordReader(file)
except MSeedReadError:
file.close()
if reftek.core._is_reftek130(path2file):
return 'RT130', False
return
chan = record.record_metadata.channel
if is_multiplex is None:
chans_in_stream.add(chan)
if len(chans_in_stream) > 1:
is_multiplex = True
if chan in sign_chan_data_type_dict.keys():
return sign_chan_data_type_dict[chan], chan
data_type = sign_chan_data_type_dict[chan]
if is_multiplex:
file.close()
return data_type, is_multiplex
move_to_next_record(file, current_record_start, record)
file.close()
is_multiplex = True if len(chans_in_stream) > 1 else False
return data_type, is_multiplex
Loading