Skip to content
Snippets Groups Projects

connect GUI to new mseed

Merged Lan Dam requested to merge connect_gui_to_new_mseed into master
from typing import List, Union
import os
from pathlib import Path
from PySide2.QtWidgets import QMessageBox
from obspy.core import read as read_ms
from obspy.io.reftek.core import Reftek130Exception
from sohstationviewer.model.mseed_data.record_reader import RecordReader
from sohstationviewer.model.mseed_data.record_reader_helper import MSeedReadError
from sohstationviewer.conf.constants import BIG_FILE_SIZE
from sohstationviewer.controller.util import validate_file
def _get_ready_for_next_read(file, current_record_start: int,
record: RecordReader):
"""
Move the current position of file to next record
:param current_record_start: the start position of the current record
:param reader: the record that is reading
"""
# MSEED stores the size of a data record as an exponent of a
# power of two, so we have to convert that to actual size before
# doing anything else.
record_length_exp = record.header_unpacker.unpack(
'B', record.blockette_1000.record_length
)[0]
record_size = 2 ** record_length_exp
file.seek(current_record_start)
file.seek(record_size, 1)
def _check_RT130(path2file: Path):
try:
read_ms(path2file, headonly=True)
except Reftek130Exception:
return 'RT130', False
def _check_file_type(path2file):
"""
Check if the given file is TEXT
:param path2file:
:return:
:rtype:
"""
try:
file = open(path2file, 'r')
file.read(64).strip()
file.close()
return 'TEXT'
except UnicodeDecodeError:
pass
except FileNotFoundError as e:
raise e
file = open(path2file, 'rb')
try:
RecordReader(file)
except MSeedReadError as e:
try:
read_ms(path2file, headonly=True)
except Reftek130Exception:
return 'RT130'
except Exception:
return 'binary'
return 'MSEED'
def _get_size_chan_in_name(dir_path: str, req_wf_chans: List[Union[str, int]]):
"""
Estimate size of directory by collecting size of waveform in the list of
wf_chan_posibilities only. This is tricky but fast.
:param dir_path: absolute path to directory
:param req_wf_chans: waveform request which can be list of data streams or
list of mseed wildcards
:return total_size:
+ 0 if don't have waveform request
+ total size of the directory up to where it greater than BIG_FILE_SIZE
+ -1 if count more than 200 TEXT files
+ -2 if count more than 200 BINARY files of which types are unkonwn
"""
wf_chan_posibilities = ['FH', 'FN', # ≥ 1000 to < 5000
'GH', 'GL', # ≥ 1000 to < 5000
'DH', 'DL', # ≥ 250 to < 1000
'CH', 'CN', # ≥ 250 to < 1000
'EH', 'EL', 'EP', # ≥ 80
'SH', 'SL', 'SP', # ≥ 10 to < 80
'HH', 'HN', # ≥ 80
'BH', 'BN', # ≥ 10 to < 80
'MH', 'MN', 'MP', 'ML'] # >1 to < 10
"""
'LH','LL', 'LP', 'LN' =1
'VP', 'VL', 'VL', 'VH' = 0.1
'UN', 'UP', 'UL', 'UH' <=0.01
Skip channels with sampling rate <=1 because there are less data in the
files, which can result many files with small sizes in compare with sizes
of high sample rate files.
"""
wf_chan_pos = set()
for req in req_wf_chans:
if req == '*':
wf_chan_pos.update(wf_chan_posibilities)
elif req[0] == '*':
wf_chan_pos.update([req for req in wf_chan_posibilities
if req.endswith(req[1])])
elif req[1] == '*':
wf_chan_pos.update([req for req in wf_chan_posibilities
if req.startswith(req[0])])
total_size = 0
for path, subdirs, files in os.walk(dir_path):
for file_name in files:
if any(x in file_name for x in wf_chan_pos):
fp = os.path.join(path, file_name)
file_size = os.path.getsize(fp)
total_size += file_size
if total_size > BIG_FILE_SIZE:
break
return total_size
def _get_size_rt130(dir_path: str, req_ds: List[int]):
"""
Get size of RT130's requested datas treams which is inside folder that has
data stream number as name
:param dir: absolute path to directory
:param req_ds: list of requested data streams
:return total_size: total size of requested data streams up to where it
greater than BIG_FILE_SIZE
"""
if req_ds == ['*']:
req_ds = ['1', '2', '3', '4', '5', '6', '7', '8']
else:
req_ds = [str(req) for req in req_ds]
total_size = 0
for path, subdirs, files in os.walk(dir_path):
path_parts = path.split(os.sep)
if path_parts[-1] in req_ds:
file_size = None
for file_name in files:
if file_size is None:
fp = os.path.join(path, file_name)
file_size = os.path.getsize(fp)
total_size += file_size
if total_size > BIG_FILE_SIZE:
break
return total_size
def _get_size_mseed(dir_path: str) -> int:
"""
Get size of all files until total size > BIG_FILE_SIZE
:param dir: absolute path to directory
:return total_size: total size of the directory up to where it greater
than BIG_FILE_SIZE
"""
total_size = 0
for path, subdirs, files in os.walk(dir_path):
for file_name in files:
if not validate_file(os.path.join(path, file_name), file_name):
continue
fp = os.path.join(path, file_name)
total_size += os.path.getsize(fp)
if total_size > BIG_FILE_SIZE:
break
return total_size
def _get_dir_size(dir_path: str, req_wf_chans: List[Union[str, int]]):
"""
Get size of directory.
To make the process go fast, separate in to different case:
+ Channel name in file name
+ File is an RT130
+ File is an MSeed
+ If only text files or binary files found, count the most 200 files
and ask user to decide stopping or continuing process at their
own risk
:param dir_path: absolute path to directory
:param req_wf_chans: waveform request which can be list of data streams or
list of mseed wildcards
:return total_size:
+ 0 if don't have waveform request
+ total size of the directory up to where it greater than BIG_FILE_SIZE
+ -1 if count more than 200 TEXT files
+ -2 if count more than 200 BINARY files of which types are unkonwn
"""
# if len(req_wf_chans) > 0 and req_wf_chans[0].__class__.__name__ == 'int':
# return _get_size_rt130(dir_path, req_wf_chans)
text_file_count = 0
binary_file_count = 0
for path, subdirs, files in os.walk(dir_path):
print(f"path: {path} files:{files}")
for file_name in files:
path2file = os.path.join(path, file_name)
if not validate_file(path2file, file_name):
continue
# print("filename:", file_name)
# type = _check_file_type(path2file)
# if type == 'TEXT':
# text_file_count += 1
# if text_file_count > 200:
# return -1
# continue
# elif type == 'RT130':
# return _get_size_rt130(dir_path, req_wf_chans)
# elif type == 'MSEED':
# total_size = _get_size_chan_in_name(dir_path, req_wf_chans)
# if total_size != 0:
# return total_size
# else:
# return _get_size_mseed(dir_path)
# else:
# binary_file_count += 1
# if binary_file_count > 200:
# return -2
return -3
def _abort_dialog(msg:str) -> bool:
dlg = QMessageBox()
dlg.setText(msg)
dlg.setInformativeText('Do you want to proceed?')
dlg.setStandardButtons(QMessageBox.Yes |
QMessageBox.Abort)
dlg.setDefaultButton(QMessageBox.Abort)
dlg.setIcon(QMessageBox.Question)
ret = dlg.exec_()
if ret == QMessageBox.Abort:
return False
else:
return True
def check_folders_size(dir_paths: List[str], req_wf_chans: List[Union[str, int]]):
dir_size = 0
for dir_path in dir_paths:
size = _get_dir_size(dir_path, req_wf_chans)
dir_size += size
if dir_size > BIG_FILE_SIZE:
break
elif dir_size == -1:
break
elif dir_size == -2:
break
if dir_size > BIG_FILE_SIZE:
msg = ('Chosen data set is very big. It '
'might take a while to finish reading '
'and plotting everything.')
return _abort_dialog(msg)
elif dir_size == -1:
msg = ("There are more than 200 text files detect."
"Do you want to continue at your own risk?")
return _abort_dialog(msg)
elif dir_size == -2:
msg = ("There are more than 200 text files detect." \
"Do you want to continue at your own risk?")
return _abort_dialog(msg)
else:
return True
Loading