diff --git a/sohstationviewer/model/data_loader.py b/sohstationviewer/model/data_loader.py index f6a5e0db3402f807af4152969147791e39ea8154..d226bf7f9e675771cd97ef8dd5d32745e99b5193 100644 --- a/sohstationviewer/model/data_loader.py +++ b/sohstationviewer/model/data_loader.py @@ -26,7 +26,7 @@ class DataLoaderWorker(QtCore.QObject): button_chosen = QtCore.Signal(int) def __init__(self, data_type: str, tracking_box: QtWidgets.QTextBrowser, - folder: str, list_of_rt130_paths: List[Path], + list_of_dir: List[Path], list_of_rt130_paths: List[Path], req_wf_chans: Union[List[str], List[int]] = [], req_soh_chans: List[str] = [], read_start: float = 0, read_end: float = constants.HIGHEST_INT, @@ -35,7 +35,7 @@ class DataLoaderWorker(QtCore.QObject): super().__init__() self.data_type = data_type self.tracking_box = tracking_box - self.folder = folder + self.list_of_dir = list_of_dir self.list_of_rt130_paths = list_of_rt130_paths self.req_wf_chans = req_wf_chans self.req_soh_chans = req_soh_chans @@ -52,6 +52,9 @@ class DataLoaderWorker(QtCore.QObject): self.end_msg = None def run(self): + folders = (self.list_of_rt130_paths + if self.list_of_dir == [''] else self.list_of_dir) + folders_str = ', '.join([dir.name for dir in folders]) try: if self.data_type == 'RT130': from sohstationviewer.model.reftek.reftek import RT130 @@ -65,7 +68,7 @@ class DataLoaderWorker(QtCore.QObject): self.button_chosen.connect(data_object.receive_pause_response, type=QtCore.Qt.DirectConnection) data_object.__init__( - self.data_type, self.tracking_box, self.folder, + self.data_type, self.tracking_box, self.list_of_dir, self.list_of_rt130_paths, req_wf_chans=self.req_wf_chans, req_soh_chans=self.req_soh_chans, read_start=self.read_start, read_end=self.read_end, @@ -84,11 +87,11 @@ class DataLoaderWorker(QtCore.QObject): self.failed.emit() except Exception: fmt = traceback.format_exc() - self.end_msg = (f"Dir {self.folder} can't be read " + self.end_msg = (f"Some in {folders_str} can't be read " f"due to error: {str(fmt)}") self.failed.emit() else: - self.end_msg = f'Finished loading data stored in {self.folder}' + self.end_msg = f'Finished loading data stored in {folders_str}' self.finished.emit(data_object) @@ -142,7 +145,7 @@ class DataLoader(QtCore.QObject): self.worker = DataLoaderWorker( data_type, tracking_box, - list_of_dir[0], # Only work on one directory for now. + list_of_dir, list_of_rt130_paths, req_wf_chans=req_wf_chans, req_soh_chans=req_soh_chans, diff --git a/sohstationviewer/model/data_type_model.py b/sohstationviewer/model/data_type_model.py index df4ccf37f249cae46f2a1248d3f89cad7d67c452..e927e70b5d4346c282a1a6463106cb0c2e9d58c0 100644 --- a/sohstationviewer/model/data_type_model.py +++ b/sohstationviewer/model/data_type_model.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os from pathlib import Path from tempfile import TemporaryDirectory from typing import Optional, Union, List, Tuple, Dict @@ -10,7 +11,8 @@ from obspy.core import Stream from PySide2 import QtCore from PySide2 import QtWidgets -from sohstationviewer.controller.util import display_tracking_info +from sohstationviewer.controller.util import ( + display_tracking_info, validate_file, validate_dir, get_total_files) from sohstationviewer.conf import constants from sohstationviewer.view.plotting.gps_plot.gps_point import GPSPoint from sohstationviewer.view.util.enums import LogType @@ -36,7 +38,7 @@ class ThreadStopped(Exception): class DataTypeModel(): def __init__(self, data_type, tracking_box: QtWidgets.QTextBrowser, - folder: str, list_of_rt130_paths: List[Path] = [], + list_of_dir: List[str], list_of_rt130_paths: List[Path] = [], req_wf_chans: Union[List[str], List[int]] = [], req_soh_chans: List[str] = [], read_start: Optional[float] = UTCDateTime(0).timestamp, @@ -52,7 +54,7 @@ class DataTypeModel(): :param data_type: type of the object :param tracking_box: widget to display tracking info - :param folder: path to the folder of data + :param list_of_dir: list of paths to the folders of data :param list_of_rt130_paths: path to the folders of RT130 data :param req_wf_chans: requested waveform channel list :param req_soh_chans: requested SOH channel list @@ -70,7 +72,7 @@ class DataTypeModel(): """ self.data_type = data_type self.tracking_box = tracking_box - self.dir = folder + self.list_of_dir = list_of_dir self.list_of_rt130_paths = list_of_rt130_paths self.req_soh_chans = req_soh_chans self.req_wf_chans = req_wf_chans @@ -257,14 +259,6 @@ class DataTypeModel(): self.gps_points: List[GPSPoint] = [] - def read_folder(self, folder: str) -> Tuple[Dict]: - """ - Read data from given folder - :param folder: path to folder to read data - :return: Tuple of different data dicts - """ - pass - def select_key(self) -> Union[str, Tuple[str, str]]: """ Get the key for the data set to process. @@ -275,7 +269,7 @@ class DataTypeModel(): def processing_data(self): if self.creator_thread.isInterruptionRequested(): raise ThreadStopped() - self.read_folder(self.dir) + self.read_folders() if self.creator_thread.isInterruptionRequested(): raise ThreadStopped() @@ -287,6 +281,60 @@ class DataTypeModel(): raise ThreadStopped() self.finalize_data() + def read_folders(self, folders) -> None: + """ + Read data from given folders to create data dicts which are + attributes of current class + """ + count = 0 + total = get_total_files(folders) + for folder in folders: + self.read_folder(folder, total, count) + + def read_folder(self, folder: str, total: int, count: int) -> int: + """ + Read data from current folder. + + :param folder: folder to read data from + :param total: total of all valid files + :param count: total of files that have been processed before this + folder to keep track of progress + :return count: total of files that have been processed after this + folder to keep track of progress + """ + for path, sub_dirs, files in os.walk(folder): + try: + validate_dir(path) + except Exception as e: + # skip Information folder + self.track_info(str(e), LogType.WARNING) + continue + + for file_name in files: + if self.creator_thread.isInterruptionRequested(): + raise ThreadStopped() + + path2file = Path(path).joinpath(file_name) + if not validate_file(path2file, file_name): + continue + + self.read_data_file(path2file, file_name) + count += 1 + if count % 10 == 0: + self.track_info( + f'Read {count} files/{total}', LogType.INFO) + + return count + + def read_data_file(self, path2file: str, file_name: str) -> None: + """ + Read data from path <path2ffile>, with name <file_name> + + :param path2file: absolute path to data file + :param file_name: name of data file + """ + pass + def finalize_data(self): """ This function should be called after all folders finish reading to diff --git a/sohstationviewer/model/mseed/mseed.py b/sohstationviewer/model/mseed/mseed.py index 192d59619328ecef7105b6b52b61269553d8cc8f..434eb3a739f12781ebeb9bfbbf9e5aa7c390e128 100644 --- a/sohstationviewer/model/mseed/mseed.py +++ b/sohstationviewer/model/mseed/mseed.py @@ -1,15 +1,12 @@ """ MSeed object to hold and process MSeed data """ -import os -from pathlib import Path -from typing import Dict, Tuple, List +from typing import Dict, List from obspy.core import Stream -from sohstationviewer.controller.util import validate_file, validate_dir from sohstationviewer.model.data_type_model import ( - DataTypeModel, ThreadStopped, ProcessingDataError) + DataTypeModel, ProcessingDataError) from sohstationviewer.model.handling_data import read_mseed_or_text from sohstationviewer.view.util.enums import LogType @@ -39,51 +36,27 @@ class MSeed(DataTypeModel): super().finalize_data() - def read_folder(self, folder: str) -> Tuple[Dict]: + def read_folders(self) -> None: """ - Read data streams for soh, mass position and waveform. - For waveform, traces in data streams will be turn into memmap file - to save resources - :param folder: absolute path to data set folder - :return waveform_data: waveform data by station - :return soh_data: soh data by station - :return mass_pos_data: mass position data by station - :return gaps: gap list by station - :return nets_by_sta: netcodes list by station + Read data from list_of_dir for soh, mass position and waveform. """ - count = 0 - - total = sum([len(files) for _, _, files in os.walk(folder)]) - for path, sub_dirs, files in os.walk(folder): - try: - validate_dir(path) - except Exception as e: - # skip Information folder - self.track_info(str(e), LogType.WARNING) - continue - - for file_name in files: - if self.creator_thread.isInterruptionRequested(): - raise ThreadStopped() - - path2file = Path(path).joinpath(file_name) - if not validate_file(path2file, file_name): - continue - count += 1 - if count % 10 == 0: - self.track_info( - f'Read {count} files/{total}', LogType.INFO) - read_mseed_or_text( - path2file, self.tmp_dir, self.read_start, self.read_end, - self.stream_header_by_key_chan, - self.soh_data, self.mass_pos_data, self.waveform_data, - self.log_data, self.data_time, - self.req_soh_chans, self.req_wf_chans, - self.include_mp123zne, self.include_mp456uvw, - self.track_info, self.populate_cur_key_for_all_data) - - self.track_info( - f'Skipped {total - count} invalid files.', LogType.INFO) + super().read_folders(self.list_of_dir) + + def read_data_file(self, path2file: str, file_name: str) -> None: + """ + Read data from path <path2ffile>, with name <file_name> + + :param path2file: absolute path to data file + :param file_name: name of data file + """ + read_mseed_or_text( + path2file, self.tmp_dir, self.read_start, self.read_end, + self.stream_header_by_key_chan, + self.soh_data, self.mass_pos_data, self.waveform_data, + self.log_data, self.data_time, + self.req_soh_chans, self.req_wf_chans, + self.include_mp123zne, self.include_mp456uvw, + self.track_info, self.populate_cur_key_for_all_data) def retrieve_nets_from_stream_header( self, streams: Dict[str, Dict[str, Stream]]) -> \ diff --git a/sohstationviewer/model/reftek/reftek.py b/sohstationviewer/model/reftek/reftek.py index f7fa193d4ca40066cef2afd711a233ac5b5b99fd..b764927d0ee609341fed5037d6e35186e0624abf 100755 --- a/sohstationviewer/model/reftek/reftek.py +++ b/sohstationviewer/model/reftek/reftek.py @@ -1,10 +1,8 @@ """ RT130 object to hold and process RefTek data """ -import os from pathlib import Path from typing import Tuple, List, Union - import numpy as np from sohstationviewer.model.reftek.from_rt2ms import ( @@ -16,7 +14,6 @@ from sohstationviewer.model.handling_data import read_text from sohstationviewer.model.handling_data_reftek import ( check_reftek_header, read_reftek_stream) from sohstationviewer.conf import constants -from sohstationviewer.controller.util import validate_file from sohstationviewer.view.util.enums import LogType @@ -62,42 +59,26 @@ class RT130(DataTypeModel): super().finalize_data() - def read_folder(self, folder: str) -> None: + def read_folders(self) -> None: """ - Loop all files in dir/list_of_rt130_paths to read for soh data, - mass position data and - index waveform data with filename and corresponding time range - - :param folder: absolute path to data set folder + Read data from list_of_dir or list_of_rt130_paths for soh, + mass position and waveform. """ - count = 0 - - total = 0 if self.list_of_rt130_paths != []: folders = self.list_of_rt130_paths - for folder in folders: - total += sum([len(files) for _, _, files in os.walk(folder)]) else: - folders = [self.dir] - total = sum([len(files) for _, _, files in os.walk(self.dir)]) + folders = self.list_of_dir + super().read_folders(folders) - for folder in folders: - for path, subdirs, files in os.walk(folder): - for file_name in files: - if self.creator_thread.isInterruptionRequested(): - raise ThreadStopped() - path2file = Path(path).joinpath(file_name) - if not validate_file(path2file, file_name): - continue - if not self.read_reftek_130(path2file): - read_text(path2file, file_name, self.log_data['TEXT']) - count += 1 - if count % 50 == 0: - self.track_info( - f"Read {count} files/ {total}", LogType.INFO) - self.total_datafile = count - self.track_info( - f'Skipped {total - count} invalid files.', LogType.INFO) + def read_data_file(self, path2file, file_name): + """ + Read data from path <path2ffile>, with name <file_name> + + :param path2file: absolute path to data file + :param file_name: name of data file + """ + if not self.read_reftek_130(path2file): + read_text(path2file, file_name, self.log_data['TEXT']) def select_key(self) -> Tuple[str, str]: """