diff --git a/sohstationviewer/model/mseed/mseed.py b/sohstationviewer/model/mseed/mseed.py index 434eb3a739f12781ebeb9bfbbf9e5aa7c390e128..192d59619328ecef7105b6b52b61269553d8cc8f 100644 --- a/sohstationviewer/model/mseed/mseed.py +++ b/sohstationviewer/model/mseed/mseed.py @@ -1,12 +1,15 @@ """ MSeed object to hold and process MSeed data """ -from typing import Dict, List +import os +from pathlib import Path +from typing import Dict, Tuple, List from obspy.core import Stream +from sohstationviewer.controller.util import validate_file, validate_dir from sohstationviewer.model.data_type_model import ( - DataTypeModel, ProcessingDataError) + DataTypeModel, ThreadStopped, ProcessingDataError) from sohstationviewer.model.handling_data import read_mseed_or_text from sohstationviewer.view.util.enums import LogType @@ -36,27 +39,51 @@ class MSeed(DataTypeModel): super().finalize_data() - def read_folders(self) -> None: + def read_folder(self, folder: str) -> Tuple[Dict]: """ - Read data from list_of_dir for soh, mass position and waveform. + Read data streams for soh, mass position and waveform. + For waveform, traces in data streams will be turn into memmap file + to save resources + :param folder: absolute path to data set folder + :return waveform_data: waveform data by station + :return soh_data: soh data by station + :return mass_pos_data: mass position data by station + :return gaps: gap list by station + :return nets_by_sta: netcodes list by station """ - super().read_folders(self.list_of_dir) - - def read_data_file(self, path2file: str, file_name: str) -> None: - """ - Read data from path <path2ffile>, with name <file_name> - - :param path2file: absolute path to data file - :param file_name: name of data file - """ - read_mseed_or_text( - path2file, self.tmp_dir, self.read_start, self.read_end, - self.stream_header_by_key_chan, - self.soh_data, self.mass_pos_data, self.waveform_data, - self.log_data, self.data_time, - self.req_soh_chans, self.req_wf_chans, - self.include_mp123zne, self.include_mp456uvw, - self.track_info, self.populate_cur_key_for_all_data) + count = 0 + + total = sum([len(files) for _, _, files in os.walk(folder)]) + for path, sub_dirs, files in os.walk(folder): + try: + validate_dir(path) + except Exception as e: + # skip Information folder + self.track_info(str(e), LogType.WARNING) + continue + + for file_name in files: + if self.creator_thread.isInterruptionRequested(): + raise ThreadStopped() + + path2file = Path(path).joinpath(file_name) + if not validate_file(path2file, file_name): + continue + count += 1 + if count % 10 == 0: + self.track_info( + f'Read {count} files/{total}', LogType.INFO) + read_mseed_or_text( + path2file, self.tmp_dir, self.read_start, self.read_end, + self.stream_header_by_key_chan, + self.soh_data, self.mass_pos_data, self.waveform_data, + self.log_data, self.data_time, + self.req_soh_chans, self.req_wf_chans, + self.include_mp123zne, self.include_mp456uvw, + self.track_info, self.populate_cur_key_for_all_data) + + self.track_info( + f'Skipped {total - count} invalid files.', LogType.INFO) def retrieve_nets_from_stream_header( self, streams: Dict[str, Dict[str, Stream]]) -> \