From 14eddf909985dac6bed6c5a79be003e8a4ce28b3 Mon Sep 17 00:00:00 2001 From: ldam <ldam@passcal.nmt.edu> Date: Tue, 29 Aug 2023 16:22:24 -0600 Subject: [PATCH] refactor read_folder to be in general_data --- .../model/general_data/general_data.py | 39 ++++++- sohstationviewer/model/mseed_data/mseed.py | 100 ++++++------------ sohstationviewer/model/reftek_data/reftek.py | 68 +++--------- 3 files changed, 86 insertions(+), 121 deletions(-) diff --git a/sohstationviewer/model/general_data/general_data.py b/sohstationviewer/model/general_data/general_data.py index eda3fa8f9..f0d125ab0 100644 --- a/sohstationviewer/model/general_data/general_data.py +++ b/sohstationviewer/model/general_data/general_data.py @@ -1,8 +1,9 @@ from __future__ import annotations - +import os from pathlib import Path from tempfile import TemporaryDirectory from typing import Optional, Union, List, Tuple, Dict +import traceback from obspy import UTCDateTime @@ -10,7 +11,7 @@ from PySide2 import QtCore from PySide2 import QtWidgets from sohstationviewer.controller.util import \ - display_tracking_info, get_total_files + display_tracking_info, get_total_files, validate_file, validate_dir from sohstationviewer.view.plotting.gps_plot.gps_point import GPSPoint from sohstationviewer.view.util.enums import LogType from sohstationviewer.database.process_db import execute_db @@ -203,6 +204,40 @@ class GeneralData(): :return count: total of files that have been processed after this folder to keep track of progress """ + if not os.path.isdir(folder): + raise ProcessingDataError(f"Path '{folder}' not exist") + + for path, sub_dirs, files in os.walk(folder): + try: + validate_dir(path) + except Exception as e: + # skip Information folder + self.track_info(str(e), LogType.WARNING) + continue + for file_name in files: + if self.creator_thread.isInterruptionRequested(): + raise ThreadStopped() + + path2file = Path(path).joinpath(file_name) + if not validate_file(path2file, file_name): + continue + count += 1 + try: + self.read_file(path2file, file_name, count) + except Exception: + fmt = traceback.format_exc() + self.track_info(f"Skip file {path2file} can't be read " + f"due to error: {str(fmt)}", + LogType.WARNING) + return count + + def read_file(self, path2file: Path, file_name: str, count: int) -> None: + """ + Read data or text from file + :param path2file: absolute path to file + :param file_name: name of file + :param count: total number of file read + """ pass def finalize_data(self): diff --git a/sohstationviewer/model/mseed_data/mseed.py b/sohstationviewer/model/mseed_data/mseed.py index ad0dbda76..d45b9ca73 100644 --- a/sohstationviewer/model/mseed_data/mseed.py +++ b/sohstationviewer/model/mseed_data/mseed.py @@ -3,15 +3,11 @@ MSeed object to hold and process MSeed data """ import os import re -import traceback -from pathlib import Path from typing import Dict, List -from sohstationviewer.controller.util import validate_file, validate_dir from sohstationviewer.view.util.enums import LogType -from sohstationviewer.model.general_data.general_data import \ - GeneralData, ThreadStopped, ProcessingDataError +from sohstationviewer.model.general_data.general_data import GeneralData from sohstationviewer.model.general_data.general_data_helper import read_text from sohstationviewer.model.mseed_data.mseed_helper import \ @@ -61,70 +57,40 @@ class MSeed(GeneralData): print("We currently only handle blockettes 500, 1000," " and 1001.") - def read_folder(self, folder: str, total: int, count: int) -> int: + def read_file(self, path2file, file_name, count): """ - Read data from current folder. - - :param folder: folder to read data from - :param total: total of all valid files - :param count: total of files that have been processed before this - folder to keep track of progress - :return count: total of files that have been processed after this - folder to keep track of progress + Read data or text from file + :param path2file: absolute path to file + :param file_name: name of file + :param count: total number of file read """ - if not os.path.isdir(folder): - raise ProcessingDataError(f"Path '{folder}' not exist") - - for path, sub_dirs, files in os.walk(folder): - try: - validate_dir(path) - except Exception as e: - # skip Information folder - self.track_info(str(e), LogType.WARNING) - continue - for file_name in files: - if self.creator_thread.isInterruptionRequested(): - raise ThreadStopped() - - path2file = Path(path).joinpath(file_name) - - if not validate_file(path2file, file_name): - continue - print("filename:", file_name) - count += 1 - if count % 10 == 0: - self.track_info( - f'Read {count} files/{total}', LogType.INFO) - log_text = read_text(path2file) - if log_text is not None: - self.log_texts[path2file] = log_text - continue - reader = MSeedReader( - path2file, - read_start=self.read_start, - read_end=self.read_end, - is_multiplex=self.is_multiplex, - req_soh_chans=self.req_soh_chans, - req_wf_chans=self.req_wf_chans, - include_mp123zne=self.include_mp123zne, - include_mp456uvw=self.include_mp456uvw, - soh_data=self.soh_data, - mass_pos_data=self.mass_pos_data, - waveform_data=self.waveform_data, - log_data=self.log_data, - gap_minimum=self.gap_minimum) - try: - reader.read() - self.invalid_blockettes = (self.invalid_blockettes - or reader.invalid_blockettes) - except MSeedReadError: - self.not_mseed_files.append(file_name) - except Exception: - fmt = traceback.format_exc() - self.track_info(f"Skip file {path2file} can't be read " - f"due to error: {str(fmt)}", - LogType.WARNING) - return count + if count % 10 == 0: + self.track_info( + f'Read {count} files', LogType.INFO) + log_text = read_text(path2file) + if log_text is not None: + self.log_texts[path2file] = log_text + return + reader = MSeedReader( + path2file, + read_start=self.read_start, + read_end=self.read_end, + is_multiplex=self.is_multiplex, + req_soh_chans=self.req_soh_chans, + req_wf_chans=self.req_wf_chans, + include_mp123zne=self.include_mp123zne, + include_mp456uvw=self.include_mp456uvw, + soh_data=self.soh_data, + mass_pos_data=self.mass_pos_data, + waveform_data=self.waveform_data, + log_data=self.log_data, + gap_minimum=self.gap_minimum) + try: + reader.read() + self.invalid_blockettes = (self.invalid_blockettes + or reader.invalid_blockettes) + except MSeedReadError: + self.not_mseed_files.append(file_name) def retrieve_nets_from_data_dicts(self): """ diff --git a/sohstationviewer/model/reftek_data/reftek.py b/sohstationviewer/model/reftek_data/reftek.py index 0a0275ea8..031deab2a 100755 --- a/sohstationviewer/model/reftek_data/reftek.py +++ b/sohstationviewer/model/reftek_data/reftek.py @@ -2,7 +2,6 @@ RT130 object to hold and process RefTek data """ from pathlib import Path -import os from typing import Union, List, Tuple, Dict import traceback import numpy as np @@ -10,7 +9,6 @@ from obspy.core import Stream from sohstationviewer.conf import constants from sohstationviewer.view.util.enums import LogType -from sohstationviewer.controller.util import validate_file from sohstationviewer.model.general_data.general_data import \ GeneralData, ThreadStopped, ProcessingDataError @@ -64,13 +62,8 @@ class RT130(GeneralData): def processing_data(self): if self.creator_thread.isInterruptionRequested(): raise ThreadStopped() - self.read_folder(self.dir) - - if self.creator_thread.isInterruptionRequested(): - raise ThreadStopped() + self.read_folders() self.selected_key = self.select_key() - if self.selected_key is None: - raise ThreadStopped() if self.creator_thread.isInterruptionRequested(): raise ThreadStopped() @@ -120,50 +113,21 @@ class RT130(GeneralData): folders = self.list_of_dir super().read_folders(folders) - def read_folder(self, folder: str, total: int, count: int) -> int: - """ - Read data from current folder. - - :param folder: folder to read data from - :param total: total of all valid files - :param count: total of files that have been processed before this - folder to keep track of progress - :return count: total of files that have been processed after this - folder to keep track of progress + def read_file(self, path2file, file_name, count): """ - - if self.list_of_rt130_paths != []: - folders = self.list_of_rt130_paths - for folder in folders: - total += sum([len(files) for _, _, files in os.walk(folder)]) - else: - folders = [self.dir] - total = sum([len(files) for _, _, files in os.walk(self.dir)]) - - for folder in folders: - if not os.path.isdir(folder): - raise ProcessingDataError(f"Path '{folder}' not exist") - for path, subdirs, files in os.walk(folder): - for file_name in files: - if self.creator_thread.isInterruptionRequested(): - raise ThreadStopped() - path2file = Path(path).joinpath(file_name) - if not validate_file(path2file, file_name): - continue - try: - if not self.read_reftek_130(path2file): - read_text(path2file, self.log_data['TEXT']) - except Exception: - fmt = traceback.format_exc() - self.track_info(f"Skip file {path2file} can't be read " - f"due to error: {str(fmt)}", - LogType.WARNING) - count += 1 - if count % 50 == 0: - self.track_info( - f"Read {count} files/ {total}", LogType.INFO) - - return count + Read data or text from file + :param path2file: absolute path to file + :param file_name: name of file + :param count: total number of file read + """ + if count % 50 == 0: + self.track_info( + f"Read {count} files", LogType.INFO) + log_text = read_text(path2file) + if log_text is not None: + self.log_texts['TEXT'].append(log_text) + return + self.read_reftek_130(path2file) def select_key(self) -> Tuple[str, str]: """ @@ -184,7 +148,7 @@ class RT130(GeneralData): raise ProcessingDataError(msg) selected_key = keys[0] - if len(keys) > 1: + if not self.on_unittest and len(keys) > 1: msg = ("There are more than one keys in the given data.\n" "Please select one to display") self.pause_signal.emit(msg, keys) -- GitLab