Lan Dam · 6b86cbeb · d874ae4f · 39e2299c · d45d6d06 · 3df64734
--- a/sohstationviewer/view/util/check_file_size.py deleted 100644 → 0

+ 0

− 268
+++ b/sohstationviewer/view/util/check_file_size.py deleted 100644 → 0

+ 0

− 268
-from typing import List, Union
-import os
-from pathlib import Path
-
-from PySide2.QtWidgets import QMessageBox
-
-from obspy.core import read as read_ms
-from obspy.io.reftek.core import Reftek130Exception
-
-from sohstationviewer.model.mseed_data.record_reader import RecordReader
-from sohstationviewer.model.mseed_data.record_reader_helper import MSeedReadError
-from sohstationviewer.conf.constants import BIG_FILE_SIZE
-from sohstationviewer.controller.util import validate_file
-
-
-def _get_ready_for_next_read(file, current_record_start: int,
-                            record: RecordReader):
-    """
-    Move the current position of file to next record
-
-    :param current_record_start: the start position of the current record
-    :param reader: the record that is reading
-    """
-    # MSEED stores the size of a data record as an exponent of a
-    # power of two, so we have to convert that to actual size before
-    # doing anything else.
-    record_length_exp = record.header_unpacker.unpack(
-        'B', record.blockette_1000.record_length
-    )[0]
-    record_size = 2 ** record_length_exp
-
-    file.seek(current_record_start)
-    file.seek(record_size, 1)
-
-
-def _check_RT130(path2file: Path):
-    try:
-        read_ms(path2file, headonly=True)
-    except Reftek130Exception:
-        return 'RT130', False
-
-
-def _check_file_type(path2file):
-    """
-    Check if the given file is TEXT
-    :param path2file:
-    :return:
-    :rtype:
-    """
-    try:
-        file = open(path2file, 'r')
-        file.read(64).strip()
-        file.close()
-        return 'TEXT'
-    except UnicodeDecodeError:
-        pass
-    except FileNotFoundError as e:
-        raise e
-    file = open(path2file, 'rb')
-    try:
-        RecordReader(file)
-    except MSeedReadError as e:
-        try:
-            read_ms(path2file, headonly=True)
-        except Reftek130Exception:
-            return 'RT130'
-        except Exception:
-            return 'binary'
-    return 'MSEED'
-
-
-def _get_size_chan_in_name(dir_path: str, req_wf_chans: List[Union[str, int]]):
-    """
-    Estimate size of directory by collecting size of waveform in the list of
-        wf_chan_posibilities only. This is tricky but fast.
-
-    :param dir_path: absolute path to directory
-    :param req_wf_chans: waveform request which can be list of data streams or
-        list of mseed wildcards
-    :return total_size:
-        + 0 if don't have waveform request
-        + total size of the directory up to where it greater than BIG_FILE_SIZE
-        + -1 if count more than 200 TEXT files
-        + -2 if count more than 200 BINARY files of which types are unkonwn
-    """
-    wf_chan_posibilities = ['FH', 'FN',                 # ≥ 1000 to < 5000
-                            'GH', 'GL',                 # ≥ 1000 to < 5000
-                            'DH', 'DL',                 # ≥ 250 to < 1000
-                            'CH', 'CN',                 # ≥ 250 to < 1000
-                            'EH', 'EL', 'EP',           # ≥ 80
-                            'SH', 'SL', 'SP',           # ≥ 10 to < 80
-                            'HH', 'HN',                 # ≥ 80
-                            'BH', 'BN',                 # ≥ 10 to < 80
-                            'MH', 'MN', 'MP', 'ML']     # >1 to < 10
-    """
-    'LH','LL', 'LP', 'LN' =1
-    'VP', 'VL', 'VL', 'VH' = 0.1
-    'UN', 'UP', 'UL', 'UH' <=0.01
-    Skip channels with sampling rate <=1 because there are less data in the
-    files, which can result many files with small sizes in compare with sizes
-    of high sample rate files.
-    """
-    wf_chan_pos = set()
-    for req in req_wf_chans:
-        if req == '*':
-            wf_chan_pos.update(wf_chan_posibilities)
-        elif req[0] == '*':
-            wf_chan_pos.update([req for req in wf_chan_posibilities
-                                if req.endswith(req[1])])
-        elif req[1] == '*':
-            wf_chan_pos.update([req for req in wf_chan_posibilities
-                                if req.startswith(req[0])])
-
-    total_size = 0
-    for path, subdirs, files in os.walk(dir_path):
-        for file_name in files:
-            if any(x in file_name for x in wf_chan_pos):
-                fp = os.path.join(path, file_name)
-                file_size = os.path.getsize(fp)
-                total_size += file_size
-                if total_size > BIG_FILE_SIZE:
-                    break
-    return total_size
-
-
-def _get_size_rt130(dir_path: str, req_ds: List[int]):
-    """
-    Get size of RT130's requested datas treams which is inside folder that has
-        data stream number as name
-    :param dir: absolute path to directory
-    :param req_ds: list of requested data streams
-    :return total_size: total size of requested data streams up to where it
-        greater than BIG_FILE_SIZE
-    """
-    if req_ds == ['*']:
-        req_ds = ['1', '2', '3', '4', '5', '6', '7', '8']
-    else:
-        req_ds = [str(req) for req in req_ds]
-    total_size = 0
-    for path, subdirs, files in os.walk(dir_path):
-        path_parts = path.split(os.sep)
-        if path_parts[-1] in req_ds:
-            file_size = None
-            for file_name in files:
-                if file_size is None:
-                    fp = os.path.join(path, file_name)
-                    file_size = os.path.getsize(fp)
-                total_size += file_size
-                if total_size > BIG_FILE_SIZE:
-                    break
-    return total_size
-
-
-def _get_size_mseed(dir_path: str) -> int:
-    """
-    Get size of all files until total size > BIG_FILE_SIZE
-    :param dir: absolute path to directory
-    :return total_size: total size of the directory up to where it greater
-        than BIG_FILE_SIZE
-    """
-    total_size = 0
-    for path, subdirs, files in os.walk(dir_path):
-        for file_name in files:
-            if not validate_file(os.path.join(path, file_name), file_name):
-                continue
-            fp = os.path.join(path, file_name)
-            total_size += os.path.getsize(fp)
-            if total_size > BIG_FILE_SIZE:
-                break
-    return total_size
-
-
-def _get_dir_size(dir_path: str, req_wf_chans: List[Union[str, int]]):
-    """
-    Get size of directory.
-    To make the process go fast, separate in to different case:
-        + Channel name in file name
-        + File is an RT130
-        + File is an MSeed
-        + If only text files or binary files found, count the most 200 files
-            and ask user to decide stopping or continuing process at their
-            own risk
-
-    :param dir_path: absolute path to directory
-    :param req_wf_chans: waveform request which can be list of data streams or
-        list of mseed wildcards
-    :return total_size:
-        + 0 if don't have waveform request
-        + total size of the directory up to where it greater than BIG_FILE_SIZE
-        + -1 if count more than 200 TEXT files
-        + -2 if count more than 200 BINARY files of which types are unkonwn
-    """
-    # if len(req_wf_chans) > 0 and req_wf_chans[0].__class__.__name__ == 'int':
-    #     return _get_size_rt130(dir_path, req_wf_chans)
-
-    text_file_count = 0
-    binary_file_count = 0
-    for path, subdirs, files in os.walk(dir_path):
-        print(f"path: {path}  files:{files}")
-        for file_name in files:
-            path2file = os.path.join(path, file_name)
-            if not validate_file(path2file, file_name):
-                continue
-            # print("filename:", file_name)
-            # type = _check_file_type(path2file)
-            # if type == 'TEXT':
-            #     text_file_count += 1
-            #     if text_file_count > 200:
-            #         return -1
-            #     continue
-            # elif type == 'RT130':
-            #     return _get_size_rt130(dir_path, req_wf_chans)
-            # elif type == 'MSEED':
-            #     total_size = _get_size_chan_in_name(dir_path, req_wf_chans)
-            #     if total_size != 0:
-            #         return total_size
-            #     else:
-            #         return _get_size_mseed(dir_path)
-            # else:
-            #     binary_file_count += 1
-            #     if binary_file_count > 200:
-            #         return -2
-    return -3
-
-
-def _abort_dialog(msg:str) -> bool:
-    dlg = QMessageBox()
-    dlg.setText(msg)
-    dlg.setInformativeText('Do you want to proceed?')
-    dlg.setStandardButtons(QMessageBox.Yes |
-                           QMessageBox.Abort)
-    dlg.setDefaultButton(QMessageBox.Abort)
-    dlg.setIcon(QMessageBox.Question)
-    ret = dlg.exec_()
-    if ret == QMessageBox.Abort:
-        return False
-    else:
-        return True
-
-def check_folders_size(dir_paths: List[str], req_wf_chans: List[Union[str, int]]):
-    dir_size = 0
-    for dir_path in dir_paths:
-        size = _get_dir_size(dir_path, req_wf_chans)
-        dir_size += size
-        if dir_size > BIG_FILE_SIZE:
-            break
-        elif dir_size == -1:
-            break
-        elif dir_size == -2:
-            break
-    if dir_size > BIG_FILE_SIZE:
-        msg = ('Chosen data set is very big. It '
-               'might take a while to finish reading '
-               'and plotting everything.')
-        return _abort_dialog(msg)
-    elif dir_size == -1:
-        msg = ("There are more than 200 text files detect."
-               "Do you want to continue at your own risk?")
-        return _abort_dialog(msg)
-    elif dir_size == -2:
-        msg = ("There are more than 200 text files detect." \
-               "Do you want to continue at your own risk?")
-        return _abort_dialog(msg)
-    else:
-        return True
-
-
-