diff --git a/sohstationviewer/model/data_type_model.py b/sohstationviewer/model/data_type_model.py index c95065c82287654f97f511f4d1645b521620d686..9d3110067aaeea2b916abea25dbf27e390becc25 100644 --- a/sohstationviewer/model/data_type_model.py +++ b/sohstationviewer/model/data_type_model.py @@ -17,42 +17,139 @@ class DataTypeModel(): reqWFChans=[], reqSOHChans=[], readStart=0, readEnd=constants.HIGHEST_INT, *args, **kwargs): + """ + Super class for different data type to process data from data files + :param trackingBox: QTextBrowser - widget to display tracking info + :param folder: str - path to the folder of data + :param readChanOnly: bool - if True, only read for channel name + :param reqWFChans: list of str - requested waveform channel list + :param reqSOHChans: list of str - requested SOH channel list + :param readStart: int - requested start time to read + :param readEnd: int - requested end time to read + """ self.trackingBox = trackingBox self.dir = folder self.reqSOHChans = reqSOHChans self.reqWFChans = reqWFChans self.readChanOnly = readChanOnly + self.readStart = readStart + self.readEnd = readEnd - self.readStart = readStart # start time to read in epoch - self.readEnd = readEnd # end time to read in epoch - self.processingLog = [] # [(message, type)] - self.logData = {'TEXT': []} # 'TEXT': for text only file + """ + processingLog: [(message, type)] - record the progress of processing + """ + self.processingLog = [] + + """ + Log data: info from log channels, soh messages, text file in dict: + {chan_id: list of log strings} + 'TEXT': is the chan_id given by sohview for text only file. + """ + self.logData = {'TEXT': []} + + """ + waveformData: data of waveform in dict: + {set_key: { + 'files_info': { + chan_id - str: [{ + 'path2file': absolute path to file - str, + 'file_name': name of file - str, + 'nets': list of network names in file - list of str + 'stats': list of station names in file - list of str + 'net_stats': list of (network, station)s in file - list of + tuple of str + 'chan_ids': list of channel names in file - list of str + 'start_epoch': start time of file - float + 'end_epoch': end time of file - float + 'traces_total': length of data - int + 'spr': sample rate - float + 'read': file has been read or not - bool + }] + } + 'read_data': { + chan_id - str: { + 'samplerate': Sample rate of the data - float + 'tracesInfo': [{ + 'samplerate': Sample rate of the data - float + 'start_tm_epoch': Start epoch time of the trace - float + 'end_tm_epoch': End epoch time of the trace - float + 'size': Size of data - int + 'times_f': np.memmap's file path for real epoch time + - str + 'data_f': np.memmap's file path for data - str + },] + 'ax': Axes for plotting channel data, + added later when it is created - matplotlib.axes.Axes + 'chan_db_info': the plotting parameters got from database + for this channel - dict + } + } + } + (np.memmap is used instead of data for waveform because waveform data + are too large and make system crashed when try to merge and process) + """ self.waveformData = {} + + """ + SOHData: data for SOH info in dict: + {set_key - str or (str, str): { + chan_id - str: { + 'org_trace': { + 'chan_id': channel name - str + 'samplerate': sample rate of the data - float + 'start_tm_epoch': start epoch time of the trace - float, + 'end_tm_epoch': end epoch time of the trace - float, + 'times': data's real time in epoch - np.array of float + 'data': data - np.array of float/int + } + } + } + times: times that has been trimmed and downsampled for plotting + - np.array of float + data: data that has been trimmed and downsampled for plotting + - np.array of float/int + """ self.SOHData = {} + + """ + massPosData: data for mass position info in the format similar to + SOHData. + Channel name for mseed mass position: VP* + Channel name for reftek mass position: MP* (given by SOHView) + """ self.massPosData = {} - self.dataTime = {} # (earliestepoch,latestepoch) for each station - self.channels = set() - self.station = None - self.nets = set() - self.stats = set() + """ + dataTime: time range of data sets: + {setKey: [earliestepoch, latestepoch]} - {str: [float, float],} + """ + self.dataTime = {} + + """ + The given data may include more than one data set which is station_id + in mseed or (unit_id, exp_no) in reftek. User are allow to choose which + data set to be displayed + selectedKey: str - key of the data set to be displayed + """ + self.selectedKey = None + + """ + gaps: gaps info in dict: + {set_key: [list of gaps]} - {str: [[float, float],],} + """ self.gaps = {} - self.timeRanges = {} - self.selectedStaID = None - # # channel with the longest traces total for each station - # self.maxTraceTotalChan = {} + + """ + tmpDir: str - dir to keep memmap files. + Will be deleted when object is deleted + """ self.tmpDir = mkdtemp() - # print("currentdir:", os.getcwd()) - # tmpDirName = 'datatmp' - # self.tmpDir = os.path.join(os.getcwd(), tmpDirName) try: os.mkdir(self.tmpDir) except FileExistsError: shutil.rmtree(self.tmpDir) os.mkdir(self.tmpDir) - # self.readDir(folder, readChanOnly) - def __del__(self): print("delete dataType Object") try: @@ -65,12 +162,22 @@ class DataTypeModel(): print("finish deleting") def hasData(self): + """ + :return bool - True if there is any data can be read. + False if there is no valid data + """ if (len(self.logData) == 0 and len(self.SOHData) == 0 and len(self.massPosData) == 0 and len(self.waveformData) == 0): return False return True def trackInfo(self, text, type): + """ + Display tracking info in tracking_box. + Add all errors/warnings to processing_log. + :param text: str - message to display + :param type: str - type of message (error/warning/info) + """ displayTrackingInfo(self.trackingBox, text, type) if type != 'info': self.processingLog.append((text, type)) diff --git a/sohstationviewer/model/handling_data.py b/sohstationviewer/model/handling_data.py index 6b69db83424d107bd566c6c4421dbf9cc7ea18ac..3df98421b6932c831e6c5364d8329da8af112159 100644 --- a/sohstationviewer/model/handling_data.py +++ b/sohstationviewer/model/handling_data.py @@ -1,7 +1,9 @@ - -import os +""" +Functions that help processing model data +""" import math from struct import unpack +from pathlib import Path import numpy as np from obspy.core import Stream, read as read_ms @@ -17,8 +19,20 @@ from sohstationviewer.model.reftek.from_rt2ms import core def readSOHMSeed(path2file, fileName, SOHStreams, logData, netsProbInFile, trackInfo): """ - Read ms and add trace in self.streams to merge later - Or read log wrap in ms and add to logData under channel in ms header + Use read() from obspy.core to read miniseed file: + + if encoding is ASCII: log string will be added to log_data. The + following blockettes' data if available will be converted to string + to add to log_data as well. + + otherwise traces from stream will be add to soh_streams to be + merged later + :param path2file: str - absolute path of mseed file + :param fileName: str - name of mseed file + :param SOHStreams: dict - holder of SOH mseed streams + :param logData: dict - holder of info from log + :param netsProbInfile: {list of str: str} - holder of dict with key are + nets of file and value is user-selected net so the rule will be + applied for other files if their nets is subset of a key + :param trackInfo: function - to display processing info """ stream = read_ms(path2file) @@ -86,6 +100,12 @@ def readSOHMSeed(path2file, fileName, def readSOHTrace(trace): + """ + Read SOH trace's info + :param trace: obspy.core.trace.Trace - mseed trace + :return tr: dict - with trace's info + (structure in DataTypeModel.__init__.soh_ata[key][chan_id][orgTrace]) + """ tr = {} tr['chanID'] = trace.stats['channel'] tr['samplerate'] = trace.stats['sampling_rate'] @@ -101,25 +121,38 @@ def readSOHTrace(trace): def readMPTrace(trace): - tr = {} - tr['chanID'] = trace.stats['channel'] - tr['samplerate'] = trace.stats['sampling_rate'] - tr['startTmEpoch'] = trace.stats['starttime'].timestamp - tr['endTmEpoch'] = trace.stats['endtime'].timestamp """ - trace time start with 0 => need to add with epoch starttime - times and data have type ndarray - """ - tr['times'] = trace.times() + trace.stats['starttime'].timestamp - # TODO: MP only has 4 different values, data can be simplified if too big - tr['data'] = np.round_(trace.data / 3276.7, 1) + Read mass possition trace's info using readSOHTrace(), then calculate real + value for mass possition + :param trace: obspy.core.trace.Trace - mseed trace + :return tr: dict - with trace's info from readSOHTrace in which tr['data'] + has been converted from 16-bit signed integer in which + 32767= 2 ** 16/2 - 1 is the highest value of 16-bit two's complement + number. The value is also multiplied by 10 for readable display. + (structure in DataTypeModel.__init__.soh_data[key][chan_id][orgTrace]) + (According to 130_theory.pdf: Each channel connects to a 12-bit A/D + converter with an input range of +/- 10V. These channel are read + once per second as left-justified, 2's-compliment, 16 bit values.) + + """ + tr = readSOHTrace(trace) + tr['data'] = np.round_(tr['data'] / 32767.0 * 10.0, 1) return tr -def readWaveformTrace(trace, staID, chanID, tracesInfo, tmpDir): +def readWaveformTrace(trace, sta_id, chan_id, traces_info, tmp_dir): """ - read data from Trace and save data to files to save mem for processing - since waveform data are big + read mseed waveform trace and save data to files to save mem for processing + since waveform data are big. + :param trace: obspy.core.trace.Trace - mseed trace + :param sta_id: str - station name + :param chan_id: str - channel name + :param traces_info: dict - holder of traces_info, refer + DataTypeModel.__init__. + waveform_data[key]['read_data'][chan_id]['traces_info'] + :return tr: dict - with trace's info + (structure in DataTypeModel.__init__. + waveform_data[key][chan_id][traces_info]) """ # gaps for SOH only for now tr = {} @@ -134,19 +167,29 @@ def readWaveformTrace(trace, staID, chanID, tracesInfo, tmpDir): data = trace.data tr['size'] = times.size - trIdx = len(tracesInfo) - tr['times_zf'] = tr['times_f'] = saveData2File( - tmpDir, 'times', staID, chanID, times, trIdx, tr['size']) - tr['data_zf'] = tr['data_f'] = saveData2File( - tmpDir, 'data', staID, chanID, data, trIdx, tr['size']) + trIdx = len(traces_info) + tr['times_f'] = saveData2File( + tmp_dir, 'times', sta_id, chan_id, times, trIdx, tr['size']) + tr['data_f'] = saveData2File( + tmp_dir, 'data', sta_id, chan_id, data, trIdx, tr['size']) return tr def readWaveformMSeed(path2file, fileName, staID, chanID, tracesInfo, dataTime, tmpDir): """ - Read ms and add trace in self.streams to merge later - Or read log wrap in ms and add to logData under channel in ms header + Read traces from waveform mseed file to append to tracesInfo. + data_time is update for new min and max time. + :param path2file: str - absolute path to waveform mseed file + :param fileName: str - name of waveform mseed file + :param staID: str - station ID from indexing + :param chanID: str - channel ID from indexing + :param tracesInfo: dict - holder of traces_info, refer + DataTypeModel.__init__. + waveform_data[key]['read_data'][chan_id]['traces_info'] + :param dataTime: DataTypeModel.__init__.data_time[key] - holder for data + time of the current station + :param tmpDir: str - the folder to keep memmap files """ stream = read_ms(path2file) for trace in stream: @@ -156,7 +199,18 @@ def readWaveformMSeed(path2file, fileName, staID, chanID, tracesInfo.append(tr) -def readWaveformReftek(rt130, staID, readData, dataTime, tmpDir): +def readWaveformReftek(rt130, key, read_data, dataTime, tmpDir): + """ + Read traces from rt130 object to add to traces Info tracesInfo. + dataTime is update for new min and max time. + :param rt130: rt130 object + :param key: (str, str) - key of the data set + :param read_data: dict -holder of the readData refer + DataTypeModel.__init__.waveform_data[key]['read_data'] + :param dataTime: DataTypeModel.__init__.data_time[key] - holder for data + time of the current station + :param tmpDir: str - the folder to keep memmap files + """ stream = core.Reftek130.to_stream( rt130, headonly=False, @@ -165,19 +219,30 @@ def readWaveformReftek(rt130, staID, readData, dataTime, tmpDir): for trace in stream: chanID = trace.stats['channel'] samplerate = trace.stats['sampling_rate'] - if chanID not in readData: - readData[chanID] = { + if chanID not in read_data: + read_data[chanID] = { "tracesInfo": [], "samplerate": samplerate} - tracesInfo = readData[chanID]['tracesInfo'] - tr = readWaveformTrace(trace, staID, chanID, tracesInfo, tmpDir) + tracesInfo = read_data[chanID]['tracesInfo'] + tr = readWaveformTrace(trace, key, chanID, tracesInfo, tmpDir) dataTime[0] = min(tr['startTmEpoch'], dataTime[0]) dataTime[1] = max(tr['endTmEpoch'], dataTime[1]) tracesInfo.append(tr) -def readASCII(path2file, file, staID, chanID, trace, logData, trackInfo): - # TODO: read this in MSEED HEADER +def readASCII(path2file, file, sta_id, chan_id, trace, log_data, track_info): + """ + Read mseed trace with ASCII encoding to add to logData. + :param path2file: str- absolute path of mseed file + :param file: file object - to continue reading. Open new if file is None + :param sta_id: str - station ID got from mseed header + :param chaID: str - channel ID got from mseed header + :param trace: obspy.core.trace.Trace - mseed trace + :param log_data: dict - holder for log messages, refer to + DataTypeModel.__init__.log_data + :param track_info: function - to display processing info + :return file: file object - to close or continue to read + """ byteorder = trace.stats.mseed['byteorder'] h = trace.stats logText = "\n\n**** STATE OF HEALTH: " @@ -203,19 +268,24 @@ def readASCII(path2file, file, staID, chanID, trace, logData, trackInfo): nextBlktByteNo, databytes, byteorder) logText += info except ReadBlocketteError as e: - trackInfo(f"{staID} - {chanID}: {e.msg}", 'error') + track_info(f"{sta_id} - {chan_id}: {e.msg}", 'error') - if staID not in logData: - logData[staID] = {} - if h.channel not in logData[staID]: - logData[staID][h.channel] = [] - logData[staID][h.channel].append(logText) + if sta_id not in log_data: + log_data[sta_id] = {} + if h.channel not in log_data[sta_id]: + log_data[sta_id][h.channel] = [] + log_data[sta_id][h.channel].append(logText) return file def readText(path2file, fileName, textLogs, ): """ - Read log file and add to logData under channel TEXT + Read text file and add to logData under channel TEXT. Raise exception + if the file isn't a text file + :param path2file: str - absolute path to text file + :param fileName: str - name of text file + :param textLogs: holder to keep log string, refer to + DataTypeModel.__init__.log_data['TEXT'] """ with open(path2file, 'r') as file: try: @@ -230,25 +300,23 @@ def readText(path2file, fileName, textLogs, ): return True -def saveData2File(tmpDir, tm_data, staID, chanID, - tr, trIdx, trSize, postfix=''): +def saveData2File(tmp_dir, tm_data, sta_id, chanid, + tr, tr_idx, tr_size): """ - save time/data to file to free memory for processing - :param tmpDir: the temp dir to save file in - :param tm_data: "times"/"data" - :param staID: station id - :param chaID: channel id - :param tr: numpy array of trace data - :param trIdx: trace index - :param trSize: trace size - :param postfix: in case of decimating for zooming, - add Z at the end of filename + Using np.memmap save time/data to file then free memory for processing + :param tmp_dir: str - the temporary dir to save file in + :param tm_data: str - "times"/"data" + :param sta_id: str - station ID + :param chaID: str - channel ID + :param tr: numpy array - of trace time or data + :param tr_idx: int - trace index + :param tr_size: int - trace size + :return memFileName: str - name of memmap file """ - memFileName = os.path.join(tmpDir, - f"{staID}-{chanID}-{tm_data}-{trIdx}{postfix}") - # print("memFileName:%s - %s" % (memFileName, trSize)) + memFileName = Path(tmp_dir).joinpath( + f"{sta_id}-{chanid}-{tm_data}-{tr_idx}") memFile = np.memmap(memFileName, dtype='int64', mode='w+', - shape=trSize) + shape=tr_size) memFile[:] = tr[:] del tr del memFile @@ -257,8 +325,14 @@ def saveData2File(tmpDir, tm_data, staID, chanID, def checkChan(chanID, reqSOHChans, reqWFChans): """ - Check if chanID is required - return type if pass, False if not required + Check if chanID is a requested channel. + :param chanID: str - channel ID + :param reqSOHChans: list of str - requested SOH channels + :param reqWFChans: list of str - requested waveform channels + :return: str/bool - + 'WF' if chanID is a requested waveform channel, + 'SOH' if chanID is a requested SOH channel, + False otherwise. """ ret = checkWFChan(chanID, reqWFChans) if ret[0] == 'WF': @@ -273,9 +347,13 @@ def checkChan(chanID, reqSOHChans, reqWFChans): def checkSOHChan(chanID, reqSOHChans): """ - check if chanID is an SOH channel or mass position (mseed channel) - check if it required by user - no reqSOHChans means read all + Check if chan_id is a requested SOH channel. + Mass position is always included. + This function is used for mseed only so mass position is 'VM'. + If there is no reqSOHChans, it means all SOH channels are requested + :param chanID: str - channel ID + :param reqSOHChans: list of str - requested SOH channels + :return: bool - True if chan_id is a requested SOH channel. False otherwise """ if reqSOHChans == []: return True @@ -284,10 +362,7 @@ def checkSOHChan(chanID, reqSOHChans): if 'EX?' in reqSOHChans and chanID.startswith('EX'): if chanID[2] in ['1', '2', '3']: return True - # if 'VM?' in reqSOHChans and chanID.startswith('VM'): if chanID.startswith('VM'): - # always read mass position - # TODO: add reqMPChans if chanID[2] in ['0', '1', '2', '3', '4', '5', '6']: return True return False @@ -295,8 +370,12 @@ def checkSOHChan(chanID, reqSOHChans): def checkWFChan(chanID, reqWFChans): """ - Check if chanID is a waveform data and is required by user - TODO: check with more wild card reqWFChans + Check if chanID is a waveform channel and is requested by user + :param chanID: str - channel ID + :param reqWFChans: list of str - requested waveform channels + :return wf: str - '' if chan_id is not a waveform channel. + 'WF' if chan_id is a waveform channel. + :return hasChan: bool - True if chan_id is a requested waveform channel. """ wf = '' hasChan = False @@ -311,7 +390,8 @@ def checkWFChan(chanID, reqWFChans): def sortData(dataDict): """ - Sort data in 'tracesInfo' according to 'startTmEpoch' + Sort data in 'tracesInfo' in 'startTmEpoch' order + :param dataDict: DataTypeModel.__init__.waveformData """ for staID in dataDict: for chanID in dataDict[staID]['readData']: @@ -322,9 +402,12 @@ def sortData(dataDict): def squash_gaps(gaps): """ - :param gaps: list of gaps: (start, end, length) - :return: squased_gaps: all related gaps are squashed extending to - min start and max end + Compress gaps from different channels that have time range related to + each other to the ones with outside boundary (min start, max end). + :param gaps: [[[float, float],], [[float, float],],] - + list of gaps of different channels: [[[start, end],], [[start, end],],] + :return: squashed_gaps: [[float, float],] - all related gaps are squashed + extending to min start and max end [[min start, max end],] """ squashed_gaps = [] sgap_indexes = [] @@ -347,6 +430,18 @@ def squash_gaps(gaps): def downsample(times, data, rq_points): + """ + Reduce sample rate of times and data so that times and data return has + the size around the rq_points. + Since the functions used for downsampling (chunk_minmax()/constant_rate) + are very slow, the values of data from mean to CUT_FROM_MEAN_FACTOR + will be removed first. If the size not meet the rq_points, then + continue to downsample. + :param times: numpy array - of a waveform channel's times + :param data: numpy array - of a waveform channel's data + :param rq_points: int - requested size to return. + :return np.array, np.array - new times and new data with the requested size + """ if times.size <= rq_points: return times, data dataMax = max(abs(data.max()), abs(data.min())) @@ -356,11 +451,19 @@ def downsample(times, data, rq_points): (dataMax - dataMean) * const.CUT_FROM_MEAN_FACTOR) times = times[indexes] data = data[indexes] - # return constant_rate(times, data, rq_points) + if times.size <= rq_points: + return times, data return chunk_minmax(times, data, rq_points) def constant_rate(times, data, rq_points): + """ + Take sample with constant_rate regardless of the value of the data + :param times: numpy array of a waveform channel's times + :param data: numpy array of a waveform channel's data + :param rq_points: requested size to return. + :return times, data: new times and new data with the requested size + """ if times.size <= rq_points: return times, data rate = int(times.size/rq_points) @@ -375,6 +478,15 @@ def constant_rate(times, data, rq_points): def chunk_minmax(times, data, rq_points): + """ + Split data into differen chunks, take the min, max of each chunk to add + to the data return + :param times: numpy array - of a waveform channel's times + :param data: numpy array - of a waveform channel's data + :param rq_points: int - requested size to return. + :return times, data: np.array, np.array - new times and new data with the + requested size + """ x, y = times, data final_points = 0 if x.size <= rq_points: @@ -446,8 +558,16 @@ def chunk_minmax(times, data, rq_points): def trim_downsample_SOHChan(chan, startTm, endTm, firsttime): """ - trim off non-included time from chan[orgTrace], downsample, and save to - chan[times], chan[data] and [logIdx] if the key exist + When channel is zoomed in, times and data will be trimmed off to exclude + the non-included time. Then they will be downsampled to get to the + expected const.CHAN_SIZE_LIMIT, and save to chan[times], chan[data] + for plotting. + + :param chan: dict - channel info from soh_data, refer + DataType_Model.__init_.soh_data[key][chan_id] + :param startTm: float - start time of zoomed section + :param endTm: float - end time of zoomed section + :param firsttime: bool True for original size when channel is not zoomed in """ # TODO, add logIdx to downsample if using reftex # zoom in to the given time @@ -461,10 +581,26 @@ def trim_downsample_SOHChan(chan, startTm, endTm, firsttime): def trim_downsample_WFChan(chan, startTm, endTm, firsttime): """ - trim off all chans with non included time - if totalSize of the rest chans > RECAL_SIZE_LIMIT => need to be downsampled - Read data from tr's filename, downsample the data - Return data for plotting + When channel is zoomed in, times and data will be trimmed off to exclude + the non-included time. + Different with soh_data where times and data are each in one np.array, + in waveform_data, times and data are each kept in a list of np.memmap + files along with startTmEpoch and endTmEpoch. + chan['startIdx'] and chan['endIdx'] will be identify to exclude np.memmap + files that aren't in the zoom time range (startTm, endTm). + Data in np.memmap files will be trimmed again if there is still time + outside the zoom time range then downsampled and combined to get the + times and data with size closed to the expected const.CHAN_SIZE_LIMIT, + and save to chan[times], chan[data] for plotting. + chan['fulldata']: is a flag set at the firsttime when channel isn't + zoomed in and the data is small enough so it doesn't need to go through + the process when zoomed in. + + :param chan: dict - channel info from soh_data, refer + DataType_Model.__init_.soh_data[key][chan_id] + :param startTm: float - start time of zoomed section + :param endTm: float - end time of zoomed section + :param firsttime: bool True for original size when channel is not zoomed in """ if 'fulldata' in chan: # data is small, already has full in the first trim @@ -535,6 +671,14 @@ def trim_downsample_WFChan(chan, startTm, endTm, firsttime): def get_eachDay5MinList(startTm, endTm): + """ + Get the list of all five minute for every day start from the day of startTm + and end at the day of endTm. + :param startTm: float - start time + :param endTm: float - end time + :return every_day_5_min_list: [[288 of floats], ] - the list of all start + of five minutes for every day in which each day has 288 of 5 minutes. + """ exactDayTm = (startTm // const.SEC_DAY) * const.SEC_DAY exactDayTmList = [] @@ -545,21 +689,38 @@ def get_eachDay5MinList(startTm, endTm): exactDayTmList.append(exactDayTm) exactDayTm += const.SEC_DAY - # list of start/end 5m in each day: eachDay5MinList + # list of start/end 5m in each day: every_day_5_min_list for idx, startDayTm in enumerate(exactDayTmList): aDay5Min = np.arange(startDayTm, startDayTm + const.SEC_DAY, const.SEC_5M) if idx == 0: - eachDay5MinList = np.array([aDay5Min]) + every_day_5_min_list = np.array([aDay5Min]) else: - eachDay5MinList = np.vstack((eachDay5MinList, aDay5Min)) - return eachDay5MinList + every_day_5_min_list = np.vstack((every_day_5_min_list, aDay5Min)) + return every_day_5_min_list -def get_trimTPSData(chan, startTm, endTm, eachDay5MinList): +def get_trimTPSData(chan, startTm, endTm, every_day_5_min_list): + """ + Different with soh_data where times and data are each in one np.array, + in waveform_data, times and data are each kept in a list of np.memmap + files along with startTmEpoch and endTmEpoch. + chan['startIdx'] and chan['endIdx'] will be identify to exclude np.memmap + files that aren't in the zoom time range (startTm, endTm). + Data in np.memmap will be trimmed according to times then time-power-square + value for each 5 minutes will be calculated and saved in + chan['tps-data']: np.mean(np.square(5m data)) + + :param chan: dict - chan dictionary, refer to + DataTypeModel.__init__.waveformData[key]['readData'][chan_id] + :param startTm: float - start time of zoomed section + :param endTm: float - end time of zoomed section + :param every_day_5_min_list: [[288 of floats], ] - the list of all start + of five minutes for every day in which each day has 288 of 5 minutes. + """ # preset all 0 for all 5 minutes for each day - tpsData = np.zeros((len(eachDay5MinList), const.NO_5M_DAY)) + tpsData = np.zeros((len(every_day_5_min_list), const.NO_5M_DAY)) # zoom in to the given range chan['startIdx'] = 0 @@ -610,8 +771,9 @@ def get_trimTPSData(chan, startTm, endTm, eachDay5MinList): startTPSTm = times[startIndex] # identify index in case of overlaps or gaps - index = np.where((eachDay5MinList <= times[startIndex]) & - (eachDay5MinList + const.SEC_5M > times[startIndex])) + index = np.where((every_day_5_min_list <= times[startIndex]) & + (every_day_5_min_list + const.SEC_5M > + times[startIndex])) currRow = index[0][0] currCol = index[1][0] nextTPSTm = startTPSTm + const.SEC_5M @@ -636,17 +798,16 @@ def get_trimTPSData(chan, startTm, endTm, eachDay5MinList): currRow += 1 nextTPSTm += const.SEC_5M chan['tps_data'] = tpsData - chan['5m_each_day'] = eachDay5MinList def findTPSTm(given_tm, each_day_5_min_list): """ Find the position of the given time (given_tm) in time-power-squared plot - :param given_tm: given time - :param each_day_5_min_list: list of days in which each day includes - a list of 288 times with 5 minutes apart - :return x_idx: index of time in the each_day_5_min_list - :return y_idx: index of day plotted + :param given_tm: float - given time + :param every_day_5_min_list: [[288 of floats], ] - the list of all start + of five minutes for every day in which each day has 288 of 5 minutes. + :return x_idx: int - index of time in the each_day_5_min_list + :return y_idx: int - index of day plotted (look in TimePowerSquaredWidget.getZoomData()) """ x_idx = None diff --git a/sohstationviewer/model/mseed/blockettes_reader.py b/sohstationviewer/model/mseed/blockettes_reader.py index 71b96435928aaea4d51908d41f46196e19236479..83b37a436b1874cc9d3e3806807d71064f3416d7 100644 --- a/sohstationviewer/model/mseed/blockettes_reader.py +++ b/sohstationviewer/model/mseed/blockettes_reader.py @@ -1,5 +1,9 @@ -import os +""" +function for reading blockettes for ascii encodding +""" + from struct import unpack +from pathlib import Path class ReadBlocketteError(Exception): @@ -11,7 +15,7 @@ def readASCII(path, fileName, byteorder): """ test function """ - file = open(os.path.join(path, fileName), 'rb') + file = open(Path(path).joinpath(fileName), 'rb') databytes = file.read() file.close() followingBlktsTotal = unpack('%s%s' % (byteorder, 'B'), @@ -34,14 +38,12 @@ def readASCII(path, fileName, byteorder): # SEED Manual V2.4 - Chapter 8 - Data Record def readNextBlkt(bNo, databytes, byteorder): """ - :param bNo: next blockette Byte Number - :param databytes: file's data in byte - :param byteorder: big/little endian - :param key: (net, stat, loc) - :param chanID: channel + :param bNo: int - next blockette Byte Number + :param databytes: bytes object - file's data in byte + :param byteorder: str - big/little endian :return: - nextBNo: byte number of next blockette - info: info read from this blockette + nextBNo: int - byte number of next blockette + info: str - info read from this blockette """ blocketteType = unpack('%s%s' % (byteorder, 'H'), databytes[bNo:bNo + 2])[0] @@ -59,6 +61,13 @@ def readNextBlkt(bNo, databytes, byteorder): def readBlkt500(bNo, databytes, byteorder): + """ + read blockette 500 (skip first 4 bytes (HH) as they are already read) + :param bNo: int - next blockette Byte Number + :param databytes: bytes object - file's data in byte + :param byteorder: str - big/little endian + :return logText: str - info of blockette 500 in string + """ logText = "\nVCO Correction: %s" % unpack( '%s%s' % (byteorder, 'f'), databytes[bNo + 4:bNo + 8])[0] t = {} @@ -83,6 +92,14 @@ def readBlkt500(bNo, databytes, byteorder): def readBlkt2000(bNo, databytes, byteorder): + """ + read blockette 2000 (skip first 4 bytes (HH) as they are already read) + :param bNo: int - next blockette Byte Number + :param databytes: bytes object - file's data in byte + :param byteorder: str - big/little endian + :return logText: str - info of blockette 2000 in string + """ + blktLen = unpack( '%s%s' % (byteorder, 'H'), databytes[bNo + 4:bNo + 6])[0] logText = "\nTotal Blockette length: %s bytes" % blktLen diff --git a/sohstationviewer/model/mseed/from_mseedpeek/mseed_header.py b/sohstationviewer/model/mseed/from_mseedpeek/mseed_header.py index 1033828d377c0a35021cadf26be8936060f9bc7f..9b4b606f68082c6e02829a5f18ee1a93c2fc0aeb 100644 --- a/sohstationviewer/model/mseed/from_mseedpeek/mseed_header.py +++ b/sohstationviewer/model/mseed/from_mseedpeek/mseed_header.py @@ -1,3 +1,9 @@ +""" +Most of functions/classes in this file are from mseedpeek. +read_mseed_headers() is the only function added to make use of the reused +functions. +""" + import struct from sohstationviewer.controller.util import getTime6 from sohstationviewer.model.handling_data import ( @@ -411,17 +417,30 @@ def readHdrs(path2file, fileName, netsProbInFile, trackInfo): """ read headers of a given file build dictionary for quick access - """ + :param path2file: str - path to file + :param fileName: str - name of file + :param SOHStreams: dict - holder for different sets of soh mseed stream + :param logData: dict - holder for logging messages + :param reqSOHChans: list of string - requested SOH channels sent + from Main Window + :param reqWFChans: list of string - requested waveform channel sent from + Main Window + :param netsProbInFile: dict - holder for all network + :param trackInfo: function - to do process tracking + :return: + + if file is mseed but cannot read: raise error + + if file is mseed but chanType isn't requested, do nothing + + if file isn't mseed, try to read text file (raise Error if needed) + + is chanType is SOH, read data to soh_streams + + if chanType is waveform but there is no reqWFChans: do nothing + + if chanType is waveform, return header info - # create object (we've already tested all files for mseed) - # and get some base info - # print("fileName:", fileName) + """ rdfile = MseedHeader(path2file) if rdfile.isMseed(): try: filesize = rdfile.filesize blksize = rdfile.blksize - encode = rdfile.encode chanID = rdfile.FH.Chan.strip().decode() chanType = checkChan(chanID, reqSOHChans, reqWFChans) if not chanType: @@ -450,7 +469,6 @@ def readHdrs(path2file, fileName, chanIDs = set() epochs = [] startTms = [] - gaps = [] # looping over total number of blocks in files for n in range(numblocks): rdfile.fixedhdr(n * blksize) @@ -482,8 +500,6 @@ def readHdrs(path2file, fileName, 'stats': sorted(list(stats)), 'netStats': sorted(list(netStats)), 'chanIDs': sorted(list(chanIDs)), - 'gaps': gaps, - 'encode': encode, 'startEpoch': epochs[0][0], 'endEpoch': epochs[-1][1], 'tracesTotal': len(epochs), diff --git a/sohstationviewer/model/mseed/mseed.py b/sohstationviewer/model/mseed/mseed.py index b98670531de27ec30d63716c9826c1810e9deac7..0e0ec8ad44814f30d5f984073d4f68b1ed476af4 100644 --- a/sohstationviewer/model/mseed/mseed.py +++ b/sohstationviewer/model/mseed/mseed.py @@ -1,16 +1,10 @@ """ -waveform: + indexing and read only the files in selected time, - + apply saving memory - + down sample traces before combine data - + data not use for plotting is kept in files to free memory - for processing. -SOH: merge all traces (with obspy stream) before down sample. -gaps: for SOH only, can manually calc. gaps for waveform but takes time - +MSeed object to hold and process MSeed data """ import os +from pathlib import Path from PySide2 import QtWidgets @@ -27,21 +21,44 @@ from sohstationviewer.model.handling_data import ( class MSeed(DataTypeModel): + """ + read and process mseed file into object with properties can be used to + plot SOH data, mass position data, waveform data and gaps + """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - # keys: 1+ nets values: net user choose + # channels: set - available channels got from files among + # requested channels + self.channels = set() + + # nets: set - available nets got from files + self.nets = set() + + # stats: set - available stats got from files + self.stats = set() + + """ + A file can have more than one experiment which only one of them should + be correct as selected by user (handling_data.py - read_soh_mseed()) + netsProbInFile is the dictionary with key is the tuple of all nets + in a file, value is the selected net selected by user. + """ self.netsProbInFile = {} + self.read_soh_and_index_waveform(self.dir) self.selectedKey = self.selectStaID() if self.selectedKey is None: return if len(self.reqWFChans) != 0: - # self.reselectTimeRange(self.selectedKey) self.readWFFiles(self.selectedKey) def read_soh_and_index_waveform(self, folder): """ - read waveform data, soh data, mass position data, gaps from folder + + read waveform data for filename associate with time range + + read soh_stream for unmerged traces + + merge soh_stream into SOH data and mass position data + + squash gaps from different soh channels to one + :param folder: str - absolute path to data set folder """ self.waveformData, soh_stream = self.index_waveform(folder) self.SOHData, self.massPosData, self.gaps = self.merge_soh_streams( @@ -49,15 +66,15 @@ class MSeed(DataTypeModel): def index_waveform(self, folder): """ - :param folder: the parent folder for the data set - :return waveform_data: a dict for waveform data including - {sta_id: {'filesInfo': {chan_id: {filepath, time range, trace's info}} - 'readData': { chan_id: {preset info for samplerate - and traces_info which is a list of {info and - np.memmap's file path of trace data} + :param folder: str - absolute path to data set folder + :return waveformData: a dict for waveform data including + {sta_id: {'filesInfo': {chan_id: [{filepath,time range,trace's info}]} + 'readData': { chan_id: {samplerate + and preset traces_info which is a list of {info and + np.memmap's file path of trace data and time} } }} - (can't use stream of traces since data to large to be merged and + (use np.memmap because data are too large to be merged and processed) :return soh_streams: a dict of soh channel streams {sta_id: {chan_id: mseed stream of traces}} @@ -71,7 +88,7 @@ class MSeed(DataTypeModel): for path, sub_dirs, files in os.walk(folder): for file_name in files: - path2file = os.path.join(path, file_name) + path2file = Path(path).joinpath(file_name) if not validateFile(path2file, file_name): continue count += 1 @@ -141,6 +158,22 @@ class MSeed(DataTypeModel): return waveform_data, soh_streams def merge_soh_streams(self, soh_streams): + """ + :param soh_streams: a dict of soh channel streams + {sta_id: {chan_id: mseed stream of traces}} + if there is more than one network for a station, user can either choose + data for one network for that station, or combine all networks related + to that station into one data set (by changing network name to the + selected network then merge again) + :return masspos_data: dict - merged data for chan_id with 'VM' prefixed + (structure in DataTypeModel.__init__.massPosData) + :return soh_data: dict - merged data for other soh chan_ids + (structure in DataTypeModel.__init__SOHData) + :return gaps: dict - start and end of gaps from all merged soh streams + are squashed to the largest gaps + (structure in DataTypeModel.__init__gaps) + """ + soh_data = {} masspos_data = {} gaps = {} @@ -198,6 +231,13 @@ class MSeed(DataTypeModel): return soh_data, masspos_data, gaps def selectStaID(self): + """ + :return selectedStaID: str - the selected station id from self.stats + + If there is only one station id, return it. + + If there is more than one, show all ids, let user choose one to + return. + """ + stats = list(self.stats) selectedStaID = stats[0] if len(stats) > 1: @@ -219,55 +259,25 @@ class MSeed(DataTypeModel): selectedStaID = stats[selectedIdx] self.trackInfo(f'Select Station {selectedStaID}', 'info') return selectedStaID - # - # def reselectTimeRange(self, staID): - # """ - # If there are no time ranges created for the station, create them - # Provide a dialog for user to choose a time range. Each button is - # a time range. User click one, time range will be changed in - # MainWindow (TODO), the dialog will be closed. If necessary, - # will change the way to do this later. - # """ - # if staID not in self.timeRanges: - # chanID = self.maxTraceTotalChan[staID] - # filesInfo = self.waveformData[staID][chanID]['filesInfo'] - # cutTrace = [ - # tr for tr in filesInfo - # if (self.readStart <= tr['startEpoch'] < self.readEnd) or - # (self.readStart < tr['endEpoch'] <= self.readEnd)] - # cutTraceTotal = len(cutTrace) - # print(f"{chanID } cutTraceTotal: {cutTraceTotal}") - # traceTotalDiv = int( - # cutTraceTotal / constants.FILE_PER_CHAN_LIMIT) - # if traceTotalDiv < 2: - # # readStart, readEnd remain unchanged - # return - # self.timeRanges[staID] = [] - # FILE_LIM = constants.FILE_PER_CHAN_LIMIT - # for i in range(traceTotalDiv): - # startTm = cutTrace[i * FILE_LIM]['startEpoch'] - # if i < (traceTotalDiv - 1): - # endTm = cutTrace[i * FILE_LIM + FILE_LIM - 1]['endEpoch'] - # else: - # endTm = cutTrace[-1]['endEpoch'] - # self.timeRanges[staID].append((startTm, endTm)) - # msg = "Data in the selected time is too big to display.\n" - # else: - # msg = "" - # - # msg += "Please choose one of the suggested time range below." - # tmStrList = [] - # for tm in self.timeRanges[staID]: - # startTm = UTCDateTime(int(tm[0])).strftime("%Y-%m-%d %H:%M") - # endTm = UTCDateTime(int(tm[1])).strftime("%Y-%m-%d %H:%M") - # tmStrList.append("%s-%s" % (startTm, endTm)) - # msgBox = SelectButtonDialog(message=msg, buttonLabels=tmStrList) - # msgBox.exec_() - # - # self.readStart, self.readEnd = self.timeRanges[staID][msgBox.ret] - # self.trackInfo(f"Select time {self.readStart, self.readEnd}", "info") def readWFFiles(self, staID): + """ + From filesInfo, read all waveform data of requested waveform channels + for given sta_id, in the selected time (from Main Window) to add + to waveformData[sat_id]['readData'][chan_id]['tracesInfo']. + 'tracesInfo' includes list of traces' info dict in startTmEpoch order. + The keys of a trace's info dict: + { + 'samplerate': sample rate of the data, + 'startTmEpoch': start epoch time of the trace, + 'endTmEpoch': end epoch time of the trace, + 'size': size of data, + 'times_f': np.memmap's file path for time + 'data_f': np.memmap's file path for data + } + :param staID: str - station ID + """ + count = 0 for chanID in self.waveformData[staID]['filesInfo']: # check chanID diff --git a/sohstationviewer/model/reftek/logInfo.py b/sohstationviewer/model/reftek/logInfo.py index dd9f8caa9aabe7d3b78ac060e39d5e9b0a66b2d4..b8f1caf0727d1a41abe07e78cb50be0381f6e969 100644 --- a/sohstationviewer/model/reftek/logInfo.py +++ b/sohstationviewer/model/reftek/logInfo.py @@ -5,9 +5,19 @@ from sohstationviewer.controller.util import ( class LogInfo(): - def __init__(self, parent, trackInfo, logText, key, packetType, reqDSs, + + def __init__(self, parent, trackInfo, logText, key, reqDSs, isLogFile=False): - self.packetType = packetType + """ + Help to extract channel data from LogText which include SOH Message and + Event message. + :param parent: reftek object - that calls LogInfo + :param trackInfo: function - to track data processing + :param logText: str - SOH and Event messages in time order + :param key: (str, str) - ID of the data set including unitID and expNo + :param reqDSs: list of str - requested data stream ID + :param isLogFile: bool - flag indicate if this is a log file + """ self.parent = parent self.trackInfo = trackInfo self.logText = logText @@ -35,6 +45,15 @@ class LogInfo(): self.extractInfo() def readEVT(self, line): + """ + Read EVT info from line for a specific datastream (DS) + :param line: str - a line of evt message + :return epoch: float: epoch time of message + :return DS: int: index of data stream + for epoch, using trigger time (TT) if available or + first sample time (FST) if available, otherwise return 0, 0 + minEpoch and maxEpoch are updated. + """ # Ex: DAS: 0108 EV: 2632 DS: 2 FST = 2001:253:15:13:59:768 # TT =2001:253:15:13:59:768 NS: 144005 SPS: 40 ETO: 0 parts = line.split() @@ -59,6 +78,13 @@ class LogInfo(): return epoch, DS def readSHHeader(self, line): + """ + :param line: str - a line of evt message + :return epoch: float - time for state of health header + minEpoch and maxEpoch are updated. + yAdded is reset to false to allow adding 1 to trackYear + If different unitID is detected, give warning and skip reading. + """ # Ex: State of Health 01:251:09:41:35:656 ST: 0108 parts = line.split() try: @@ -80,6 +106,13 @@ class LogInfo(): return epoch def simpleRead(self, line): + """ + Read parts and epoch from an SOH line + :param line: str - a line of evt message + :return parts: list of str - parts of line with space delim + :return epoch: float - time when info is recorded + maxEpoch is updated with the epoch time. + """ # Ex: 186:21:41:35 <content> parts = line.split() try: @@ -92,19 +125,32 @@ class LogInfo(): return parts, epoch def readIntClockPhaseErr(self, line): + """ + Read internal clock phase error + :param line: str - a line of evt message + :return epoch: float - time when info is recorded + :return error: float - time of ICP error in microseconds + """ # Ex: 253:19:41:42 INTERNAL CLOCK PHASE ERROR OF 4823 USECONDS ret = self.simpleRead(line) if not ret: return False parts, epoch = ret error = float(parts[-2]) - # if parts[-1].startswith("USEC"): bc requested unit is us - # error /= 1000.0 if parts[-1].startswith("SEC"): error *= 1000000.0 return epoch, error def readBatTemBkup(self, line): + """ + Read battery voltage, temperature, backup voltage + :param line: str - a line of evt message + :return epoch: float - time when info is recorded + :return volts: float - battery voltage. + :return temp: float - temperature of battery in Celsius + :return bkupV: float - backup voltage. + Available for RT130. For 72A, 0.0 is assigned + """ # 72A's: # Ex: 186:14:33:58 BATTERY VOLTAGE = 13.6V, TEMPERATURE = 26C # RT130: @@ -124,6 +170,14 @@ class LogInfo(): return epoch, volts, temp, bkupV def readDiskUsage(self, line): + """ + Read disk usage + :param line: str - a line of evt message + :return epoch: float - time when info is recorded + :return disk: int - disk number + :return val: int - memory used in disk + Available for RT130. For 72A, 0 is assigned + """ # RT130: # Ex: 186:14:33:58 DISK 1: USED: 89744 AVAIL:... # Ex: 186:14:33:58 DISK 2* USED: 89744 AVAIL:... @@ -137,6 +191,12 @@ class LogInfo(): return epoch, disk, val def readDPS_ClockDiff(self, line): + """ + Read DPS clock difference + :param line: str - a line of evt message + :return epoch: float - time when info is recorded + :return total: float - total difference time in milliseconds + """ # Ex: 245:07:41:45 DSP CLOCK DIFFERENCE: 0 SECS AND -989 MSECS parts, epoch = self.simpleRead(line) try: @@ -151,17 +211,21 @@ class LogInfo(): return epoch, total def readDefs(self, line): + """ + Read definitions' time. Currently, only read Station Channel Definition + Based on user requested, may use ["STATION", "DATA", "CALIBRATION"] + :param line: str - a line of evt message + :return epoch: float - time of the definition + """ # Ex: Station Channel Definition 01:330:19:24:42:978 ST: 7095 # Ex: Data Stream Definition 01:330:19:24:42:978 ST: 7095 # Ex: Calibration Definition 01:330:19:24:42:978 ST: 7095 # Ex: STATION CHANNEL DEFINITION 2020:066:19:00:56:000 ST: 92E9 parts = line.split() # Lines from a .log file may be just the first time the parameters were - # saved until the DAS is reset, so if this is a log file then use the + # saved until the DAS is reset, so if this is a log file then use the # current SOH time for plotting the points, instead of what is in the # message line. - # if parts[0] in ["STATION", "DATA", "CALIBRATION"]: - # TODO: check if want to plot other def if parts[0] in ["STATION"]: if self.isLogFile is False: try: @@ -181,6 +245,11 @@ class LogInfo(): return epoch def readCPUVer(self, line): + """ + Read version of CPU software + :param line: str - a line of evt message + :return CPUVer: str - version of CPU software + """ # Ex: 341:22:05:41 CPU SOFTWARE V03.00H (72A and older 130 FW) # Ex: 341:22:05:41 REF TEK 130 (no version number at all) # Ex: 341:22:05:41 Ref Tek 130 2.8.8S (2007:163) @@ -197,6 +266,11 @@ class LogInfo(): return CPUVer def readGPSVer(self, line): + """ + Read version of GPS firmware + :param line: str - a line of evt message + :return GPSVer: str - version of GPS firmware + """ parts = line.split() verParts = [p.strip() for p in parts] if "GPS FIRMWARE VERSION:" in line: @@ -207,21 +281,40 @@ class LogInfo(): GPSVer = " ".join(verParts[2:]) return GPSVer - def addChanInfo(self, chanName, t, d, idx): - if chanName not in self.chans: - self.chans[chanName] = {} - self.chans[chanName]['orgTrace'] = { + def addChanInfo(self, chan_id, t, d, idx): + """ + Add information to field orgTrace of channel + { + unitID: serial number of device + expNo: experiment number + times: list of epoch time + data: list of channel's values + logIdx: list of indexes of SOH message line corresponding to + time and data + } + :param chan_id: str - ID of channel + :param t: float - epoch time of data + :param d: float - value of data + :param idx: int - index of SOH message line + """ + + if chan_id not in self.chans: + self.chans[chan_id] = {} + self.chans[chan_id]['orgTrace'] = { 'unitID': self.unitID, 'expNo': self.expNo, 'times': [], 'data': [], 'logIdx': []} - self.chans[chanName]['orgTrace']['times'].append(t) - self.chans[chanName]['orgTrace']['data'].append(d) - self.chans[chanName]['orgTrace']['logIdx'].append(idx) + self.chans[chan_id]['orgTrace']['times'].append(t) + self.chans[chan_id]['orgTrace']['data'].append(d) + self.chans[chan_id]['orgTrace']['logIdx'].append(idx) def extractInfo(self): - + """ + Extract data from each line of log string to add to + SOH channels's orgTrace using addChanInfo() + """ lines = [ln.strip() for ln in self.logText.splitlines() if ln != ''] sohEpoch = 0 diff --git a/sohstationviewer/model/reftek/reftek.py b/sohstationviewer/model/reftek/reftek.py index a2b15a1897001df7151f7eb144d9218cf7e8e766..2cf1d9024c66f7ff1e682740e0c992366e6dad77 100755 --- a/sohstationviewer/model/reftek/reftek.py +++ b/sohstationviewer/model/reftek/reftek.py @@ -1,5 +1,9 @@ +""" +RT130 object to hold and process RefTek data +""" import os +from pathlib import Path import numpy as np from PySide2 import QtWidgets @@ -18,6 +22,10 @@ from sohstationviewer.controller.util import validateFile class RT130(DataTypeModel): + """ + read and process reftek file into object with properties can be used to + plot SOH data, mass position data, waveform data and gaps + """ def __init__(self, *args, **kwarg): self.EH = {} super().__init__(*args, **kwarg) @@ -32,14 +40,19 @@ class RT130(DataTypeModel): if len(self.reqWFChans) != 0: self.readWFFiles(self.selectedKey) - def readSOH_indexWaveform(self, dir): + def readSOH_indexWaveform(self, folder): + """ + Loop all files in dir to read for soh data, mass position data and + index waveform data with filename and corresponding time range + :param folder: str - absolute path to data set folder + """ count = 0 - for path, subdirs, files in os.walk(dir): + for path, subdirs, files in os.walk(folder): for fileName in files: - path2file = os.path.join(path, fileName) + path2file = Path(path).joinpath(fileName) if not validateFile(path2file, fileName): continue - if not self.readReftek130(path2file, fileName): + if not self.readReftek130(path2file): readText(path2file, fileName, self.logData['TEXT']) count += 1 if count % 50 == 0: @@ -49,6 +62,14 @@ class RT130(DataTypeModel): self.combineData() def selectKey(self): + """ + :return selectedKey: (str, str) - + (device's serial number, experiment_number) + the selected keys from self.keys. + + If there is only one key, return it. + + If there is more than one, show all keys, let user choose one to + return. + """ self.keys = sorted(list(self.keys)) selectedKey = self.keys[0] if len(self.keys) > 1: @@ -71,11 +92,25 @@ class RT130(DataTypeModel): self.trackInfo(f'Select Key {selectedKey}', 'info') return selectedKey - def readWFFiles(self, staID): + def readWFFiles(self, key): + """ + :param key: (str, str) - + (device's serial number, experiment_number) + the selected keys from self.keys. + From filesInfo, read all waveform data of requested waveform DS + channels for given sta_id, in the selected time (from Main Window) + to append to DataTypeModel.__init__. + waveformData[sat_id]['readData'][chan_id]['tracesInfo']. + tracesInfo includes list of traces' info dict which is sorted in + startTmEpoch order by sort_data. + refer to DataTypeModel.__init__. + waveform_data[key]['read_data'][chan_id]['traces_info']. + + """ count = 0 - for DS in self.waveformData[staID]['filesInfo']: - readData = self.waveformData[staID]['readData'] - for fileInfo in self.waveformData[staID]['filesInfo'][DS]: + for DS in self.waveformData[key]['filesInfo']: + readData = self.waveformData[key]['readData'] + for fileInfo in self.waveformData[key]['filesInfo'][DS]: # file have been read if fileInfo['read']: continue @@ -87,8 +122,8 @@ class RT130(DataTypeModel): hasData = True if not hasData: continue - readWaveformReftek(fileInfo['rt130'], staID, readData, - self.dataTime[staID], self.tmpDir) + readWaveformReftek(fileInfo['rt130'], key, readData, + self.dataTime[key], self.tmpDir) fileInfo['read'] = True count += 1 if count % 50 == 0: @@ -100,7 +135,14 @@ class RT130(DataTypeModel): self.logData[self.curKey][chan_pkt] = [] self.logData[self.curKey][chan_pkt].append(logInfo) - def readReftek130(self, path2file, fileName): + def readReftek130(self, path2file): + """ + From the given file: + + Read SOH data from file with SH packets, + + read event info, mass position and index waveform (data stream) + file from file with EH or ET packets + :param path2file: str - absolute path to file + """ rt130 = core.Reftek130.from_file(path2file) unique, counts = np.unique(rt130._data["packet_type"], return_counts=True) @@ -113,6 +155,11 @@ class RT130(DataTypeModel): return True def readSH(self, path2file): + """ + Use soh_packet library to read file with SH packet for soh data + to append tuple (time, log string) to log_data[self.curKey][SOH] + :param path2file: str - absolute path to file + """ with open(path2file, "rb") as fh: str = fh.read() data = soh_packet._initial_unpack_packets_soh(str) @@ -128,13 +175,24 @@ class RT130(DataTypeModel): self.logData[self.curKey]['SOH'].append((d['time'], logs)) def readEHET_MP_indexWF(self, rt130): + """ + Files that contents EH or ET packets are data stream files. + There may be 1 - 9 data streams. + :param rt130: rt130 object - of a data stream file in which + + event info can be found in EH packet and save in self.logData + + mass position data can be found in data stream 9 and save in + self.masspos_stream + + waveform data can be found in data stream 1-8 which will be index + and save to self.waveformData[self.curKey]["filesInfo"] to + be processed later + """ DS = rt130._data['data_stream_number'][0] + 1 if DS not in self.reqDSs + [9]: return ind_EHET = [ind for ind, val in enumerate(rt130._data["packet_type"]) - if val in [b"EH"]] # on ly need event header + if val in [b"EH"]] # only need event header nbr_DT_samples = sum( [rt130._data[ind]["number_of_samples"] for ind in range(0, len(rt130._data)) @@ -159,8 +217,12 @@ class RT130(DataTypeModel): self.indexWaveForm(rt130, DS) def readMassPos(self, rt130): + """ + Append all traces of data stream 9 to self.massPosStream[self.currKey]. + Update dataTime. + :param rt130: object rt130 - for data stream 9 + """ if self.curKey not in self.massPosStream: - self.massPosStream[self.curKey] = Stream() stream = core.Reftek130.to_stream( @@ -176,6 +238,12 @@ class RT130(DataTypeModel): tr.stats['endtime'].timestamp, self.dataTime[self.curKey][1]) def indexWaveForm(self, rt130, DS): + """ + Indexing by adding rt130 object along with time range to + self.waveformData[self.currKey]['filesInfo'] + :param rt130: object rt130 - for data stream DS + :param DS: int - data stream index + """ if self.curKey not in self.waveformData: self.waveformData[self.curKey] = {"filesInfo": {}, "readData": {}} @@ -195,6 +263,24 @@ class RT130(DataTypeModel): 'read': False}) def combineData(self): + """ + + SOH and event logData will be processed to create SOHdata: + {Key of set: + {chan_id: + {orgTrace: { + times: list of epoch time + data: list of channel's values + logIdx: list of indexes of SOH message line + corresponding to time and data + }} + } + } + + massPossStream for each data set will be merged and save to + massPossData according to chan_ids + + Gaps will be calculate and squash for mass position only since + currently there is no way to calculate SOH gaps, and data for waveform + is too big to consider calculating gaps. + """ for k in self.logData: if k == 'TEXT': continue @@ -218,7 +304,7 @@ class RT130(DataTypeModel): logStr = ''.join(logs) self.logData[k][pktType] = logStr logObj = LogInfo( - self, self.trackInfo, logStr, k, pktType, self.reqDSs) + self, self.trackInfo, logStr, k, self.reqDSs) self.dataTime[k][0] = min(logObj.minEpoch, self.dataTime[k][0]) self.dataTime[k][1] = max(logObj.maxEpoch, self.dataTime[k][1]) for cName in self.SOHData[k]: