diff --git a/sohstationviewer/database/extract_data.py b/sohstationviewer/database/extract_data.py index c6cf6581b84373601f8515046c48be5098b86e67..bfabf3aaa99accf25eb5505c2a5bb6cea9cf3468 100755 --- a/sohstationviewer/database/extract_data.py +++ b/sohstationviewer/database/extract_data.py @@ -26,7 +26,7 @@ def get_chan_plot_info(org_chan_id: str, chan_info: Dict, data_type: str, chan = 'MassPos?' if org_chan_id.startswith('Event DS'): chan = 'Event DS?' - if org_chan_id.startswith('DS'): + if org_chan_id.startswith('DS') and 'DSP' not in org_chan_id: chan = 'DS?' if org_chan_id.startswith('Disk Usage'): chan = 'Disk Usage?' diff --git a/sohstationviewer/model/mseed/read_mseed_experiment/decode_mseed.py b/sohstationviewer/model/mseed/read_mseed_experiment/decode_mseed.py new file mode 100644 index 0000000000000000000000000000000000000000..dc4d85396b81ef2962d365cad33d8f0a8acb2b0e --- /dev/null +++ b/sohstationviewer/model/mseed/read_mseed_experiment/decode_mseed.py @@ -0,0 +1,35 @@ +def decode_int16(buffer, unpacker): + requested_bytes = buffer[:2] + return unpacker.unpack('h', requested_bytes)[0] + + +def decode_int24(buffer, unpacker): + requested_bytes = buffer[:3] + byte_order = 'big' if unpacker.byte_order_char == '>' else 'little' + # We delegate to int.from_bytes() because it takes a lot of work to make + # struct.unpack() handle signed 24-bits integers. + # See: https://stackoverflow.com/questions/3783677/how-to-read-integers-from-a-file-that-are-24bit-and-little-endian-using-python # noqa + return int.from_bytes(requested_bytes, byte_order) + + +def decode_int32(buffer, unpacker): + requested_bytes = buffer[:4] + return unpacker.unpack('i', requested_bytes)[0] + + +def decode_ieee_float(buffer, unpacker): + requested_bytes = buffer[:4] + return unpacker.unpack('f', requested_bytes)[0] + + +def decode_ieee_double(buffer, unpacker): + requested_bytes = buffer[:8] + return unpacker.unpack('d', requested_bytes)[0] + + +def decode_steim(buffer, unpacker): + # The first 4 bytes in a Steim frame is metadata of the record. Since we + # aren't decompressing the data, we are skipping. The next 4 bytes contain + # the first data point of the MSEED data record, which is what we need. + requested_bytes = buffer[4:8] + return unpacker.unpack('i', requested_bytes)[0] diff --git a/sohstationviewer/model/mseed/read_mseed_experiment/mseed_helper.py b/sohstationviewer/model/mseed/read_mseed_experiment/mseed_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..28f0c228b713cc14d9adbaf243a052f0995c0f63 --- /dev/null +++ b/sohstationviewer/model/mseed/read_mseed_experiment/mseed_helper.py @@ -0,0 +1,209 @@ +from dataclasses import dataclass +import struct +from enum import Enum + +from obspy import UTCDateTime + + +class Unpacker: + """ + A wrapper around struct.unpack() to unpack binary data without having to + explicitly define the byte order in the format string. Also restrict the + type of format to str and buffer to bytes. + """ + def __init__(self, byte_order_char: str = '') -> None: + self.byte_order_char = byte_order_char + + def unpack(self, format: str, buffer: bytes): + """ + Unpack a string of bytes into a tuple of values based on the given + format + :param format: the format used to unpack the byte string + :param buffer: the byte string + :return: a tuple containing the unpacked values. + """ + default_byte_order_chars = ('@', '=', '>', '<', '!') + if format.startswith(default_byte_order_chars): + format = self.byte_order_char + format[:1] + else: + format = self.byte_order_char + format + return struct.unpack(format, buffer) + + +@dataclass +class FixedHeader: + """ + The fixed portion of the header of a data record. All fields are stored as + bytes to minimize time wasted on decoding unused values. + """ + sequence_number: bytes + data_header_quality_indicator: bytes + reserved: bytes + station: bytes + location: bytes + channel: bytes + net_code: bytes + record_start_time: bytes + sample_count: bytes + sample_rate_factor: bytes + sample_rate_multiplier: bytes + activity_flags: bytes + io_and_clock_flags: bytes + data_quality_flags: bytes + blockette_count: bytes + time_correction: bytes + data_offset: bytes + first_blockette_offset: bytes + + +@dataclass +class Blockette1000: + """ + Blockette 100 of a data record. All fields are stored as bytes to minimize + time wasted on decoding unused values. + """ + blockette_type: bytes + next_blockette_offset: bytes + encoding_format: bytes + byte_order: bytes + record_length: bytes + reserved_byte: bytes + + +@dataclass +class RecordMetadata: + """ + The metadata of a data record. + """ + station: str + location: str + channel: str + network: str + start_time: UTCDateTime + sample_count: int + sample_rate: float + + +class EncodingFormat(Enum): + ASCII = 0 + INT_16_BIT = 1 + INT_24_BIT = 2 + INT_32_BIT = 3 + IEEE_FLOAT_32_BIT = 4 + IEEE_FLOAT_64_BIT = 5 + STEIM_1 = 10 + STEIM_2 = 11 + + +def get_header_endianness(header: FixedHeader): + """ + Determine the endianness of the fixed header of a data record. Works by + checking if the decoded record start time has a sane value if the header + is assumed to be big-endian. + + WARNING: This check fails on three dates: 2056-1, 2056-256, and 2056-257. + 2056 is a palindrome when encoded as a pair of octets, so endianness does + not affect it. Similarly, 257 is also 2-octet-palindromic. Meanwhile, 1 and + 256 are counterparts when encoded as pairs of octets. Because they are both + valid values for day of year, it is impossible to make a conclusion about + endianness based on day of year if it is either 1 or 256 in big-endian. + These facts combined means that we cannot determine the endianness of the + header whose record starts on the aforementioned dates. The way this + function was written, the endianness will be recorded as big in these + cases. This problem is also recorded in libmseed. + + :param header: the fixed header of the data record + :return: either of the string 'big' or 'little' depending on the extracted + endianness of header + """ + record_start_time_string = header.record_start_time + record_start_time_tuple = struct.unpack('>hhbbbbh', + record_start_time_string) + # libmseed uses 1900 to 2100 as the sane year range. We follow their + # example here. + year_is_good = (1900 <= record_start_time_tuple[0] <= 2100) + # The upper range is 366 to account for leap years. + day_is_good = (1 <= record_start_time_tuple[1] <= 366) + + endianness = 'big' if year_is_good and day_is_good else 'little' + return endianness + + +def get_data_endianness(blockette_1000: Blockette1000): + """ + Get endianness of a data record by examining blockette 1000. + + :param blockette_1000: the blockette 1000 of the data record. + """ + # The byte order is only one byte so using big or little endian does not + # matter. + blockette_1000_endianness = int.from_bytes( + blockette_1000.byte_order, 'big' + ) + if blockette_1000_endianness: + return 'big' + else: + return 'little' + + +def calculate_sample_rate(factor: int, multiplier: int) -> float: + """ + Calculate the sample rate using the sample rate factor and multiplier. This + algorithm is described around the start of Chapter 8 in the SEED manual. + + :param factor: the sample rate factor + :param multiplier: the sample rate multiplier + :return: the nominal sample rate + """ + sample_rate = 0 + if factor == 0: + sample_rate = 0 + elif factor > 0 and multiplier > 0: + sample_rate = factor * multiplier + elif factor > 0 and multiplier < 0: + sample_rate = -(factor / multiplier) + elif factor < 0 and multiplier > 0: + sample_rate = -(multiplier / factor) + elif factor < 0 and multiplier < 0: + sample_rate = 1 / (factor * multiplier) + return sample_rate + + +def get_record_metadata(header: FixedHeader, header_unpacker: Unpacker): + """ + Extract and parse the metadata of a data record from its fixed header. + + :param header: the fixed header of the data record + :param header_unpacker: the unpacker corresponding to the data record; + needed so that the correct byte order can be used + :return: the extract record metadata + """ + station = header.station.decode('utf-8').rstrip() + location = header.location.decode('utf-8').rstrip() + channel = header.channel.decode('utf-8').rstrip() + network = header.net_code.decode('utf-8').rstrip() + + record_start_time_string = header.record_start_time + record_start_time_tuple = header_unpacker.unpack('HHBBBBH', + record_start_time_string) + record_start_time = UTCDateTime(year=record_start_time_tuple[0], + julday=record_start_time_tuple[1], + hour=record_start_time_tuple[2], + minute=record_start_time_tuple[3], + second=record_start_time_tuple[4], + microsecond=record_start_time_tuple[ + 6] * 100) + + sample_count = header_unpacker.unpack('H', header.sample_count)[0] + + sample_rate_factor = header_unpacker.unpack( + 'h', header.sample_rate_factor + )[0] + sample_rate_multiplier = header_unpacker.unpack( + 'h', header.sample_rate_multiplier + )[0] + sample_rate = calculate_sample_rate(sample_rate_factor, + sample_rate_multiplier) + + return RecordMetadata(station, location, channel, network, + record_start_time, sample_count, sample_rate) diff --git a/sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py b/sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..120c30965fa4c24e6457413d6395dd913ba0246e --- /dev/null +++ b/sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py @@ -0,0 +1,55 @@ +from typing import BinaryIO +import obspy +from record_reader import RecordReader + + +class MSeedReader: + def __init__(self, file: BinaryIO) -> None: + self.file = file + + def read(self): + trace = [] + while 1: + # We know that end of file is reached when read() returns an empty + # string. + is_eof = (self.file.read(1) == b'') + if is_eof: + break + # We need to move the file pointer back to its position after we + # do the end of file check. Otherwise, we would be off by one + # byte for all the reads afterward. + self.file.seek(-1, 1) + + # We save the start of the current record so that after we are + # done reading the record, we can move back. This makes moving + # to the next record a lot easier, seeing as we can simply move + # the file pointer a distance the size of the current record. + current_record_start = self.file.tell() + + reader = RecordReader(self.file) + trace.append(reader.get_first_data_point()) + # sample_count = reader.record_metadata.sample_count + # sample_rate = reader.record_metadata.sample_rate + # record_time_taken = sample_count / sample_rate + # record_end_time = (reader.record_metadata.start_time + + # record_time_taken) + + # MSEED stores the size of a data record as an exponent of a + # power of two, so we have to convert that to actual size before + # doing anything else. + record_length_exp = reader.header_unpacker.unpack( + 'B', reader.blockette_1000.record_length + )[0] + record_size = 2 ** record_length_exp + + self.file.seek(current_record_start) + self.file.seek(record_size, 1) + + +if __name__ == '__main__': + # numpy.set_printoptions(threshold=sys.maxsize) + file_path = '/Users/ldam/Documents/GIT/sohstationviewer/tests/test_data/' \ + 'Q330_mixed_traces/XX-3203_4-20221222183011' + file = open(file_path, 'rb') + stream = obspy.read(file_path) + MSeedReader(file).read() diff --git a/sohstationviewer/model/mseed/read_mseed_experiment/record_reader.py b/sohstationviewer/model/mseed/read_mseed_experiment/record_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..5b3af30c01f0e938989fdd7d388164d2e82338d0 --- /dev/null +++ b/sohstationviewer/model/mseed/read_mseed_experiment/record_reader.py @@ -0,0 +1,288 @@ +from numbers import Real +from typing import BinaryIO, Optional, List + + +from obspy import UTCDateTime + +from decode_mseed import ( + decode_ieee_float, decode_ieee_double, decode_steim, decode_int16, + decode_int24, decode_int32, +) +from mseed_helper import ( + FixedHeader, Blockette1000, get_data_endianness, Unpacker, + get_record_metadata, get_header_endianness, RecordMetadata, + EncodingFormat, +) + + +class RecordReader: + """ + This class reads one data record from an MSEED file. + """ + + def __init__(self, file: BinaryIO) -> None: + # The MSEED file object to read from. The file pointer needs to be + # located at the start of a data record. + self.file = file + + self.fixed_header: Optional[FixedHeader] = None + self.blockette_1000: Optional[Blockette1000] = None + self.other_blockettes: List[str] = [] + # Utility object that helps unpack byte strings in the header (the + # fixed header and the blockettes). + # Separate from the one for data in case the header has a different + # byte order. + # TODO: change blockettes to use this unpacker as well. + self.header_unpacker: Unpacker = Unpacker() + + self.data_unpacker: Unpacker = Unpacker() + self.record_metadata: Optional[RecordMetadata] = None + + self.read_header() + + def read_header(self) -> None: + """ + Read the header of the current data record. The header includes the + fixed portion, blockette 1000, and any blockettes that follow. + """ + # Save the start of the record so that we can go back after reading the + # header. + record_start = self.file.tell() + + self.read_fixed_header() + self.read_blockette_1000() + + header_endianness = get_header_endianness(self.fixed_header) + if header_endianness == 'little': + self.header_unpacker.byte_order_char = '<' + else: + self.header_unpacker.byte_order_char = '>' + + data_endianness = get_data_endianness(self.blockette_1000) + if data_endianness == 'little': + self.data_unpacker.byte_order_char = '<' + else: + self.data_unpacker.byte_order_char = '>' + + self.record_metadata = get_record_metadata(self.fixed_header, + self.header_unpacker) + + self.apply_time_correction() + self.read_blockettes() + self.file.seek(record_start) + + def read_fixed_header(self) -> None: + """ + Read the fixed header of the current data record and store it in + self.fixed_header. + """ + byte_counts = [6, 1, 1, 5, 2, 3, 2, 10, 2, 2, 2, 1, 1, 1, 1, 4, 2, 2] + + fixed_header_sections_values = [] + for byte_count in byte_counts: + fixed_header_sections_values.append(self.file.read(byte_count)) + self.fixed_header = FixedHeader(*fixed_header_sections_values) + + def read_blockette_500(self) -> None: + """ + Read blockette 500 and format its content. The result is stored for + future uses. Assumes that the file pointer is at the start of the + blockette. + """ + blockette_content = {} + # Skip the first four bytes because they contain meta-information about + # the blockettes. + self.file.read(4) + + vco_correction = self.file.read(4) + blockette_content['VCO correction'] = self.header_unpacker.unpack( + 'f', vco_correction + )[0] + + exception_time_bytes = self.file.read(10) + exception_time_tuple = self.header_unpacker.unpack( + 'HHBBBBH', exception_time_bytes) + exception_time = UTCDateTime(year=exception_time_tuple[0], + julday=exception_time_tuple[1], + hour=exception_time_tuple[2], + minute=exception_time_tuple[3], + second=exception_time_tuple[4], + microsecond=exception_time_tuple[6] * 100) + blockette_content['Time of exception'] = exception_time.strftime( + '%Y:%j:%H:%M:%S:%f' + ) + + microsecond = self.file.read(1) + microsecond = self.header_unpacker.unpack('B', microsecond)[0] + start_time_adjustment = microsecond / (10 ** 6) + self.record_metadata.start_time += start_time_adjustment + blockette_content['Micro sec'] = microsecond + + reception_quality = self.file.read(1) + blockette_content['Reception Quality'] = self.header_unpacker.unpack( + 'B', reception_quality + )[0] + + exception_count = self.file.read(4) + blockette_content['Exception Count'] = self.header_unpacker.unpack( + 'I', exception_count + )[0] + + exception_type = self.file.read(16) + blockette_content['Exception Type'] = self.header_unpacker.unpack( + '16s', exception_type + )[0].decode('utf-8').strip() + + clock_model = self.file.read(32) + blockette_content['Clock Model'] = self.header_unpacker.unpack( + '32s', clock_model + )[0].decode('utf-8').strip() + + clock_status = self.file.read(128) + blockette_content['Clock Status'] = self.header_unpacker.unpack( + '128s', clock_status + )[0].decode('utf-8').strip() + + formatted_blockette = '\n'.join([f'{key}: {value}' + for key, value + in blockette_content.items()]) + self.other_blockettes.append(formatted_blockette) + + def read_blockette_1000(self) -> None: + """ + Read blockette 1000 of the current data record and store it in + self.blockette_1000. + """ + blockette_1000_section_lengths = [2, 2, 1, 1, 1, 1] + blockette_1000_values = [] + for section_length in blockette_1000_section_lengths: + blockette_1000_values.append(self.file.read(section_length)) + + self.blockette_1000 = Blockette1000(*blockette_1000_values) + + def read_blockette_1001(self) -> None: + """ + Read blockette 1001. The only valuable thing in this blockette is the + more precise start time. Assumes that the file pointer is at the start + of the blockette. + """ + self.file.read(5) + start_time_microsecond = self.file.read(1) + start_time_microsecond = self.header_unpacker.unpack( + 'b', start_time_microsecond + )[0] + # Convert from microsecond to second so that UTCDateTime can handle it. + start_time_microsecond /= (10 ** 6) + self.record_metadata.start_time += start_time_microsecond + self.file.read(2) + + def read_blockette_2000(self) -> None: + pass + + def apply_time_correction(self) -> None: + """ + Apply the time correction found in the fixed header to the start time. + """ + # format() is used here instead of bin() because we need to pad the + # resulting bit string with 0 to the left. + activity_flags = format( + self.header_unpacker.unpack( + 'B', self.fixed_header.activity_flags)[0], + '0>8b' + ) + is_time_correction_applied = int(activity_flags[1]) + if is_time_correction_applied: + return + + time_correction = self.header_unpacker.unpack( + 'L', self.fixed_header.time_correction + )[0] + # We need to convert the unit from 0.0001 seconds to seconds + time_correction *= 0.0001 + self.record_metadata.start_time += time_correction + + def read_blockettes(self) -> None: + """ + Read all the blockettes in the current data record aside from blockette + 1000, which has beem read previously. Currently only handle blockettes + 500, 1001, and 2000. + """ + blockette_count = self.header_unpacker.unpack( + 'B', self.fixed_header.blockette_count + )[0] + for i in range(1, blockette_count): + # All blockettes store their type in the first two bytes, so we + # read that to determine what to do + next_blockette_type = self.file.read(2) + # Move file pointer back to start of blockette + self.file.seek(-2, 1) + next_blockette_type = self.header_unpacker.unpack( + 'H', next_blockette_type + )[0] + if next_blockette_type not in (500, 1000, 1001): + print('We currently only handle blockettes 500, 1000, and' + '1001.') + continue + if next_blockette_type == 500: + self.read_blockette_500() + elif next_blockette_type == 1001: + self.read_blockette_1001() + elif next_blockette_type == 2000: + self.read_blockette_2000() + + def get_first_data_point(self) -> Real: + """ + Get the first data point of the current data record. + :return: the first data point of the current data record, whose type is + determined based on the encoding type stored in blockette 1000. + """ + record_start = self.file.tell() + data_start = self.header_unpacker.unpack( + 'H', self.fixed_header.data_offset + )[0] + # The data start byte is defined as an offset from the start of the + # data record. Seeing as we should be at the start of the data record + # by seeking there at the end of every major step, we can simply seek + # to the start of the data. + self.file.seek(data_start, 1) + + encoding_format = self.blockette_1000.encoding_format + encoding_format = self.header_unpacker.unpack('b', encoding_format)[0] + encoding_format = EncodingFormat(encoding_format) + + if encoding_format == EncodingFormat.ASCII: + # We want to read everything in the record if the encoding is + # ASCII. + record_length_exp = self.header_unpacker.unpack( + 'B', self.blockette_1000.record_length + )[0] + record_size = 2 ** record_length_exp + # This name does not make much sense with what we are doing here, + # but it will have to do for now. + # The size of the record includes the header, so we have to account + # for that when grabbing the data. + first_data_point = self.file.read(record_size - data_start) + else: + + # Currently, we are extracting only the first data point in each + # record. The smallest possible amount of bytes we can extract + # while guaranteeing that we get the first data point in the + # record is 8, with Steim encodings and IEEE double precision + # float needing to use the whole buffer. + buffer = self.file.read(8) + encoding_to_decoder = { + EncodingFormat.INT_16_BIT: decode_int16, + EncodingFormat.INT_24_BIT: decode_int24, + EncodingFormat.INT_32_BIT: decode_int32, + EncodingFormat.IEEE_FLOAT_32_BIT: decode_ieee_float, + EncodingFormat.IEEE_FLOAT_64_BIT: decode_ieee_double, + EncodingFormat.STEIM_1: decode_steim, + EncodingFormat.STEIM_2: decode_steim, + } + first_data_point = encoding_to_decoder[encoding_format]( + buffer, self.data_unpacker + ) + # Seek back to the start of the record so we can call this method again + # if needed. + self.file.seek(record_start) + return first_data_point diff --git a/sohstationviewer/model/reftek/log_info.py b/sohstationviewer/model/reftek/log_info.py index ac0bf9430f506e8ab77fbfd06ea39055006d6665..26e17ecb20149ec2a9badc9bf886fb5265d625f8 100644 --- a/sohstationviewer/model/reftek/log_info.py +++ b/sohstationviewer/model/reftek/log_info.py @@ -203,10 +203,10 @@ class LogInfo(): return False return epoch, disk, val - def read_dps_clock_diff(self, line: str + def read_dsp_clock_diff(self, line: str ) -> Union[bool, Tuple[float, float]]: """ - Read DPS clock difference + Read DSP clock difference :param line: str - a line of evt message :return epoch: float - time when info is recorded :return total: float - total difference time in milliseconds @@ -415,11 +415,11 @@ class LogInfo(): if epoch: self.add_chan_info('Jerks/DSP Sets', epoch, 0, idx) - elif "DPS clock diff" in line: - ret = self.read_dps_clock_diff() + elif "DSP CLOCK DIFFERENCE" in line: + ret = self.read_dsp_clock_diff(line) if ret: epoch, total = ret - self.add_chan_info('DPS Clock Diff', epoch, total, idx) + self.add_chan_info('DSP Clock Diff', epoch, total, idx) elif "ACQUISITION STARTED" in line: epoch = self.simple_read(line)[1] diff --git a/sohstationviewer/model/reftek/reftek.py b/sohstationviewer/model/reftek/reftek.py index f7fa193d4ca40066cef2afd711a233ac5b5b99fd..eb1730d4e9805796eb7757781550302ae6255bf0 100755 --- a/sohstationviewer/model/reftek/reftek.py +++ b/sohstationviewer/model/reftek/reftek.py @@ -4,7 +4,7 @@ RT130 object to hold and process RefTek data import os from pathlib import Path from typing import Tuple, List, Union - +import traceback import numpy as np from sohstationviewer.model.reftek.from_rt2ms import ( @@ -89,8 +89,15 @@ class RT130(DataTypeModel): path2file = Path(path).joinpath(file_name) if not validate_file(path2file, file_name): continue - if not self.read_reftek_130(path2file): - read_text(path2file, file_name, self.log_data['TEXT']) + try: + if not self.read_reftek_130(path2file): + read_text(path2file, self.log_data['TEXT']) + except Exception: + fmt = traceback.format_exc() + self.track_info(f"Skip file {path2file} can't be read " + f"due to error: {str(fmt)}", + LogType.WARNING) + count += 1 if count % 50 == 0: self.track_info( @@ -133,7 +140,13 @@ class RT130(DataTypeModel): :param path2file: absolute path to file """ - rt130 = core.Reftek130.from_file(path2file) + try: + rt130 = core.Reftek130.from_file(path2file) + except Exception: + fmt = traceback.format_exc() + self.track_info(f"Skip file {path2file} can't be read " + f"due to error: {str(fmt)}", LogType.WARNING) + return unique, counts = np.unique(rt130._data["packet_type"], return_counts=True) nbr_packet_type = dict(zip(unique, counts))