From 9c1b2edf4aec1751c324ea9aa8f12e3ef3b63946 Mon Sep 17 00:00:00 2001 From: ldam <ldam@passcal.nmt.edu> Date: Tue, 23 May 2023 09:22:55 -0600 Subject: [PATCH] detach RecordReader from mseed_reader.py --- .../read_mseed_experiment/mseed_reader.py | 293 +----------------- .../read_mseed_experiment/record_reader.py | 288 +++++++++++++++++ 2 files changed, 292 insertions(+), 289 deletions(-) create mode 100644 sohstationviewer/model/mseed/read_mseed_experiment/record_reader.py diff --git a/sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py b/sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py index e15eb14cb..120c30965 100644 --- a/sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py +++ b/sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py @@ -1,291 +1,6 @@ -from numbers import Real -from typing import BinaryIO, Optional, List - +from typing import BinaryIO import obspy -from obspy import UTCDateTime - -from decode_mseed import ( - decode_ieee_float, decode_ieee_double, decode_steim, decode_int16, - decode_int24, decode_int32, -) -from mseed_helper import ( - FixedHeader, Blockette1000, get_data_endianness, Unpacker, - get_record_metadata, get_header_endianness, RecordMetadata, - EncodingFormat, -) - - -class RecordReader: - """ - This class reads one data record from an MSEED file. - """ - - def __init__(self, file: BinaryIO) -> None: - # The MSEED file object to read from. The file pointer needs to be - # located at the start of a data record. - self.file = file - - self.fixed_header: Optional[FixedHeader] = None - self.blockette_1000: Optional[Blockette1000] = None - self.other_blockettes: List[str] = [] - # Utility object that helps unpack byte strings in the header (the - # fixed header and the blockettes). - # Separate from the one for data in case the header has a different - # byte order. - # TODO: change blockettes to use this unpacker as well. - self.header_unpacker: Unpacker = Unpacker() - - self.data_unpacker: Unpacker = Unpacker() - self.record_metadata: Optional[RecordMetadata] = None - - self.read_header() - - def read_header(self) -> None: - """ - Read the header of the current data record. The header includes the - fixed portion, blockette 1000, and any blockettes that follow. - """ - # Save the start of the record so that we can go back after reading the - # header. - record_start = self.file.tell() - - self.read_fixed_header() - self.read_blockette_1000() - - header_endianness = get_header_endianness(self.fixed_header) - if header_endianness == 'little': - self.header_unpacker.byte_order_char = '<' - else: - self.header_unpacker.byte_order_char = '>' - - data_endianness = get_data_endianness(self.blockette_1000) - if data_endianness == 'little': - self.data_unpacker.byte_order_char = '<' - else: - self.data_unpacker.byte_order_char = '>' - - self.record_metadata = get_record_metadata(self.fixed_header, - self.header_unpacker) - - self.apply_time_correction() - self.read_blockettes() - self.file.seek(record_start) - - def read_fixed_header(self) -> None: - """ - Read the fixed header of the current data record and store it in - self.fixed_header. - """ - byte_counts = [6, 1, 1, 5, 2, 3, 2, 10, 2, 2, 2, 1, 1, 1, 1, 4, 2, 2] - - fixed_header_sections_values = [] - for byte_count in byte_counts: - fixed_header_sections_values.append(self.file.read(byte_count)) - self.fixed_header = FixedHeader(*fixed_header_sections_values) - - def read_blockette_500(self) -> None: - """ - Read blockette 500 and format its content. The result is stored for - future uses. Assumes that the file pointer is at the start of the - blockette. - """ - blockette_content = {} - # Skip the first four bytes because they contain meta-information about - # the blockettes. - self.file.read(4) - - vco_correction = self.file.read(4) - blockette_content['VCO correction'] = self.header_unpacker.unpack( - 'f', vco_correction - )[0] - - exception_time_bytes = self.file.read(10) - exception_time_tuple = self.header_unpacker.unpack( - 'HHBBBBH', exception_time_bytes) - exception_time = UTCDateTime(year=exception_time_tuple[0], - julday=exception_time_tuple[1], - hour=exception_time_tuple[2], - minute=exception_time_tuple[3], - second=exception_time_tuple[4], - microsecond=exception_time_tuple[6] * 100) - blockette_content['Time of exception'] = exception_time.strftime( - '%Y:%j:%H:%M:%S:%f' - ) - - microsecond = self.file.read(1) - microsecond = self.header_unpacker.unpack('B', microsecond)[0] - start_time_adjustment = microsecond / (10 ** 6) - self.record_metadata.start_time += start_time_adjustment - blockette_content['Micro sec'] = microsecond - - reception_quality = self.file.read(1) - blockette_content['Reception Quality'] = self.header_unpacker.unpack( - 'B', reception_quality - )[0] - - exception_count = self.file.read(4) - blockette_content['Exception Count'] = self.header_unpacker.unpack( - 'I', exception_count - )[0] - - exception_type = self.file.read(16) - blockette_content['Exception Type'] = self.header_unpacker.unpack( - '16s', exception_type - )[0].decode('utf-8').strip() - - clock_model = self.file.read(32) - blockette_content['Clock Model'] = self.header_unpacker.unpack( - '32s', clock_model - )[0].decode('utf-8').strip() - - clock_status = self.file.read(128) - blockette_content['Clock Status'] = self.header_unpacker.unpack( - '128s', clock_status - )[0].decode('utf-8').strip() - - formatted_blockette = '\n'.join([f'{key}: {value}' - for key, value - in blockette_content.items()]) - self.other_blockettes.append(formatted_blockette) - - def read_blockette_1000(self) -> None: - """ - Read blockette 1000 of the current data record and store it in - self.blockette_1000. - """ - blockette_1000_section_lengths = [2, 2, 1, 1, 1, 1] - blockette_1000_values = [] - for section_length in blockette_1000_section_lengths: - blockette_1000_values.append(self.file.read(section_length)) - - self.blockette_1000 = Blockette1000(*blockette_1000_values) - - def read_blockette_1001(self) -> None: - """ - Read blockette 1001. The only valuable thing in this blockette is the - more precise start time. Assumes that the file pointer is at the start - of the blockette. - """ - self.file.read(5) - start_time_microsecond = self.file.read(1) - start_time_microsecond = self.header_unpacker.unpack( - 'b', start_time_microsecond - )[0] - # Convert from microsecond to second so that UTCDateTime can handle it. - start_time_microsecond /= (10 ** 6) - self.record_metadata.start_time += start_time_microsecond - self.file.read(2) - - def read_blockette_2000(self) -> None: - pass - - def apply_time_correction(self) -> None: - """ - Apply the time correction found in the fixed header to the start time. - """ - # format() is used here instead of bin() because we need to pad the - # resulting bit string with 0 to the left. - activity_flags = format( - self.header_unpacker.unpack( - 'B', self.fixed_header.activity_flags)[0], - '0>8b' - ) - is_time_correction_applied = int(activity_flags[1]) - if is_time_correction_applied: - return - - time_correction = self.header_unpacker.unpack( - 'L', self.fixed_header.time_correction - )[0] - # We need to convert the unit from 0.0001 seconds to seconds - time_correction *= 0.0001 - self.record_metadata.start_time += time_correction - - def read_blockettes(self) -> None: - """ - Read all the blockettes in the current data record aside from blockette - 1000, which has beem read previously. Currently only handle blockettes - 500, 1001, and 2000. - """ - blockette_count = self.header_unpacker.unpack( - 'B', self.fixed_header.blockette_count - )[0] - for i in range(1, blockette_count): - # All blockettes store their type in the first two bytes, so we - # read that to determine what to do - next_blockette_type = self.file.read(2) - # Move file pointer back to start of blockette - self.file.seek(-2, 1) - next_blockette_type = self.header_unpacker.unpack( - 'H', next_blockette_type - )[0] - if next_blockette_type not in (500, 1000, 1001): - print('We currently only handle blockettes 500, 1000, and' - '1001.') - continue - if next_blockette_type == 500: - self.read_blockette_500() - elif next_blockette_type == 1001: - self.read_blockette_1001() - elif next_blockette_type == 2000: - self.read_blockette_2000() - - def get_first_data_point(self) -> Real: - """ - Get the first data point of the current data record. - :return: the first data point of the current data record, whose type is - determined based on the encoding type stored in blockette 1000. - """ - record_start = self.file.tell() - data_start = self.header_unpacker.unpack( - 'H', self.fixed_header.data_offset - )[0] - # The data start byte is defined as an offset from the start of the - # data record. Seeing as we should be at the start of the data record - # by seeking there at the end of every major step, we can simply seek - # to the start of the data. - self.file.seek(data_start, 1) - - encoding_format = self.blockette_1000.encoding_format - encoding_format = self.header_unpacker.unpack('b', encoding_format)[0] - encoding_format = EncodingFormat(encoding_format) - - if encoding_format == EncodingFormat.ASCII: - # We want to read everything in the record if the encoding is - # ASCII. - record_length_exp = self.header_unpacker.unpack( - 'B', self.blockette_1000.record_length - )[0] - record_size = 2 ** record_length_exp - # This name does not make much sense with what we are doing here, - # but it will have to do for now. - # The size of the record includes the header, so we have to account - # for that when grabbing the data. - first_data_point = self.file.read(record_size - data_start) - else: - - # Currently, we are extracting only the first data point in each - # record. The smallest possible amount of bytes we can extract - # while guaranteeing that we get the first data point in the - # record is 8, with Steim encodings and IEEE double precision - # float needing to use the whole buffer. - buffer = self.file.read(8) - encoding_to_decoder = { - EncodingFormat.INT_16_BIT: decode_int16, - EncodingFormat.INT_24_BIT: decode_int24, - EncodingFormat.INT_32_BIT: decode_int32, - EncodingFormat.IEEE_FLOAT_32_BIT: decode_ieee_float, - EncodingFormat.IEEE_FLOAT_64_BIT: decode_ieee_double, - EncodingFormat.STEIM_1: decode_steim, - EncodingFormat.STEIM_2: decode_steim, - } - first_data_point = encoding_to_decoder[encoding_format]( - buffer, self.data_unpacker - ) - # Seek back to the start of the record so we can call this method again - # if needed. - self.file.seek(record_start) - return first_data_point +from record_reader import RecordReader class MSeedReader: @@ -333,8 +48,8 @@ class MSeedReader: if __name__ == '__main__': # numpy.set_printoptions(threshold=sys.maxsize) - file_path = '/Users/kle/PycharmProjects/sohstationviewer/tests/test_data' \ - '/DT0001__.ACE ' + file_path = '/Users/ldam/Documents/GIT/sohstationviewer/tests/test_data/' \ + 'Q330_mixed_traces/XX-3203_4-20221222183011' file = open(file_path, 'rb') stream = obspy.read(file_path) MSeedReader(file).read() diff --git a/sohstationviewer/model/mseed/read_mseed_experiment/record_reader.py b/sohstationviewer/model/mseed/read_mseed_experiment/record_reader.py new file mode 100644 index 000000000..241dc9134 --- /dev/null +++ b/sohstationviewer/model/mseed/read_mseed_experiment/record_reader.py @@ -0,0 +1,288 @@ +from numbers import Real +from typing import BinaryIO, Optional, List + + +from obspy import UTCDateTime + +from decode_mseed import ( + decode_ieee_float, decode_ieee_double, decode_steim, decode_int16, + decode_int24, decode_int32, +) +from mseed_helper import ( + FixedHeader, Blockette1000, get_data_endianness, Unpacker, + get_record_metadata, get_header_endianness, RecordMetadata, + EncodingFormat, +) + + +class RecordReader: + """ + This class reads one data record from an MSEED file. + """ + + def __init__(self, file: BinaryIO) -> None: + # The MSEED file object to read from. The file pointer needs to be + # located at the start of a data record. + self.file = file + + self.fixed_header: Optional[FixedHeader] = None + self.blockette_1000: Optional[Blockette1000] = None + self.other_blockettes: List[str] = [] + # Utility object that helps unpack byte strings in the header (the + # fixed header and the blockettes). + # Separate from the one for data in case the header has a different + # byte order. + # TODO: change blockettes to use this unpacker as well. + self.header_unpacker: Unpacker = Unpacker() + + self.data_unpacker: Unpacker = Unpacker() + self.record_metadata: Optional[RecordMetadata] = None + + self.read_header() + + def read_header(self) -> None: + """ + Read the header of the current data record. The header includes the + fixed portion, blockette 1000, and any blockettes that follow. + """ + # Save the start of the record so that we can go back after reading the + # header. + record_start = self.file.tell() + + self.read_fixed_header() + self.read_blockette_1000() + + header_endianness = get_header_endianness(self.fixed_header) + if header_endianness == 'little': + self.header_unpacker.byte_order_char = '<' + else: + self.header_unpacker.byte_order_char = '>' + + data_endianness = get_data_endianness(self.blockette_1000) + if data_endianness == 'little': + self.data_unpacker.byte_order_char = '<' + else: + self.data_unpacker.byte_order_char = '>' + + self.record_metadata = get_record_metadata(self.fixed_header, + self.header_unpacker) + + self.apply_time_correction() + self.read_blockettes() + self.file.seek(record_start) + + def read_fixed_header(self) -> None: + """ + Read the fixed header of the current data record and store it in + self.fixed_header. + """ + byte_counts = [6, 1, 1, 5, 2, 3, 2, 10, 2, 2, 2, 1, 1, 1, 1, 4, 2, 2] + + fixed_header_sections_values = [] + for byte_count in byte_counts: + fixed_header_sections_values.append(self.file.read(byte_count)) + self.fixed_header = FixedHeader(*fixed_header_sections_values) + + def read_blockette_500(self) -> None: + """ + Read blockette 500 and format its content. The result is stored for + future uses. Assumes that the file pointer is at the start of the + blockette. + """ + blockette_content = {} + # Skip the first four bytes because they contain meta-information about + # the blockettes. + self.file.read(4) + + vco_correction = self.file.read(4) + blockette_content['VCO correction'] = self.header_unpacker.unpack( + 'f', vco_correction + )[0] + + exception_time_bytes = self.file.read(10) + exception_time_tuple = self.header_unpacker.unpack( + 'HHBBBBH', exception_time_bytes) + exception_time = UTCDateTime(year=exception_time_tuple[0], + julday=exception_time_tuple[1], + hour=exception_time_tuple[2], + minute=exception_time_tuple[3], + second=exception_time_tuple[4], + microsecond=exception_time_tuple[6] * 100) + blockette_content['Time of exception'] = exception_time.strftime( + '%Y:%j:%H:%M:%S:%f' + ) + + microsecond = self.file.read(1) + microsecond = self.header_unpacker.unpack('B', microsecond)[0] + start_time_adjustment = microsecond / (10 ** 6) + self.record_metadata.start_time += start_time_adjustment + blockette_content['Micro sec'] = microsecond + + reception_quality = self.file.read(1) + blockette_content['Reception Quality'] = self.header_unpacker.unpack( + 'B', reception_quality + )[0] + + exception_count = self.file.read(4) + blockette_content['Exception Count'] = self.header_unpacker.unpack( + 'I', exception_count + )[0] + + exception_type = self.file.read(16) + blockette_content['Exception Type'] = self.header_unpacker.unpack( + '16s', exception_type + )[0].decode('utf-8').strip() + + clock_model = self.file.read(32) + blockette_content['Clock Model'] = self.header_unpacker.unpack( + '32s', clock_model + )[0].decode('utf-8').strip() + + clock_status = self.file.read(128) + blockette_content['Clock Status'] = self.header_unpacker.unpack( + '128s', clock_status + )[0].decode('utf-8').strip() + + formatted_blockette = '\n'.join([f'{key}: {value}' + for key, value + in blockette_content.items()]) + self.other_blockettes.append(formatted_blockette) + + def read_blockette_1000(self) -> None: + """ + Read blockette 1000 of the current data record and store it in + self.blockette_1000. + """ + blockette_1000_section_lengths = [2, 2, 1, 1, 1, 1] + blockette_1000_values = [] + for section_length in blockette_1000_section_lengths: + blockette_1000_values.append(self.file.read(section_length)) + + self.blockette_1000 = Blockette1000(*blockette_1000_values) + + def read_blockette_1001(self) -> None: + """ + Read blockette 1001. The only valuable thing in this blockette is the + more precise start time. Assumes that the file pointer is at the start + of the blockette. + """ + self.file.read(5) + start_time_microsecond = self.file.read(1) + start_time_microsecond = self.header_unpacker.unpack( + 'b', start_time_microsecond + )[0] + # Convert from microsecond to second so that UTCDateTime can handle it. + start_time_microsecond /= (10 ** 6) + self.record_metadata.start_time += start_time_microsecond + self.file.read(2) + + def read_blockette_2000(self) -> None: + pass + + def apply_time_correction(self) -> None: + """ + Apply the time correction found in the fixed header to the start time. + """ + # format() is used here instead of bin() because we need to pad the + # resulting bit string with 0 to the left. + activity_flags = format( + self.header_unpacker.unpack( + 'B', self.fixed_header.activity_flags)[0], + '0>8b' + ) + is_time_correction_applied = int(activity_flags[1]) + if is_time_correction_applied: + return + + time_correction = self.header_unpacker.unpack( + 'L', self.fixed_header.time_correction + )[0] + # We need to convert the unit from 0.0001 seconds to seconds + time_correction *= 0.0001 + self.record_metadata.start_time += time_correction + + def read_blockettes(self) -> None: + """ + Read all the blockettes in the current data record aside from blockette + 1000, which has beem read previously. Currently only handle blockettes + 500, 1001, and 2000. + """ + blockette_count = self.header_unpacker.unpack( + 'B', self.fixed_header.blockette_count + )[0] + for i in range(1, blockette_count): + # All blockettes store their type in the first two bytes, so we + # read that to determine what to do + next_blockette_type = self.file.read(2) + # Move file pointer back to start of blockette + self.file.seek(-2, 1) + next_blockette_type = self.header_unpacker.unpack( + 'H', next_blockette_type + )[0] + if next_blockette_type not in (500, 1000, 1001): + print('We currently only handle blockettes 500, 1000, and' + '1001.') + continue + if next_blockette_type == 500: + self.read_blockette_500() + elif next_blockette_type == 1001: + self.read_blockette_1001() + elif next_blockette_type == 2000: + self.read_blockette_2000() + + def get_first_data_point(self) -> Real: + """ + Get the first data point of the current data record. + :return: the first data point of the current data record, whose type is + determined based on the encoding type stored in blockette 1000. + """ + record_start = self.file.tell() + data_start = self.header_unpacker.unpack( + 'H', self.fixed_header.data_offset + )[0] + # The data start byte is defined as an offset from the start of the + # data record. Seeing as we should be at the start of the data record + # by seeking there at the end of every major step, we can simply seek + # to the start of the data. + self.file.seek(data_start, 1) + + encoding_format = self.blockette_1000.encoding_format + encoding_format = self.header_unpacker.unpack('b', encoding_format)[0] + encoding_format = EncodingFormat(encoding_format) + + if encoding_format == EncodingFormat.ASCII: + # We want to read everything in the record if the encoding is + # ASCII. + record_length_exp = self.header_unpacker.unpack( + 'B', self.blockette_1000.record_length + )[0] + record_size = 2 ** record_length_exp + # This name does not make much sense with what we are doing here, + # but it will have to do for now. + # The size of the record includes the header, so we have to account + # for that when grabbing the data. + first_data_point = self.file.read(record_size - data_start) + else: + + # Currently, we are extracting only the first data point in each + # record. The smallest possible amount of bytes we can extract + # while guaranteeing that we get the first data point in the + # record is 8, with Steim encodings and IEEE double precision + # float needing to use the whole buffer. + buffer = self.file.read(8) + encoding_to_decoder = { + EncodingFormat.INT_16_BIT: decode_int16, + EncodingFormat.INT_24_BIT: decode_int24, + EncodingFormat.INT_32_BIT: decode_int32, + EncodingFormat.IEEE_FLOAT_32_BIT: decode_ieee_float, + EncodingFormat.IEEE_FLOAT_64_BIT: decode_ieee_double, + EncodingFormat.STEIM_1: decode_steim, + EncodingFormat.STEIM_2: decode_steim, + } + first_data_point = encoding_to_decoder[encoding_format]( + buffer, self.data_unpacker + ) + # Seek back to the start of the record so we can call this method again + # if needed. + self.file.seek(record_start) + return first_data_point \ No newline at end of file -- GitLab