diff --git a/sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py b/sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py index 2dff217ac9f295c9f2e982443fe2732c6280d334..3cfd423dc5f336765f9349c4e969821bbfdc4938 100644 --- a/sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py +++ b/sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py @@ -1,10 +1,14 @@ +from numbers import Number, Real +from typing import BinaryIO, Optional, List + import obspy +from obspy import UTCDateTime from decode_mseed import ( decode_ieee_float, decode_ieee_double, decode_steim, decode_int16, decode_int24, decode_int32, ) -from help_do_stuff import ( +from mseed_helper import ( FixedHeader, Blockette1000, get_data_endianness, Unpacker, get_record_metadata, get_header_endianness, RecordMetadata, EncodingFormat, @@ -12,19 +16,66 @@ from help_do_stuff import ( class RecordReader: - def __init__(self, file): + """ + This class reads one data record from an MSEED file. + """ + + def __init__(self, file: BinaryIO) -> None: + # The MSEED file object to read from. The file pointer needs to be + # located at the start of a data record. self.file = file - self.fixed_header: FixedHeader = None - self.blockette_1000: Blockette1000 = None - # Utility object that helps unpack byte string in fixed header. + + self.fixed_header: Optional[FixedHeader] = None + self.blockette_1000: Optional[Blockette1000] = None + self.other_blockettes: List[str] = [] + # Utility object that helps unpack byte strings in the header (the + # fixed header and the blockettes). # Separate from the one for data in case the header has a different # byte order. # TODO: change blockettes to use this unpacker as well. - self.fixed_header_unpacker: Unpacker = Unpacker() + self.header_unpacker: Unpacker = Unpacker() + self.data_unpacker: Unpacker = Unpacker() - self.record_metadata: RecordMetadata = None + self.record_metadata: Optional[RecordMetadata] = None + + self.read_header() + + def read_header(self) -> None: + """ + Read the header of the current data record. The header includes the + fixed portion, blockette 1000, and any blockettes that follow. + """ + # Save the start of the record so that we can go back after reading the + # header. + record_start = self.file.tell() + + self.read_fixed_header() + self.read_blockette_1000() + + header_endianness = get_header_endianness(self.fixed_header) + if header_endianness == 'little': + self.header_unpacker.byte_order_char = '<' + else: + self.header_unpacker.byte_order_char = '>' + + data_endianness = get_data_endianness(self.blockette_1000) + if data_endianness == 'little': + self.data_unpacker.byte_order_char = '<' + else: + self.data_unpacker.byte_order_char = '>' + + self.record_metadata = get_record_metadata(self.fixed_header, + self.header_unpacker) + + self.apply_time_correction() + self.read_blockettes() + self.file.seek(record_start) - def read_fixed_header(self): + def read_fixed_header(self) -> None: + """ + Read the fixed header of the current data record and store it in + self.fixed_header. + """ byte_counts = [6, 1, 1, 5, 2, 3, 2, 10, 2, 2, 2, 1, 1, 1, 1, 4, 2, 2] fixed_header_sections_values = [] @@ -32,23 +83,74 @@ class RecordReader: fixed_header_sections_values.append(self.file.read(byte_count)) self.fixed_header = FixedHeader(*fixed_header_sections_values) - def read_blockette_500(self): + def read_blockette_500(self) -> None: """ - Read blockette 500. Only read more precise start time. Assumes that the - file pointer is at the start of the blockette. - # TODO: find out what VCO correction is. + Read blockette 500 and format its content. The result is stored for + future uses. Assumes that the file pointer is at the start of the + blockette. """ - self.file.seek(18, 1) - start_time_microsecond = self.file.read(1) - start_time_microsecond = self.data_unpacker.unpack( - 'b', start_time_microsecond + blockette_content = {} + # Skip the first four bytes because they contain meta-information about + # the blockettes. + self.file.read(4) + + vco_correction = self.file.read(4) + blockette_content['VCO correction'] = self.header_unpacker.unpack( + 'f', vco_correction )[0] - # Convert from microsecond to second so that UTCDateTime can handle it. - start_time_microsecond /= (10 ** 6) - self.record_metadata.start_time += start_time_microsecond - self.file.seek(181, 1) - def read_blockette_1000(self): + exception_time_bytes = self.file.read(10) + exception_time_tuple = self.header_unpacker.unpack( + 'HHBBBBH', exception_time_bytes) + exception_time = UTCDateTime(year=exception_time_tuple[0], + julday=exception_time_tuple[1], + hour=exception_time_tuple[2], + minute=exception_time_tuple[3], + second=exception_time_tuple[4], + microsecond=exception_time_tuple[6] * 100) + blockette_content['Time of exception'] = exception_time.strftime( + '%Y:%j:%H:%M:%S:%f' + ) + + microsecond = self.file.read(1) + microsecond = self.header_unpacker.unpack('B', microsecond)[0] + start_time_adjustment = microsecond / (10 ** 6) + self.record_metadata.start_time += start_time_adjustment + blockette_content['Micro sec'] = microsecond + + reception_quality = self.file.read(1) + blockette_content['Reception Quality'] = self.header_unpacker.unpack( + 'B', reception_quality + )[0] + + exception_count = self.file.read(4) + blockette_content['Exception Count'] = self.header_unpacker.unpack( + 'I', exception_count + )[0] + + exception_type = self.file.read(16) + blockette_content['Exception Type'] = self.header_unpacker.unpack( + '16s', exception_type + )[0].decode('utf-8').strip() + + clock_model = self.file.read(32) + blockette_content['Clock Model'] = self.header_unpacker.unpack( + '32s', clock_model + )[0].decode('utf-8').strip() + + clock_status = self.file.read(128) + blockette_content['Clock Status'] = self.header_unpacker.unpack( + '128s', clock_status + )[0].decode('utf-8').strip() + + formatted_blockette = '\n'.join([f'{key}: {value}' for key, value in blockette_content.items()]) + self.other_blockettes.append(formatted_blockette) + + def read_blockette_1000(self) -> None: + """ + Read blockette 1000 of the current data record and store it in + self.blockette_1000. + """ blockette_1000_section_lengths = [2, 2, 1, 1, 1, 1] blockette_1000_values = [] for section_length in blockette_1000_section_lengths: @@ -56,7 +158,7 @@ class RecordReader: self.blockette_1000 = Blockette1000(*blockette_1000_values) - def read_blockette_1001(self): + def read_blockette_1001(self) -> None: """ Read blockette 1001. The only valuable thing in this blockette is the more precise start time. Assumes that the file pointer is at the start @@ -64,19 +166,25 @@ class RecordReader: """ self.file.read(5) start_time_microsecond = self.file.read(1) - start_time_microsecond = self.data_unpacker.unpack( + start_time_microsecond = self.header_unpacker.unpack( 'b', start_time_microsecond )[0] # Convert from microsecond to second so that UTCDateTime can handle it. - start_time_microsecond /= (10**6) + start_time_microsecond /= (10 ** 6) self.record_metadata.start_time += start_time_microsecond self.file.read(2) - def apply_time_correction(self): + def read_blockette_2000(self) -> None: + pass + + def apply_time_correction(self) -> None: + """ + Apply the time correction found in the fixed header to the start time. + """ # format() is used here instead of bin() because we need to pad the # resulting bit string with 0 to the left. activity_flags = format( - self.fixed_header_unpacker.unpack( + self.header_unpacker.unpack( 'B', self.fixed_header.activity_flags)[0], '0>8b' ) @@ -84,15 +192,20 @@ class RecordReader: if is_time_correction_applied: return - time_correction = self.fixed_header_unpacker.unpack( + time_correction = self.header_unpacker.unpack( 'L', self.fixed_header.time_correction )[0] # We need to convert the unit from 0.0001 seconds to seconds time_correction *= 0.0001 self.record_metadata.start_time += time_correction - def read_blockettes(self): - blockette_count = self.fixed_header_unpacker.unpack( + def read_blockettes(self) -> None: + """ + Read all the blockettes in the current data record aside from blockette + 1000, which has beem read previously. Currently only handle blockettes + 500, 1001, and 2000. + """ + blockette_count = self.header_unpacker.unpack( 'B', self.fixed_header.blockette_count )[0] for i in range(1, blockette_count): @@ -101,49 +214,38 @@ class RecordReader: next_blockette_type = self.file.read(2) # Move file pointer back to start of blockette self.file.seek(-2, 1) - next_blockette_type = self.data_unpacker.unpack( + next_blockette_type = self.header_unpacker.unpack( 'H', next_blockette_type )[0] - if next_blockette_type not in (1000, 1001, 500): + if next_blockette_type not in (500, 1000, 1001): print('We currently only handle blockettes 500, 1000, and' '1001.') continue - if next_blockette_type == 1001: - self.read_blockette_1001() - elif next_blockette_type == 500: + if next_blockette_type == 500: self.read_blockette_500() + elif next_blockette_type == 1001: + self.read_blockette_1001() + elif next_blockette_type == 2000: + self.read_blockette_2000() - def get_first_data_point(self): + def get_first_data_point(self) -> Real: + """ + Get the first data point of the current data record. + :return: the first data point of the current data record, whose type is + determined based on the encoding type stored in blockette 1000. + """ record_start = self.file.tell() - self.read_fixed_header() - self.read_blockette_1000() - - header_endianness = get_header_endianness(self.fixed_header) - if header_endianness == 'little': - self.fixed_header_unpacker.byte_order_char = '<' - else: - self.fixed_header_unpacker.byte_order_char = '>' - - data_endianness = get_data_endianness(self.blockette_1000) - if data_endianness == 'little': - self.data_unpacker.byte_order_char = '<' - else: - self.data_unpacker.byte_order_char = '>' - - self.record_metadata = get_record_metadata(self.fixed_header, - self.fixed_header_unpacker) - - self.apply_time_correction() - self.read_blockettes() - data_start = self.fixed_header_unpacker.unpack('H', self.fixed_header.data_offset)[0] - # It is most convenient to seek to the start of the record, then seek - # to the start of the data. Any other way would involve some - # calculation to determine which point to seek to. - self.file.seek(record_start) + data_start = self.header_unpacker.unpack( + 'H', self.fixed_header.data_offset + )[0] + # The data start byte is defined as an offset from the start of the + # data record. Seeing as we should be at the start of the data record + # by seeking there at the end of every major step, we can simply seek + # to the start of the data. self.file.seek(data_start, 1) encoding_format = self.blockette_1000.encoding_format - encoding_format = self.data_unpacker.unpack('b', encoding_format)[0] + encoding_format = self.header_unpacker.unpack('b', encoding_format)[0] encoding_format = EncodingFormat(encoding_format) # Currently, we are extracting only the first data point in each @@ -153,6 +255,7 @@ class RecordReader: # the whole buffer. buffer = self.file.read(8) encoding_to_decoder = { + EncodingFormat.ASCII: (lambda a1, a2: int(1)), EncodingFormat.INT_16_BIT: decode_int16, EncodingFormat.INT_24_BIT: decode_int24, EncodingFormat.INT_32_BIT: decode_int32, @@ -161,50 +264,61 @@ class RecordReader: EncodingFormat.STEIM_1: decode_steim, EncodingFormat.STEIM_2: decode_steim, } - first_data_point = encoding_to_decoder[encoding_format](buffer, self.data_unpacker) - + first_data_point = encoding_to_decoder[encoding_format](buffer, + self.data_unpacker) + # Seek back to the start of the record so we can call this method again + # if needed. + self.file.seek(record_start) return first_data_point +class MSeedReader: + def __init__(self, file: BinaryIO) -> None: + self.file = file + + def read(self): + trace = [] + while 1: + # We know that end of file is reached when read() returns an empty + # string. + is_eof = (self.file.read(1) == b'') + if is_eof: + break + # We need to move the file pointer back to its position after we do + # the end of file check. Otherwise, we would be off by one byte for all + # the reads afterward. + self.file.seek(-1, 1) + + # We save the start of the current record so that after we are done + # reading the record, we can move back. This makes moving to the next + # record a lot easier, seeing as we can simply move the file pointer + # a distance the size of the current record. + current_record_start = self.file.tell() + + reader = RecordReader(self.file) + trace.append(reader.get_first_data_point()) + print(reader.other_blockettes[0]) + # sample_count = reader.record_metadata.sample_count + # sample_rate = reader.record_metadata.sample_rate + # record_time_taken = sample_count / sample_rate + # record_end_time = reader.record_metadata.start_time + record_time_taken + + # MSEED stores the size of a data record as an exponent of a power of + # two, so we have to convert that to actual size before doing anything + # else. + record_length_exp = reader.header_unpacker.unpack( + 'B', reader.blockette_1000.record_length + )[0] + record_size = 2 ** record_length_exp + + self.file.seek(current_record_start) + self.file.seek(record_size, 1) + print(trace) + + if __name__ == '__main__': # numpy.set_printoptions(threshold=sys.maxsize) - file_path = '/Users/kle/PycharmProjects/sohstationviewer/tests/test_data/potato.sdr/XX.KC01..VCO.D.2020.145' + file_path = '/Users/kle/PycharmProjects/sohstationviewer/tests/test_data/DT0001__.ACE' file = open(file_path, 'rb') stream = obspy.read(file_path) - - trace = [] - while 1: - # We know that end of file is reached when read() returns an empty - # string. - is_eof = (file.read(1) == b'') - if is_eof: - break - # We need to move the file pointer back to its position before we do - # the end of file check. Otherwise, we would be off by one byte for all - # the reads afterward. - file.seek(-1, 1) - - # We save the start of the current record so that after we are done - # reading the record, we can move back. This makes moving to the next - # record a lot easier, seeing as we can simply move the file pointer - # a distance the size of the current record. - current_record_start = file.tell() - - reader = RecordReader(file) - trace.append(reader.get_first_data_point()) - sample_count = reader.record_metadata.sample_count - sample_rate = reader.record_metadata.sample_rate - record_time_taken = sample_count / sample_rate - record_end_time = reader.record_metadata.start_time + record_time_taken - - # MSEED stores the size of a data record as an exponent of a power of - # two, so we have to convert that to actual size before doing anything - # else. - record_length_exp = reader.data_unpacker.unpack( - 'B', reader.blockette_1000.record_length - )[0] - record_size = 2 ** record_length_exp - - file.seek(current_record_start) - file.seek(record_size, 1) - print(trace) + MSeedReader(file).read()