From 9c1b2edf4aec1751c324ea9aa8f12e3ef3b63946 Mon Sep 17 00:00:00 2001
From: ldam <ldam@passcal.nmt.edu>
Date: Tue, 23 May 2023 09:22:55 -0600
Subject: [PATCH] detach RecordReader from  mseed_reader.py

---
 .../read_mseed_experiment/mseed_reader.py     | 293 +-----------------
 .../read_mseed_experiment/record_reader.py    | 288 +++++++++++++++++
 2 files changed, 292 insertions(+), 289 deletions(-)
 create mode 100644 sohstationviewer/model/mseed/read_mseed_experiment/record_reader.py

diff --git a/sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py b/sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py
index e15eb14cb..120c30965 100644
--- a/sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py
+++ b/sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py
@@ -1,291 +1,6 @@
-from numbers import Real
-from typing import BinaryIO, Optional, List
-
+from typing import BinaryIO
 import obspy
-from obspy import UTCDateTime
-
-from decode_mseed import (
-    decode_ieee_float, decode_ieee_double, decode_steim, decode_int16,
-    decode_int24, decode_int32,
-)
-from mseed_helper import (
-    FixedHeader, Blockette1000, get_data_endianness, Unpacker,
-    get_record_metadata, get_header_endianness, RecordMetadata,
-    EncodingFormat,
-)
-
-
-class RecordReader:
-    """
-    This class reads one data record from an MSEED file.
-    """
-
-    def __init__(self, file: BinaryIO) -> None:
-        # The MSEED file object to read from. The file pointer needs to be
-        # located at the start of a data record.
-        self.file = file
-
-        self.fixed_header: Optional[FixedHeader] = None
-        self.blockette_1000: Optional[Blockette1000] = None
-        self.other_blockettes: List[str] = []
-        # Utility object that helps unpack byte strings in the header (the
-        # fixed header and the blockettes).
-        # Separate from the one for data in case the header has a different
-        # byte order.
-        # TODO: change blockettes to use this unpacker as well.
-        self.header_unpacker: Unpacker = Unpacker()
-
-        self.data_unpacker: Unpacker = Unpacker()
-        self.record_metadata: Optional[RecordMetadata] = None
-
-        self.read_header()
-
-    def read_header(self) -> None:
-        """
-        Read the header of the current data record. The header includes the
-        fixed portion, blockette 1000, and any blockettes that follow.
-        """
-        # Save the start of the record so that we can go back after reading the
-        # header.
-        record_start = self.file.tell()
-
-        self.read_fixed_header()
-        self.read_blockette_1000()
-
-        header_endianness = get_header_endianness(self.fixed_header)
-        if header_endianness == 'little':
-            self.header_unpacker.byte_order_char = '<'
-        else:
-            self.header_unpacker.byte_order_char = '>'
-
-        data_endianness = get_data_endianness(self.blockette_1000)
-        if data_endianness == 'little':
-            self.data_unpacker.byte_order_char = '<'
-        else:
-            self.data_unpacker.byte_order_char = '>'
-
-        self.record_metadata = get_record_metadata(self.fixed_header,
-                                                   self.header_unpacker)
-
-        self.apply_time_correction()
-        self.read_blockettes()
-        self.file.seek(record_start)
-
-    def read_fixed_header(self) -> None:
-        """
-        Read the fixed header of the current data record and store it in
-        self.fixed_header.
-        """
-        byte_counts = [6, 1, 1, 5, 2, 3, 2, 10, 2, 2, 2, 1, 1, 1, 1, 4, 2, 2]
-
-        fixed_header_sections_values = []
-        for byte_count in byte_counts:
-            fixed_header_sections_values.append(self.file.read(byte_count))
-        self.fixed_header = FixedHeader(*fixed_header_sections_values)
-
-    def read_blockette_500(self) -> None:
-        """
-        Read blockette 500 and format its content. The result is stored for
-        future uses. Assumes that the file pointer is at the start of the
-        blockette.
-        """
-        blockette_content = {}
-        # Skip the first four bytes because they contain meta-information about
-        # the blockettes.
-        self.file.read(4)
-
-        vco_correction = self.file.read(4)
-        blockette_content['VCO correction'] = self.header_unpacker.unpack(
-            'f', vco_correction
-        )[0]
-
-        exception_time_bytes = self.file.read(10)
-        exception_time_tuple = self.header_unpacker.unpack(
-            'HHBBBBH', exception_time_bytes)
-        exception_time = UTCDateTime(year=exception_time_tuple[0],
-                                     julday=exception_time_tuple[1],
-                                     hour=exception_time_tuple[2],
-                                     minute=exception_time_tuple[3],
-                                     second=exception_time_tuple[4],
-                                     microsecond=exception_time_tuple[6] * 100)
-        blockette_content['Time of exception'] = exception_time.strftime(
-            '%Y:%j:%H:%M:%S:%f'
-        )
-
-        microsecond = self.file.read(1)
-        microsecond = self.header_unpacker.unpack('B', microsecond)[0]
-        start_time_adjustment = microsecond / (10 ** 6)
-        self.record_metadata.start_time += start_time_adjustment
-        blockette_content['Micro sec'] = microsecond
-
-        reception_quality = self.file.read(1)
-        blockette_content['Reception Quality'] = self.header_unpacker.unpack(
-            'B', reception_quality
-        )[0]
-
-        exception_count = self.file.read(4)
-        blockette_content['Exception Count'] = self.header_unpacker.unpack(
-            'I', exception_count
-        )[0]
-
-        exception_type = self.file.read(16)
-        blockette_content['Exception Type'] = self.header_unpacker.unpack(
-            '16s', exception_type
-        )[0].decode('utf-8').strip()
-
-        clock_model = self.file.read(32)
-        blockette_content['Clock Model'] = self.header_unpacker.unpack(
-            '32s', clock_model
-        )[0].decode('utf-8').strip()
-
-        clock_status = self.file.read(128)
-        blockette_content['Clock Status'] = self.header_unpacker.unpack(
-            '128s', clock_status
-        )[0].decode('utf-8').strip()
-
-        formatted_blockette = '\n'.join([f'{key}: {value}'
-                                         for key, value
-                                         in blockette_content.items()])
-        self.other_blockettes.append(formatted_blockette)
-
-    def read_blockette_1000(self) -> None:
-        """
-        Read blockette 1000 of the current data record and store it in
-        self.blockette_1000.
-        """
-        blockette_1000_section_lengths = [2, 2, 1, 1, 1, 1]
-        blockette_1000_values = []
-        for section_length in blockette_1000_section_lengths:
-            blockette_1000_values.append(self.file.read(section_length))
-
-        self.blockette_1000 = Blockette1000(*blockette_1000_values)
-
-    def read_blockette_1001(self) -> None:
-        """
-        Read blockette 1001. The only valuable thing in this blockette is the
-        more precise start time. Assumes that the file pointer is at the start
-        of the blockette.
-        """
-        self.file.read(5)
-        start_time_microsecond = self.file.read(1)
-        start_time_microsecond = self.header_unpacker.unpack(
-            'b', start_time_microsecond
-        )[0]
-        # Convert from microsecond to second so that UTCDateTime can handle it.
-        start_time_microsecond /= (10 ** 6)
-        self.record_metadata.start_time += start_time_microsecond
-        self.file.read(2)
-
-    def read_blockette_2000(self) -> None:
-        pass
-
-    def apply_time_correction(self) -> None:
-        """
-        Apply the time correction found in the fixed header to the start time.
-        """
-        # format() is used here instead of bin() because we need to pad the
-        # resulting bit string with 0 to the left.
-        activity_flags = format(
-            self.header_unpacker.unpack(
-                'B', self.fixed_header.activity_flags)[0],
-            '0>8b'
-        )
-        is_time_correction_applied = int(activity_flags[1])
-        if is_time_correction_applied:
-            return
-
-        time_correction = self.header_unpacker.unpack(
-            'L', self.fixed_header.time_correction
-        )[0]
-        # We need to convert the unit from 0.0001 seconds to seconds
-        time_correction *= 0.0001
-        self.record_metadata.start_time += time_correction
-
-    def read_blockettes(self) -> None:
-        """
-        Read all the blockettes in the current data record aside from blockette
-        1000, which has beem read previously. Currently only handle blockettes
-        500, 1001, and 2000.
-        """
-        blockette_count = self.header_unpacker.unpack(
-            'B', self.fixed_header.blockette_count
-        )[0]
-        for i in range(1, blockette_count):
-            # All blockettes store their type in the first two bytes, so we
-            # read that to determine what to do
-            next_blockette_type = self.file.read(2)
-            # Move file pointer back to start of blockette
-            self.file.seek(-2, 1)
-            next_blockette_type = self.header_unpacker.unpack(
-                'H', next_blockette_type
-            )[0]
-            if next_blockette_type not in (500, 1000, 1001):
-                print('We currently only handle blockettes 500, 1000, and'
-                      '1001.')
-                continue
-            if next_blockette_type == 500:
-                self.read_blockette_500()
-            elif next_blockette_type == 1001:
-                self.read_blockette_1001()
-            elif next_blockette_type == 2000:
-                self.read_blockette_2000()
-
-    def get_first_data_point(self) -> Real:
-        """
-        Get the first data point of the current data record.
-        :return: the first data point of the current data record, whose type is
-            determined based on the encoding type stored in blockette 1000.
-        """
-        record_start = self.file.tell()
-        data_start = self.header_unpacker.unpack(
-            'H', self.fixed_header.data_offset
-        )[0]
-        # The data start byte is defined as an offset from the start of the
-        # data record. Seeing as we should be at the start of the data record
-        # by seeking there at the end of every major step, we can simply seek
-        # to the start of the data.
-        self.file.seek(data_start, 1)
-
-        encoding_format = self.blockette_1000.encoding_format
-        encoding_format = self.header_unpacker.unpack('b', encoding_format)[0]
-        encoding_format = EncodingFormat(encoding_format)
-
-        if encoding_format == EncodingFormat.ASCII:
-            # We want to read everything in the record if the encoding is
-            # ASCII.
-            record_length_exp = self.header_unpacker.unpack(
-                'B', self.blockette_1000.record_length
-            )[0]
-            record_size = 2 ** record_length_exp
-            # This name does not make much sense with what we are doing here,
-            # but it will have to do for now.
-            # The size of the record includes the header, so we have to account
-            # for that when grabbing the data.
-            first_data_point = self.file.read(record_size - data_start)
-        else:
-
-            # Currently, we are extracting only the first data point in each
-            # record. The smallest possible amount of bytes we can extract
-            # while guaranteeing that we get the first data point in the
-            # record is 8, with Steim encodings and IEEE double precision
-            # float needing to use the whole buffer.
-            buffer = self.file.read(8)
-            encoding_to_decoder = {
-                EncodingFormat.INT_16_BIT: decode_int16,
-                EncodingFormat.INT_24_BIT: decode_int24,
-                EncodingFormat.INT_32_BIT: decode_int32,
-                EncodingFormat.IEEE_FLOAT_32_BIT: decode_ieee_float,
-                EncodingFormat.IEEE_FLOAT_64_BIT: decode_ieee_double,
-                EncodingFormat.STEIM_1: decode_steim,
-                EncodingFormat.STEIM_2: decode_steim,
-            }
-            first_data_point = encoding_to_decoder[encoding_format](
-                buffer, self.data_unpacker
-            )
-        # Seek back to the start of the record so we can call this method again
-        # if needed.
-        self.file.seek(record_start)
-        return first_data_point
+from record_reader import RecordReader
 
 
 class MSeedReader:
@@ -333,8 +48,8 @@ class MSeedReader:
 
 if __name__ == '__main__':
     # numpy.set_printoptions(threshold=sys.maxsize)
-    file_path = '/Users/kle/PycharmProjects/sohstationviewer/tests/test_data' \
-                '/DT0001__.ACE '
+    file_path = '/Users/ldam/Documents/GIT/sohstationviewer/tests/test_data/' \
+                'Q330_mixed_traces/XX-3203_4-20221222183011'
     file = open(file_path, 'rb')
     stream = obspy.read(file_path)
     MSeedReader(file).read()
diff --git a/sohstationviewer/model/mseed/read_mseed_experiment/record_reader.py b/sohstationviewer/model/mseed/read_mseed_experiment/record_reader.py
new file mode 100644
index 000000000..241dc9134
--- /dev/null
+++ b/sohstationviewer/model/mseed/read_mseed_experiment/record_reader.py
@@ -0,0 +1,288 @@
+from numbers import Real
+from typing import BinaryIO, Optional, List
+
+
+from obspy import UTCDateTime
+
+from decode_mseed import (
+    decode_ieee_float, decode_ieee_double, decode_steim, decode_int16,
+    decode_int24, decode_int32,
+)
+from mseed_helper import (
+    FixedHeader, Blockette1000, get_data_endianness, Unpacker,
+    get_record_metadata, get_header_endianness, RecordMetadata,
+    EncodingFormat,
+)
+
+
+class RecordReader:
+    """
+    This class reads one data record from an MSEED file.
+    """
+
+    def __init__(self, file: BinaryIO) -> None:
+        # The MSEED file object to read from. The file pointer needs to be
+        # located at the start of a data record.
+        self.file = file
+
+        self.fixed_header: Optional[FixedHeader] = None
+        self.blockette_1000: Optional[Blockette1000] = None
+        self.other_blockettes: List[str] = []
+        # Utility object that helps unpack byte strings in the header (the
+        # fixed header and the blockettes).
+        # Separate from the one for data in case the header has a different
+        # byte order.
+        # TODO: change blockettes to use this unpacker as well.
+        self.header_unpacker: Unpacker = Unpacker()
+
+        self.data_unpacker: Unpacker = Unpacker()
+        self.record_metadata: Optional[RecordMetadata] = None
+
+        self.read_header()
+
+    def read_header(self) -> None:
+        """
+        Read the header of the current data record. The header includes the
+        fixed portion, blockette 1000, and any blockettes that follow.
+        """
+        # Save the start of the record so that we can go back after reading the
+        # header.
+        record_start = self.file.tell()
+
+        self.read_fixed_header()
+        self.read_blockette_1000()
+
+        header_endianness = get_header_endianness(self.fixed_header)
+        if header_endianness == 'little':
+            self.header_unpacker.byte_order_char = '<'
+        else:
+            self.header_unpacker.byte_order_char = '>'
+
+        data_endianness = get_data_endianness(self.blockette_1000)
+        if data_endianness == 'little':
+            self.data_unpacker.byte_order_char = '<'
+        else:
+            self.data_unpacker.byte_order_char = '>'
+
+        self.record_metadata = get_record_metadata(self.fixed_header,
+                                                   self.header_unpacker)
+
+        self.apply_time_correction()
+        self.read_blockettes()
+        self.file.seek(record_start)
+
+    def read_fixed_header(self) -> None:
+        """
+        Read the fixed header of the current data record and store it in
+        self.fixed_header.
+        """
+        byte_counts = [6, 1, 1, 5, 2, 3, 2, 10, 2, 2, 2, 1, 1, 1, 1, 4, 2, 2]
+
+        fixed_header_sections_values = []
+        for byte_count in byte_counts:
+            fixed_header_sections_values.append(self.file.read(byte_count))
+        self.fixed_header = FixedHeader(*fixed_header_sections_values)
+
+    def read_blockette_500(self) -> None:
+        """
+        Read blockette 500 and format its content. The result is stored for
+        future uses. Assumes that the file pointer is at the start of the
+        blockette.
+        """
+        blockette_content = {}
+        # Skip the first four bytes because they contain meta-information about
+        # the blockettes.
+        self.file.read(4)
+
+        vco_correction = self.file.read(4)
+        blockette_content['VCO correction'] = self.header_unpacker.unpack(
+            'f', vco_correction
+        )[0]
+
+        exception_time_bytes = self.file.read(10)
+        exception_time_tuple = self.header_unpacker.unpack(
+            'HHBBBBH', exception_time_bytes)
+        exception_time = UTCDateTime(year=exception_time_tuple[0],
+                                     julday=exception_time_tuple[1],
+                                     hour=exception_time_tuple[2],
+                                     minute=exception_time_tuple[3],
+                                     second=exception_time_tuple[4],
+                                     microsecond=exception_time_tuple[6] * 100)
+        blockette_content['Time of exception'] = exception_time.strftime(
+            '%Y:%j:%H:%M:%S:%f'
+        )
+
+        microsecond = self.file.read(1)
+        microsecond = self.header_unpacker.unpack('B', microsecond)[0]
+        start_time_adjustment = microsecond / (10 ** 6)
+        self.record_metadata.start_time += start_time_adjustment
+        blockette_content['Micro sec'] = microsecond
+
+        reception_quality = self.file.read(1)
+        blockette_content['Reception Quality'] = self.header_unpacker.unpack(
+            'B', reception_quality
+        )[0]
+
+        exception_count = self.file.read(4)
+        blockette_content['Exception Count'] = self.header_unpacker.unpack(
+            'I', exception_count
+        )[0]
+
+        exception_type = self.file.read(16)
+        blockette_content['Exception Type'] = self.header_unpacker.unpack(
+            '16s', exception_type
+        )[0].decode('utf-8').strip()
+
+        clock_model = self.file.read(32)
+        blockette_content['Clock Model'] = self.header_unpacker.unpack(
+            '32s', clock_model
+        )[0].decode('utf-8').strip()
+
+        clock_status = self.file.read(128)
+        blockette_content['Clock Status'] = self.header_unpacker.unpack(
+            '128s', clock_status
+        )[0].decode('utf-8').strip()
+
+        formatted_blockette = '\n'.join([f'{key}: {value}'
+                                         for key, value
+                                         in blockette_content.items()])
+        self.other_blockettes.append(formatted_blockette)
+
+    def read_blockette_1000(self) -> None:
+        """
+        Read blockette 1000 of the current data record and store it in
+        self.blockette_1000.
+        """
+        blockette_1000_section_lengths = [2, 2, 1, 1, 1, 1]
+        blockette_1000_values = []
+        for section_length in blockette_1000_section_lengths:
+            blockette_1000_values.append(self.file.read(section_length))
+
+        self.blockette_1000 = Blockette1000(*blockette_1000_values)
+
+    def read_blockette_1001(self) -> None:
+        """
+        Read blockette 1001. The only valuable thing in this blockette is the
+        more precise start time. Assumes that the file pointer is at the start
+        of the blockette.
+        """
+        self.file.read(5)
+        start_time_microsecond = self.file.read(1)
+        start_time_microsecond = self.header_unpacker.unpack(
+            'b', start_time_microsecond
+        )[0]
+        # Convert from microsecond to second so that UTCDateTime can handle it.
+        start_time_microsecond /= (10 ** 6)
+        self.record_metadata.start_time += start_time_microsecond
+        self.file.read(2)
+
+    def read_blockette_2000(self) -> None:
+        pass
+
+    def apply_time_correction(self) -> None:
+        """
+        Apply the time correction found in the fixed header to the start time.
+        """
+        # format() is used here instead of bin() because we need to pad the
+        # resulting bit string with 0 to the left.
+        activity_flags = format(
+            self.header_unpacker.unpack(
+                'B', self.fixed_header.activity_flags)[0],
+            '0>8b'
+        )
+        is_time_correction_applied = int(activity_flags[1])
+        if is_time_correction_applied:
+            return
+
+        time_correction = self.header_unpacker.unpack(
+            'L', self.fixed_header.time_correction
+        )[0]
+        # We need to convert the unit from 0.0001 seconds to seconds
+        time_correction *= 0.0001
+        self.record_metadata.start_time += time_correction
+
+    def read_blockettes(self) -> None:
+        """
+        Read all the blockettes in the current data record aside from blockette
+        1000, which has beem read previously. Currently only handle blockettes
+        500, 1001, and 2000.
+        """
+        blockette_count = self.header_unpacker.unpack(
+            'B', self.fixed_header.blockette_count
+        )[0]
+        for i in range(1, blockette_count):
+            # All blockettes store their type in the first two bytes, so we
+            # read that to determine what to do
+            next_blockette_type = self.file.read(2)
+            # Move file pointer back to start of blockette
+            self.file.seek(-2, 1)
+            next_blockette_type = self.header_unpacker.unpack(
+                'H', next_blockette_type
+            )[0]
+            if next_blockette_type not in (500, 1000, 1001):
+                print('We currently only handle blockettes 500, 1000, and'
+                      '1001.')
+                continue
+            if next_blockette_type == 500:
+                self.read_blockette_500()
+            elif next_blockette_type == 1001:
+                self.read_blockette_1001()
+            elif next_blockette_type == 2000:
+                self.read_blockette_2000()
+
+    def get_first_data_point(self) -> Real:
+        """
+        Get the first data point of the current data record.
+        :return: the first data point of the current data record, whose type is
+            determined based on the encoding type stored in blockette 1000.
+        """
+        record_start = self.file.tell()
+        data_start = self.header_unpacker.unpack(
+            'H', self.fixed_header.data_offset
+        )[0]
+        # The data start byte is defined as an offset from the start of the
+        # data record. Seeing as we should be at the start of the data record
+        # by seeking there at the end of every major step, we can simply seek
+        # to the start of the data.
+        self.file.seek(data_start, 1)
+
+        encoding_format = self.blockette_1000.encoding_format
+        encoding_format = self.header_unpacker.unpack('b', encoding_format)[0]
+        encoding_format = EncodingFormat(encoding_format)
+
+        if encoding_format == EncodingFormat.ASCII:
+            # We want to read everything in the record if the encoding is
+            # ASCII.
+            record_length_exp = self.header_unpacker.unpack(
+                'B', self.blockette_1000.record_length
+            )[0]
+            record_size = 2 ** record_length_exp
+            # This name does not make much sense with what we are doing here,
+            # but it will have to do for now.
+            # The size of the record includes the header, so we have to account
+            # for that when grabbing the data.
+            first_data_point = self.file.read(record_size - data_start)
+        else:
+
+            # Currently, we are extracting only the first data point in each
+            # record. The smallest possible amount of bytes we can extract
+            # while guaranteeing that we get the first data point in the
+            # record is 8, with Steim encodings and IEEE double precision
+            # float needing to use the whole buffer.
+            buffer = self.file.read(8)
+            encoding_to_decoder = {
+                EncodingFormat.INT_16_BIT: decode_int16,
+                EncodingFormat.INT_24_BIT: decode_int24,
+                EncodingFormat.INT_32_BIT: decode_int32,
+                EncodingFormat.IEEE_FLOAT_32_BIT: decode_ieee_float,
+                EncodingFormat.IEEE_FLOAT_64_BIT: decode_ieee_double,
+                EncodingFormat.STEIM_1: decode_steim,
+                EncodingFormat.STEIM_2: decode_steim,
+            }
+            first_data_point = encoding_to_decoder[encoding_format](
+                buffer, self.data_unpacker
+            )
+        # Seek back to the start of the record so we can call this method again
+        # if needed.
+        self.file.seek(record_start)
+        return first_data_point
\ No newline at end of file
-- 
GitLab