From 61b8b398aa5a7b02059dcd43a710fc8d9b697d2a Mon Sep 17 00:00:00 2001
From: kienle <kienle@passcal.nmt.edu>
Date: Thu, 10 Aug 2023 10:58:22 -0600
Subject: [PATCH] Documentation

---
 .../model/reftek/from_rt2ms/core.py           | 36 +++++++++++++++++--
 .../model/reftek/from_rt2ms/packet.py         | 13 +++++--
 .../reftek/rt130_experiment/dt_packet.py      | 35 ++++++++++++++++++
 .../reftek/rt130_experiment/eh_et_packet.py   | 22 ++++++++++++
 .../model/reftek/rt130_experiment/header.py   | 13 +++++--
 .../reftek/rt130_experiment/reftek_helper.py  | 35 +++++++++++++-----
 .../reftek/rt130_experiment/soh_packets.py    | 27 ++++++++++++++
 7 files changed, 166 insertions(+), 15 deletions(-)

diff --git a/sohstationviewer/model/reftek/from_rt2ms/core.py b/sohstationviewer/model/reftek/from_rt2ms/core.py
index 00d27a172..aed69845d 100644
--- a/sohstationviewer/model/reftek/from_rt2ms/core.py
+++ b/sohstationviewer/model/reftek/from_rt2ms/core.py
@@ -33,6 +33,10 @@ from sohstationviewer.model.reftek.rt130_experiment.reftek_helper import (
 
 
 class DiscontinuousTrace(Trace):
+    """
+    Extension of obspy.Trace that changes the way time data is handled when
+    reading data using the method from logpeek/qpeek.
+    """
     def __init__(self, *args, times, **kwargs):
         super().__init__(*args, **kwargs)
         self._times = times
@@ -65,12 +69,22 @@ class Reftek130Exception(ObsPyException):
 
 
 class Reftek130(obspy_rt130_core.Reftek130):
+    """
+    Child class of obspy.Reftek that reads waveform data similar to logpeek for
+    better performance.
+    """
     @staticmethod
-    def from_file(filename: Union[str, Path]):
+    def from_file(file: Union[str, Path]):
+        """
+        Read data from an RT130 file and save it in a Reftek130 object.
+        :param file: the RT130 file to read
+        :return: a Reftek130 object that stores the data in file
+        """
+        # RT130 data is all big-endian
         rt130_unpacker = Unpacker('>')
         rt = Reftek130()
-        rt._filename = filename
-        packets_in_file = read_rt130_file(filename, rt130_unpacker)
+        rt._filename = file
+        packets_in_file = read_rt130_file(file, rt130_unpacker)
         converted_packets = []
         for packet in packets_in_file:
             converted_packets.append(
@@ -83,6 +97,9 @@ class Reftek130(obspy_rt130_core.Reftek130):
                   headonly=False, verbose=False,
                   sort_permuted_package_sequence=False):
         """
+        Create an obspy.Stream object that holds the data stored in this
+            Reftek130 object.
+
         :type headonly: bool
         :param headonly: Determines whether or not to unpack the data or just
             read the headers.
@@ -173,6 +190,13 @@ class Reftek130(obspy_rt130_core.Reftek130):
                         sample_data = np.array([], dtype=np.int32)
                         npts = packets_["number_of_samples"].sum()
                     else:
+                        # The payload stores the first data point of each
+                        # packet, encoded as a numpy array of 4 1-byte numbers.
+                        # Due to the way the payload is encoded during the
+                        # reading process and a quirk of 2-complement binary
+                        # numbers (namely, appending a negative number with 1s
+                        # does not change its value), we do not have to care
+                        # about the actual encoding type of the stored packets.
                         sample_data = (packets_['payload'][:, :4])
                         sample_data = sample_data.view(np.dtype('>i4')).squeeze()
                         npts = len(sample_data)
@@ -180,6 +204,12 @@ class Reftek130(obspy_rt130_core.Reftek130):
                         data=sample_data, header=copy.deepcopy(header),
                         times=(packets_['time'] / 10**9).round(3)
                     )
+                    # The plotting process needs to know about the number of
+                    # points stored in the trace. However, tr.stats use the
+                    # stored npts to calculate some other metadata, so we can't
+                    # store that information there. As a compromise, we keep
+                    # tr.stats.npts the same, while storing the actual number
+                    # of data points in the trace in another part of tr.stats.
                     tr.stats.npts = packets_['number_of_samples'].sum()
                     tr.stats.actual_npts = npts
                     # channel number is not included in the EH/ET packet
diff --git a/sohstationviewer/model/reftek/from_rt2ms/packet.py b/sohstationviewer/model/reftek/from_rt2ms/packet.py
index fcd2abe24..f9533d2b5 100644
--- a/sohstationviewer/model/reftek/from_rt2ms/packet.py
+++ b/sohstationviewer/model/reftek/from_rt2ms/packet.py
@@ -32,6 +32,8 @@ eh_et_payload_last_field_size = 16
 eh_et_payload_end_in_packet = eh_et_payload_last_field_start + eh_et_payload_last_field_size + 24
 
 # name, offset, length (bytes) and converter routine for EH/ET packet payload
+# Trimmed to only include the parts used elsewhere for the sake of better
+# performance.
 EH_PAYLOAD = {
     "station_name_extension": (35, 1, _decode_ascii),
     "station_name": (36, 4, _decode_ascii),
@@ -74,6 +76,15 @@ obspy_rt130_packet.EH_PAYLOAD = {
 
 class EHPacket(obspy_rt130_packet.EHPacket):
     def __init__(self, data):
+        """
+        Reimplement __init__ to change a different value for EH_PAYLOAD.
+        This should be the cleanest way to do it, seeing as any other way I
+        can think of modify EH_PAYLOAD in the original file, which can have
+        consequences that are not readily apparent.
+
+        :param data: the data of an EH packet. For more information, refer to
+            obspy.io.reftek.packet.PACKET_FINAL_DTYPE.
+        """
         self._data = data
         payload = self._data["payload"].tobytes()
         for name, (start, length, converter) in EH_PAYLOAD.items():
@@ -157,5 +168,3 @@ class EHPacket(obspy_rt130_packet.EHPacket):
                                    self.sampling_rate))
         info.append(packet_tagline2)
         return info
-
-
diff --git a/sohstationviewer/model/reftek/rt130_experiment/dt_packet.py b/sohstationviewer/model/reftek/rt130_experiment/dt_packet.py
index 6b576c5df..b72ce54e6 100644
--- a/sohstationviewer/model/reftek/rt130_experiment/dt_packet.py
+++ b/sohstationviewer/model/reftek/rt130_experiment/dt_packet.py
@@ -5,6 +5,15 @@ from sohstationviewer.model.reftek.rt130_experiment.header import PacketHeader
 
 
 def decode_uncompressed(packet: bytes, data_format: str, unpacker: Unpacker):
+    """
+    Grab the first data point in a packet that contains uncompressed RT130 data
+    (aka packets with data format 16, 32, or 33_.
+    :param packet: the bytes that make up the given packet.
+    :param data_format: the data format of the given packet, can be one of 16,
+        32, or 33.
+    :param unpacker: the unpacker to use to decode the data.
+    :return: the first data point in the given packet
+    """
     data = packet[24:]
     # For uncompressed RT130 data, the data format is also the size of a data
     # point in bit (aside from data format 33, which uses the same size as data
@@ -25,12 +34,32 @@ def decode_uncompressed(packet: bytes, data_format: str, unpacker: Unpacker):
 
 
 def decode_compressed(packet: bytes, data_format: str, unpacker: Unpacker):
+    """
+    Grab the stop point in a packet that contains compressed RT130 data (aka
+    packets with data format C0, C1, C2, or C3).
+    We get the stop point in this case because that is what logpeek did. It
+    also looks a lot better than using the start point, so that is a plus.
+    :param packet: the bytes that make up the given packet.
+    :param data_format: the data format of the given packet, can be one of C0,
+        C1, C2, or C3. Exist only to have the same signature as
+        decode_uncompressed
+    :param unpacker: the unpacker to use to decode the data.
+    :return: the first data point in the given packet
+    """
+    # The data in a compressed data packet starts at byte 64, with bytes
+    # between byte 24 and 64 being fillers.
     data = packet[64:]
     first_data_point = data[8:12]
     return unpacker.unpack('i', first_data_point)[0]
 
 
 def read_dt_packet(packet: bytes, unpacker: Unpacker):
+    """
+    Process a DT packet and get its extended header and first data point.
+    :param packet: the bytes that make up the given DT packet.
+    :param unpacker: the unpacker to use to decode the data.
+    :return: the extended header and first data point of the given DT packet.
+    """
     decoders = {
         **dict.fromkeys(['16', '32', '33'], decode_uncompressed),
         **dict.fromkeys(['C0', 'C1', 'C2', 'C3'], decode_compressed)
@@ -52,6 +81,9 @@ def read_dt_packet(packet: bytes, unpacker: Unpacker):
 
 @dataclasses.dataclass
 class DTExtendedHeader:
+    """
+    The extended header of a DT packet.
+    """
     event_number: int
     data_stream_number: int
     channel_number: int
@@ -62,6 +94,9 @@ class DTExtendedHeader:
 
 @dataclasses.dataclass
 class DTPacket:
+    """
+    The decoded data of a DT packet.
+    """
     header: PacketHeader
     extended_header: DTExtendedHeader
     data: int
diff --git a/sohstationviewer/model/reftek/rt130_experiment/eh_et_packet.py b/sohstationviewer/model/reftek/rt130_experiment/eh_et_packet.py
index b2e9917a5..b9faee4c0 100644
--- a/sohstationviewer/model/reftek/rt130_experiment/eh_et_packet.py
+++ b/sohstationviewer/model/reftek/rt130_experiment/eh_et_packet.py
@@ -7,6 +7,14 @@ from sohstationviewer.model.reftek.rt130_experiment.header import PacketHeader
 
 
 def read_eh_et_packet(packet: bytes, unpacker: Unpacker):
+    """
+    Process an EH/ET packet and get its extended header and required part of
+    the payload.
+    :param packet: the bytes that make up the given EH/ET packet.
+    :param unpacker: the unpacker to use to decode the data.
+    :return: the extended header and truncated payload of the given EH/ET
+        packet.
+    """
     event_number = int(packet[16:18].hex())
     data_stream_number = int(packet[18:19].hex())
     flags = unpacker.unpack('B', packet[22:23])[0]
@@ -14,12 +22,21 @@ def read_eh_et_packet(packet: bytes, unpacker: Unpacker):
 
     extended_header = EHETExtendedHeader(event_number, data_stream_number,
                                          flags, data_format)
+    # The largest possible data point has a size of 4  bytes, so we need to
+    # grab at least data.
     payload = packet[24:eh_et_payload_end_in_packet]
     return extended_header, payload
 
 
 @dataclasses.dataclass
 class EHETExtendedHeader:
+    """
+    A collection of some useful information about an EH/ET packet. Technically,
+    EH/ET packets do not have extended headers. We name this class what it is
+    due to the way obspy.Reftek130 (and consequently, core.Reftek130) stores
+    the data of processed packets. For more information, refer to
+    Reftek130._data.
+    """
     event_number: int
     data_stream_number: int
     flags: int
@@ -32,6 +49,11 @@ class EHETExtendedHeader:
 
 @dataclasses.dataclass
 class EHETPacket:
+    """
+    The decoded data of an EH/ET packet. The extended_header field is to ensure
+    compatibility with dt_packet.DTPacket. EH/ET packets do not have an
+    extended header otherwise.
+    """
     header: PacketHeader
     extended_header: EHETExtendedHeader
     data: bytes
diff --git a/sohstationviewer/model/reftek/rt130_experiment/header.py b/sohstationviewer/model/reftek/rt130_experiment/header.py
index d09e86bf9..97bb3aab2 100644
--- a/sohstationviewer/model/reftek/rt130_experiment/header.py
+++ b/sohstationviewer/model/reftek/rt130_experiment/header.py
@@ -8,6 +8,9 @@ from sohstationviewer.model.reftek.rt130_experiment import reftek_helper
 
 @dataclasses.dataclass
 class PacketHeader:
+    """
+    The decoded header of an RT130 packet.
+    """
     packet_type: str
     experiment_number: int
     unit_id: str
@@ -17,6 +20,13 @@ class PacketHeader:
 
 
 def parse_rt130_time(year: int, time_bytes: bytes) -> UTCDateTime:
+    """
+    Convert BCD-encoded RT130 time into UTCDateTime.
+    :param year: the year of the time. RT130's header store the year separate
+        from the time, so we have to pass it as an argument.
+    :param time_bytes: the BCD-encoded time.
+    :return: an UTCDateTime object that stores the decoded time.
+    """
     time_string = time_bytes.hex()
     # The time string has the format of DDDHHMMSSTTT, where
     # D = day of year
@@ -45,8 +55,7 @@ def parse_rt130_time(year: int, time_bytes: bytes) -> UTCDateTime:
     return converted_time
 
 
-def get_rt130_packet_header(rt130_packet: bytes,
-                            unpacker: Unpacker) -> PacketHeader:
+def get_rt130_packet_header(rt130_packet: bytes) -> PacketHeader:
     try:
         # Because RT130 data is always big-endian, it is more convenient to
         # use str.decode() than the unpacker.
diff --git a/sohstationviewer/model/reftek/rt130_experiment/reftek_helper.py b/sohstationviewer/model/reftek/rt130_experiment/reftek_helper.py
index b3566dd51..8b5379f2b 100644
--- a/sohstationviewer/model/reftek/rt130_experiment/reftek_helper.py
+++ b/sohstationviewer/model/reftek/rt130_experiment/reftek_helper.py
@@ -29,10 +29,19 @@ def packet_reader_placeholder(*args: Any, **kwargs: Any) -> None:
 
 
 class RT130ParseError(Exception):
+    """
+    Error to raise when there is a problem with parsing RT130 data.
+    """
     pass
 
 
 def read_rt130_file(file_name: str, unpacker: Unpacker):
+    """
+    Read an RT130 file and stores the data in a list of RT130 packets.
+    :param file_name: the name of the file to read.
+    :param unpacker: the decoder used to decode the data.
+    :return: a list of processed RT130 packets.
+    """
     # RT130 data looks to be all big-endian (logpeek assumes this, and it has
     # been working pretty well), so we don't have to do any endianness check.
 
@@ -43,6 +52,8 @@ def read_rt130_file(file_name: str, unpacker: Unpacker):
     packets = []
 
     with open(file_name, 'rb') as rt130_file:
+        # Each packet is exactly 1024 bytes, so we can rely on that to know
+        # when we have finished reading.
         for i in range(os.path.getsize(file_name) // 1024):
             packet = rt130_file.read(1024)
             packet_header = get_rt130_packet_header(packet, unpacker)
@@ -54,7 +65,8 @@ def read_rt130_file(file_name: str, unpacker: Unpacker):
             }
 
             soh_handlers: Dict[str, Callable] = dict.fromkeys(
-                ['AD', 'CD', 'DS', 'FD', 'OM', 'SC', 'SH'], read_soh_packet
+                ['AD', 'CD', 'DS', 'FD', 'OM', 'SC', 'SH'],
+                read_soh_packet
             )
 
             packet_handlers = {
@@ -79,8 +91,16 @@ def read_rt130_file(file_name: str, unpacker: Unpacker):
     return packets
 
 
-def convert_packet_to_obspy_format(packet: Union[EHETPacket, DTPacket],
+def convert_packet_to_obspy_format(packet: Union[EHETPacket, DTPacket,
+                                                 SOHPacket],
                                    unpacker: Unpacker):
+    """
+    Convert an RT130 packet into a numpy array of type PACKET_FINAL_DTYPE
+    :param packet: an RT130 packet.
+    :param unpacker: the decoder used to decode the data.
+    :return: a numpy array of type PACKET_FINAL_DTYPE that contains the data
+        stored in packet.
+    """
     # We want to convert the packet to a tuple. In order to make it easier to
     # maintain, we first convert the packet to a dictionary. Then, we grab the
     # values of the dictionary as tuple to get the final result.
@@ -105,12 +125,11 @@ def convert_packet_to_obspy_format(packet: Union[EHETPacket, DTPacket],
     if converted_packet['packet_type'] == 'DT':
         # Obspy stores the data as list of 1-byte integers. We store the
         # data as an arbitrary length integer, so we need to do some
-        # conversion. To make converting the resulting tuple to an element
-        # of a structured array of type PACKET_FINAL_DTYPE easier, we set
-        # the size of the payload to be 4. This only affect data with format
-        # 16, and as long as we are careful in self.to_stream, we don't even
-        # have to make a special case when decoding (note: this is possible
-        # because of a peculiarity of the 2's complement encoding).
+        # conversion. To make encoding and decoding the data point easier, we
+        # store it in 4 bytes no matter what the data format is. This only
+        # has an effect on data with format 16. Thanks to a quirk with
+        # 2-complement binary encoding, however, this does not cause any
+        # problem.
         data_size = 4
         format_char = 'B'
         converted_packet['payload'] = numpy.empty(1000, np.uint8)
diff --git a/sohstationviewer/model/reftek/rt130_experiment/soh_packets.py b/sohstationviewer/model/reftek/rt130_experiment/soh_packets.py
index 55cfd0088..90facbda8 100644
--- a/sohstationviewer/model/reftek/rt130_experiment/soh_packets.py
+++ b/sohstationviewer/model/reftek/rt130_experiment/soh_packets.py
@@ -10,6 +10,15 @@ import numpy
 
 @dataclasses.dataclass
 class SOHExtendedHeader:
+    """
+    A collection of dummy data for some information needed so that
+    core.Reftek130 can understand SOH packets.
+
+    core.Reftek130 focuses on reading waveform data, so it wants information
+    available in the waveform packets (EH/ET/DT). However, core.Reftek130 also
+    supports SOH packets, which does not contain the required information. As
+    a result, we need to store dummy data in its place.
+    """
     event_number: int
     data_stream_number: int
     channel_number: int
@@ -20,17 +29,35 @@ class SOHExtendedHeader:
 
 @dataclasses.dataclass
 class SOHPacket:
+    """
+    The decoded data of an SOH packet. The extended_header field is to ensure
+    compatibility with dt_packet.DTPacket. SOH packets do not have an
+    extended header otherwise.
+    """
     header: PacketHeader
     extended_header: SOHExtendedHeader
     data: bytes
 
 
 def bcd_16bit_int(_i):
+    """
+    Reimplement a private function of the same name in obspy. Kept here in case
+    the private function is removed in future obspy version.
+    :param _i: the byte string to convert into a 16-bite integer
+    :return: a 16-bit integer
+    """
     _i = bcd(_i)
     return _i[0] * 100 + _i[1]
 
 
 def read_soh_packet(packet: bytes, unpacker: Unpacker):
+    """
+    Process an SOH packet and get its extended header and poyload.
+    :param packet: the bytes that make up the given SOH packet.
+    :param unpacker: the unpacker to use to decode the data.
+    :return: the extended header and payload of the given SOH packet.
+    """
+
     event_number = bcd_16bit_int(numpy.frombuffer(packet[16:18], numpy.uint8))
     data_stream_number = bcd(numpy.frombuffer(packet[18:19], numpy.uint8))
     channel_number = bcd(numpy.frombuffer(packet[19:20], numpy.uint8))
-- 
GitLab