diff --git a/sohstationviewer/model/reftek/from_rt2ms/core.py b/sohstationviewer/model/reftek/from_rt2ms/core.py index 00d27a17258c0d4b4cb75fb3b6b92757a8267ff3..aed69845d2f599eb51c230c4c76d8a0b8db856a3 100644 --- a/sohstationviewer/model/reftek/from_rt2ms/core.py +++ b/sohstationviewer/model/reftek/from_rt2ms/core.py @@ -33,6 +33,10 @@ from sohstationviewer.model.reftek.rt130_experiment.reftek_helper import ( class DiscontinuousTrace(Trace): + """ + Extension of obspy.Trace that changes the way time data is handled when + reading data using the method from logpeek/qpeek. + """ def __init__(self, *args, times, **kwargs): super().__init__(*args, **kwargs) self._times = times @@ -65,12 +69,22 @@ class Reftek130Exception(ObsPyException): class Reftek130(obspy_rt130_core.Reftek130): + """ + Child class of obspy.Reftek that reads waveform data similar to logpeek for + better performance. + """ @staticmethod - def from_file(filename: Union[str, Path]): + def from_file(file: Union[str, Path]): + """ + Read data from an RT130 file and save it in a Reftek130 object. + :param file: the RT130 file to read + :return: a Reftek130 object that stores the data in file + """ + # RT130 data is all big-endian rt130_unpacker = Unpacker('>') rt = Reftek130() - rt._filename = filename - packets_in_file = read_rt130_file(filename, rt130_unpacker) + rt._filename = file + packets_in_file = read_rt130_file(file, rt130_unpacker) converted_packets = [] for packet in packets_in_file: converted_packets.append( @@ -83,6 +97,9 @@ class Reftek130(obspy_rt130_core.Reftek130): headonly=False, verbose=False, sort_permuted_package_sequence=False): """ + Create an obspy.Stream object that holds the data stored in this + Reftek130 object. + :type headonly: bool :param headonly: Determines whether or not to unpack the data or just read the headers. @@ -173,6 +190,13 @@ class Reftek130(obspy_rt130_core.Reftek130): sample_data = np.array([], dtype=np.int32) npts = packets_["number_of_samples"].sum() else: + # The payload stores the first data point of each + # packet, encoded as a numpy array of 4 1-byte numbers. + # Due to the way the payload is encoded during the + # reading process and a quirk of 2-complement binary + # numbers (namely, appending a negative number with 1s + # does not change its value), we do not have to care + # about the actual encoding type of the stored packets. sample_data = (packets_['payload'][:, :4]) sample_data = sample_data.view(np.dtype('>i4')).squeeze() npts = len(sample_data) @@ -180,6 +204,12 @@ class Reftek130(obspy_rt130_core.Reftek130): data=sample_data, header=copy.deepcopy(header), times=(packets_['time'] / 10**9).round(3) ) + # The plotting process needs to know about the number of + # points stored in the trace. However, tr.stats use the + # stored npts to calculate some other metadata, so we can't + # store that information there. As a compromise, we keep + # tr.stats.npts the same, while storing the actual number + # of data points in the trace in another part of tr.stats. tr.stats.npts = packets_['number_of_samples'].sum() tr.stats.actual_npts = npts # channel number is not included in the EH/ET packet diff --git a/sohstationviewer/model/reftek/from_rt2ms/packet.py b/sohstationviewer/model/reftek/from_rt2ms/packet.py index fcd2abe24416e23276696cdfc8d049a82071b6ac..f9533d2b5bbb9ab011261e0278136ebb1f1f863f 100644 --- a/sohstationviewer/model/reftek/from_rt2ms/packet.py +++ b/sohstationviewer/model/reftek/from_rt2ms/packet.py @@ -32,6 +32,8 @@ eh_et_payload_last_field_size = 16 eh_et_payload_end_in_packet = eh_et_payload_last_field_start + eh_et_payload_last_field_size + 24 # name, offset, length (bytes) and converter routine for EH/ET packet payload +# Trimmed to only include the parts used elsewhere for the sake of better +# performance. EH_PAYLOAD = { "station_name_extension": (35, 1, _decode_ascii), "station_name": (36, 4, _decode_ascii), @@ -74,6 +76,15 @@ obspy_rt130_packet.EH_PAYLOAD = { class EHPacket(obspy_rt130_packet.EHPacket): def __init__(self, data): + """ + Reimplement __init__ to change a different value for EH_PAYLOAD. + This should be the cleanest way to do it, seeing as any other way I + can think of modify EH_PAYLOAD in the original file, which can have + consequences that are not readily apparent. + + :param data: the data of an EH packet. For more information, refer to + obspy.io.reftek.packet.PACKET_FINAL_DTYPE. + """ self._data = data payload = self._data["payload"].tobytes() for name, (start, length, converter) in EH_PAYLOAD.items(): @@ -157,5 +168,3 @@ class EHPacket(obspy_rt130_packet.EHPacket): self.sampling_rate)) info.append(packet_tagline2) return info - - diff --git a/sohstationviewer/model/reftek/rt130_experiment/dt_packet.py b/sohstationviewer/model/reftek/rt130_experiment/dt_packet.py index 6b576c5df49966184ead4965f06a801f284f2a84..b72ce54e6e520a438371972ead82a542578febc1 100644 --- a/sohstationviewer/model/reftek/rt130_experiment/dt_packet.py +++ b/sohstationviewer/model/reftek/rt130_experiment/dt_packet.py @@ -5,6 +5,15 @@ from sohstationviewer.model.reftek.rt130_experiment.header import PacketHeader def decode_uncompressed(packet: bytes, data_format: str, unpacker: Unpacker): + """ + Grab the first data point in a packet that contains uncompressed RT130 data + (aka packets with data format 16, 32, or 33_. + :param packet: the bytes that make up the given packet. + :param data_format: the data format of the given packet, can be one of 16, + 32, or 33. + :param unpacker: the unpacker to use to decode the data. + :return: the first data point in the given packet + """ data = packet[24:] # For uncompressed RT130 data, the data format is also the size of a data # point in bit (aside from data format 33, which uses the same size as data @@ -25,12 +34,32 @@ def decode_uncompressed(packet: bytes, data_format: str, unpacker: Unpacker): def decode_compressed(packet: bytes, data_format: str, unpacker: Unpacker): + """ + Grab the stop point in a packet that contains compressed RT130 data (aka + packets with data format C0, C1, C2, or C3). + We get the stop point in this case because that is what logpeek did. It + also looks a lot better than using the start point, so that is a plus. + :param packet: the bytes that make up the given packet. + :param data_format: the data format of the given packet, can be one of C0, + C1, C2, or C3. Exist only to have the same signature as + decode_uncompressed + :param unpacker: the unpacker to use to decode the data. + :return: the first data point in the given packet + """ + # The data in a compressed data packet starts at byte 64, with bytes + # between byte 24 and 64 being fillers. data = packet[64:] first_data_point = data[8:12] return unpacker.unpack('i', first_data_point)[0] def read_dt_packet(packet: bytes, unpacker: Unpacker): + """ + Process a DT packet and get its extended header and first data point. + :param packet: the bytes that make up the given DT packet. + :param unpacker: the unpacker to use to decode the data. + :return: the extended header and first data point of the given DT packet. + """ decoders = { **dict.fromkeys(['16', '32', '33'], decode_uncompressed), **dict.fromkeys(['C0', 'C1', 'C2', 'C3'], decode_compressed) @@ -52,6 +81,9 @@ def read_dt_packet(packet: bytes, unpacker: Unpacker): @dataclasses.dataclass class DTExtendedHeader: + """ + The extended header of a DT packet. + """ event_number: int data_stream_number: int channel_number: int @@ -62,6 +94,9 @@ class DTExtendedHeader: @dataclasses.dataclass class DTPacket: + """ + The decoded data of a DT packet. + """ header: PacketHeader extended_header: DTExtendedHeader data: int diff --git a/sohstationviewer/model/reftek/rt130_experiment/eh_et_packet.py b/sohstationviewer/model/reftek/rt130_experiment/eh_et_packet.py index b2e9917a55d6e9b72ef51cf02b882fa843b9a344..b9faee4c056b07acf842a00af8fd75d4b4f17e70 100644 --- a/sohstationviewer/model/reftek/rt130_experiment/eh_et_packet.py +++ b/sohstationviewer/model/reftek/rt130_experiment/eh_et_packet.py @@ -7,6 +7,14 @@ from sohstationviewer.model.reftek.rt130_experiment.header import PacketHeader def read_eh_et_packet(packet: bytes, unpacker: Unpacker): + """ + Process an EH/ET packet and get its extended header and required part of + the payload. + :param packet: the bytes that make up the given EH/ET packet. + :param unpacker: the unpacker to use to decode the data. + :return: the extended header and truncated payload of the given EH/ET + packet. + """ event_number = int(packet[16:18].hex()) data_stream_number = int(packet[18:19].hex()) flags = unpacker.unpack('B', packet[22:23])[0] @@ -14,12 +22,21 @@ def read_eh_et_packet(packet: bytes, unpacker: Unpacker): extended_header = EHETExtendedHeader(event_number, data_stream_number, flags, data_format) + # The largest possible data point has a size of 4 bytes, so we need to + # grab at least data. payload = packet[24:eh_et_payload_end_in_packet] return extended_header, payload @dataclasses.dataclass class EHETExtendedHeader: + """ + A collection of some useful information about an EH/ET packet. Technically, + EH/ET packets do not have extended headers. We name this class what it is + due to the way obspy.Reftek130 (and consequently, core.Reftek130) stores + the data of processed packets. For more information, refer to + Reftek130._data. + """ event_number: int data_stream_number: int flags: int @@ -32,6 +49,11 @@ class EHETExtendedHeader: @dataclasses.dataclass class EHETPacket: + """ + The decoded data of an EH/ET packet. The extended_header field is to ensure + compatibility with dt_packet.DTPacket. EH/ET packets do not have an + extended header otherwise. + """ header: PacketHeader extended_header: EHETExtendedHeader data: bytes diff --git a/sohstationviewer/model/reftek/rt130_experiment/header.py b/sohstationviewer/model/reftek/rt130_experiment/header.py index d09e86bf91d4d80e6557d85dc2f2783fb94a4e5d..97bb3aab27787ea69e28224317421a767d90d297 100644 --- a/sohstationviewer/model/reftek/rt130_experiment/header.py +++ b/sohstationviewer/model/reftek/rt130_experiment/header.py @@ -8,6 +8,9 @@ from sohstationviewer.model.reftek.rt130_experiment import reftek_helper @dataclasses.dataclass class PacketHeader: + """ + The decoded header of an RT130 packet. + """ packet_type: str experiment_number: int unit_id: str @@ -17,6 +20,13 @@ class PacketHeader: def parse_rt130_time(year: int, time_bytes: bytes) -> UTCDateTime: + """ + Convert BCD-encoded RT130 time into UTCDateTime. + :param year: the year of the time. RT130's header store the year separate + from the time, so we have to pass it as an argument. + :param time_bytes: the BCD-encoded time. + :return: an UTCDateTime object that stores the decoded time. + """ time_string = time_bytes.hex() # The time string has the format of DDDHHMMSSTTT, where # D = day of year @@ -45,8 +55,7 @@ def parse_rt130_time(year: int, time_bytes: bytes) -> UTCDateTime: return converted_time -def get_rt130_packet_header(rt130_packet: bytes, - unpacker: Unpacker) -> PacketHeader: +def get_rt130_packet_header(rt130_packet: bytes) -> PacketHeader: try: # Because RT130 data is always big-endian, it is more convenient to # use str.decode() than the unpacker. diff --git a/sohstationviewer/model/reftek/rt130_experiment/reftek_helper.py b/sohstationviewer/model/reftek/rt130_experiment/reftek_helper.py index b3566dd514a9211175eb873ec57e6d83005df598..8b5379f2b94806a2bb5257e11431c9f7a0a9fd0f 100644 --- a/sohstationviewer/model/reftek/rt130_experiment/reftek_helper.py +++ b/sohstationviewer/model/reftek/rt130_experiment/reftek_helper.py @@ -29,10 +29,19 @@ def packet_reader_placeholder(*args: Any, **kwargs: Any) -> None: class RT130ParseError(Exception): + """ + Error to raise when there is a problem with parsing RT130 data. + """ pass def read_rt130_file(file_name: str, unpacker: Unpacker): + """ + Read an RT130 file and stores the data in a list of RT130 packets. + :param file_name: the name of the file to read. + :param unpacker: the decoder used to decode the data. + :return: a list of processed RT130 packets. + """ # RT130 data looks to be all big-endian (logpeek assumes this, and it has # been working pretty well), so we don't have to do any endianness check. @@ -43,6 +52,8 @@ def read_rt130_file(file_name: str, unpacker: Unpacker): packets = [] with open(file_name, 'rb') as rt130_file: + # Each packet is exactly 1024 bytes, so we can rely on that to know + # when we have finished reading. for i in range(os.path.getsize(file_name) // 1024): packet = rt130_file.read(1024) packet_header = get_rt130_packet_header(packet, unpacker) @@ -54,7 +65,8 @@ def read_rt130_file(file_name: str, unpacker: Unpacker): } soh_handlers: Dict[str, Callable] = dict.fromkeys( - ['AD', 'CD', 'DS', 'FD', 'OM', 'SC', 'SH'], read_soh_packet + ['AD', 'CD', 'DS', 'FD', 'OM', 'SC', 'SH'], + read_soh_packet ) packet_handlers = { @@ -79,8 +91,16 @@ def read_rt130_file(file_name: str, unpacker: Unpacker): return packets -def convert_packet_to_obspy_format(packet: Union[EHETPacket, DTPacket], +def convert_packet_to_obspy_format(packet: Union[EHETPacket, DTPacket, + SOHPacket], unpacker: Unpacker): + """ + Convert an RT130 packet into a numpy array of type PACKET_FINAL_DTYPE + :param packet: an RT130 packet. + :param unpacker: the decoder used to decode the data. + :return: a numpy array of type PACKET_FINAL_DTYPE that contains the data + stored in packet. + """ # We want to convert the packet to a tuple. In order to make it easier to # maintain, we first convert the packet to a dictionary. Then, we grab the # values of the dictionary as tuple to get the final result. @@ -105,12 +125,11 @@ def convert_packet_to_obspy_format(packet: Union[EHETPacket, DTPacket], if converted_packet['packet_type'] == 'DT': # Obspy stores the data as list of 1-byte integers. We store the # data as an arbitrary length integer, so we need to do some - # conversion. To make converting the resulting tuple to an element - # of a structured array of type PACKET_FINAL_DTYPE easier, we set - # the size of the payload to be 4. This only affect data with format - # 16, and as long as we are careful in self.to_stream, we don't even - # have to make a special case when decoding (note: this is possible - # because of a peculiarity of the 2's complement encoding). + # conversion. To make encoding and decoding the data point easier, we + # store it in 4 bytes no matter what the data format is. This only + # has an effect on data with format 16. Thanks to a quirk with + # 2-complement binary encoding, however, this does not cause any + # problem. data_size = 4 format_char = 'B' converted_packet['payload'] = numpy.empty(1000, np.uint8) diff --git a/sohstationviewer/model/reftek/rt130_experiment/soh_packets.py b/sohstationviewer/model/reftek/rt130_experiment/soh_packets.py index 55cfd008832df1a117ea8bf2c9b29af49916c643..90facbda84adb0480537fd4ed7d0cf68e86acd94 100644 --- a/sohstationviewer/model/reftek/rt130_experiment/soh_packets.py +++ b/sohstationviewer/model/reftek/rt130_experiment/soh_packets.py @@ -10,6 +10,15 @@ import numpy @dataclasses.dataclass class SOHExtendedHeader: + """ + A collection of dummy data for some information needed so that + core.Reftek130 can understand SOH packets. + + core.Reftek130 focuses on reading waveform data, so it wants information + available in the waveform packets (EH/ET/DT). However, core.Reftek130 also + supports SOH packets, which does not contain the required information. As + a result, we need to store dummy data in its place. + """ event_number: int data_stream_number: int channel_number: int @@ -20,17 +29,35 @@ class SOHExtendedHeader: @dataclasses.dataclass class SOHPacket: + """ + The decoded data of an SOH packet. The extended_header field is to ensure + compatibility with dt_packet.DTPacket. SOH packets do not have an + extended header otherwise. + """ header: PacketHeader extended_header: SOHExtendedHeader data: bytes def bcd_16bit_int(_i): + """ + Reimplement a private function of the same name in obspy. Kept here in case + the private function is removed in future obspy version. + :param _i: the byte string to convert into a 16-bite integer + :return: a 16-bit integer + """ _i = bcd(_i) return _i[0] * 100 + _i[1] def read_soh_packet(packet: bytes, unpacker: Unpacker): + """ + Process an SOH packet and get its extended header and poyload. + :param packet: the bytes that make up the given SOH packet. + :param unpacker: the unpacker to use to decode the data. + :return: the extended header and payload of the given SOH packet. + """ + event_number = bcd_16bit_int(numpy.frombuffer(packet[16:18], numpy.uint8)) data_stream_number = bcd(numpy.frombuffer(packet[18:19], numpy.uint8)) channel_number = bcd(numpy.frombuffer(packet[19:20], numpy.uint8))