Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • software_public/passoft/sohstationviewer
1 result
Show changes
Commits on Source (6)
......@@ -26,7 +26,7 @@ def get_chan_plot_info(org_chan_id: str, chan_info: Dict, data_type: str,
chan = 'MassPos?'
if org_chan_id.startswith('Event DS'):
chan = 'Event DS?'
if org_chan_id.startswith('DS'):
if org_chan_id.startswith('DS') and 'DSP' not in org_chan_id:
chan = 'DS?'
if org_chan_id.startswith('Disk Usage'):
chan = 'Disk Usage?'
......
def decode_int16(buffer, unpacker):
requested_bytes = buffer[:2]
return unpacker.unpack('h', requested_bytes)[0]
def decode_int24(buffer, unpacker):
requested_bytes = buffer[:3]
byte_order = 'big' if unpacker.byte_order_char == '>' else 'little'
# We delegate to int.from_bytes() because it takes a lot of work to make
# struct.unpack() handle signed 24-bits integers.
# See: https://stackoverflow.com/questions/3783677/how-to-read-integers-from-a-file-that-are-24bit-and-little-endian-using-python # noqa
return int.from_bytes(requested_bytes, byte_order)
def decode_int32(buffer, unpacker):
requested_bytes = buffer[:4]
return unpacker.unpack('i', requested_bytes)[0]
def decode_ieee_float(buffer, unpacker):
requested_bytes = buffer[:4]
return unpacker.unpack('f', requested_bytes)[0]
def decode_ieee_double(buffer, unpacker):
requested_bytes = buffer[:8]
return unpacker.unpack('d', requested_bytes)[0]
def decode_steim(buffer, unpacker):
# The first 4 bytes in a Steim frame is metadata of the record. Since we
# aren't decompressing the data, we are skipping. The next 4 bytes contain
# the first data point of the MSEED data record, which is what we need.
requested_bytes = buffer[4:8]
return unpacker.unpack('i', requested_bytes)[0]
from dataclasses import dataclass
import struct
from enum import Enum
from obspy import UTCDateTime
class Unpacker:
"""
A wrapper around struct.unpack() to unpack binary data without having to
explicitly define the byte order in the format string. Also restrict the
type of format to str and buffer to bytes.
"""
def __init__(self, byte_order_char: str = '') -> None:
self.byte_order_char = byte_order_char
def unpack(self, format: str, buffer: bytes):
"""
Unpack a string of bytes into a tuple of values based on the given
format
:param format: the format used to unpack the byte string
:param buffer: the byte string
:return: a tuple containing the unpacked values.
"""
default_byte_order_chars = ('@', '=', '>', '<', '!')
if format.startswith(default_byte_order_chars):
format = self.byte_order_char + format[:1]
else:
format = self.byte_order_char + format
return struct.unpack(format, buffer)
@dataclass
class FixedHeader:
"""
The fixed portion of the header of a data record. All fields are stored as
bytes to minimize time wasted on decoding unused values.
"""
sequence_number: bytes
data_header_quality_indicator: bytes
reserved: bytes
station: bytes
location: bytes
channel: bytes
net_code: bytes
record_start_time: bytes
sample_count: bytes
sample_rate_factor: bytes
sample_rate_multiplier: bytes
activity_flags: bytes
io_and_clock_flags: bytes
data_quality_flags: bytes
blockette_count: bytes
time_correction: bytes
data_offset: bytes
first_blockette_offset: bytes
@dataclass
class Blockette1000:
"""
Blockette 100 of a data record. All fields are stored as bytes to minimize
time wasted on decoding unused values.
"""
blockette_type: bytes
next_blockette_offset: bytes
encoding_format: bytes
byte_order: bytes
record_length: bytes
reserved_byte: bytes
@dataclass
class RecordMetadata:
"""
The metadata of a data record.
"""
station: str
location: str
channel: str
network: str
start_time: UTCDateTime
sample_count: int
sample_rate: float
class EncodingFormat(Enum):
ASCII = 0
INT_16_BIT = 1
INT_24_BIT = 2
INT_32_BIT = 3
IEEE_FLOAT_32_BIT = 4
IEEE_FLOAT_64_BIT = 5
STEIM_1 = 10
STEIM_2 = 11
def get_header_endianness(header: FixedHeader):
"""
Determine the endianness of the fixed header of a data record. Works by
checking if the decoded record start time has a sane value if the header
is assumed to be big-endian.
WARNING: This check fails on three dates: 2056-1, 2056-256, and 2056-257.
2056 is a palindrome when encoded as a pair of octets, so endianness does
not affect it. Similarly, 257 is also 2-octet-palindromic. Meanwhile, 1 and
256 are counterparts when encoded as pairs of octets. Because they are both
valid values for day of year, it is impossible to make a conclusion about
endianness based on day of year if it is either 1 or 256 in big-endian.
These facts combined means that we cannot determine the endianness of the
header whose record starts on the aforementioned dates. The way this
function was written, the endianness will be recorded as big in these
cases. This problem is also recorded in libmseed.
:param header: the fixed header of the data record
:return: either of the string 'big' or 'little' depending on the extracted
endianness of header
"""
record_start_time_string = header.record_start_time
record_start_time_tuple = struct.unpack('>hhbbbbh',
record_start_time_string)
# libmseed uses 1900 to 2100 as the sane year range. We follow their
# example here.
year_is_good = (1900 <= record_start_time_tuple[0] <= 2100)
# The upper range is 366 to account for leap years.
day_is_good = (1 <= record_start_time_tuple[1] <= 366)
endianness = 'big' if year_is_good and day_is_good else 'little'
return endianness
def get_data_endianness(blockette_1000: Blockette1000):
"""
Get endianness of a data record by examining blockette 1000.
:param blockette_1000: the blockette 1000 of the data record.
"""
# The byte order is only one byte so using big or little endian does not
# matter.
blockette_1000_endianness = int.from_bytes(
blockette_1000.byte_order, 'big'
)
if blockette_1000_endianness:
return 'big'
else:
return 'little'
def calculate_sample_rate(factor: int, multiplier: int) -> float:
"""
Calculate the sample rate using the sample rate factor and multiplier. This
algorithm is described around the start of Chapter 8 in the SEED manual.
:param factor: the sample rate factor
:param multiplier: the sample rate multiplier
:return: the nominal sample rate
"""
sample_rate = 0
if factor == 0:
sample_rate = 0
elif factor > 0 and multiplier > 0:
sample_rate = factor * multiplier
elif factor > 0 and multiplier < 0:
sample_rate = -(factor / multiplier)
elif factor < 0 and multiplier > 0:
sample_rate = -(multiplier / factor)
elif factor < 0 and multiplier < 0:
sample_rate = 1 / (factor * multiplier)
return sample_rate
def get_record_metadata(header: FixedHeader, header_unpacker: Unpacker):
"""
Extract and parse the metadata of a data record from its fixed header.
:param header: the fixed header of the data record
:param header_unpacker: the unpacker corresponding to the data record;
needed so that the correct byte order can be used
:return: the extract record metadata
"""
station = header.station.decode('utf-8').rstrip()
location = header.location.decode('utf-8').rstrip()
channel = header.channel.decode('utf-8').rstrip()
network = header.net_code.decode('utf-8').rstrip()
record_start_time_string = header.record_start_time
record_start_time_tuple = header_unpacker.unpack('HHBBBBH',
record_start_time_string)
record_start_time = UTCDateTime(year=record_start_time_tuple[0],
julday=record_start_time_tuple[1],
hour=record_start_time_tuple[2],
minute=record_start_time_tuple[3],
second=record_start_time_tuple[4],
microsecond=record_start_time_tuple[
6] * 100)
sample_count = header_unpacker.unpack('H', header.sample_count)[0]
sample_rate_factor = header_unpacker.unpack(
'h', header.sample_rate_factor
)[0]
sample_rate_multiplier = header_unpacker.unpack(
'h', header.sample_rate_multiplier
)[0]
sample_rate = calculate_sample_rate(sample_rate_factor,
sample_rate_multiplier)
return RecordMetadata(station, location, channel, network,
record_start_time, sample_count, sample_rate)
from typing import BinaryIO
import obspy
from record_reader import RecordReader
class MSeedReader:
def __init__(self, file: BinaryIO) -> None:
self.file = file
def read(self):
trace = []
while 1:
# We know that end of file is reached when read() returns an empty
# string.
is_eof = (self.file.read(1) == b'')
if is_eof:
break
# We need to move the file pointer back to its position after we
# do the end of file check. Otherwise, we would be off by one
# byte for all the reads afterward.
self.file.seek(-1, 1)
# We save the start of the current record so that after we are
# done reading the record, we can move back. This makes moving
# to the next record a lot easier, seeing as we can simply move
# the file pointer a distance the size of the current record.
current_record_start = self.file.tell()
reader = RecordReader(self.file)
trace.append(reader.get_first_data_point())
# sample_count = reader.record_metadata.sample_count
# sample_rate = reader.record_metadata.sample_rate
# record_time_taken = sample_count / sample_rate
# record_end_time = (reader.record_metadata.start_time +
# record_time_taken)
# MSEED stores the size of a data record as an exponent of a
# power of two, so we have to convert that to actual size before
# doing anything else.
record_length_exp = reader.header_unpacker.unpack(
'B', reader.blockette_1000.record_length
)[0]
record_size = 2 ** record_length_exp
self.file.seek(current_record_start)
self.file.seek(record_size, 1)
if __name__ == '__main__':
# numpy.set_printoptions(threshold=sys.maxsize)
file_path = '/Users/ldam/Documents/GIT/sohstationviewer/tests/test_data/' \
'Q330_mixed_traces/XX-3203_4-20221222183011'
file = open(file_path, 'rb')
stream = obspy.read(file_path)
MSeedReader(file).read()
from numbers import Real
from typing import BinaryIO, Optional, List
from obspy import UTCDateTime
from decode_mseed import (
decode_ieee_float, decode_ieee_double, decode_steim, decode_int16,
decode_int24, decode_int32,
)
from mseed_helper import (
FixedHeader, Blockette1000, get_data_endianness, Unpacker,
get_record_metadata, get_header_endianness, RecordMetadata,
EncodingFormat,
)
class RecordReader:
"""
This class reads one data record from an MSEED file.
"""
def __init__(self, file: BinaryIO) -> None:
# The MSEED file object to read from. The file pointer needs to be
# located at the start of a data record.
self.file = file
self.fixed_header: Optional[FixedHeader] = None
self.blockette_1000: Optional[Blockette1000] = None
self.other_blockettes: List[str] = []
# Utility object that helps unpack byte strings in the header (the
# fixed header and the blockettes).
# Separate from the one for data in case the header has a different
# byte order.
# TODO: change blockettes to use this unpacker as well.
self.header_unpacker: Unpacker = Unpacker()
self.data_unpacker: Unpacker = Unpacker()
self.record_metadata: Optional[RecordMetadata] = None
self.read_header()
def read_header(self) -> None:
"""
Read the header of the current data record. The header includes the
fixed portion, blockette 1000, and any blockettes that follow.
"""
# Save the start of the record so that we can go back after reading the
# header.
record_start = self.file.tell()
self.read_fixed_header()
self.read_blockette_1000()
header_endianness = get_header_endianness(self.fixed_header)
if header_endianness == 'little':
self.header_unpacker.byte_order_char = '<'
else:
self.header_unpacker.byte_order_char = '>'
data_endianness = get_data_endianness(self.blockette_1000)
if data_endianness == 'little':
self.data_unpacker.byte_order_char = '<'
else:
self.data_unpacker.byte_order_char = '>'
self.record_metadata = get_record_metadata(self.fixed_header,
self.header_unpacker)
self.apply_time_correction()
self.read_blockettes()
self.file.seek(record_start)
def read_fixed_header(self) -> None:
"""
Read the fixed header of the current data record and store it in
self.fixed_header.
"""
byte_counts = [6, 1, 1, 5, 2, 3, 2, 10, 2, 2, 2, 1, 1, 1, 1, 4, 2, 2]
fixed_header_sections_values = []
for byte_count in byte_counts:
fixed_header_sections_values.append(self.file.read(byte_count))
self.fixed_header = FixedHeader(*fixed_header_sections_values)
def read_blockette_500(self) -> None:
"""
Read blockette 500 and format its content. The result is stored for
future uses. Assumes that the file pointer is at the start of the
blockette.
"""
blockette_content = {}
# Skip the first four bytes because they contain meta-information about
# the blockettes.
self.file.read(4)
vco_correction = self.file.read(4)
blockette_content['VCO correction'] = self.header_unpacker.unpack(
'f', vco_correction
)[0]
exception_time_bytes = self.file.read(10)
exception_time_tuple = self.header_unpacker.unpack(
'HHBBBBH', exception_time_bytes)
exception_time = UTCDateTime(year=exception_time_tuple[0],
julday=exception_time_tuple[1],
hour=exception_time_tuple[2],
minute=exception_time_tuple[3],
second=exception_time_tuple[4],
microsecond=exception_time_tuple[6] * 100)
blockette_content['Time of exception'] = exception_time.strftime(
'%Y:%j:%H:%M:%S:%f'
)
microsecond = self.file.read(1)
microsecond = self.header_unpacker.unpack('B', microsecond)[0]
start_time_adjustment = microsecond / (10 ** 6)
self.record_metadata.start_time += start_time_adjustment
blockette_content['Micro sec'] = microsecond
reception_quality = self.file.read(1)
blockette_content['Reception Quality'] = self.header_unpacker.unpack(
'B', reception_quality
)[0]
exception_count = self.file.read(4)
blockette_content['Exception Count'] = self.header_unpacker.unpack(
'I', exception_count
)[0]
exception_type = self.file.read(16)
blockette_content['Exception Type'] = self.header_unpacker.unpack(
'16s', exception_type
)[0].decode('utf-8').strip()
clock_model = self.file.read(32)
blockette_content['Clock Model'] = self.header_unpacker.unpack(
'32s', clock_model
)[0].decode('utf-8').strip()
clock_status = self.file.read(128)
blockette_content['Clock Status'] = self.header_unpacker.unpack(
'128s', clock_status
)[0].decode('utf-8').strip()
formatted_blockette = '\n'.join([f'{key}: {value}'
for key, value
in blockette_content.items()])
self.other_blockettes.append(formatted_blockette)
def read_blockette_1000(self) -> None:
"""
Read blockette 1000 of the current data record and store it in
self.blockette_1000.
"""
blockette_1000_section_lengths = [2, 2, 1, 1, 1, 1]
blockette_1000_values = []
for section_length in blockette_1000_section_lengths:
blockette_1000_values.append(self.file.read(section_length))
self.blockette_1000 = Blockette1000(*blockette_1000_values)
def read_blockette_1001(self) -> None:
"""
Read blockette 1001. The only valuable thing in this blockette is the
more precise start time. Assumes that the file pointer is at the start
of the blockette.
"""
self.file.read(5)
start_time_microsecond = self.file.read(1)
start_time_microsecond = self.header_unpacker.unpack(
'b', start_time_microsecond
)[0]
# Convert from microsecond to second so that UTCDateTime can handle it.
start_time_microsecond /= (10 ** 6)
self.record_metadata.start_time += start_time_microsecond
self.file.read(2)
def read_blockette_2000(self) -> None:
pass
def apply_time_correction(self) -> None:
"""
Apply the time correction found in the fixed header to the start time.
"""
# format() is used here instead of bin() because we need to pad the
# resulting bit string with 0 to the left.
activity_flags = format(
self.header_unpacker.unpack(
'B', self.fixed_header.activity_flags)[0],
'0>8b'
)
is_time_correction_applied = int(activity_flags[1])
if is_time_correction_applied:
return
time_correction = self.header_unpacker.unpack(
'L', self.fixed_header.time_correction
)[0]
# We need to convert the unit from 0.0001 seconds to seconds
time_correction *= 0.0001
self.record_metadata.start_time += time_correction
def read_blockettes(self) -> None:
"""
Read all the blockettes in the current data record aside from blockette
1000, which has beem read previously. Currently only handle blockettes
500, 1001, and 2000.
"""
blockette_count = self.header_unpacker.unpack(
'B', self.fixed_header.blockette_count
)[0]
for i in range(1, blockette_count):
# All blockettes store their type in the first two bytes, so we
# read that to determine what to do
next_blockette_type = self.file.read(2)
# Move file pointer back to start of blockette
self.file.seek(-2, 1)
next_blockette_type = self.header_unpacker.unpack(
'H', next_blockette_type
)[0]
if next_blockette_type not in (500, 1000, 1001):
print('We currently only handle blockettes 500, 1000, and'
'1001.')
continue
if next_blockette_type == 500:
self.read_blockette_500()
elif next_blockette_type == 1001:
self.read_blockette_1001()
elif next_blockette_type == 2000:
self.read_blockette_2000()
def get_first_data_point(self) -> Real:
"""
Get the first data point of the current data record.
:return: the first data point of the current data record, whose type is
determined based on the encoding type stored in blockette 1000.
"""
record_start = self.file.tell()
data_start = self.header_unpacker.unpack(
'H', self.fixed_header.data_offset
)[0]
# The data start byte is defined as an offset from the start of the
# data record. Seeing as we should be at the start of the data record
# by seeking there at the end of every major step, we can simply seek
# to the start of the data.
self.file.seek(data_start, 1)
encoding_format = self.blockette_1000.encoding_format
encoding_format = self.header_unpacker.unpack('b', encoding_format)[0]
encoding_format = EncodingFormat(encoding_format)
if encoding_format == EncodingFormat.ASCII:
# We want to read everything in the record if the encoding is
# ASCII.
record_length_exp = self.header_unpacker.unpack(
'B', self.blockette_1000.record_length
)[0]
record_size = 2 ** record_length_exp
# This name does not make much sense with what we are doing here,
# but it will have to do for now.
# The size of the record includes the header, so we have to account
# for that when grabbing the data.
first_data_point = self.file.read(record_size - data_start)
else:
# Currently, we are extracting only the first data point in each
# record. The smallest possible amount of bytes we can extract
# while guaranteeing that we get the first data point in the
# record is 8, with Steim encodings and IEEE double precision
# float needing to use the whole buffer.
buffer = self.file.read(8)
encoding_to_decoder = {
EncodingFormat.INT_16_BIT: decode_int16,
EncodingFormat.INT_24_BIT: decode_int24,
EncodingFormat.INT_32_BIT: decode_int32,
EncodingFormat.IEEE_FLOAT_32_BIT: decode_ieee_float,
EncodingFormat.IEEE_FLOAT_64_BIT: decode_ieee_double,
EncodingFormat.STEIM_1: decode_steim,
EncodingFormat.STEIM_2: decode_steim,
}
first_data_point = encoding_to_decoder[encoding_format](
buffer, self.data_unpacker
)
# Seek back to the start of the record so we can call this method again
# if needed.
self.file.seek(record_start)
return first_data_point
......@@ -203,10 +203,10 @@ class LogInfo():
return False
return epoch, disk, val
def read_dps_clock_diff(self, line: str
def read_dsp_clock_diff(self, line: str
) -> Union[bool, Tuple[float, float]]:
"""
Read DPS clock difference
Read DSP clock difference
:param line: str - a line of evt message
:return epoch: float - time when info is recorded
:return total: float - total difference time in milliseconds
......@@ -415,11 +415,11 @@ class LogInfo():
if epoch:
self.add_chan_info('Jerks/DSP Sets', epoch, 0, idx)
elif "DPS clock diff" in line:
ret = self.read_dps_clock_diff()
elif "DSP CLOCK DIFFERENCE" in line:
ret = self.read_dsp_clock_diff(line)
if ret:
epoch, total = ret
self.add_chan_info('DPS Clock Diff', epoch, total, idx)
self.add_chan_info('DSP Clock Diff', epoch, total, idx)
elif "ACQUISITION STARTED" in line:
epoch = self.simple_read(line)[1]
......
......@@ -4,7 +4,7 @@ RT130 object to hold and process RefTek data
import os
from pathlib import Path
from typing import Tuple, List, Union
import traceback
import numpy as np
from sohstationviewer.model.reftek.from_rt2ms import (
......@@ -89,8 +89,15 @@ class RT130(DataTypeModel):
path2file = Path(path).joinpath(file_name)
if not validate_file(path2file, file_name):
continue
if not self.read_reftek_130(path2file):
read_text(path2file, file_name, self.log_data['TEXT'])
try:
if not self.read_reftek_130(path2file):
read_text(path2file, self.log_data['TEXT'])
except Exception:
fmt = traceback.format_exc()
self.track_info(f"Skip file {path2file} can't be read "
f"due to error: {str(fmt)}",
LogType.WARNING)
count += 1
if count % 50 == 0:
self.track_info(
......@@ -133,7 +140,13 @@ class RT130(DataTypeModel):
:param path2file: absolute path to file
"""
rt130 = core.Reftek130.from_file(path2file)
try:
rt130 = core.Reftek130.from_file(path2file)
except Exception:
fmt = traceback.format_exc()
self.track_info(f"Skip file {path2file} can't be read "
f"due to error: {str(fmt)}", LogType.WARNING)
return
unique, counts = np.unique(rt130._data["packet_type"],
return_counts=True)
nbr_packet_type = dict(zip(unique, counts))
......