Skip to content
Snippets Groups Projects
Commit 3ba1e625 authored by Kien Le's avatar Kien Le
Browse files

Read blockette 500 better

parent 61c4340d
No related branches found
No related tags found
1 merge request!132Fast MSEED read
Pipeline #2580 failed with stage
in 2 minutes and 44 seconds
from numbers import Number, Real
from typing import BinaryIO, Optional, List
import obspy
from obspy import UTCDateTime
from decode_mseed import (
decode_ieee_float, decode_ieee_double, decode_steim, decode_int16,
decode_int24, decode_int32,
)
from help_do_stuff import (
from mseed_helper import (
FixedHeader, Blockette1000, get_data_endianness, Unpacker,
get_record_metadata, get_header_endianness, RecordMetadata,
EncodingFormat,
......@@ -12,19 +16,66 @@ from help_do_stuff import (
class RecordReader:
def __init__(self, file):
"""
This class reads one data record from an MSEED file.
"""
def __init__(self, file: BinaryIO) -> None:
# The MSEED file object to read from. The file pointer needs to be
# located at the start of a data record.
self.file = file
self.fixed_header: FixedHeader = None
self.blockette_1000: Blockette1000 = None
# Utility object that helps unpack byte string in fixed header.
self.fixed_header: Optional[FixedHeader] = None
self.blockette_1000: Optional[Blockette1000] = None
self.other_blockettes: List[str] = []
# Utility object that helps unpack byte strings in the header (the
# fixed header and the blockettes).
# Separate from the one for data in case the header has a different
# byte order.
# TODO: change blockettes to use this unpacker as well.
self.fixed_header_unpacker: Unpacker = Unpacker()
self.header_unpacker: Unpacker = Unpacker()
self.data_unpacker: Unpacker = Unpacker()
self.record_metadata: RecordMetadata = None
self.record_metadata: Optional[RecordMetadata] = None
self.read_header()
def read_header(self) -> None:
"""
Read the header of the current data record. The header includes the
fixed portion, blockette 1000, and any blockettes that follow.
"""
# Save the start of the record so that we can go back after reading the
# header.
record_start = self.file.tell()
self.read_fixed_header()
self.read_blockette_1000()
header_endianness = get_header_endianness(self.fixed_header)
if header_endianness == 'little':
self.header_unpacker.byte_order_char = '<'
else:
self.header_unpacker.byte_order_char = '>'
data_endianness = get_data_endianness(self.blockette_1000)
if data_endianness == 'little':
self.data_unpacker.byte_order_char = '<'
else:
self.data_unpacker.byte_order_char = '>'
self.record_metadata = get_record_metadata(self.fixed_header,
self.header_unpacker)
self.apply_time_correction()
self.read_blockettes()
self.file.seek(record_start)
def read_fixed_header(self):
def read_fixed_header(self) -> None:
"""
Read the fixed header of the current data record and store it in
self.fixed_header.
"""
byte_counts = [6, 1, 1, 5, 2, 3, 2, 10, 2, 2, 2, 1, 1, 1, 1, 4, 2, 2]
fixed_header_sections_values = []
......@@ -32,23 +83,74 @@ class RecordReader:
fixed_header_sections_values.append(self.file.read(byte_count))
self.fixed_header = FixedHeader(*fixed_header_sections_values)
def read_blockette_500(self):
def read_blockette_500(self) -> None:
"""
Read blockette 500. Only read more precise start time. Assumes that the
file pointer is at the start of the blockette.
# TODO: find out what VCO correction is.
Read blockette 500 and format its content. The result is stored for
future uses. Assumes that the file pointer is at the start of the
blockette.
"""
self.file.seek(18, 1)
start_time_microsecond = self.file.read(1)
start_time_microsecond = self.data_unpacker.unpack(
'b', start_time_microsecond
blockette_content = {}
# Skip the first four bytes because they contain meta-information about
# the blockettes.
self.file.read(4)
vco_correction = self.file.read(4)
blockette_content['VCO correction'] = self.header_unpacker.unpack(
'f', vco_correction
)[0]
# Convert from microsecond to second so that UTCDateTime can handle it.
start_time_microsecond /= (10 ** 6)
self.record_metadata.start_time += start_time_microsecond
self.file.seek(181, 1)
def read_blockette_1000(self):
exception_time_bytes = self.file.read(10)
exception_time_tuple = self.header_unpacker.unpack(
'HHBBBBH', exception_time_bytes)
exception_time = UTCDateTime(year=exception_time_tuple[0],
julday=exception_time_tuple[1],
hour=exception_time_tuple[2],
minute=exception_time_tuple[3],
second=exception_time_tuple[4],
microsecond=exception_time_tuple[6] * 100)
blockette_content['Time of exception'] = exception_time.strftime(
'%Y:%j:%H:%M:%S:%f'
)
microsecond = self.file.read(1)
microsecond = self.header_unpacker.unpack('B', microsecond)[0]
start_time_adjustment = microsecond / (10 ** 6)
self.record_metadata.start_time += start_time_adjustment
blockette_content['Micro sec'] = microsecond
reception_quality = self.file.read(1)
blockette_content['Reception Quality'] = self.header_unpacker.unpack(
'B', reception_quality
)[0]
exception_count = self.file.read(4)
blockette_content['Exception Count'] = self.header_unpacker.unpack(
'I', exception_count
)[0]
exception_type = self.file.read(16)
blockette_content['Exception Type'] = self.header_unpacker.unpack(
'16s', exception_type
)[0].decode('utf-8').strip()
clock_model = self.file.read(32)
blockette_content['Clock Model'] = self.header_unpacker.unpack(
'32s', clock_model
)[0].decode('utf-8').strip()
clock_status = self.file.read(128)
blockette_content['Clock Status'] = self.header_unpacker.unpack(
'128s', clock_status
)[0].decode('utf-8').strip()
formatted_blockette = '\n'.join([f'{key}: {value}' for key, value in blockette_content.items()])
self.other_blockettes.append(formatted_blockette)
def read_blockette_1000(self) -> None:
"""
Read blockette 1000 of the current data record and store it in
self.blockette_1000.
"""
blockette_1000_section_lengths = [2, 2, 1, 1, 1, 1]
blockette_1000_values = []
for section_length in blockette_1000_section_lengths:
......@@ -56,7 +158,7 @@ class RecordReader:
self.blockette_1000 = Blockette1000(*blockette_1000_values)
def read_blockette_1001(self):
def read_blockette_1001(self) -> None:
"""
Read blockette 1001. The only valuable thing in this blockette is the
more precise start time. Assumes that the file pointer is at the start
......@@ -64,19 +166,25 @@ class RecordReader:
"""
self.file.read(5)
start_time_microsecond = self.file.read(1)
start_time_microsecond = self.data_unpacker.unpack(
start_time_microsecond = self.header_unpacker.unpack(
'b', start_time_microsecond
)[0]
# Convert from microsecond to second so that UTCDateTime can handle it.
start_time_microsecond /= (10**6)
start_time_microsecond /= (10 ** 6)
self.record_metadata.start_time += start_time_microsecond
self.file.read(2)
def apply_time_correction(self):
def read_blockette_2000(self) -> None:
pass
def apply_time_correction(self) -> None:
"""
Apply the time correction found in the fixed header to the start time.
"""
# format() is used here instead of bin() because we need to pad the
# resulting bit string with 0 to the left.
activity_flags = format(
self.fixed_header_unpacker.unpack(
self.header_unpacker.unpack(
'B', self.fixed_header.activity_flags)[0],
'0>8b'
)
......@@ -84,15 +192,20 @@ class RecordReader:
if is_time_correction_applied:
return
time_correction = self.fixed_header_unpacker.unpack(
time_correction = self.header_unpacker.unpack(
'L', self.fixed_header.time_correction
)[0]
# We need to convert the unit from 0.0001 seconds to seconds
time_correction *= 0.0001
self.record_metadata.start_time += time_correction
def read_blockettes(self):
blockette_count = self.fixed_header_unpacker.unpack(
def read_blockettes(self) -> None:
"""
Read all the blockettes in the current data record aside from blockette
1000, which has beem read previously. Currently only handle blockettes
500, 1001, and 2000.
"""
blockette_count = self.header_unpacker.unpack(
'B', self.fixed_header.blockette_count
)[0]
for i in range(1, blockette_count):
......@@ -101,49 +214,38 @@ class RecordReader:
next_blockette_type = self.file.read(2)
# Move file pointer back to start of blockette
self.file.seek(-2, 1)
next_blockette_type = self.data_unpacker.unpack(
next_blockette_type = self.header_unpacker.unpack(
'H', next_blockette_type
)[0]
if next_blockette_type not in (1000, 1001, 500):
if next_blockette_type not in (500, 1000, 1001):
print('We currently only handle blockettes 500, 1000, and'
'1001.')
continue
if next_blockette_type == 1001:
self.read_blockette_1001()
elif next_blockette_type == 500:
if next_blockette_type == 500:
self.read_blockette_500()
elif next_blockette_type == 1001:
self.read_blockette_1001()
elif next_blockette_type == 2000:
self.read_blockette_2000()
def get_first_data_point(self):
def get_first_data_point(self) -> Real:
"""
Get the first data point of the current data record.
:return: the first data point of the current data record, whose type is
determined based on the encoding type stored in blockette 1000.
"""
record_start = self.file.tell()
self.read_fixed_header()
self.read_blockette_1000()
header_endianness = get_header_endianness(self.fixed_header)
if header_endianness == 'little':
self.fixed_header_unpacker.byte_order_char = '<'
else:
self.fixed_header_unpacker.byte_order_char = '>'
data_endianness = get_data_endianness(self.blockette_1000)
if data_endianness == 'little':
self.data_unpacker.byte_order_char = '<'
else:
self.data_unpacker.byte_order_char = '>'
self.record_metadata = get_record_metadata(self.fixed_header,
self.fixed_header_unpacker)
self.apply_time_correction()
self.read_blockettes()
data_start = self.fixed_header_unpacker.unpack('H', self.fixed_header.data_offset)[0]
# It is most convenient to seek to the start of the record, then seek
# to the start of the data. Any other way would involve some
# calculation to determine which point to seek to.
self.file.seek(record_start)
data_start = self.header_unpacker.unpack(
'H', self.fixed_header.data_offset
)[0]
# The data start byte is defined as an offset from the start of the
# data record. Seeing as we should be at the start of the data record
# by seeking there at the end of every major step, we can simply seek
# to the start of the data.
self.file.seek(data_start, 1)
encoding_format = self.blockette_1000.encoding_format
encoding_format = self.data_unpacker.unpack('b', encoding_format)[0]
encoding_format = self.header_unpacker.unpack('b', encoding_format)[0]
encoding_format = EncodingFormat(encoding_format)
# Currently, we are extracting only the first data point in each
......@@ -153,6 +255,7 @@ class RecordReader:
# the whole buffer.
buffer = self.file.read(8)
encoding_to_decoder = {
EncodingFormat.ASCII: (lambda a1, a2: int(1)),
EncodingFormat.INT_16_BIT: decode_int16,
EncodingFormat.INT_24_BIT: decode_int24,
EncodingFormat.INT_32_BIT: decode_int32,
......@@ -161,50 +264,61 @@ class RecordReader:
EncodingFormat.STEIM_1: decode_steim,
EncodingFormat.STEIM_2: decode_steim,
}
first_data_point = encoding_to_decoder[encoding_format](buffer, self.data_unpacker)
first_data_point = encoding_to_decoder[encoding_format](buffer,
self.data_unpacker)
# Seek back to the start of the record so we can call this method again
# if needed.
self.file.seek(record_start)
return first_data_point
class MSeedReader:
def __init__(self, file: BinaryIO) -> None:
self.file = file
def read(self):
trace = []
while 1:
# We know that end of file is reached when read() returns an empty
# string.
is_eof = (self.file.read(1) == b'')
if is_eof:
break
# We need to move the file pointer back to its position after we do
# the end of file check. Otherwise, we would be off by one byte for all
# the reads afterward.
self.file.seek(-1, 1)
# We save the start of the current record so that after we are done
# reading the record, we can move back. This makes moving to the next
# record a lot easier, seeing as we can simply move the file pointer
# a distance the size of the current record.
current_record_start = self.file.tell()
reader = RecordReader(self.file)
trace.append(reader.get_first_data_point())
print(reader.other_blockettes[0])
# sample_count = reader.record_metadata.sample_count
# sample_rate = reader.record_metadata.sample_rate
# record_time_taken = sample_count / sample_rate
# record_end_time = reader.record_metadata.start_time + record_time_taken
# MSEED stores the size of a data record as an exponent of a power of
# two, so we have to convert that to actual size before doing anything
# else.
record_length_exp = reader.header_unpacker.unpack(
'B', reader.blockette_1000.record_length
)[0]
record_size = 2 ** record_length_exp
self.file.seek(current_record_start)
self.file.seek(record_size, 1)
print(trace)
if __name__ == '__main__':
# numpy.set_printoptions(threshold=sys.maxsize)
file_path = '/Users/kle/PycharmProjects/sohstationviewer/tests/test_data/potato.sdr/XX.KC01..VCO.D.2020.145'
file_path = '/Users/kle/PycharmProjects/sohstationviewer/tests/test_data/DT0001__.ACE'
file = open(file_path, 'rb')
stream = obspy.read(file_path)
trace = []
while 1:
# We know that end of file is reached when read() returns an empty
# string.
is_eof = (file.read(1) == b'')
if is_eof:
break
# We need to move the file pointer back to its position before we do
# the end of file check. Otherwise, we would be off by one byte for all
# the reads afterward.
file.seek(-1, 1)
# We save the start of the current record so that after we are done
# reading the record, we can move back. This makes moving to the next
# record a lot easier, seeing as we can simply move the file pointer
# a distance the size of the current record.
current_record_start = file.tell()
reader = RecordReader(file)
trace.append(reader.get_first_data_point())
sample_count = reader.record_metadata.sample_count
sample_rate = reader.record_metadata.sample_rate
record_time_taken = sample_count / sample_rate
record_end_time = reader.record_metadata.start_time + record_time_taken
# MSEED stores the size of a data record as an exponent of a power of
# two, so we have to convert that to actual size before doing anything
# else.
record_length_exp = reader.data_unpacker.unpack(
'B', reader.blockette_1000.record_length
)[0]
record_size = 2 ** record_length_exp
file.seek(current_record_start)
file.seek(record_size, 1)
print(trace)
MSeedReader(file).read()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment