Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
SOHViewer
Manage
Activity
Members
Labels
Plan
Issues
11
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
3
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Software Public
PASSOFT
SOHViewer
Commits
3ba1e625
Commit
3ba1e625
authored
1 year ago
by
Kien Le
Browse files
Options
Downloads
Patches
Plain Diff
Read blockette 500 better
parent
61c4340d
No related branches found
No related tags found
1 merge request
!132
Fast MSEED read
Pipeline
#2580
failed with stage
in 2 minutes and 44 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py
+215
-101
215 additions, 101 deletions
...nviewer/model/mseed/read_mseed_experiment/mseed_reader.py
with
215 additions
and
101 deletions
sohstationviewer/model/mseed/read_mseed_experiment/mseed_reader.py
+
215
−
101
View file @
3ba1e625
from
numbers
import
Number
,
Real
from
typing
import
BinaryIO
,
Optional
,
List
import
obspy
from
obspy
import
UTCDateTime
from
decode_mseed
import
(
decode_ieee_float
,
decode_ieee_double
,
decode_steim
,
decode_int16
,
decode_int24
,
decode_int32
,
)
from
help_do_stuff
import
(
from
mseed_helper
import
(
FixedHeader
,
Blockette1000
,
get_data_endianness
,
Unpacker
,
get_record_metadata
,
get_header_endianness
,
RecordMetadata
,
EncodingFormat
,
...
...
@@ -12,19 +16,66 @@ from help_do_stuff import (
class
RecordReader
:
def
__init__
(
self
,
file
):
"""
This class reads one data record from an MSEED file.
"""
def
__init__
(
self
,
file
:
BinaryIO
)
->
None
:
# The MSEED file object to read from. The file pointer needs to be
# located at the start of a data record.
self
.
file
=
file
self
.
fixed_header
:
FixedHeader
=
None
self
.
blockette_1000
:
Blockette1000
=
None
# Utility object that helps unpack byte string in fixed header.
self
.
fixed_header
:
Optional
[
FixedHeader
]
=
None
self
.
blockette_1000
:
Optional
[
Blockette1000
]
=
None
self
.
other_blockettes
:
List
[
str
]
=
[]
# Utility object that helps unpack byte strings in the header (the
# fixed header and the blockettes).
# Separate from the one for data in case the header has a different
# byte order.
# TODO: change blockettes to use this unpacker as well.
self
.
fixed_header_unpacker
:
Unpacker
=
Unpacker
()
self
.
header_unpacker
:
Unpacker
=
Unpacker
()
self
.
data_unpacker
:
Unpacker
=
Unpacker
()
self
.
record_metadata
:
RecordMetadata
=
None
self
.
record_metadata
:
Optional
[
RecordMetadata
]
=
None
self
.
read_header
()
def
read_header
(
self
)
->
None
:
"""
Read the header of the current data record. The header includes the
fixed portion, blockette 1000, and any blockettes that follow.
"""
# Save the start of the record so that we can go back after reading the
# header.
record_start
=
self
.
file
.
tell
()
self
.
read_fixed_header
()
self
.
read_blockette_1000
()
header_endianness
=
get_header_endianness
(
self
.
fixed_header
)
if
header_endianness
==
'
little
'
:
self
.
header_unpacker
.
byte_order_char
=
'
<
'
else
:
self
.
header_unpacker
.
byte_order_char
=
'
>
'
data_endianness
=
get_data_endianness
(
self
.
blockette_1000
)
if
data_endianness
==
'
little
'
:
self
.
data_unpacker
.
byte_order_char
=
'
<
'
else
:
self
.
data_unpacker
.
byte_order_char
=
'
>
'
self
.
record_metadata
=
get_record_metadata
(
self
.
fixed_header
,
self
.
header_unpacker
)
self
.
apply_time_correction
()
self
.
read_blockettes
()
self
.
file
.
seek
(
record_start
)
def
read_fixed_header
(
self
):
def
read_fixed_header
(
self
)
->
None
:
"""
Read the fixed header of the current data record and store it in
self.fixed_header.
"""
byte_counts
=
[
6
,
1
,
1
,
5
,
2
,
3
,
2
,
10
,
2
,
2
,
2
,
1
,
1
,
1
,
1
,
4
,
2
,
2
]
fixed_header_sections_values
=
[]
...
...
@@ -32,23 +83,74 @@ class RecordReader:
fixed_header_sections_values
.
append
(
self
.
file
.
read
(
byte_count
))
self
.
fixed_header
=
FixedHeader
(
*
fixed_header_sections_values
)
def
read_blockette_500
(
self
):
def
read_blockette_500
(
self
)
->
None
:
"""
Read blockette 500
. Only rea
d
m
or
e precise start time. Assumes that the
file pointer is at the start of the
blockette.
# TODO: find out what VCO correction is
.
Read blockette 500
an
d
f
or
mat its content. The result is stored for
future uses. Assumes that the
file pointer is at the start of the
blockette
.
"""
self
.
file
.
seek
(
18
,
1
)
start_time_microsecond
=
self
.
file
.
read
(
1
)
start_time_microsecond
=
self
.
data_unpacker
.
unpack
(
'
b
'
,
start_time_microsecond
blockette_content
=
{}
# Skip the first four bytes because they contain meta-information about
# the blockettes.
self
.
file
.
read
(
4
)
vco_correction
=
self
.
file
.
read
(
4
)
blockette_content
[
'
VCO correction
'
]
=
self
.
header_unpacker
.
unpack
(
'
f
'
,
vco_correction
)[
0
]
# Convert from microsecond to second so that UTCDateTime can handle it.
start_time_microsecond
/=
(
10
**
6
)
self
.
record_metadata
.
start_time
+=
start_time_microsecond
self
.
file
.
seek
(
181
,
1
)
def
read_blockette_1000
(
self
):
exception_time_bytes
=
self
.
file
.
read
(
10
)
exception_time_tuple
=
self
.
header_unpacker
.
unpack
(
'
HHBBBBH
'
,
exception_time_bytes
)
exception_time
=
UTCDateTime
(
year
=
exception_time_tuple
[
0
],
julday
=
exception_time_tuple
[
1
],
hour
=
exception_time_tuple
[
2
],
minute
=
exception_time_tuple
[
3
],
second
=
exception_time_tuple
[
4
],
microsecond
=
exception_time_tuple
[
6
]
*
100
)
blockette_content
[
'
Time of exception
'
]
=
exception_time
.
strftime
(
'
%Y:%j:%H:%M:%S:%f
'
)
microsecond
=
self
.
file
.
read
(
1
)
microsecond
=
self
.
header_unpacker
.
unpack
(
'
B
'
,
microsecond
)[
0
]
start_time_adjustment
=
microsecond
/
(
10
**
6
)
self
.
record_metadata
.
start_time
+=
start_time_adjustment
blockette_content
[
'
Micro sec
'
]
=
microsecond
reception_quality
=
self
.
file
.
read
(
1
)
blockette_content
[
'
Reception Quality
'
]
=
self
.
header_unpacker
.
unpack
(
'
B
'
,
reception_quality
)[
0
]
exception_count
=
self
.
file
.
read
(
4
)
blockette_content
[
'
Exception Count
'
]
=
self
.
header_unpacker
.
unpack
(
'
I
'
,
exception_count
)[
0
]
exception_type
=
self
.
file
.
read
(
16
)
blockette_content
[
'
Exception Type
'
]
=
self
.
header_unpacker
.
unpack
(
'
16s
'
,
exception_type
)[
0
].
decode
(
'
utf-8
'
).
strip
()
clock_model
=
self
.
file
.
read
(
32
)
blockette_content
[
'
Clock Model
'
]
=
self
.
header_unpacker
.
unpack
(
'
32s
'
,
clock_model
)[
0
].
decode
(
'
utf-8
'
).
strip
()
clock_status
=
self
.
file
.
read
(
128
)
blockette_content
[
'
Clock Status
'
]
=
self
.
header_unpacker
.
unpack
(
'
128s
'
,
clock_status
)[
0
].
decode
(
'
utf-8
'
).
strip
()
formatted_blockette
=
'
\n
'
.
join
([
f
'
{
key
}
:
{
value
}
'
for
key
,
value
in
blockette_content
.
items
()])
self
.
other_blockettes
.
append
(
formatted_blockette
)
def
read_blockette_1000
(
self
)
->
None
:
"""
Read blockette 1000 of the current data record and store it in
self.blockette_1000.
"""
blockette_1000_section_lengths
=
[
2
,
2
,
1
,
1
,
1
,
1
]
blockette_1000_values
=
[]
for
section_length
in
blockette_1000_section_lengths
:
...
...
@@ -56,7 +158,7 @@ class RecordReader:
self
.
blockette_1000
=
Blockette1000
(
*
blockette_1000_values
)
def
read_blockette_1001
(
self
):
def
read_blockette_1001
(
self
)
->
None
:
"""
Read blockette 1001. The only valuable thing in this blockette is the
more precise start time. Assumes that the file pointer is at the start
...
...
@@ -64,19 +166,25 @@ class RecordReader:
"""
self
.
file
.
read
(
5
)
start_time_microsecond
=
self
.
file
.
read
(
1
)
start_time_microsecond
=
self
.
data
_unpacker
.
unpack
(
start_time_microsecond
=
self
.
header
_unpacker
.
unpack
(
'
b
'
,
start_time_microsecond
)[
0
]
# Convert from microsecond to second so that UTCDateTime can handle it.
start_time_microsecond
/=
(
10
**
6
)
start_time_microsecond
/=
(
10
**
6
)
self
.
record_metadata
.
start_time
+=
start_time_microsecond
self
.
file
.
read
(
2
)
def
apply_time_correction
(
self
):
def
read_blockette_2000
(
self
)
->
None
:
pass
def
apply_time_correction
(
self
)
->
None
:
"""
Apply the time correction found in the fixed header to the start time.
"""
# format() is used here instead of bin() because we need to pad the
# resulting bit string with 0 to the left.
activity_flags
=
format
(
self
.
fixed_
header_unpacker
.
unpack
(
self
.
header_unpacker
.
unpack
(
'
B
'
,
self
.
fixed_header
.
activity_flags
)[
0
],
'
0>8b
'
)
...
...
@@ -84,15 +192,20 @@ class RecordReader:
if
is_time_correction_applied
:
return
time_correction
=
self
.
fixed_
header_unpacker
.
unpack
(
time_correction
=
self
.
header_unpacker
.
unpack
(
'
L
'
,
self
.
fixed_header
.
time_correction
)[
0
]
# We need to convert the unit from 0.0001 seconds to seconds
time_correction
*=
0.0001
self
.
record_metadata
.
start_time
+=
time_correction
def
read_blockettes
(
self
):
blockette_count
=
self
.
fixed_header_unpacker
.
unpack
(
def
read_blockettes
(
self
)
->
None
:
"""
Read all the blockettes in the current data record aside from blockette
1000, which has beem read previously. Currently only handle blockettes
500, 1001, and 2000.
"""
blockette_count
=
self
.
header_unpacker
.
unpack
(
'
B
'
,
self
.
fixed_header
.
blockette_count
)[
0
]
for
i
in
range
(
1
,
blockette_count
):
...
...
@@ -101,49 +214,38 @@ class RecordReader:
next_blockette_type
=
self
.
file
.
read
(
2
)
# Move file pointer back to start of blockette
self
.
file
.
seek
(
-
2
,
1
)
next_blockette_type
=
self
.
data
_unpacker
.
unpack
(
next_blockette_type
=
self
.
header
_unpacker
.
unpack
(
'
H
'
,
next_blockette_type
)[
0
]
if
next_blockette_type
not
in
(
1000
,
1001
,
500
):
if
next_blockette_type
not
in
(
500
,
1000
,
1001
):
print
(
'
We currently only handle blockettes 500, 1000, and
'
'
1001.
'
)
continue
if
next_blockette_type
==
1001
:
self
.
read_blockette_1001
()
elif
next_blockette_type
==
500
:
if
next_blockette_type
==
500
:
self
.
read_blockette_500
()
elif
next_blockette_type
==
1001
:
self
.
read_blockette_1001
()
elif
next_blockette_type
==
2000
:
self
.
read_blockette_2000
()
def
get_first_data_point
(
self
):
def
get_first_data_point
(
self
)
->
Real
:
"""
Get the first data point of the current data record.
:return: the first data point of the current data record, whose type is
determined based on the encoding type stored in blockette 1000.
"""
record_start
=
self
.
file
.
tell
()
self
.
read_fixed_header
()
self
.
read_blockette_1000
()
header_endianness
=
get_header_endianness
(
self
.
fixed_header
)
if
header_endianness
==
'
little
'
:
self
.
fixed_header_unpacker
.
byte_order_char
=
'
<
'
else
:
self
.
fixed_header_unpacker
.
byte_order_char
=
'
>
'
data_endianness
=
get_data_endianness
(
self
.
blockette_1000
)
if
data_endianness
==
'
little
'
:
self
.
data_unpacker
.
byte_order_char
=
'
<
'
else
:
self
.
data_unpacker
.
byte_order_char
=
'
>
'
self
.
record_metadata
=
get_record_metadata
(
self
.
fixed_header
,
self
.
fixed_header_unpacker
)
self
.
apply_time_correction
()
self
.
read_blockettes
()
data_start
=
self
.
fixed_header_unpacker
.
unpack
(
'
H
'
,
self
.
fixed_header
.
data_offset
)[
0
]
# It is most convenient to seek to the start of the record, then seek
# to the start of the data. Any other way would involve some
# calculation to determine which point to seek to.
self
.
file
.
seek
(
record_start
)
data_start
=
self
.
header_unpacker
.
unpack
(
'
H
'
,
self
.
fixed_header
.
data_offset
)[
0
]
# The data start byte is defined as an offset from the start of the
# data record. Seeing as we should be at the start of the data record
# by seeking there at the end of every major step, we can simply seek
# to the start of the data.
self
.
file
.
seek
(
data_start
,
1
)
encoding_format
=
self
.
blockette_1000
.
encoding_format
encoding_format
=
self
.
data
_unpacker
.
unpack
(
'
b
'
,
encoding_format
)[
0
]
encoding_format
=
self
.
header
_unpacker
.
unpack
(
'
b
'
,
encoding_format
)[
0
]
encoding_format
=
EncodingFormat
(
encoding_format
)
# Currently, we are extracting only the first data point in each
...
...
@@ -153,6 +255,7 @@ class RecordReader:
# the whole buffer.
buffer
=
self
.
file
.
read
(
8
)
encoding_to_decoder
=
{
EncodingFormat
.
ASCII
:
(
lambda
a1
,
a2
:
int
(
1
)),
EncodingFormat
.
INT_16_BIT
:
decode_int16
,
EncodingFormat
.
INT_24_BIT
:
decode_int24
,
EncodingFormat
.
INT_32_BIT
:
decode_int32
,
...
...
@@ -161,50 +264,61 @@ class RecordReader:
EncodingFormat
.
STEIM_1
:
decode_steim
,
EncodingFormat
.
STEIM_2
:
decode_steim
,
}
first_data_point
=
encoding_to_decoder
[
encoding_format
](
buffer
,
self
.
data_unpacker
)
first_data_point
=
encoding_to_decoder
[
encoding_format
](
buffer
,
self
.
data_unpacker
)
# Seek back to the start of the record so we can call this method again
# if needed.
self
.
file
.
seek
(
record_start
)
return
first_data_point
class
MSeedReader
:
def
__init__
(
self
,
file
:
BinaryIO
)
->
None
:
self
.
file
=
file
def
read
(
self
):
trace
=
[]
while
1
:
# We know that end of file is reached when read() returns an empty
# string.
is_eof
=
(
self
.
file
.
read
(
1
)
==
b
''
)
if
is_eof
:
break
# We need to move the file pointer back to its position after we do
# the end of file check. Otherwise, we would be off by one byte for all
# the reads afterward.
self
.
file
.
seek
(
-
1
,
1
)
# We save the start of the current record so that after we are done
# reading the record, we can move back. This makes moving to the next
# record a lot easier, seeing as we can simply move the file pointer
# a distance the size of the current record.
current_record_start
=
self
.
file
.
tell
()
reader
=
RecordReader
(
self
.
file
)
trace
.
append
(
reader
.
get_first_data_point
())
print
(
reader
.
other_blockettes
[
0
])
# sample_count = reader.record_metadata.sample_count
# sample_rate = reader.record_metadata.sample_rate
# record_time_taken = sample_count / sample_rate
# record_end_time = reader.record_metadata.start_time + record_time_taken
# MSEED stores the size of a data record as an exponent of a power of
# two, so we have to convert that to actual size before doing anything
# else.
record_length_exp
=
reader
.
header_unpacker
.
unpack
(
'
B
'
,
reader
.
blockette_1000
.
record_length
)[
0
]
record_size
=
2
**
record_length_exp
self
.
file
.
seek
(
current_record_start
)
self
.
file
.
seek
(
record_size
,
1
)
print
(
trace
)
if
__name__
==
'
__main__
'
:
# numpy.set_printoptions(threshold=sys.maxsize)
file_path
=
'
/Users/kle/PycharmProjects/sohstationviewer/tests/test_data/
potato.sdr/XX.KC01..VCO.D.2020.145
'
file_path
=
'
/Users/kle/PycharmProjects/sohstationviewer/tests/test_data/
DT0001__.ACE
'
file
=
open
(
file_path
,
'
rb
'
)
stream
=
obspy
.
read
(
file_path
)
trace
=
[]
while
1
:
# We know that end of file is reached when read() returns an empty
# string.
is_eof
=
(
file
.
read
(
1
)
==
b
''
)
if
is_eof
:
break
# We need to move the file pointer back to its position before we do
# the end of file check. Otherwise, we would be off by one byte for all
# the reads afterward.
file
.
seek
(
-
1
,
1
)
# We save the start of the current record so that after we are done
# reading the record, we can move back. This makes moving to the next
# record a lot easier, seeing as we can simply move the file pointer
# a distance the size of the current record.
current_record_start
=
file
.
tell
()
reader
=
RecordReader
(
file
)
trace
.
append
(
reader
.
get_first_data_point
())
sample_count
=
reader
.
record_metadata
.
sample_count
sample_rate
=
reader
.
record_metadata
.
sample_rate
record_time_taken
=
sample_count
/
sample_rate
record_end_time
=
reader
.
record_metadata
.
start_time
+
record_time_taken
# MSEED stores the size of a data record as an exponent of a power of
# two, so we have to convert that to actual size before doing anything
# else.
record_length_exp
=
reader
.
data_unpacker
.
unpack
(
'
B
'
,
reader
.
blockette_1000
.
record_length
)[
0
]
record_size
=
2
**
record_length_exp
file
.
seek
(
current_record_start
)
file
.
seek
(
record_size
,
1
)
print
(
trace
)
MSeedReader
(
file
).
read
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment