From 30437627f62ee972d70d3d22feaaee500bf1c0fd Mon Sep 17 00:00:00 2001
From: kienle <kienle@passcal.nmt.edu>
Date: Tue, 7 Jan 2025 12:18:40 -0700
Subject: [PATCH] Read all data points based on database

---
 sohstationviewer/model/mseed_data/mseed.py    | 20 +++++++++-
 .../model/mseed_data/mseed_reader.py          | 37 ++++++++++++++-----
 2 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/sohstationviewer/model/mseed_data/mseed.py b/sohstationviewer/model/mseed_data/mseed.py
index ecb65f28..2b83c845 100644
--- a/sohstationviewer/model/mseed_data/mseed.py
+++ b/sohstationviewer/model/mseed_data/mseed.py
@@ -6,6 +6,7 @@ import re
 from pathlib import Path
 from typing import Dict, List
 
+from sohstationviewer.database.process_db import execute_db_dict
 from sohstationviewer.view.util.enums import LogType
 
 from sohstationviewer.model.general_data.general_data import GeneralData
@@ -29,6 +30,20 @@ class MSeed(GeneralData):
         self.nets_by_sta: Dict[str, List[str]] = {}
         self.invalid_blockettes = False
         self.not_mseed_files = []
+        # Get whether a channel should have all its data points read.
+        channels_sql = (
+            f"SELECT channel, readAllPoints "
+            f"FROM Channels "
+            f"WHERE dataType='{self.data_type}'"
+        )
+        channels_db_info = execute_db_dict(channels_sql)
+        self.channels_db_info = {
+            channel_info['channel']: int(channel_info['readAllPoints'])
+            for channel_info in channels_db_info
+        }
+        # We never want to read all data points in waveform channels, so we
+        # remove it preemptively.
+        self.channels_db_info.pop('SEISMIC', None)
         self.processing_data()
 
     def finalize_data(self):
@@ -86,7 +101,10 @@ class MSeed(GeneralData):
             mass_pos_data=self.mass_pos_data,
             waveform_data=self.waveform_data,
             log_data=self.log_data,
-            gap_minimum=self.gap_minimum)
+            gap_minimum=self.gap_minimum,
+            channels_db_info=self.channels_db_info,
+            data_type=self.data_type,
+        )
         try:
             reader.read()
             self.invalid_blockettes = (self.invalid_blockettes
diff --git a/sohstationviewer/model/mseed_data/mseed_reader.py b/sohstationviewer/model/mseed_data/mseed_reader.py
index 45a3243e..8a52a350 100644
--- a/sohstationviewer/model/mseed_data/mseed_reader.py
+++ b/sohstationviewer/model/mseed_data/mseed_reader.py
@@ -5,6 +5,8 @@ from pathlib import Path
 import numpy
 from obspy import UTCDateTime
 
+from sohstationviewer.database.extract_data import \
+    convert_actual_channel_to_db_channel_w_question_mark
 from sohstationviewer.model.mseed_data.record_reader import RecordReader
 from sohstationviewer.model.mseed_data.record_reader_helper import \
     RecordMetadata
@@ -48,7 +50,9 @@ class MSeedReader:
                  mass_pos_data: Dict = {},
                  waveform_data: Dict = {},
                  log_data: LogData = {},
-                 gap_minimum: Optional[float] = None
+                 gap_minimum: Optional[float] = None,
+                 channels_db_info: Dict = None,
+                 data_type: str = None,
                  ) -> None:
         """
         The object of the class is to read data from given file to add
@@ -74,6 +78,9 @@ class MSeedReader:
         :param log_data: data dict of log_data
         :param gap_minimum: minimum length of gaps required to detect
             from record
+        :param channels_db_info: dict containing the needed information about
+            each channel. Only includes whether to read all data points from
+            a channel currently.
         """
         self.read_start = read_start
         self.read_end = read_end
@@ -89,6 +96,8 @@ class MSeedReader:
         self.log_data = log_data
         self.file_path = file_path
         self.file: BinaryIO = open(file_path, 'rb')
+        self.channels_db_info = channels_db_info
+        self.data_type = data_type
 
         self.invalid_blockettes = False,
 
@@ -319,18 +328,24 @@ class MSeedReader:
                     continue
                 else:
                     break
-            if data_dict is self.waveform_data:
-                data_points = list(record.get_two_data_points())
-                times = [record.record_metadata.start_time,
-                         record.record_metadata.end_time]
-            else:
+
+            # Some channels are represented differently in the database, so we
+            # need to convert them to that representation. One such example is
+            # VM1 for Centaur, which is represented by VM? in the database.
+            # Waveform channels also work like this, but because we never want
+            # to read all the data points in a waveform channel, we ignore them
+            # in this conversion.
+            db_channel = convert_actual_channel_to_db_channel_w_question_mark(
+                record.record_metadata.channel, self.data_type
+            )
+            if self.channels_db_info.get(db_channel):
                 try:
                     data_points = record.get_data_points()
                     sample_interval = 1 / record.record_metadata.sample_rate
                     times = numpy.arange(
-                            record.record_metadata.start_time,
-                            record.record_metadata.end_time,
-                            sample_interval).tolist()
+                        record.record_metadata.start_time,
+                        record.record_metadata.end_time,
+                        sample_interval).tolist()
                     # The previous calculation will always miss one time point
                     # at the end. We can adjust the stop argument to capture
                     # that point in one calculation, but the code for that is a
@@ -340,6 +355,10 @@ class MSeedReader:
                 except ZeroDivisionError:
                     data_points = None
                     times = None
+            else:
+                data_points = list(record.get_two_data_points())
+                times = [record.record_metadata.start_time,
+                         record.record_metadata.end_time]
             self.append_data(data_dict, record, data_points, times)
             self.append_log(record)
 
-- 
GitLab