From 3a2894db3219acf199ac63fd24dfe0caf8453524 Mon Sep 17 00:00:00 2001
From: Lan Dam <ldam@passcal.nmt.edu>
Date: Thu, 4 May 2023 19:35:17 -0600
Subject: [PATCH] separate  data at gaps/overlaps

---
 sohstationviewer/model/data_type_model.py     | 37 +++++++----
 sohstationviewer/model/handling_data.py       | 52 +++++++++-------
 .../plotting_widget/plotting_widget.py        |  2 +
 .../test_handling_data_rearrange_data.py      | 61 ++++++++++++++++++-
 4 files changed, 116 insertions(+), 36 deletions(-)

diff --git a/sohstationviewer/model/data_type_model.py b/sohstationviewer/model/data_type_model.py
index 01f1869f5..df4ccf37f 100644
--- a/sohstationviewer/model/data_type_model.py
+++ b/sohstationviewer/model/data_type_model.py
@@ -16,7 +16,8 @@ from sohstationviewer.view.plotting.gps_plot.gps_point import GPSPoint
 from sohstationviewer.view.util.enums import LogType
 from sohstationviewer.database.process_db import execute_db
 from sohstationviewer.model.handling_data import (
-    combine_traces_except_overlap, sort_data, retrieve_gaps_from_stream_header)
+    combine_traces_except_gaps_overlaps, sort_data,
+    retrieve_gaps_from_stream_header)
 
 
 class ProcessingDataError(Exception):
@@ -89,9 +90,15 @@ class DataTypeModel():
         self.notification_signal = notification_signal
         self.pause_signal = pause_signal
 
+        """
+        gaps_by_key_chan: gap list for each key/chan_id to separate data at
+            gaps, overlaps
+        """
+        self.gaps_by_key_chan: Dict[Union[str, Tuple[str, str]],
+                                    Dict[str, List[List[int]]]] = {}
         """
         stream_header_by_key_chan: stream header by key, chan_id to get key
-        list, gaps by sta_id, nets by sta_id, channels by sta_id
+            list, gaps by sta_id, nets by sta_id, channels by sta_id
         """
         self.stream_header_by_key_chan: Dict[str, Dict[str, Stream]] = {}
         """
@@ -292,7 +299,7 @@ class DataTypeModel():
         """
         self.track_info("Retrieve gaps.", LogType.INFO)
         retrieve_gaps_from_stream_header(
-            self.stream_header_by_key_chan,
+            self.stream_header_by_key_chan, self.gaps_by_key_chan,
             self.gaps, self.read_start, self.read_end)
 
         self.track_info("Sort data.", LogType.INFO)
@@ -457,29 +464,35 @@ class DataTypeModel():
             self.processing_log.append((msg, LogType.WARNING))
 
     def combine_times_data_of_traces_w_spr_less_or_equal_1(
-            self, sta_data: Dict[str, Dict], data_name: str):
+            self, data: Dict[str, Dict], selected_key: Union[(str, str), str],
+            data_name: str):
         """
         Create plotting times and data for channels with samplerate
             less than or equal to 1, in which plotting times is
             all traces' time combined together but split where there is
             overlap to prevent plotting line go back and fort,
             and same for plotting data.
-        :param sta_data: chan_data of a station by chan_id
-        :param data_name: name of data (Waveform, SOH, Mass Position)
+        :param data: one of waveform_data, soh_data, mass_pos_data
+        :param selected_key: key of the selected data
+        :param data_name: name of data (Waveform, SOH, Mass Position) to show
+            info
 
         :return: the result plotting times and data of each channel will be
             used to create times and data items of the channel.
         """
+        selected_data = data[selected_key]
+        selected_gaps = self.gaps_by_key_chan[selected_key]
         if self.creator_thread.isInterruptionRequested():
             raise ThreadStopped()
         self.track_info(
             f'{data_name}: Combine traces with samplerate < 1', LogType.INFO)
 
-        for chan_id in sta_data:
-            chan_data = sta_data[chan_id]
+        for chan_id in selected_data:
+            chan_data = selected_data[chan_id]
             if chan_data['samplerate'] > 1:
                 continue
-            new_traces = combine_traces_except_overlap(chan_data['tracesInfo'])
+            new_traces = combine_traces_except_gaps_overlaps(
+                chan_data['tracesInfo'], selected_gaps[chan_id])
             chan_data['tracesInfo'] = new_traces
 
     def sort_all_data(self):
@@ -507,14 +520,14 @@ class DataTypeModel():
         """
         if self.selected_key in self.waveform_data.keys():
             self.combine_times_data_of_traces_w_spr_less_or_equal_1(
-                self.waveform_data[self.selected_key], 'Waveform')
+                self.waveform_data, self.selected_key, 'Waveform')
         if self.selected_key in self.mass_pos_data.keys():
             self.combine_times_data_of_traces_w_spr_less_or_equal_1(
-                self.mass_pos_data[self.selected_key], 'Mass Possition')
+                self.mass_pos_data, self.selected_key, 'Mass Possition')
         try:
             if self.selected_key in self.soh_data.keys():
                 self.combine_times_data_of_traces_w_spr_less_or_equal_1(
-                    self.soh_data[self.selected_key], 'SOH')
+                    self.soh_data, self.selected_key, 'SOH')
         except KeyError:
             # Reftek's SOH trace doesn't have startTmEpoch and
             # actually soh_data consists of only one trace
diff --git a/sohstationviewer/model/handling_data.py b/sohstationviewer/model/handling_data.py
index f60d6e533..2092766e4 100644
--- a/sohstationviewer/model/handling_data.py
+++ b/sohstationviewer/model/handling_data.py
@@ -415,12 +415,14 @@ def convert_reftek_masspos_data(data: np.ndarray) -> Dict:
     return np.round_(data / 32767.0 * 10.0, 1)
 
 
-def combine_traces_except_overlap(traces: List[Dict]) -> List[Dict]:
+def combine_traces_except_gaps_overlaps(
+        traces: List[Dict], gaps: List[List[float]]) -> List[Dict]:
     """
-    Return new list of traces in which traces are combined together but split
-        into different trace at overlap
+    Return new list of traces for a channel in which traces are combined
+        together but split into different traces at gaps/overlaps.
 
-    :param traces: list of traces
+    :param traces: list of traces of the channel
+    :param gaps: list of gaps of the channel
     :return new_traces: list of new traces
     """
     end_epoch = 0
@@ -429,16 +431,12 @@ def combine_traces_except_overlap(traces: List[Dict]) -> List[Dict]:
     start_epoch = traces[0]['startTmEpoch']
     size = 0
     new_traces = []
-
+    curr_gap_idx = 0
     for idx, tr in enumerate(traces):
-        if tr['startTmEpoch'] > end_epoch:
-            # not overlap: end of a trace not greater than beginning
-            #   of next trace => combine trace
-            current_times.append(tr['times'])
-            current_data.append(tr['data'])
-        else:
-            # overlap: fulfill trace to start next trace
-            if len(current_times) > 0:
+        try:
+            start_in_gap = (min(gaps[curr_gap_idx]) <= tr['startTmEpoch']
+                            <= max(gaps[curr_gap_idx]))
+            if start_in_gap and size != 0:
                 new_tr = {'samplerate': tr['samplerate'],
                           'startTmEpoch': start_epoch,
                           'endTmEpoch': end_epoch,
@@ -446,14 +444,18 @@ def combine_traces_except_overlap(traces: List[Dict]) -> List[Dict]:
                           'times': np.hstack(current_times),
                           'data': np.hstack(current_data)}
                 new_traces.append(new_tr)
-            # startTmEpoch of the first tr combined
-            start_epoch = tr['startTmEpoch']
-            size = 0
-            current_times = [tr['times']]
-            current_data = [tr['data']]
-        # endTmEpoch of the last tr combined
-        end_epoch = tr['endTmEpoch']
+                curr_gap_idx += 1
+                start_epoch = tr['startTmEpoch']
+                current_times = []
+                current_data = []
+                size = 0
+        except IndexError:
+            pass
+        current_times.append(tr['times'])
+        current_data.append(tr['data'])
         size += tr['size']
+        end_epoch = tr['endTmEpoch']
+
     new_tr = {
         'samplerate': tr['samplerate'],
         'startTmEpoch': start_epoch,
@@ -992,21 +994,25 @@ def find_tps_tm_idx(
 
 def retrieve_gaps_from_stream_header(
         streams: Dict[str, Dict[str, Stream]],
+        gaps_by_key_chan: Dict[Union[str, Tuple[str, str]],
+                               Dict[str, List[List[int]]]],
         gaps: Dict[str, List[List[float]]],
         read_start: Optional[float], read_end: Optional[float]) -> \
         Dict[str, List[List[float]]]:
     """
     Retrieve gaps by sta_id from stream_header_by_key_chan
 
-    :param: dict of stream header by sta, chan
-    :return all_gaps: list of gaps by sta_id
+    :param streams: dict of stream header by sta, chan
+    :param gaps_by_key_chan: gaps list by key and channel id
+    :param gaps: gaps list by key
     """
     for sta_id in streams:
         sta_gaps = []
+        gaps_by_key_chan[sta_id] = {}
         for chan_id in streams[sta_id]:
             stream = streams[sta_id][chan_id]
             gaps_in_stream = stream.get_gaps()
-            stream_gaps = [
+            gaps_by_key_chan[sta_id][chan_id] = stream_gaps = [
                 [g[4].timestamp, g[5].timestamp] for g in gaps_in_stream
                 if read_start <= min(g[4].timestamp, g[5].timestamp) <= read_end or  # noqa
                    read_start <= max(g[4].timestamp, g[5].timestamp) <= read_end]    # noqa
diff --git a/sohstationviewer/view/plotting/plotting_widget/plotting_widget.py b/sohstationviewer/view/plotting/plotting_widget/plotting_widget.py
index ee20d2c43..9cc7a78fb 100755
--- a/sohstationviewer/view/plotting/plotting_widget/plotting_widget.py
+++ b/sohstationviewer/view/plotting/plotting_widget/plotting_widget.py
@@ -580,6 +580,8 @@ class PlottingWidget(QtWidgets.QScrollArea):
                     tr_min_ys = []
                     tr_max_ys = []
                     for x, y in zip(ax.x_list, ax.y_list):
+                        if len(x) == 0:
+                            continue
                         if self.min_x > x[-1] or self.max_x < x[0]:
                             continue
                         ret = get_total_miny_maxy(x, y, self.min_x, self.max_x)
diff --git a/tests/test_model/test_handling_data_rearrange_data.py b/tests/test_model/test_handling_data_rearrange_data.py
index 325d7ef98..68fe5eac0 100644
--- a/tests/test_model/test_handling_data_rearrange_data.py
+++ b/tests/test_model/test_handling_data_rearrange_data.py
@@ -1,7 +1,9 @@
+import numpy as np
 from unittest import TestCase
 
 from sohstationviewer.model.handling_data import (
-    sort_data, check_related_gaps, squash_gaps
+    sort_data, check_related_gaps, squash_gaps,
+    combine_traces_except_gaps_overlaps
 )
 
 
@@ -84,3 +86,60 @@ class TestSquashGaps(TestCase):
     def test_mixed_gaps(self):
         gaps = squash_gaps((self.mixed_gaps))
         self.assertEqual(gaps, [[3, 8], [18, 13]])
+
+
+class TestCombineTracesExceptGapsOverlaps(TestCase):
+    def test_combine(self):
+        traces = [
+            {'samplerate': 1, 'startTmEpoch': 1, 'endTmEpoch': 2, 'size': 2,
+             'times': np.array([1, 2]), 'data': np.array([0, 0])},
+            # overlap at the beginning
+            {'samplerate': 1, 'startTmEpoch': 1, 'endTmEpoch': 3, 'size': 3,
+             'times': np.array([1, 2, 3]), 'data': np.array([1, 1, 1])},
+            {'samplerate': 1, 'startTmEpoch': 4, 'endTmEpoch': 6, 'size': 3,
+             'times': np.array([4, 5, 6]), 'data': np.array([2, 2, 2])},
+            # gap
+            {'samplerate': 1, 'startTmEpoch': 8, 'endTmEpoch': 10, 'size': 3,
+             'times': np.array([8, 9, 10]), 'data': np.array([3, 3, 3])},
+            {'samplerate': 1, 'startTmEpoch': 11, 'endTmEpoch': 13, 'size': 3,
+             'times': np.array([11, 12, 13]), 'data': np.array([4, 4, 4])},
+            # overlap
+            {'samplerate': 1, 'startTmEpoch': 12, 'endTmEpoch': 14, 'size': 3,
+             'times': np.array([12, 13, 14]), 'data': np.array([5, 5, 5])}
+        ]
+        gaps = [[2, 1], [6, 8], [13, 12]]
+        new_traces = combine_traces_except_gaps_overlaps(traces, gaps)
+
+        self.assertEqual(len(new_traces), 4)
+        # Trace 0
+        self.assertEqual(new_traces[0]['samplerate'], 1)
+        self.assertEqual(new_traces[0]['startTmEpoch'], 1)
+        self.assertEqual(new_traces[0]['endTmEpoch'], 2)
+        self.assertEqual(new_traces[0]['size'], 2)
+        self.assertEqual(new_traces[0]['times'].tolist(), [1, 2])
+        self.assertEqual(new_traces[0]['data'].tolist(), [0, 0])
+
+        # Combine traces 1 & 2
+        self.assertEqual(new_traces[1]['samplerate'], 1)
+        self.assertEqual(new_traces[1]['startTmEpoch'], 1)
+        self.assertEqual(new_traces[1]['endTmEpoch'], 6)
+        self.assertEqual(new_traces[1]['size'], 6)
+        self.assertEqual(new_traces[1]['times'].tolist(), [1, 2, 3, 4, 5, 6])
+        self.assertEqual(new_traces[1]['data'].tolist(), [1, 1, 1, 2, 2, 2])
+
+        # Combine traces 3 & 4
+        self.assertEqual(new_traces[2]['samplerate'], 1)
+        self.assertEqual(new_traces[2]['startTmEpoch'], 8)
+        self.assertEqual(new_traces[2]['endTmEpoch'], 13)
+        self.assertEqual(new_traces[2]['size'], 6)
+        self.assertEqual(new_traces[2]['times'].tolist(),
+                         [8, 9, 10, 11, 12, 13])
+        self.assertEqual(new_traces[2]['data'].tolist(), [3, 3, 3, 4, 4, 4])
+
+        # Trace 5
+        self.assertEqual(new_traces[3]['samplerate'], 1)
+        self.assertEqual(new_traces[3]['startTmEpoch'], 12)
+        self.assertEqual(new_traces[3]['endTmEpoch'], 14)
+        self.assertEqual(new_traces[3]['size'], 3)
+        self.assertEqual(new_traces[3]['times'].tolist(), [12, 13, 14])
+        self.assertEqual(new_traces[3]['data'].tolist(), [5, 5, 5])
-- 
GitLab