From 023ebcd1cd150fd6961e79d5e5754c43e550b188 Mon Sep 17 00:00:00 2001
From: Andrew Tridgell <andrew@tridgell.net>
Date: Mon, 28 Feb 2022 14:24:38 +1100
Subject: [PATCH] speed up parsing of bin logs

use a dictionary of sets to record which instances of a message have
been seen before. This avoids us having to parse all instance based
messages fully during the load.

as we have added more instance messages the time to load the log has
become progressively longer. With this change it now becomes short
again

on a recent 160MByte quadplane log the time to load the log with this
change drops from 39.9s to 6.5s
---
 DFReader.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/DFReader.py b/DFReader.py
index 8205df1e..f40e2949 100644
--- a/DFReader.py
+++ b/DFReader.py
@@ -113,6 +113,16 @@ class DFFormat(object):
         instance_idx = unit_ids.find('#')
         if instance_idx != -1:
             self.instance_field = self.columns[instance_idx]
+            # work out offset and length of instance field in message
+            pre_fmt = self.format[:instance_idx]
+            pre_sfmt = ""
+            for c in pre_fmt:
+                (s, mul, type) = FORMAT_TO_STRUCT[c]
+                pre_sfmt += s
+            self.instance_ofs = struct.calcsize(pre_sfmt)
+            (ifmt,) = self.format[instance_idx]
+            self.instance_len = struct.calcsize(ifmt)
+
 
     def set_mult_ids(self, mult_ids):
         '''set mult IDs string from FMTU'''
@@ -759,6 +769,7 @@ class DFReader_binary(DFReader):
         self._count = 0
         self.name_to_id = {}
         self.id_to_name = {}
+        type_instances = {}
         for i in range(256):
             self.offsets.append([])
             self.counts.append(0)
@@ -794,7 +805,15 @@ class DFReader_binary(DFReader):
                 fmt = self.formats[mtype]
                 lengths[mtype] = fmt.len
             elif self.formats[mtype].instance_field is not None:
-                self._parse_next()
+                fmt = self.formats[mtype]
+                # see if we've has this instance value before
+                idata = self.data_map[ofs+3+fmt.instance_ofs:ofs+3+fmt.instance_ofs+fmt.instance_len]
+                if not mtype in type_instances:
+                    type_instances[mtype] = set()
+                if not idata in type_instances[mtype]:
+                    # its a new one, need to parse it so we have the complete set of instances
+                    type_instances[mtype].add(idata)
+                    self._parse_next()
 
             self.counts[mtype] += 1
             mlen = lengths[mtype]
-- 
GitLab