diff --git a/refscrub/refscrub.py b/refscrub/refscrub.py index 6e0ff373d826c2c515fedcc1a6ed9204f66e6dd1..ae66381824d1118f8bea58d5b22d881dc4375399 100644 --- a/refscrub/refscrub.py +++ b/refscrub/refscrub.py @@ -73,33 +73,11 @@ class SNseen(dict): return s -# class SNseen(dict): -# def __init__(self): -# self.outfiledict = dict() -# def look(self, SN, year, day, hour): -# ret = self.setdefault(SN, list()) -# #make a file for a new seen SN -# if EXTRACT and not self.outfiledict.has_key(SN) : -# self.outfiledict[SN] = open( PREFIX + '.' + SN +'.scrub.ref', 'ab') -# if (year, day, hour) not in ret: -# self[SN].append((year, day, hour)) -# def close_fhs(self): -# for fh in self.outfiledict.values(): -# fh.close() -# def __str__(self): -# s = " SN: YR:DAY:HR -- YR:DAY:HR\n" -# for sn in self.keys(): -# dates = self[sn] -# dates.sort() -# start = "%0.2d:%0.3d:%0.2d" % (dates[0]) -# end = "%0.2d:%0.3d:%0.2d" % (dates[-1]) -# s += "%7s: %s -- %s\n" % (sn , start , end ) -# return s - class RTPacket: RTstruct = struct.Struct('2c1B1B2B6B2B2B') - # packetHdrFmt = '2c1B1B2B6B2B2B' - packet_types = ('AD', 'CD', 'DS', 'DT', 'EH', 'ET', 'OM', 'SH', 'SC') + # Define the packet types as byte literals, since that is what the above structure will cause the type field + # do be decoded as. Without this, they will not match below. + packet_types = (b'AD', b'CD', b'DS', b'DT', b'EH', b'ET', b'OM', b'SH', b'SC') seen = SNseen() goodpkts = 0 IOErrorCount = 0 @@ -127,7 +105,6 @@ class RTPacket: def write(self): outfh = RTPacket.seen[self.sn].outfh - # outfh = RTPacket.seen.outfiledict[self.sn] try: outfh.write(self.data) except Exception as e: @@ -145,8 +122,8 @@ class RTPacket: global VERBOSE try: + # This tup will be a byte literal. tup = RTPacket.RTstruct.unpack(self.data[:16]) - # tup = unpack( self.packetHdrFmt, self.data[:16]) self.ptype = tup[0] + tup[1] assert self.ptype in self.packet_types, "BAD packet type" self.expnum = int("%0.2X" % tup[2]) @@ -183,27 +160,23 @@ class RTPacket: def readfile(infile): ticker = 1 - # speedup local vars global VERBOSE global EXTRACT - tell = infile.tell - read = infile.read - seek = infile.seek while True: ticker += 1 if VERBOSE is False and ticker == 10000: print(summary(infile)) ticker = 1 if VERBOSE: - print("OFFSET: %12d" % tell()) + print("OFFSET: %12d" % infile.tell()) # often disk wont read try: - buffer = read(1024) + buffer = infile.read(1024) except IOError: print(TABS + "IOError on read") print(TABS + "Skipping 1MB..") RTPacket.IOErrorCount += 1 - seek(1024 * 1024, 1) + infile.seek(1024 * 1024, 1) continue # End of file if len(buffer) < 1024: @@ -224,7 +197,7 @@ def readfile(infile): if VERBOSE: print(TABS + "NOT VALID") print(TABS + ">>>>>>>>") - seek(-1023, 1) + infile.seek(-1023, 1) print(summary(infile)) # commented out as when run with many files as input like a cf dir the closed fh's need to be written to and isn't @@ -273,7 +246,8 @@ def main(): PREFIX = basename(infilename) print("Using prefix %s" % PREFIX) FILESIZE = getsize(infilename) - with open(infilename) as infile: + # Must open the file in binary mode or else we will have unicode issues when reading. + with open(infilename, "rb") as infile: readfile(infile) if options.SUMMARY: summaryfh.write(infilename + ', ' + PREFIX + '\n')