diff --git a/HISTORY.rst b/HISTORY.rst index 158c12cd5c93c27ecc13f0c418b34d0242f8c3a0..e3a5348c48818e486a566df3f5c20c683861af2c 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -16,3 +16,9 @@ History ------------------ * Minor fixes of global variables + +2018.228 (2018-09-17) +------------------ + +* Python 2.7 & 3.6 compatiblity. +* Code cleanup to match PEP 8. diff --git a/docs/conf.py b/docs/conf.py old mode 100755 new mode 100644 diff --git a/refscrub/__init__.py b/refscrub/__init__.py index 384ebd4b593cd0e4342bbd51910853f15ed33099..41aa9e9d143e7571a17e3ad6b5838e2a20f2049f 100644 --- a/refscrub/__init__.py +++ b/refscrub/__init__.py @@ -4,4 +4,4 @@ __author__ = """IRIS PASSCAL""" __email__ = 'software-support@passcal.nmt.edu' -__version__ = '2018.204' +__version__ = '2018.228' diff --git a/refscrub/refscrub.py b/refscrub/refscrub.py old mode 100755 new mode 100644 index ba46a854e99619a3b1d8b775a5d1c246e65425b1..ae66381824d1118f8bea58d5b22d881dc4375399 --- a/refscrub/refscrub.py +++ b/refscrub/refscrub.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -''' +""" Lloyd Carothers February 2011 Rewrite of original refscrub @@ -7,53 +7,65 @@ Rewrite of original refscrub Derick Hess July 2018 Minor fixes for new build system -''' -from __future__ import with_statement -USAGE = 'usage: %prog [options] infile1 [ infile2 ... infileN]' -#USAGE = '''Scans (default) or extracts good reftek packets from a REFTEK file or CF card.''' -PROG_VERSION = '2018.204' +Michael Love +August 2018 +Updates to work on both Python 2 & 3. +Code cleanup to match PEP 8. +Cleanup global vars. +""" +from __future__ import (print_function, with_statement) +from os.path import join, basename, getsize +import sys +import struct + +USAGE = 'usage: %prog [options] infile1 [ infile2 ... infileN]' +PROG_VERSION = '2018.228' VERBOSE = False EXTRACT = False SUMMARY_FILE = 'scrubsum.txt' +PREFIX = "" +FILESIZE = 0 +TABS = "\t\t\t\t" -tabs = "\t\t\t\t" - -from os.path import join, basename, getsize -import sys -import struct -class SN(): - def __init__(self, SN): +class SN: + def __init__(self, sn): global EXTRACT global PREFIX - self.SN = SN + self.sn = sn self.count = 0 self.dates = list() if EXTRACT: - self.outfh = open( PREFIX + '.' + SN +'.scrub.ref', 'ab') + self.outfh = open(PREFIX + '.' + sn + '.scrub.ref', 'ab') else: self.outfh = None + def look(self, year, day, hour): self.dates.append((year, day, hour)) self.count += 1 + def close_fh(self): if self.outfh is not None: self.outfh.close() + def __str__(self): self.dates = list(set(self.dates)) self.dates.sort() start = "%0.2d:%0.3d:%0.2d" % (self.dates[0]) - end = "%0.2d:%0.3d:%0.2d" % (self.dates[-1]) - return "%7s: %s -- %s : %12d\n" % (self.SN , start , end, self.count ) + end = "%0.2d:%0.3d:%0.2d" % (self.dates[-1]) + return "%7s: %s -- %s : %12d\n" % (self.sn, start, end, self.count) + class SNseen(dict): def look(self, sn, year, day, hour): - ret = self.setdefault(sn,SN(sn) ) - ret.look( year, day, hour) + ret = self.setdefault(sn, SN(sn)) + ret.look(year, day, hour) + def close_fhs(self): for sn in self.values(): sn.close_fh() + def __str__(self): s = " SN: YR:DAY:HR -- YR:DAY:HR : %12s\n" % 'Good Packets' for sn in self.values(): @@ -61,122 +73,115 @@ class SNseen(dict): return s -##class SNseen(dict): -## def __init__(self): -## self.outfiledict = dict() -## def look(self, SN, year, day, hour): -## ret = self.setdefault(SN, list()) -## #make a file for a new seen SN -## if EXTRACT and not self.outfiledict.has_key(SN) : -## self.outfiledict[SN] = open( PREFIX + '.' + SN +'.scrub.ref', 'ab') -## if (year, day, hour) not in ret: -## self[SN].append((year, day, hour)) -## def close_fhs(self): -## for fh in self.outfiledict.values(): -## fh.close() -## def __str__(self): -## s = " SN: YR:DAY:HR -- YR:DAY:HR\n" -## for sn in self.keys(): -## dates = self[sn] -## dates.sort() -## start = "%0.2d:%0.3d:%0.2d" % (dates[0]) -## end = "%0.2d:%0.3d:%0.2d" % (dates[-1]) -## s += "%7s: %s -- %s\n" % (sn , start , end ) -## return s - class RTPacket: RTstruct = struct.Struct('2c1B1B2B6B2B2B') - #packetHdrFmt = '2c1B1B2B6B2B2B' - packet_types = ( 'AD', 'CD', 'DS', 'DT', 'EH', 'ET', 'OM', 'SH', 'SC') - seen = SNseen() + # Define the packet types as byte literals, since that is what the above structure will cause the type field + # do be decoded as. Without this, they will not match below. + packet_types = (b'AD', b'CD', b'DS', b'DT', b'EH', b'ET', b'OM', b'SH', b'SC') + seen = SNseen() goodpkts = 0 IOErrorCount = 0 + def __init__(self, data): self.data = data self.valid = None + self.timestring = None + self.ptype = None + self.expnum = None + self.year = None + self.sn = None + self.day = None + self.hour = None + self.min = None + self.sec = None + self.millisec = None + self.bytecount = None + self.sequence = None + def __str__(self): self.settimestring() s = "%(sequence)s %(ptype)s %(sn)s %(timestring)s" % self.__dict__ return s + def write(self): outfh = RTPacket.seen[self.sn].outfh - #outfh = RTPacket.seen.outfiledict[self.sn] try: outfh.write(self.data) - except Exception, e: - print "Failed to write extracted data to %s" % outfh.filename - print e + except Exception as e: + print("Failed to write extracted data to %s" % outfh.filename) + print(e) + def settimestring(self): - self.timestring = "%(year)0.2d:%(day)0.3d:%(hour)0.2d:%(min)0.2d:%(sec)0.2d.%(millisec)0.3d" % self.__dict__ + self.timestring = "%(year)0.2d:%(day)0.3d:%(hour)0.2d:%(min)0.2d:%(sec)0.2d.%(millisec)0.3d" % self.__dict__ + def isvalid(self): - ''' + """ Returns True if a valid reftek packet (headers parse well and are valid) Also populates the objects attributes SN, time, etc. - ''' + """ global VERBOSE try: + # This tup will be a byte literal. tup = RTPacket.RTstruct.unpack(self.data[:16]) - #tup = unpack( self.packetHdrFmt, self.data[:16]) self.ptype = tup[0] + tup[1] assert self.ptype in self.packet_types, "BAD packet type" self.expnum = int("%0.2X" % tup[2]) - self.year = int("%0.2X" %tup[3]) - self.sn = "%0.2X%0.2X" % (tup[4] , tup[5] ) + self.year = int("%0.2X" % tup[3]) + self.sn = "%0.2X%0.2X" % (tup[4], tup[5]) assert '9001' <= self.sn, "BAD SN" - assert self.sn <= 'FFFF' , "BAD SN" + assert self.sn <= 'FFFF', "BAD SN" time = "%0.2X%0.2X%0.2X%0.2X%0.2X%0.2X" % tup[6:12] self.day, self.hour, self.min, self.sec, self.millisec = \ - int(time[:3]),int(time[3:5]),int(time[5:7]),int(time[7:9]),int(time[9:]) + int(time[:3]), int(time[3:5]), int(time[5:7]), int(time[7:9]), int(time[9:]) assert self.day <= 366, "BAD TIME" - assert self.hour <=24, "BAD TIME" + assert self.hour <= 24, "BAD TIME" assert self.min <= 60, "BAD TIME" - assert self.sec <=80, "BAD TIME" - assert self.millisec <=1000, "BAD TIME" + assert self.sec <= 80, "BAD TIME" + assert self.millisec <= 1000, "BAD TIME" self.bytecount = int("%0.2X%0.2X" % tup[12:14]) assert self.bytecount >= 24, "BAD byte count" assert self.bytecount <= 1024, "BAD bytecount" self.sequence = int("%0.2X%0.2X" % tup[14:16]) - #not a valid packet - except Exception , e: + # not a valid packet + except Exception as e: if VERBOSE: - print tabs +"Not a valid REFTEK packet" - print tabs + str(e) + print(TABS + "Not a valid REFTEK packet") + print(TABS + str(e)) self.valid = False return False - #parsed well :: valid packet + # parsed well :: valid packet else: self.valid = True RTPacket.seen.look(self.sn, self.year, self.day, self.hour) - RTPacket.goodpkts +=1 + RTPacket.goodpkts += 1 return True + def readfile(infile): ticker = 1 - #speedup local vars global VERBOSE global EXTRACT - tell = infile.tell - read = infile.read - seek = infile.seek while True: ticker += 1 if VERBOSE is False and ticker == 10000: - print summary() + print(summary(infile)) ticker = 1 - if VERBOSE: print "OFFSET: %12d" % tell() + if VERBOSE: + print("OFFSET: %12d" % infile.tell()) # often disk wont read try: - buffer = read(1024) + buffer = infile.read(1024) except IOError: - print tabs + "IOError on read" - print tabs + "Skipping 1MB.." + print(TABS + "IOError on read") + print(TABS + "Skipping 1MB..") RTPacket.IOErrorCount += 1 - seek(1024*1024, 1) + infile.seek(1024 * 1024, 1) continue - #End of file + # End of file if len(buffer) < 1024: - if VERBOSE: print "Partial packet" + if VERBOSE: + print("Partial packet") break p = RTPacket(buffer) valid = p.isvalid() @@ -185,43 +190,46 @@ def readfile(infile): if EXTRACT: p.write() if VERBOSE: - print p - print "<<<<<<<" - #packet is bad + print(p) + print("<<<<<<<") + # packet is bad else: if VERBOSE: - print tabs + "NOT VALID" - print tabs + ">>>>>>>>" - seek(-1023, 1) + print(TABS + "NOT VALID") + print(TABS + ">>>>>>>>") + infile.seek(-1023, 1) - print summary() - #commented out as when run with many files as input like a cf dir the closed fh's need to be written to and isn't smart enough to be opened - #RTPacket.seen.close_fhs() + print(summary(infile)) + # commented out as when run with many files as input like a cf dir the closed fh's need to be written to and isn't + # smart enough to be opened RTPacket.seen.close_fhs() -def summary(): - global infile + +def summary(infile): global FILESIZE - OFFSTRING = "%6s: %12d\n" - s = OFFSTRING % ( "OFFSET", infile.tell()) - s += OFFSTRING % ( "OF", FILESIZE) + offstring = "%6s: %12d\n" + s = offstring % ("OFFSET", infile.tell()) + s += offstring % ("OF", FILESIZE) s += "Good packets: %d = %8.2fMB\n" % (RTPacket.goodpkts, RTPacket.goodpkts / 1024.0) - s+= "IOErrors: %d\n" % (RTPacket.IOErrorCount) + s += "IOErrors: %d\n" % RTPacket.IOErrorCount s += str(RTPacket.seen) + '\n' return s + def main(): global VERBOSE global EXTRACT global PREFIX - global infile global FILESIZE + summaryfh = None from optparse import OptionParser parser = OptionParser(USAGE, version="%prog " + PROG_VERSION) parser.description = "infile can be a REFTEK file or a raw disk (/dev/disk1) of a CF card." parser.add_option('-v', '--verbose', dest="VERBOSE", action='store_true', default=False, help="Prints info about each packet, good or bad. This will increase runtime.") parser.add_option('-e', '--extract', dest='EXTRACT', action='store_true', default=False, - help='Writes good packets to files named infile.SNXX.scrub.ref OR PREFIX.SNXX.scrub.ref, if given, for each Serial Number found. If output file exists it will append. Be careful not to duplicate data by running more than once on the same file in the same dir.') + help='Writes good packets to files named infile.SNXX.scrub.ref OR PREFIX.SNXX.scrub.ref, ' + ' if given, for each Serial Number found. If output file exists it will append. Be careful ' + 'not to duplicate data by running more than once on the same file in the same dir.') parser.add_option('-p', '--prefix', dest='PREFIX', help='Prefix of output filename. Defaults to inputfilename') parser.add_option('-s', '--savesum', dest='SUMMARY', action='store_true', default=False, @@ -233,24 +241,20 @@ def main(): if options.SUMMARY: summaryfh = open(SUMMARY_FILE, 'a') for infilename in args: - print - "Processing: %s" % infilename + print("Processing: %s" % infilename) if not PREFIX: PREFIX = basename(infilename) - print - "Using prefix %s" % PREFIX + print("Using prefix %s" % PREFIX) FILESIZE = getsize(infilename) - with open(infilename) as infile: - # with open( infile.name + 'scrub.ref')) as outfile: - # import profile - # profile.run('readfile(infile)') + # Must open the file in binary mode or else we will have unicode issues when reading. + with open(infilename, "rb") as infile: readfile(infile) if options.SUMMARY: summaryfh.write(infilename + ', ' + PREFIX + '\n') - summaryfh.write(summary()) + summaryfh.write(summary(infile)) + + print("----------------------------------------") - print - "----------------------------------------" if __name__ == '__main__': main() diff --git a/setup.cfg b/setup.cfg index 01be8885ce5ae39815a7295d2e34698c2975ddc9..371beb7b759af77d8f94aa900d751bf79f767bd9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 2018.180 +current_version = 2018.228 commit = True tag = True diff --git a/setup.py b/setup.py index 194d54d9d5340156de7ab0a92601105d5cdb61a3..288cc8fc627f0e09fb47381efc903de58d87ed87 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ setup( 'Intended Audience :: Developers', 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 'Natural Language :: English', - 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.6', ], description="Remove select packets from RT130 data", entry_points={ @@ -51,6 +51,6 @@ setup( packages=find_packages(include=['refscrub']), test_suite='tests', url='https://git.passcal.nmt.edu/passoft/refscrub', - version='2018.204', + version='2018.228', zip_safe=False, )