diff --git a/pyaccuwage/__init__.py b/pyaccuwage/__init__.py index a869047..fae797e 100644 --- a/pyaccuwage/__init__.py +++ b/pyaccuwage/__init__.py @@ -1,5 +1,6 @@ -from record import * -from reader import RecordReader +from .record import * +from .reader import RecordReader +import collections VERSION = (0, 2012, 0) @@ -16,33 +17,33 @@ RECORD_TYPES = [ ] def test(): - import record, model - from fields import ValidationError + from . import record, model + from .fields import ValidationError for rname in RECORD_TYPES: inst = record.__dict__[rname]() try: output_length = len(inst.output()) - except ValidationError, e: - print e.msg, type(inst), inst.record_identifier + except ValidationError as e: + print(e.msg, type(inst), inst.record_identifier) continue - print type(inst), inst.record_identifier, output_length + print(type(inst), inst.record_identifier, output_length) def test_dump(): - import record, StringIO + import record, io records = [ record.SubmitterRecord(), record.EmployerRecord(), record.EmployeeWageRecord(), ] - out = StringIO.StringIO() + out = io.StringIO() dump(records, out) return out def test_record_order(): - import record + from . import record records = [ record.SubmitterRecord(), record.EmployerRecord(), @@ -58,7 +59,7 @@ def test_load(fp): def load(fp): # BUILD LIST OF RECORD TYPES - import record + from . import record types = {} for r in RECORD_TYPES: klass = record.__dict__[r] @@ -73,8 +74,8 @@ def load(fp): yield record def loads(s): - import StringIO - fp = StringIO.StringIO(s) + import io + fp = io.StringIO(s) return load(fp) @@ -83,21 +84,21 @@ def dump(records, fp): fp.write(r.output()) def dumps(records): - import StringIO - fp = StringIO.StringIO() + import io + fp = io.StringIO() dump(records, fp) fp.seek(0) return fp.read() def json_dumps(records): import json - import model + from . import model import decimal class JSONEncoder(json.JSONEncoder): def default(self, o): - if hasattr(o, 'toJSON') and callable(getattr(o, 'toJSON')): + if hasattr(o, 'toJSON') and isinstance(getattr(o, 'toJSON'), collections.Callable): return o.toJSON() elif isinstance(o, decimal.Decimal): @@ -110,7 +111,7 @@ def json_dumps(records): def json_loads(s, record_classes): import json - import fields + from . import fields import decimal import re @@ -151,14 +152,14 @@ def validate_required_records(records): while req_types: req = req_types[0] if req not in types: - from fields import ValidationError + from .fields import ValidationError raise ValidationError("Record set missing required record: %s" % req) else: req_types.remove(req) def validate_record_order(records): - import record - from fields import ValidationError + from . import record + from .fields import ValidationError # 1st record must be SubmitterRecord if not isinstance(records[0], record.SubmitterRecord): @@ -178,10 +179,10 @@ def validate_record_order(records): if not isinstance(records[i+1], record.EmployeeWageRecord): raise ValidationError("All EmployerRecords must be followed by an EmployeeWageRecord") - num_ro_records = len(filter(lambda x:isinstance(x, record.OptionalEmployeeWageRecord), records)) - num_ru_records = len(filter(lambda x:isinstance(x, record.OptionalTotalRecord), records)) - num_employer_records = len(filter(lambda x:isinstance(x, record.EmployerRecord), records)) - num_total_records = len(filter(lambda x: isinstance(x, record.TotalRecord), records)) + num_ro_records = len([x for x in records if isinstance(x, record.OptionalEmployeeWageRecord)]) + num_ru_records = len([x for x in records if isinstance(x, record.OptionalTotalRecord)]) + num_employer_records = len([x for x in records if isinstance(x, record.EmployerRecord)]) + num_total_records = len([x for x in records if isinstance(x, record.TotalRecord)]) # a TotalRecord is required for each instance of an EmployeeRecord if num_total_records != num_employer_records: @@ -194,7 +195,7 @@ def validate_record_order(records): num_ro_records, num_ru_records)) # FinalRecord - Must appear only once on each file. - if len(filter(lambda x:isinstance(x, record.FinalRecord), records)) != 1: + if len([x for x in records if isinstance(x, record.FinalRecord)]) != 1: raise ValidationError("Incorrect number of FinalRecords") def validate_records(records): @@ -207,8 +208,8 @@ def test_unique_fields(): r1.employee_first_name.value = "John Johnson" r2 = EmployeeWageRecord() - print 'r1:', r1.employee_first_name.value, r1.employee_first_name, r1.employee_first_name.creation_counter - print 'r2:', r2.employee_first_name.value, r2.employee_first_name, r2.employee_first_name.creation_counter + print('r1:', r1.employee_first_name.value, r1.employee_first_name, r1.employee_first_name.creation_counter) + print('r2:', r2.employee_first_name.value, r2.employee_first_name, r2.employee_first_name.creation_counter) if r1.employee_first_name.value == r2.employee_first_name.value: raise ValidationError("Horrible problem involving shared values across records") diff --git a/pyaccuwage/fields.py b/pyaccuwage/fields.py index 6f8ffec..ff57391 100644 --- a/pyaccuwage/fields.py +++ b/pyaccuwage/fields.py @@ -1,6 +1,6 @@ import decimal, datetime import inspect -import enums +from . import enums class ValidationError(Exception): def __init__(self, msg, field=None): @@ -76,7 +76,7 @@ class Field(object): required=o['required'], ) - if isinstance(o['value'], basestring) and re.match('^\d*\.\d*$', o['value']): + if isinstance(o['value'], str) and re.match('^\d*\.\d*$', o['value']): o['value'] = decimal.Decimal(o['value']) self.value = o['value'] @@ -92,11 +92,11 @@ class Field(object): value = wrapper.wrap(value) #value = textwrap.wrap(value, 100) #print value - value = list(map(lambda x:(" " * 9) + ('"' + x + '"'), value)) + value = list([(" " * 9) + ('"' + x + '"') for x in value]) #value[0] = '"' + value[0] + '"' value.append(" " * 10 + ('_' * 10) * (wrapper.width / 10)) value.append(" " * 10 + ('0123456789') * (wrapper.width / 10)) - value.append(" " * 10 + ''.join((map(lambda x:str(x) + (' ' * 9), range(wrapper.width / 10 ))))) + value.append(" " * 10 + ''.join(([str(x) + (' ' * 9) for x in range(wrapper.width / 10 )]))) #value.append((" " * 59) + map(lambda x:("%x" % x), range(16)) start = counter['c'] @@ -130,7 +130,7 @@ class TextField(Field): # NO NEWLINES try: value = value.replace('\n', '').replace('\r', '') - except AttributeError, e: + except AttributeError as e: pass self._value = value @@ -154,12 +154,12 @@ class StateField(TextField): def validate(self): super(StateField, self).validate() - if self.value and self.value.upper() not in enums.state_postal_numeric.keys(): + if self.value and self.value.upper() not in list(enums.state_postal_numeric.keys()): raise ValidationError("%s is not a valid state abbreviation" % self.value, field=self) def parse(self, s): if s.strip() and self.use_numeric: - states = dict( [(v,k) for (k,v) in enums.state_postal_numeric.items()] ) + states = dict( [(v,k) for (k,v) in list(enums.state_postal_numeric.items())] ) self.value = states[int(s)] else: self.value = s @@ -269,7 +269,7 @@ class DateField(TextField): def parse(self, s): if int(s) > 0: - self.value = datetime.date(*[int(x) for x in s[4:8], s[0:2], s[2:4]]) + self.value = datetime.date(*[int(x) for x in (s[4:8], s[0:2], s[2:4])]) else: self.value = None @@ -277,7 +277,7 @@ class DateField(TextField): if isinstance(value, datetime.date): self._value = value elif value: - self._value = datetime.date(*[int(x) for x in value[4:8], value[0:2], value[2:4]]) + self._value = datetime.date(*[int(x) for x in (value[4:8], value[0:2], value[2:4])]) else: self._value = None @@ -301,7 +301,7 @@ class MonthYearField(TextField): def parse(self, s): if int(s) > 0: - self.value = datetime.date(*[int(x) for x in s[2:6], s[0:2], 1]) + self.value = datetime.date(*[int(x) for x in (s[2:6], s[0:2], 1)]) else: self.value = None @@ -309,7 +309,7 @@ class MonthYearField(TextField): if isinstance(value, datetime.date): self._value = value elif value: - self._value = datetime.date(*[int(x) for x in value[2:6], value[0:2], 1]) + self._value = datetime.date(*[int(x) for x in (value[2:6], value[0:2], 1)]) else: self._value = None diff --git a/pyaccuwage/model.py b/pyaccuwage/model.py index d628711..103738e 100644 --- a/pyaccuwage/model.py +++ b/pyaccuwage/model.py @@ -1,6 +1,7 @@ -from fields import Field, TextField, ValidationError +from .fields import Field, TextField, ValidationError import copy import pdb +import collections class Model(object): @@ -9,7 +10,7 @@ class Model(object): target_size = 512 def __init__(self): - for (key, value) in self.__class__.__dict__.items(): + for (key, value) in list(self.__class__.__dict__.items()): if isinstance(value, Field): # GRAB THE FIELD INSTANCE FROM THE CLASS DEFINITION # AND MAKE A LOCAL COPY FOR THIS RECORD'S INSTANCE, @@ -33,7 +34,7 @@ class Model(object): identifier.value = self.record_identifier fields = [identifier] - for key in self.__class__.__dict__.keys(): + for key in list(self.__class__.__dict__.keys()): attr = getattr(self, key) if isinstance(attr, Field): fields.append(attr) @@ -50,9 +51,9 @@ class Model(object): try: custom_validator = getattr(self, 'validate_' + f.name) - except AttributeError, e: + except AttributeError as e: continue - if callable(custom_validator): + if isinstance(custom_validator, collections.Callable): custom_validator(f) def output(self): @@ -85,7 +86,7 @@ class Model(object): if (target.required != f.required or target.max_length != f.max_length): - print "Warning: value mismatch on import" + print("Warning: value mismatch on import") target._value = f._value diff --git a/pyaccuwage/modeldef.py b/pyaccuwage/modeldef.py index 0528615..a9364ca 100644 --- a/pyaccuwage/modeldef.py +++ b/pyaccuwage/modeldef.py @@ -22,7 +22,7 @@ class ClassEntryCommentSequence(object): if (i + 1) != a: line_number = self.line + line_no - print("ERROR\tline:%d\tnear:%s\texpected:%d\tsaw:%d" % (line_number, line.split(' ')[0].strip(), i+1, a)) + print(("ERROR\tline:%d\tnear:%s\texpected:%d\tsaw:%d" % (line_number, line.split(' ')[0].strip(), i+1, a))) i = int(b) if b else a diff --git a/pyaccuwage/parser.py b/pyaccuwage/parser.py index 57da8c9..6b808fc 100644 --- a/pyaccuwage/parser.py +++ b/pyaccuwage/parser.py @@ -7,6 +7,7 @@ convert it into python classes. """ import re import hashlib +from functools import reduce class SimpleDefParser(object): def __init__(self): @@ -34,7 +35,7 @@ class SimpleDefParser(object): item = item.upper() if '-' in item: - parts = map(lambda x:self._intify(x), item.split('-')) + parts = [self._intify(x) for x in item.split('-')] item = reduce(lambda x,y: y-x, parts) else: item = self._intify(item) @@ -56,7 +57,7 @@ class LengthExpression(object): self.exp_cache = {} def __call__(self, value, exps): - return len(exps) == sum(map(lambda x: self.check(value, x), exps)) + return len(exps) == sum([self.check(value, x) for x in exps]) def compile_exp(self, exp): op, val = self.REG.match(exp).groups() @@ -98,7 +99,7 @@ class RangeToken(BaseToken): def value(self): if '-' not in self._value: return 1 - return reduce(lambda x,y: y-x, map(int, self._value.split('-')))+1 + return reduce(lambda x,y: y-x, list(map(int, self._value.split('-'))))+1 @property def end_position(self): @@ -118,7 +119,7 @@ class NumericToken(BaseToken): class RecordBuilder(object): - import fields + from . import fields entry_max_length = 4 @@ -201,15 +202,15 @@ class RecordBuilder(object): try: f_length = int(f_length) - except ValueError, e: + except ValueError as e: # bad result, skip continue try: assert f_length == RangeToken(f_range).value - except AssertionError, e: + except AssertionError as e: continue - except ValueError, e: + except ValueError as e: # bad result, skip continue @@ -223,7 +224,7 @@ class RecordBuilder(object): else: required = None - f_name = u'_'.join(map(lambda x:x.lower(), name_parts)) + f_name = '_'.join([x.lower() for x in name_parts]) f_name = f_name.replace('&', 'and') f_name = re.sub(r'[^\w]','', f_name) @@ -240,7 +241,7 @@ class RecordBuilder(object): lengthexp = LengthExpression() for entry in entries: - matches = dict(map(lambda x:(x[0],0), self.FIELD_TYPES)) + matches = dict([(x[0],0) for x in self.FIELD_TYPES]) for (classtype, criteria) in self.FIELD_TYPES: if 'length' in criteria: @@ -248,7 +249,7 @@ class RecordBuilder(object): continue if 'regexp' in criteria: - for crit_key, crit_values in criteria['regexp'].items(): + for crit_key, crit_values in list(criteria['regexp'].items()): for (crit_re, score) in crit_values: matches[classtype] += score if crit_re.search(entry[crit_key]) else 0 @@ -256,7 +257,7 @@ class RecordBuilder(object): matches = list(matches.items()) matches.sort(key=lambda x:x[1]) - matches_found = True if sum(map(lambda x:x[1], matches)) > 0 else False + matches_found = True if sum([x[1] for x in matches]) > 0 else False entry['guessed_type'] = matches[-1][0] if matches_found else self.fields.TextField yield entry @@ -271,7 +272,7 @@ class RecordBuilder(object): if entry['name'] == 'blank': blank_id = hashlib.new('md5') blank_id.update(entry['range'].encode()) - add( (u'blank_%s' % blank_id.hexdigest()[:8]).ljust(40) ) + add( ('blank_%s' % blank_id.hexdigest()[:8]).ljust(40) ) else: add(entry['name'].ljust(40)) @@ -386,7 +387,7 @@ class PastedDefParser(RecordBuilder): for g in groups: assert g['byterange'].value == g['length'].value - desc = u' '.join(map(lambda x:unicode(x.value), g['desc'])) + desc = ' '.join([str(x.value) for x in g['desc']]) if g['name'][-1].value.lower() == '(optional)': g['name'] = g['name'][0:-1] @@ -396,7 +397,7 @@ class PastedDefParser(RecordBuilder): else: required = None - name = u'_'.join(map(lambda x:x.value.lower(), g['name'])) + name = '_'.join([x.value.lower() for x in g['name']]) name = re.sub(r'[^\w]','', name) yield({ diff --git a/pyaccuwage/pdfextract.py b/pyaccuwage/pdfextract.py index 2499b1f..2903b5d 100644 --- a/pyaccuwage/pdfextract.py +++ b/pyaccuwage/pdfextract.py @@ -57,7 +57,7 @@ class PDFRecordFinder(object): position -= 1 name = ''.join(header).strip().decode('ascii','ignore') - print (name, position) + print((name, position)) results.append((i, name, position)) else: # See if this row forces us to break from field reading. @@ -117,7 +117,7 @@ class PDFRecordFinder(object): row_columns = self.extract_columns_from_row(row) if not row_columns: - if cc.data and len(cc.data.keys()) > 1 and len(row.strip()) > cc.data.keys()[-1]: + if cc.data and len(list(cc.data.keys())) > 1 and len(row.strip()) > list(cc.data.keys())[-1]: yield cc cc = ColumnCollector() else: @@ -127,11 +127,11 @@ class PDFRecordFinder(object): try: cc.add(row_columns) - except IsNextField, e: + except IsNextField as e: yield cc cc = ColumnCollector() cc.add(row_columns) - except UnknownColumn, e: + except UnknownColumn as e: raise StopIteration yield cc @@ -160,8 +160,8 @@ class PDFRecordFinder(object): white_iter = iter(white_ranges) while white_iter: try: - start = white_iter.next() - end = white_iter.next() + start = next(white_iter) + end = next(white_iter) if start != end: row_result.append( (start, row[start:end].encode('ascii','ignore')) @@ -192,8 +192,7 @@ class ColumnCollector(object): def __repr__(self): return "<%s: %s>" % ( self.__class__.__name__, - map(lambda x:x if len(x) < 25 else x[:25] + '..', - self.data.values() if self.data else '')) + [x if len(x) < 25 else x[:25] + '..' for x in list(self.data.values()) if self.data else '']) def add(self, data): #if self.empty_rows > 2: @@ -218,7 +217,7 @@ class ColumnCollector(object): self.max_data_length = max(self.max_data_length, len(data)) if not self.column_widths: - self.column_widths = dict(map(lambda (column, value): [column, column + len(value)], data)) + self.column_widths = dict([[column_value[0], column_value[0] + len(column_value[1])] for column_value in data]) else: for col_id, value in data: try: @@ -239,21 +238,21 @@ class ColumnCollector(object): def adjust_columns(self, data): adjusted_data = {} for col_id, value in data: - if col_id in self.data.keys(): + if col_id in list(self.data.keys()): adjusted_data[col_id] = value.strip() else: - for col_start, col_end in self.column_widths.items(): + for col_start, col_end in list(self.column_widths.items()): if (col_start - self.adjust_pad) <= col_id and (col_end + self.adjust_pad) >= col_id: if col_start in adjusted_data: adjusted_data[col_start] += ' ' + value.strip() else: adjusted_data[col_start] = value.strip() - return adjusted_data.items() + return list(adjusted_data.items()) def merge_column(self, col_id, value): - if col_id in self.data.keys(): + if col_id in list(self.data.keys()): self.data[col_id] += ' ' + value.strip() else: # try adding a wiggle room value? @@ -286,15 +285,15 @@ class ColumnCollector(object): """ if self.data and data: - keys = dict(self.column_widths).keys() + keys = list(dict(self.column_widths).keys()) keys.sort() keys += [None] if self.last_data_length < len(data): return True - first_key, first_value = dict(data).items()[0] - if self.data.keys()[0] == first_key: + first_key, first_value = list(dict(data).items())[0] + if list(self.data.keys())[0] == first_key: position = keys.index(first_key) max_length = keys[position + 1] @@ -308,7 +307,7 @@ class ColumnCollector(object): def tuple(self): #try: if self.data: - return tuple(map(lambda k:self.data[k], sorted(self.data.keys()))) + return tuple([self.data[k] for k in sorted(self.data.keys())]) return () #except: # import pdb diff --git a/pyaccuwage/record.py b/pyaccuwage/record.py index 9435d75..7d4d249 100644 --- a/pyaccuwage/record.py +++ b/pyaccuwage/record.py @@ -1,6 +1,6 @@ -import model -from fields import * -import enums +from . import model +from .fields import * +from . import enums __all__ = RECORD_TYPES = ['SubmitterRecord', 'EmployerRecord', 'EmployeeWageRecord', 'OptionalEmployeeWageRecord',