run through 2to3
This commit is contained in:
parent
961aedc0ae
commit
16bf2c41d0
7 changed files with 82 additions and 80 deletions
|
@ -1,5 +1,6 @@
|
||||||
from record import *
|
from .record import *
|
||||||
from reader import RecordReader
|
from .reader import RecordReader
|
||||||
|
import collections
|
||||||
|
|
||||||
VERSION = (0, 2012, 0)
|
VERSION = (0, 2012, 0)
|
||||||
|
|
||||||
|
@ -16,33 +17,33 @@ RECORD_TYPES = [
|
||||||
]
|
]
|
||||||
|
|
||||||
def test():
|
def test():
|
||||||
import record, model
|
from . import record, model
|
||||||
from fields import ValidationError
|
from .fields import ValidationError
|
||||||
for rname in RECORD_TYPES:
|
for rname in RECORD_TYPES:
|
||||||
inst = record.__dict__[rname]()
|
inst = record.__dict__[rname]()
|
||||||
try:
|
try:
|
||||||
output_length = len(inst.output())
|
output_length = len(inst.output())
|
||||||
except ValidationError, e:
|
except ValidationError as e:
|
||||||
print e.msg, type(inst), inst.record_identifier
|
print(e.msg, type(inst), inst.record_identifier)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print type(inst), inst.record_identifier, output_length
|
print(type(inst), inst.record_identifier, output_length)
|
||||||
|
|
||||||
|
|
||||||
def test_dump():
|
def test_dump():
|
||||||
import record, StringIO
|
import record, io
|
||||||
records = [
|
records = [
|
||||||
record.SubmitterRecord(),
|
record.SubmitterRecord(),
|
||||||
record.EmployerRecord(),
|
record.EmployerRecord(),
|
||||||
record.EmployeeWageRecord(),
|
record.EmployeeWageRecord(),
|
||||||
]
|
]
|
||||||
out = StringIO.StringIO()
|
out = io.StringIO()
|
||||||
dump(records, out)
|
dump(records, out)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
def test_record_order():
|
def test_record_order():
|
||||||
import record
|
from . import record
|
||||||
records = [
|
records = [
|
||||||
record.SubmitterRecord(),
|
record.SubmitterRecord(),
|
||||||
record.EmployerRecord(),
|
record.EmployerRecord(),
|
||||||
|
@ -58,7 +59,7 @@ def test_load(fp):
|
||||||
|
|
||||||
def load(fp):
|
def load(fp):
|
||||||
# BUILD LIST OF RECORD TYPES
|
# BUILD LIST OF RECORD TYPES
|
||||||
import record
|
from . import record
|
||||||
types = {}
|
types = {}
|
||||||
for r in RECORD_TYPES:
|
for r in RECORD_TYPES:
|
||||||
klass = record.__dict__[r]
|
klass = record.__dict__[r]
|
||||||
|
@ -73,8 +74,8 @@ def load(fp):
|
||||||
yield record
|
yield record
|
||||||
|
|
||||||
def loads(s):
|
def loads(s):
|
||||||
import StringIO
|
import io
|
||||||
fp = StringIO.StringIO(s)
|
fp = io.StringIO(s)
|
||||||
return load(fp)
|
return load(fp)
|
||||||
|
|
||||||
|
|
||||||
|
@ -83,21 +84,21 @@ def dump(records, fp):
|
||||||
fp.write(r.output())
|
fp.write(r.output())
|
||||||
|
|
||||||
def dumps(records):
|
def dumps(records):
|
||||||
import StringIO
|
import io
|
||||||
fp = StringIO.StringIO()
|
fp = io.StringIO()
|
||||||
dump(records, fp)
|
dump(records, fp)
|
||||||
fp.seek(0)
|
fp.seek(0)
|
||||||
return fp.read()
|
return fp.read()
|
||||||
|
|
||||||
def json_dumps(records):
|
def json_dumps(records):
|
||||||
import json
|
import json
|
||||||
import model
|
from . import model
|
||||||
import decimal
|
import decimal
|
||||||
|
|
||||||
class JSONEncoder(json.JSONEncoder):
|
class JSONEncoder(json.JSONEncoder):
|
||||||
|
|
||||||
def default(self, o):
|
def default(self, o):
|
||||||
if hasattr(o, 'toJSON') and callable(getattr(o, 'toJSON')):
|
if hasattr(o, 'toJSON') and isinstance(getattr(o, 'toJSON'), collections.Callable):
|
||||||
return o.toJSON()
|
return o.toJSON()
|
||||||
|
|
||||||
elif isinstance(o, decimal.Decimal):
|
elif isinstance(o, decimal.Decimal):
|
||||||
|
@ -110,7 +111,7 @@ def json_dumps(records):
|
||||||
|
|
||||||
def json_loads(s, record_classes):
|
def json_loads(s, record_classes):
|
||||||
import json
|
import json
|
||||||
import fields
|
from . import fields
|
||||||
import decimal
|
import decimal
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
@ -151,14 +152,14 @@ def validate_required_records(records):
|
||||||
while req_types:
|
while req_types:
|
||||||
req = req_types[0]
|
req = req_types[0]
|
||||||
if req not in types:
|
if req not in types:
|
||||||
from fields import ValidationError
|
from .fields import ValidationError
|
||||||
raise ValidationError("Record set missing required record: %s" % req)
|
raise ValidationError("Record set missing required record: %s" % req)
|
||||||
else:
|
else:
|
||||||
req_types.remove(req)
|
req_types.remove(req)
|
||||||
|
|
||||||
def validate_record_order(records):
|
def validate_record_order(records):
|
||||||
import record
|
from . import record
|
||||||
from fields import ValidationError
|
from .fields import ValidationError
|
||||||
|
|
||||||
# 1st record must be SubmitterRecord
|
# 1st record must be SubmitterRecord
|
||||||
if not isinstance(records[0], record.SubmitterRecord):
|
if not isinstance(records[0], record.SubmitterRecord):
|
||||||
|
@ -178,10 +179,10 @@ def validate_record_order(records):
|
||||||
if not isinstance(records[i+1], record.EmployeeWageRecord):
|
if not isinstance(records[i+1], record.EmployeeWageRecord):
|
||||||
raise ValidationError("All EmployerRecords must be followed by an EmployeeWageRecord")
|
raise ValidationError("All EmployerRecords must be followed by an EmployeeWageRecord")
|
||||||
|
|
||||||
num_ro_records = len(filter(lambda x:isinstance(x, record.OptionalEmployeeWageRecord), records))
|
num_ro_records = len([x for x in records if isinstance(x, record.OptionalEmployeeWageRecord)])
|
||||||
num_ru_records = len(filter(lambda x:isinstance(x, record.OptionalTotalRecord), records))
|
num_ru_records = len([x for x in records if isinstance(x, record.OptionalTotalRecord)])
|
||||||
num_employer_records = len(filter(lambda x:isinstance(x, record.EmployerRecord), records))
|
num_employer_records = len([x for x in records if isinstance(x, record.EmployerRecord)])
|
||||||
num_total_records = len(filter(lambda x: isinstance(x, record.TotalRecord), records))
|
num_total_records = len([x for x in records if isinstance(x, record.TotalRecord)])
|
||||||
|
|
||||||
# a TotalRecord is required for each instance of an EmployeeRecord
|
# a TotalRecord is required for each instance of an EmployeeRecord
|
||||||
if num_total_records != num_employer_records:
|
if num_total_records != num_employer_records:
|
||||||
|
@ -194,7 +195,7 @@ def validate_record_order(records):
|
||||||
num_ro_records, num_ru_records))
|
num_ro_records, num_ru_records))
|
||||||
|
|
||||||
# FinalRecord - Must appear only once on each file.
|
# FinalRecord - Must appear only once on each file.
|
||||||
if len(filter(lambda x:isinstance(x, record.FinalRecord), records)) != 1:
|
if len([x for x in records if isinstance(x, record.FinalRecord)]) != 1:
|
||||||
raise ValidationError("Incorrect number of FinalRecords")
|
raise ValidationError("Incorrect number of FinalRecords")
|
||||||
|
|
||||||
def validate_records(records):
|
def validate_records(records):
|
||||||
|
@ -207,8 +208,8 @@ def test_unique_fields():
|
||||||
r1.employee_first_name.value = "John Johnson"
|
r1.employee_first_name.value = "John Johnson"
|
||||||
|
|
||||||
r2 = EmployeeWageRecord()
|
r2 = EmployeeWageRecord()
|
||||||
print 'r1:', r1.employee_first_name.value, r1.employee_first_name, r1.employee_first_name.creation_counter
|
print('r1:', r1.employee_first_name.value, r1.employee_first_name, r1.employee_first_name.creation_counter)
|
||||||
print 'r2:', r2.employee_first_name.value, r2.employee_first_name, r2.employee_first_name.creation_counter
|
print('r2:', r2.employee_first_name.value, r2.employee_first_name, r2.employee_first_name.creation_counter)
|
||||||
|
|
||||||
if r1.employee_first_name.value == r2.employee_first_name.value:
|
if r1.employee_first_name.value == r2.employee_first_name.value:
|
||||||
raise ValidationError("Horrible problem involving shared values across records")
|
raise ValidationError("Horrible problem involving shared values across records")
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import decimal, datetime
|
import decimal, datetime
|
||||||
import inspect
|
import inspect
|
||||||
import enums
|
from . import enums
|
||||||
|
|
||||||
class ValidationError(Exception):
|
class ValidationError(Exception):
|
||||||
def __init__(self, msg, field=None):
|
def __init__(self, msg, field=None):
|
||||||
|
@ -76,7 +76,7 @@ class Field(object):
|
||||||
required=o['required'],
|
required=o['required'],
|
||||||
)
|
)
|
||||||
|
|
||||||
if isinstance(o['value'], basestring) and re.match('^\d*\.\d*$', o['value']):
|
if isinstance(o['value'], str) and re.match('^\d*\.\d*$', o['value']):
|
||||||
o['value'] = decimal.Decimal(o['value'])
|
o['value'] = decimal.Decimal(o['value'])
|
||||||
|
|
||||||
self.value = o['value']
|
self.value = o['value']
|
||||||
|
@ -92,11 +92,11 @@ class Field(object):
|
||||||
value = wrapper.wrap(value)
|
value = wrapper.wrap(value)
|
||||||
#value = textwrap.wrap(value, 100)
|
#value = textwrap.wrap(value, 100)
|
||||||
#print value
|
#print value
|
||||||
value = list(map(lambda x:(" " * 9) + ('"' + x + '"'), value))
|
value = list([(" " * 9) + ('"' + x + '"') for x in value])
|
||||||
#value[0] = '"' + value[0] + '"'
|
#value[0] = '"' + value[0] + '"'
|
||||||
value.append(" " * 10 + ('_' * 10) * (wrapper.width / 10))
|
value.append(" " * 10 + ('_' * 10) * (wrapper.width / 10))
|
||||||
value.append(" " * 10 + ('0123456789') * (wrapper.width / 10))
|
value.append(" " * 10 + ('0123456789') * (wrapper.width / 10))
|
||||||
value.append(" " * 10 + ''.join((map(lambda x:str(x) + (' ' * 9), range(wrapper.width / 10 )))))
|
value.append(" " * 10 + ''.join(([str(x) + (' ' * 9) for x in range(wrapper.width / 10 )])))
|
||||||
#value.append((" " * 59) + map(lambda x:("%x" % x), range(16))
|
#value.append((" " * 59) + map(lambda x:("%x" % x), range(16))
|
||||||
|
|
||||||
start = counter['c']
|
start = counter['c']
|
||||||
|
@ -130,7 +130,7 @@ class TextField(Field):
|
||||||
# NO NEWLINES
|
# NO NEWLINES
|
||||||
try:
|
try:
|
||||||
value = value.replace('\n', '').replace('\r', '')
|
value = value.replace('\n', '').replace('\r', '')
|
||||||
except AttributeError, e:
|
except AttributeError as e:
|
||||||
pass
|
pass
|
||||||
self._value = value
|
self._value = value
|
||||||
|
|
||||||
|
@ -154,12 +154,12 @@ class StateField(TextField):
|
||||||
|
|
||||||
def validate(self):
|
def validate(self):
|
||||||
super(StateField, self).validate()
|
super(StateField, self).validate()
|
||||||
if self.value and self.value.upper() not in enums.state_postal_numeric.keys():
|
if self.value and self.value.upper() not in list(enums.state_postal_numeric.keys()):
|
||||||
raise ValidationError("%s is not a valid state abbreviation" % self.value, field=self)
|
raise ValidationError("%s is not a valid state abbreviation" % self.value, field=self)
|
||||||
|
|
||||||
def parse(self, s):
|
def parse(self, s):
|
||||||
if s.strip() and self.use_numeric:
|
if s.strip() and self.use_numeric:
|
||||||
states = dict( [(v,k) for (k,v) in enums.state_postal_numeric.items()] )
|
states = dict( [(v,k) for (k,v) in list(enums.state_postal_numeric.items())] )
|
||||||
self.value = states[int(s)]
|
self.value = states[int(s)]
|
||||||
else:
|
else:
|
||||||
self.value = s
|
self.value = s
|
||||||
|
@ -269,7 +269,7 @@ class DateField(TextField):
|
||||||
|
|
||||||
def parse(self, s):
|
def parse(self, s):
|
||||||
if int(s) > 0:
|
if int(s) > 0:
|
||||||
self.value = datetime.date(*[int(x) for x in s[4:8], s[0:2], s[2:4]])
|
self.value = datetime.date(*[int(x) for x in (s[4:8], s[0:2], s[2:4])])
|
||||||
else:
|
else:
|
||||||
self.value = None
|
self.value = None
|
||||||
|
|
||||||
|
@ -277,7 +277,7 @@ class DateField(TextField):
|
||||||
if isinstance(value, datetime.date):
|
if isinstance(value, datetime.date):
|
||||||
self._value = value
|
self._value = value
|
||||||
elif value:
|
elif value:
|
||||||
self._value = datetime.date(*[int(x) for x in value[4:8], value[0:2], value[2:4]])
|
self._value = datetime.date(*[int(x) for x in (value[4:8], value[0:2], value[2:4])])
|
||||||
else:
|
else:
|
||||||
self._value = None
|
self._value = None
|
||||||
|
|
||||||
|
@ -301,7 +301,7 @@ class MonthYearField(TextField):
|
||||||
|
|
||||||
def parse(self, s):
|
def parse(self, s):
|
||||||
if int(s) > 0:
|
if int(s) > 0:
|
||||||
self.value = datetime.date(*[int(x) for x in s[2:6], s[0:2], 1])
|
self.value = datetime.date(*[int(x) for x in (s[2:6], s[0:2], 1)])
|
||||||
else:
|
else:
|
||||||
self.value = None
|
self.value = None
|
||||||
|
|
||||||
|
@ -309,7 +309,7 @@ class MonthYearField(TextField):
|
||||||
if isinstance(value, datetime.date):
|
if isinstance(value, datetime.date):
|
||||||
self._value = value
|
self._value = value
|
||||||
elif value:
|
elif value:
|
||||||
self._value = datetime.date(*[int(x) for x in value[2:6], value[0:2], 1])
|
self._value = datetime.date(*[int(x) for x in (value[2:6], value[0:2], 1)])
|
||||||
else:
|
else:
|
||||||
self._value = None
|
self._value = None
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from fields import Field, TextField, ValidationError
|
from .fields import Field, TextField, ValidationError
|
||||||
import copy
|
import copy
|
||||||
import pdb
|
import pdb
|
||||||
|
import collections
|
||||||
|
|
||||||
|
|
||||||
class Model(object):
|
class Model(object):
|
||||||
|
@ -9,7 +10,7 @@ class Model(object):
|
||||||
target_size = 512
|
target_size = 512
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
for (key, value) in self.__class__.__dict__.items():
|
for (key, value) in list(self.__class__.__dict__.items()):
|
||||||
if isinstance(value, Field):
|
if isinstance(value, Field):
|
||||||
# GRAB THE FIELD INSTANCE FROM THE CLASS DEFINITION
|
# GRAB THE FIELD INSTANCE FROM THE CLASS DEFINITION
|
||||||
# AND MAKE A LOCAL COPY FOR THIS RECORD'S INSTANCE,
|
# AND MAKE A LOCAL COPY FOR THIS RECORD'S INSTANCE,
|
||||||
|
@ -33,7 +34,7 @@ class Model(object):
|
||||||
identifier.value = self.record_identifier
|
identifier.value = self.record_identifier
|
||||||
fields = [identifier]
|
fields = [identifier]
|
||||||
|
|
||||||
for key in self.__class__.__dict__.keys():
|
for key in list(self.__class__.__dict__.keys()):
|
||||||
attr = getattr(self, key)
|
attr = getattr(self, key)
|
||||||
if isinstance(attr, Field):
|
if isinstance(attr, Field):
|
||||||
fields.append(attr)
|
fields.append(attr)
|
||||||
|
@ -50,9 +51,9 @@ class Model(object):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
custom_validator = getattr(self, 'validate_' + f.name)
|
custom_validator = getattr(self, 'validate_' + f.name)
|
||||||
except AttributeError, e:
|
except AttributeError as e:
|
||||||
continue
|
continue
|
||||||
if callable(custom_validator):
|
if isinstance(custom_validator, collections.Callable):
|
||||||
custom_validator(f)
|
custom_validator(f)
|
||||||
|
|
||||||
def output(self):
|
def output(self):
|
||||||
|
@ -85,7 +86,7 @@ class Model(object):
|
||||||
|
|
||||||
if (target.required != f.required or
|
if (target.required != f.required or
|
||||||
target.max_length != f.max_length):
|
target.max_length != f.max_length):
|
||||||
print "Warning: value mismatch on import"
|
print("Warning: value mismatch on import")
|
||||||
|
|
||||||
target._value = f._value
|
target._value = f._value
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ class ClassEntryCommentSequence(object):
|
||||||
|
|
||||||
if (i + 1) != a:
|
if (i + 1) != a:
|
||||||
line_number = self.line + line_no
|
line_number = self.line + line_no
|
||||||
print("ERROR\tline:%d\tnear:%s\texpected:%d\tsaw:%d" % (line_number, line.split(' ')[0].strip(), i+1, a))
|
print(("ERROR\tline:%d\tnear:%s\texpected:%d\tsaw:%d" % (line_number, line.split(' ')[0].strip(), i+1, a)))
|
||||||
|
|
||||||
i = int(b) if b else a
|
i = int(b) if b else a
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@ convert it into python classes.
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
import hashlib
|
import hashlib
|
||||||
|
from functools import reduce
|
||||||
|
|
||||||
class SimpleDefParser(object):
|
class SimpleDefParser(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -34,7 +35,7 @@ class SimpleDefParser(object):
|
||||||
item = item.upper()
|
item = item.upper()
|
||||||
|
|
||||||
if '-' in item:
|
if '-' in item:
|
||||||
parts = map(lambda x:self._intify(x), item.split('-'))
|
parts = [self._intify(x) for x in item.split('-')]
|
||||||
item = reduce(lambda x,y: y-x, parts)
|
item = reduce(lambda x,y: y-x, parts)
|
||||||
else:
|
else:
|
||||||
item = self._intify(item)
|
item = self._intify(item)
|
||||||
|
@ -56,7 +57,7 @@ class LengthExpression(object):
|
||||||
self.exp_cache = {}
|
self.exp_cache = {}
|
||||||
|
|
||||||
def __call__(self, value, exps):
|
def __call__(self, value, exps):
|
||||||
return len(exps) == sum(map(lambda x: self.check(value, x), exps))
|
return len(exps) == sum([self.check(value, x) for x in exps])
|
||||||
|
|
||||||
def compile_exp(self, exp):
|
def compile_exp(self, exp):
|
||||||
op, val = self.REG.match(exp).groups()
|
op, val = self.REG.match(exp).groups()
|
||||||
|
@ -98,7 +99,7 @@ class RangeToken(BaseToken):
|
||||||
def value(self):
|
def value(self):
|
||||||
if '-' not in self._value:
|
if '-' not in self._value:
|
||||||
return 1
|
return 1
|
||||||
return reduce(lambda x,y: y-x, map(int, self._value.split('-')))+1
|
return reduce(lambda x,y: y-x, list(map(int, self._value.split('-'))))+1
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def end_position(self):
|
def end_position(self):
|
||||||
|
@ -118,7 +119,7 @@ class NumericToken(BaseToken):
|
||||||
|
|
||||||
|
|
||||||
class RecordBuilder(object):
|
class RecordBuilder(object):
|
||||||
import fields
|
from . import fields
|
||||||
|
|
||||||
entry_max_length = 4
|
entry_max_length = 4
|
||||||
|
|
||||||
|
@ -201,15 +202,15 @@ class RecordBuilder(object):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
f_length = int(f_length)
|
f_length = int(f_length)
|
||||||
except ValueError, e:
|
except ValueError as e:
|
||||||
# bad result, skip
|
# bad result, skip
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
assert f_length == RangeToken(f_range).value
|
assert f_length == RangeToken(f_range).value
|
||||||
except AssertionError, e:
|
except AssertionError as e:
|
||||||
continue
|
continue
|
||||||
except ValueError, e:
|
except ValueError as e:
|
||||||
# bad result, skip
|
# bad result, skip
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -223,7 +224,7 @@ class RecordBuilder(object):
|
||||||
else:
|
else:
|
||||||
required = None
|
required = None
|
||||||
|
|
||||||
f_name = u'_'.join(map(lambda x:x.lower(), name_parts))
|
f_name = '_'.join([x.lower() for x in name_parts])
|
||||||
f_name = f_name.replace('&', 'and')
|
f_name = f_name.replace('&', 'and')
|
||||||
f_name = re.sub(r'[^\w]','', f_name)
|
f_name = re.sub(r'[^\w]','', f_name)
|
||||||
|
|
||||||
|
@ -240,7 +241,7 @@ class RecordBuilder(object):
|
||||||
lengthexp = LengthExpression()
|
lengthexp = LengthExpression()
|
||||||
|
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
matches = dict(map(lambda x:(x[0],0), self.FIELD_TYPES))
|
matches = dict([(x[0],0) for x in self.FIELD_TYPES])
|
||||||
|
|
||||||
for (classtype, criteria) in self.FIELD_TYPES:
|
for (classtype, criteria) in self.FIELD_TYPES:
|
||||||
if 'length' in criteria:
|
if 'length' in criteria:
|
||||||
|
@ -248,7 +249,7 @@ class RecordBuilder(object):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if 'regexp' in criteria:
|
if 'regexp' in criteria:
|
||||||
for crit_key, crit_values in criteria['regexp'].items():
|
for crit_key, crit_values in list(criteria['regexp'].items()):
|
||||||
for (crit_re, score) in crit_values:
|
for (crit_re, score) in crit_values:
|
||||||
matches[classtype] += score if crit_re.search(entry[crit_key]) else 0
|
matches[classtype] += score if crit_re.search(entry[crit_key]) else 0
|
||||||
|
|
||||||
|
@ -256,7 +257,7 @@ class RecordBuilder(object):
|
||||||
matches = list(matches.items())
|
matches = list(matches.items())
|
||||||
matches.sort(key=lambda x:x[1])
|
matches.sort(key=lambda x:x[1])
|
||||||
|
|
||||||
matches_found = True if sum(map(lambda x:x[1], matches)) > 0 else False
|
matches_found = True if sum([x[1] for x in matches]) > 0 else False
|
||||||
|
|
||||||
entry['guessed_type'] = matches[-1][0] if matches_found else self.fields.TextField
|
entry['guessed_type'] = matches[-1][0] if matches_found else self.fields.TextField
|
||||||
yield entry
|
yield entry
|
||||||
|
@ -271,7 +272,7 @@ class RecordBuilder(object):
|
||||||
if entry['name'] == 'blank':
|
if entry['name'] == 'blank':
|
||||||
blank_id = hashlib.new('md5')
|
blank_id = hashlib.new('md5')
|
||||||
blank_id.update(entry['range'].encode())
|
blank_id.update(entry['range'].encode())
|
||||||
add( (u'blank_%s' % blank_id.hexdigest()[:8]).ljust(40) )
|
add( ('blank_%s' % blank_id.hexdigest()[:8]).ljust(40) )
|
||||||
else:
|
else:
|
||||||
add(entry['name'].ljust(40))
|
add(entry['name'].ljust(40))
|
||||||
|
|
||||||
|
@ -386,7 +387,7 @@ class PastedDefParser(RecordBuilder):
|
||||||
for g in groups:
|
for g in groups:
|
||||||
assert g['byterange'].value == g['length'].value
|
assert g['byterange'].value == g['length'].value
|
||||||
|
|
||||||
desc = u' '.join(map(lambda x:unicode(x.value), g['desc']))
|
desc = ' '.join([str(x.value) for x in g['desc']])
|
||||||
|
|
||||||
if g['name'][-1].value.lower() == '(optional)':
|
if g['name'][-1].value.lower() == '(optional)':
|
||||||
g['name'] = g['name'][0:-1]
|
g['name'] = g['name'][0:-1]
|
||||||
|
@ -396,7 +397,7 @@ class PastedDefParser(RecordBuilder):
|
||||||
else:
|
else:
|
||||||
required = None
|
required = None
|
||||||
|
|
||||||
name = u'_'.join(map(lambda x:x.value.lower(), g['name']))
|
name = '_'.join([x.value.lower() for x in g['name']])
|
||||||
name = re.sub(r'[^\w]','', name)
|
name = re.sub(r'[^\w]','', name)
|
||||||
|
|
||||||
yield({
|
yield({
|
||||||
|
|
|
@ -57,7 +57,7 @@ class PDFRecordFinder(object):
|
||||||
position -= 1
|
position -= 1
|
||||||
|
|
||||||
name = ''.join(header).strip().decode('ascii','ignore')
|
name = ''.join(header).strip().decode('ascii','ignore')
|
||||||
print (name, position)
|
print((name, position))
|
||||||
results.append((i, name, position))
|
results.append((i, name, position))
|
||||||
else:
|
else:
|
||||||
# See if this row forces us to break from field reading.
|
# See if this row forces us to break from field reading.
|
||||||
|
@ -117,7 +117,7 @@ class PDFRecordFinder(object):
|
||||||
row_columns = self.extract_columns_from_row(row)
|
row_columns = self.extract_columns_from_row(row)
|
||||||
|
|
||||||
if not row_columns:
|
if not row_columns:
|
||||||
if cc.data and len(cc.data.keys()) > 1 and len(row.strip()) > cc.data.keys()[-1]:
|
if cc.data and len(list(cc.data.keys())) > 1 and len(row.strip()) > list(cc.data.keys())[-1]:
|
||||||
yield cc
|
yield cc
|
||||||
cc = ColumnCollector()
|
cc = ColumnCollector()
|
||||||
else:
|
else:
|
||||||
|
@ -127,11 +127,11 @@ class PDFRecordFinder(object):
|
||||||
try:
|
try:
|
||||||
cc.add(row_columns)
|
cc.add(row_columns)
|
||||||
|
|
||||||
except IsNextField, e:
|
except IsNextField as e:
|
||||||
yield cc
|
yield cc
|
||||||
cc = ColumnCollector()
|
cc = ColumnCollector()
|
||||||
cc.add(row_columns)
|
cc.add(row_columns)
|
||||||
except UnknownColumn, e:
|
except UnknownColumn as e:
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
|
|
||||||
yield cc
|
yield cc
|
||||||
|
@ -160,8 +160,8 @@ class PDFRecordFinder(object):
|
||||||
white_iter = iter(white_ranges)
|
white_iter = iter(white_ranges)
|
||||||
while white_iter:
|
while white_iter:
|
||||||
try:
|
try:
|
||||||
start = white_iter.next()
|
start = next(white_iter)
|
||||||
end = white_iter.next()
|
end = next(white_iter)
|
||||||
if start != end:
|
if start != end:
|
||||||
row_result.append(
|
row_result.append(
|
||||||
(start, row[start:end].encode('ascii','ignore'))
|
(start, row[start:end].encode('ascii','ignore'))
|
||||||
|
@ -192,8 +192,7 @@ class ColumnCollector(object):
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<%s: %s>" % (
|
return "<%s: %s>" % (
|
||||||
self.__class__.__name__,
|
self.__class__.__name__,
|
||||||
map(lambda x:x if len(x) < 25 else x[:25] + '..',
|
[x if len(x) < 25 else x[:25] + '..' for x in list(self.data.values()) if self.data else ''])
|
||||||
self.data.values() if self.data else ''))
|
|
||||||
|
|
||||||
def add(self, data):
|
def add(self, data):
|
||||||
#if self.empty_rows > 2:
|
#if self.empty_rows > 2:
|
||||||
|
@ -218,7 +217,7 @@ class ColumnCollector(object):
|
||||||
self.max_data_length = max(self.max_data_length, len(data))
|
self.max_data_length = max(self.max_data_length, len(data))
|
||||||
|
|
||||||
if not self.column_widths:
|
if not self.column_widths:
|
||||||
self.column_widths = dict(map(lambda (column, value): [column, column + len(value)], data))
|
self.column_widths = dict([[column_value[0], column_value[0] + len(column_value[1])] for column_value in data])
|
||||||
else:
|
else:
|
||||||
for col_id, value in data:
|
for col_id, value in data:
|
||||||
try:
|
try:
|
||||||
|
@ -239,21 +238,21 @@ class ColumnCollector(object):
|
||||||
def adjust_columns(self, data):
|
def adjust_columns(self, data):
|
||||||
adjusted_data = {}
|
adjusted_data = {}
|
||||||
for col_id, value in data:
|
for col_id, value in data:
|
||||||
if col_id in self.data.keys():
|
if col_id in list(self.data.keys()):
|
||||||
adjusted_data[col_id] = value.strip()
|
adjusted_data[col_id] = value.strip()
|
||||||
else:
|
else:
|
||||||
for col_start, col_end in self.column_widths.items():
|
for col_start, col_end in list(self.column_widths.items()):
|
||||||
if (col_start - self.adjust_pad) <= col_id and (col_end + self.adjust_pad) >= col_id:
|
if (col_start - self.adjust_pad) <= col_id and (col_end + self.adjust_pad) >= col_id:
|
||||||
if col_start in adjusted_data:
|
if col_start in adjusted_data:
|
||||||
adjusted_data[col_start] += ' ' + value.strip()
|
adjusted_data[col_start] += ' ' + value.strip()
|
||||||
else:
|
else:
|
||||||
adjusted_data[col_start] = value.strip()
|
adjusted_data[col_start] = value.strip()
|
||||||
|
|
||||||
return adjusted_data.items()
|
return list(adjusted_data.items())
|
||||||
|
|
||||||
|
|
||||||
def merge_column(self, col_id, value):
|
def merge_column(self, col_id, value):
|
||||||
if col_id in self.data.keys():
|
if col_id in list(self.data.keys()):
|
||||||
self.data[col_id] += ' ' + value.strip()
|
self.data[col_id] += ' ' + value.strip()
|
||||||
else:
|
else:
|
||||||
# try adding a wiggle room value?
|
# try adding a wiggle room value?
|
||||||
|
@ -286,15 +285,15 @@ class ColumnCollector(object):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if self.data and data:
|
if self.data and data:
|
||||||
keys = dict(self.column_widths).keys()
|
keys = list(dict(self.column_widths).keys())
|
||||||
keys.sort()
|
keys.sort()
|
||||||
keys += [None]
|
keys += [None]
|
||||||
|
|
||||||
if self.last_data_length < len(data):
|
if self.last_data_length < len(data):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
first_key, first_value = dict(data).items()[0]
|
first_key, first_value = list(dict(data).items())[0]
|
||||||
if self.data.keys()[0] == first_key:
|
if list(self.data.keys())[0] == first_key:
|
||||||
|
|
||||||
position = keys.index(first_key)
|
position = keys.index(first_key)
|
||||||
max_length = keys[position + 1]
|
max_length = keys[position + 1]
|
||||||
|
@ -308,7 +307,7 @@ class ColumnCollector(object):
|
||||||
def tuple(self):
|
def tuple(self):
|
||||||
#try:
|
#try:
|
||||||
if self.data:
|
if self.data:
|
||||||
return tuple(map(lambda k:self.data[k], sorted(self.data.keys())))
|
return tuple([self.data[k] for k in sorted(self.data.keys())])
|
||||||
return ()
|
return ()
|
||||||
#except:
|
#except:
|
||||||
# import pdb
|
# import pdb
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import model
|
from . import model
|
||||||
from fields import *
|
from .fields import *
|
||||||
import enums
|
from . import enums
|
||||||
|
|
||||||
__all__ = RECORD_TYPES = ['SubmitterRecord', 'EmployerRecord',
|
__all__ = RECORD_TYPES = ['SubmitterRecord', 'EmployerRecord',
|
||||||
'EmployeeWageRecord', 'OptionalEmployeeWageRecord',
|
'EmployeeWageRecord', 'OptionalEmployeeWageRecord',
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue