Looks like 1220 form has changed since last year, work on getting

changes applied in a simple manner.
This commit is contained in:
Binh 2013-03-05 14:49:38 -06:00
parent afc4138898
commit a1ab6b4918
2 changed files with 12 additions and 4 deletions

View file

@ -6,6 +6,7 @@ import pdb
class Model(object):
record_identifier = ' '
required = False
target_size = 512
def __init__(self):
for (key, value) in self.__class__.__dict__.items():
@ -32,7 +33,7 @@ class Model(object):
for key in self.__class__.__dict__.keys():
attr = getattr(self, key)
if isinstance(attr, Field):
fields.append(attr)
fields.append(attr)
return fields
def get_sorted_fields(self):
@ -53,8 +54,8 @@ class Model(object):
def output(self):
result = ''.join([self.record_identifier] + [field.get_data() for field in self.get_sorted_fields()])
if len(result) != 512:
raise ValidationError("Record result length not equal to 512 bytes (%d)" % len(result))
if len(result) != self.target_size:
raise ValidationError("Record result length not equal to %d bytes (%d)" % (self.target_size, len(result)))
return result
def read(self, fp):

View file

@ -24,6 +24,9 @@ class PDFRecordFinder(object):
def records(self):
headings = self.locate_heading_rows_by_field()
#for x in headings:
# print x
for (start, end, name) in headings:
name = name.decode('ascii', 'ignore')
yield (name, list(self.find_fields(iter(self.textrows[start+1:end]))), (start+1, end))
@ -109,6 +112,7 @@ class PDFRecordFinder(object):
for r in row_iter:
row = r.decode('UTF-8')
#print row
row_columns = self.extract_columns_from_row(row)
if not row_columns:
@ -185,7 +189,10 @@ class ColumnCollector(object):
pass
def __repr__(self):
return "<%s: %s>" % (self.__class__.__name__, map(lambda x:x if len(x) < 25 else x[:25] + '..', self.data.values()))
return "<%s: %s>" % (
self.__class__.__name__,
map(lambda x:x if len(x) < 25 else x[:25] + '..',
self.data.values() if self.data else ''))
def add(self, data):
#if self.empty_rows > 2: