This commit is contained in:
Binh 2013-03-20 15:15:51 -05:00
commit e0d54c8a01
2 changed files with 16 additions and 4 deletions

View file

@ -6,7 +6,7 @@ import pdb
class Model(object): class Model(object):
record_identifier = ' ' record_identifier = ' '
required = False required = False
record_length = 512 target_size = 512
def __init__(self): def __init__(self):
for (key, value) in self.__class__.__dict__.items(): for (key, value) in self.__class__.__dict__.items():
@ -57,6 +57,11 @@ class Model(object):
[field.get_data() for field in self.get_sorted_fields()]) [field.get_data() for field in self.get_sorted_fields()])
if len(result) != self.record_length: if len(result) != self.record_length:
raise ValidationError("Record result length not equal to %d bytes (%d)" % (self.record_length, len(result))) raise ValidationError("Record result length not equal to %d bytes (%d)" % (self.record_length, len(result)))
#result = ''.join([self.record_identifier] + [field.get_data() for field in self.get_sorted_fields()])
#if len(result) != self.target_size:
# raise ValidationError("Record result length not equal to %d bytes (%d)" % (self.target_size, len(result)))
return result return result
def read(self, fp): def read(self, fp):

View file

@ -24,6 +24,9 @@ class PDFRecordFinder(object):
def records(self): def records(self):
headings = self.locate_heading_rows_by_field() headings = self.locate_heading_rows_by_field()
#for x in headings:
# print x
for (start, end, name) in headings: for (start, end, name) in headings:
name = name.decode('ascii', 'ignore') name = name.decode('ascii', 'ignore')
yield (name, list(self.find_fields(iter(self.textrows[start+1:end]))), (start+1, end)) yield (name, list(self.find_fields(iter(self.textrows[start+1:end]))), (start+1, end))
@ -109,6 +112,7 @@ class PDFRecordFinder(object):
for r in row_iter: for r in row_iter:
row = r.decode('UTF-8') row = r.decode('UTF-8')
#print row
row_columns = self.extract_columns_from_row(row) row_columns = self.extract_columns_from_row(row)
if not row_columns: if not row_columns:
@ -185,7 +189,10 @@ class ColumnCollector(object):
pass pass
def __repr__(self): def __repr__(self):
return "<%s: %s>" % (self.__class__.__name__, map(lambda x:x if len(x) < 25 else x[:25] + '..', self.data.values())) return "<%s: %s>" % (
self.__class__.__name__,
map(lambda x:x if len(x) < 25 else x[:25] + '..',
self.data.values() if self.data else ''))
def add(self, data): def add(self, data):
#if self.empty_rows > 2: #if self.empty_rows > 2: