working better!
This commit is contained in:
parent
e6e087ef38
commit
730073dcd1
1 changed files with 18 additions and 33 deletions
|
@ -108,45 +108,29 @@ class PDFRecordFinder(object):
|
|||
blank_row_counter = 0
|
||||
|
||||
for r in row_iter:
|
||||
row = self.extract_columns_from_row(r.decode('UTF-8'))
|
||||
if not row:
|
||||
cc.empty_row()
|
||||
row = r.decode('UTF-8')
|
||||
row_columns = self.extract_columns_from_row(row)
|
||||
|
||||
if not row_columns:
|
||||
if cc.data and len(cc.data.keys()) > 1 and len(row.strip()) > cc.data.keys()[-1]:
|
||||
yield cc
|
||||
cc = ColumnCollector()
|
||||
else:
|
||||
cc.empty_row()
|
||||
continue
|
||||
|
||||
try:
|
||||
cc.add(row)
|
||||
cc.add(row_columns)
|
||||
|
||||
except IsNextField, e:
|
||||
yield cc
|
||||
cc = ColumnCollector()
|
||||
cc.add(row)
|
||||
cc.add(row_columns)
|
||||
except UnknownColumn, e:
|
||||
raise StopIteration
|
||||
|
||||
yield cc
|
||||
|
||||
def find_fields_old(self, row_iter):
|
||||
cc = ColumnCollector()
|
||||
|
||||
for r in row_iter:
|
||||
row = self.extract_columns_from_row(r.decode('UTF-8'))
|
||||
|
||||
if not row:
|
||||
continue
|
||||
|
||||
if cc.is_next_field(row):
|
||||
#if row[1][1] == 'Vendor Indicator':
|
||||
# import pdb
|
||||
# pdb.set_trace()
|
||||
yield cc
|
||||
cc = ColumnCollector()
|
||||
|
||||
try:
|
||||
cc.add(row)
|
||||
|
||||
except UnknownColumn, e:
|
||||
raise StopIteration
|
||||
yield cc
|
||||
|
||||
|
||||
def extract_columns_from_row(self, row):
|
||||
re_multiwhite = re.compile(r'\s{2,}')
|
||||
|
@ -246,7 +230,6 @@ class ColumnCollector(object):
|
|||
|
||||
def adjust_columns(self, data):
|
||||
adjusted_data = {}
|
||||
|
||||
for col_id, value in data:
|
||||
if col_id in self.data.keys():
|
||||
adjusted_data[col_id] = value.strip()
|
||||
|
@ -315,9 +298,11 @@ class ColumnCollector(object):
|
|||
|
||||
@property
|
||||
def tuple(self):
|
||||
try:
|
||||
#try:
|
||||
if self.data:
|
||||
return tuple(map(lambda k:self.data[k], sorted(self.data.keys())))
|
||||
except:
|
||||
import pdb
|
||||
pdb.set_trace()
|
||||
return ()
|
||||
#except:
|
||||
# import pdb
|
||||
# pdb.set_trace()
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue