diff --git a/pyaccuwage/model.py b/pyaccuwage/model.py index 044ba27..a8f83cb 100644 --- a/pyaccuwage/model.py +++ b/pyaccuwage/model.py @@ -6,7 +6,7 @@ import pdb class Model(object): record_identifier = ' ' required = False - record_length = 512 + target_size = 512 def __init__(self): for (key, value) in self.__class__.__dict__.items(): @@ -33,7 +33,7 @@ class Model(object): for key in self.__class__.__dict__.keys(): attr = getattr(self, key) if isinstance(attr, Field): - fields.append(attr) + fields.append(attr) return fields def get_sorted_fields(self): @@ -57,7 +57,12 @@ class Model(object): [field.get_data() for field in self.get_sorted_fields()]) if len(result) != self.record_length: raise ValidationError("Record result length not equal to %d bytes (%d)" % (self.record_length, len(result))) - return result + + #result = ''.join([self.record_identifier] + [field.get_data() for field in self.get_sorted_fields()]) + #if len(result) != self.target_size: + # raise ValidationError("Record result length not equal to %d bytes (%d)" % (self.target_size, len(result))) + + return result def read(self, fp): for field in self.get_sorted_fields(): diff --git a/pyaccuwage/pdfextract.py b/pyaccuwage/pdfextract.py index 42cc9dd..7660912 100644 --- a/pyaccuwage/pdfextract.py +++ b/pyaccuwage/pdfextract.py @@ -24,6 +24,9 @@ class PDFRecordFinder(object): def records(self): headings = self.locate_heading_rows_by_field() + #for x in headings: + # print x + for (start, end, name) in headings: name = name.decode('ascii', 'ignore') yield (name, list(self.find_fields(iter(self.textrows[start+1:end]))), (start+1, end)) @@ -109,6 +112,7 @@ class PDFRecordFinder(object): for r in row_iter: row = r.decode('UTF-8') + #print row row_columns = self.extract_columns_from_row(row) if not row_columns: @@ -185,7 +189,10 @@ class ColumnCollector(object): pass def __repr__(self): - return "<%s: %s>" % (self.__class__.__name__, map(lambda x:x if len(x) < 25 else x[:25] + '..', self.data.values())) + return "<%s: %s>" % ( + self.__class__.__name__, + map(lambda x:x if len(x) < 25 else x[:25] + '..', + self.data.values() if self.data else '')) def add(self, data): #if self.empty_rows > 2: