From e9a6dc981f6305248516f490f59971b5cf1e654d Mon Sep 17 00:00:00 2001 From: Binh Nguyen Date: Tue, 27 Nov 2012 16:01:00 -0600 Subject: [PATCH] Refer to previous log, but also verify that records are returning proper information prior to getting passed into the ColumnCollector. It seems like some things are getting stripped out due to blank lines or perhaps the annoying "Record Layout" pages. If we could extract the "record layout" sections, things may be simpler" --- pyaccuwage/pdfextract.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyaccuwage/pdfextract.py b/pyaccuwage/pdfextract.py index 9bb5e54..e90c84d 100644 --- a/pyaccuwage/pdfextract.py +++ b/pyaccuwage/pdfextract.py @@ -137,6 +137,9 @@ class ColumnCollector(object): self.empty_rows = 0 pass + def __repr__(self): + return "<%s: %s>" % (self.__class__.__name__, map(lambda x:x if len(x) < 25 else x[:25] + '..', self.data.values())) + def add(self, data): if self.empty_rows > 2: raise IsNextField()