adding new pdf extract capability

This commit is contained in:
Binh 2012-07-10 15:24:13 -05:00
parent b77b80e485
commit e8145c5616
4 changed files with 174 additions and 134 deletions

47
scripts/pyaccuwage-pdfparse Executable file
View file

@ -0,0 +1,47 @@
#!/usr/bin/python
from pyaccuwage.parser import RecordBuilder
from pyaccuwage.pdfexport import PDFRecordFinder
import argparse
import sys
parser = argparse.ArgumentParser(description="Parse and convert contents of IRS files into pyaccuwage e-file classes.")
parser.add_argument("-f", "--full", help="Generate full python file, including related imports.", action="store_true")
parser.add_argument("-i", "--input", metavar="file", type=argparse.FileType('r'), help="Source PDF file, ie: p1220.pdf")
args = parser.parse_args()
"""
lines = []
for x in sys.stdin.readlines():
lines.append(x)
pdp = PastedDefParser()
tokens = pdp.load("".join(lines))
"""
def generate_imports():
return "\n".join([
"from pyaccuwage import model",
"from pyaccuwage.fields import *",
"",
"",
])
def generate_class_begin(name):
return "class %s(mode.Model):\n" % name
if args.full:
sys.stdout.write(generate_imports())
if args.classname:
classname = args.classname
else:
classname = "GeneratedRecord"
sys.stdout.write(generate_class_begin(classname))
for x in tokens:
sys.stdout.write('\t' + x + '\n')