#!/usr/bin/env python from pyaccuwage.parser import RecordBuilder from pyaccuwage.pdfextract import PDFRecordFinder import argparse import sys import os import re parser = argparse.ArgumentParser(description="Parse and convert contents of IRS files into pyaccuwage e-file classes.") parser.add_argument("-i", "--input", nargs=1, required=True, metavar="file", type=argparse.FileType('r'), help="Source PDF file, ie: p1220.pdf") parser.add_argument("-f", "--full", help="Generate full python file, including related imports.", action="store_true") args = parser.parse_args() def generate_imports(): return "\n".join([ "from pyaccuwage import model as pyaccuwagemodel", "from pyaccuwage.fields import *", "", "", ]) if args.full: sys.stdout.write(generate_imports()) source_file = os.path.abspath(args.input[0].name) doc = PDFRecordFinder(source_file) records = doc.records() builder = RecordBuilder() for (name, fields) in records: name = re.sub(r'^[^a-zA-Z]*','', name.split(':')[-1]) name = re.sub(r'[^\w]*', '', name) sys.stdout.write("\nclass %s(pyaccuwagemodel.Model):\n" % name) for field in builder.load(map(lambda x: x, fields[0:])): sys.stdout.write('\t' + field + '\n')