Added field extraction and model creation
This commit is contained in:
parent
8131e77dd9
commit
1a5910f1ef
4 changed files with 177 additions and 5 deletions
57
controller.py
Normal file
57
controller.py
Normal file
|
@ -0,0 +1,57 @@
|
|||
import poppler
|
||||
|
||||
import pdfformfiller.models
|
||||
import pdb
|
||||
|
||||
class PopplerController(object):
|
||||
def __init__(self, pdf=""):
|
||||
self.doc = poppler.document_new_from_file('file:///home/mark/Bedraga.pdf', password=None)
|
||||
|
||||
def load_poppler_doc(self, filename):
|
||||
from django.conf import settings
|
||||
|
||||
pdf_path = os.path.abspath(os.path.join(settings.STATIC_FORMS_ROOT, filename))
|
||||
if os.path.isfile(pdf_path):
|
||||
return poppler.document_new_from_file('file://' + pdf_path, password=None)
|
||||
|
||||
|
||||
def generate_page_images(self, pages=[]):
|
||||
import cairo
|
||||
import StringIO
|
||||
|
||||
page = self.doc.get_page(pages[0] if pages else 0)
|
||||
width, height = page.get_size()
|
||||
surface = cairo.ImageSurface(cairo.FORMAT_RGB24, int(width*2), int(height*2))
|
||||
context = cairo.Context(surface)
|
||||
context.scale(2, 2)
|
||||
|
||||
for page_num in pages or xrange(self.doc.get_n_pages()):
|
||||
page = self.doc.get_page(page_num)
|
||||
page.render(context)
|
||||
fd = StringIO.StringIO()
|
||||
surface.write_to_png(fd)
|
||||
fd.seek(0)
|
||||
yield fd
|
||||
|
||||
|
||||
def get_page_fields(self, page_num):
|
||||
page = self.doc.get_page(page_num)
|
||||
for field in page.get_form_field_mapping():
|
||||
area = field.area
|
||||
data = field.field
|
||||
|
||||
yield {
|
||||
'name': data.get_name(),
|
||||
'value': data.text_get_text(),
|
||||
'area': {
|
||||
'pos_x': area.x1,
|
||||
'pos_y': area.y1,
|
||||
'width': area.x2 - area.x1,
|
||||
'height': area.y2 - area.y1,
|
||||
}
|
||||
}
|
||||
|
||||
class PDFFormFillerPostSave(PopplerController):
|
||||
def __init__(self, document):
|
||||
pass
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue