Added field extraction and model creation

This commit is contained in:
Mark Riedesel 2013-10-15 23:47:32 -05:00
parent 8131e77dd9
commit 1a5910f1ef
4 changed files with 177 additions and 5 deletions

57
controller.py Normal file
View file

@ -0,0 +1,57 @@
import poppler
import pdfformfiller.models
import pdb
class PopplerController(object):
def __init__(self, pdf=""):
self.doc = poppler.document_new_from_file('file:///home/mark/Bedraga.pdf', password=None)
def load_poppler_doc(self, filename):
from django.conf import settings
pdf_path = os.path.abspath(os.path.join(settings.STATIC_FORMS_ROOT, filename))
if os.path.isfile(pdf_path):
return poppler.document_new_from_file('file://' + pdf_path, password=None)
def generate_page_images(self, pages=[]):
import cairo
import StringIO
page = self.doc.get_page(pages[0] if pages else 0)
width, height = page.get_size()
surface = cairo.ImageSurface(cairo.FORMAT_RGB24, int(width*2), int(height*2))
context = cairo.Context(surface)
context.scale(2, 2)
for page_num in pages or xrange(self.doc.get_n_pages()):
page = self.doc.get_page(page_num)
page.render(context)
fd = StringIO.StringIO()
surface.write_to_png(fd)
fd.seek(0)
yield fd
def get_page_fields(self, page_num):
page = self.doc.get_page(page_num)
for field in page.get_form_field_mapping():
area = field.area
data = field.field
yield {
'name': data.get_name(),
'value': data.text_get_text(),
'area': {
'pos_x': area.x1,
'pos_y': area.y1,
'width': area.x2 - area.x1,
'height': area.y2 - area.y1,
}
}
class PDFFormFillerPostSave(PopplerController):
def __init__(self, document):
pass

View file

@ -1,3 +1,97 @@
from django.db import models
import os
# Create your models here.
from django.db import models
from django.core.files import File
from pdfformfiller.controller import PopplerController
FIELD_TYPE_CHOICES = (
('t', 'Text'),
('c', 'Checkbox'),
)
class Document(models.Model):
name = models.CharField(max_length=64)
pdf = models.FileField(upload_to='pdfformfiller_pdf', blank=True)
def __unicode__(self):
return self.name
def process_pages(self):
poppler_con = PopplerController(self.pdf.path)
for page_num, image in enumerate(poppler_con.generate_page_images()):
try:
page = self.page_set.get(page_num=page_num)
except Page.DoesNotExist:
page = Page(document=self,
page_num=page_num,
name=self.name + (' page %d' % page_num))
page.image.save(
os.path.basename(self.pdf.path) + ('_page%03d.png' % page_num),
File(image)
)
def document_post_save(sender, **kwargs):
instance = kwargs.get('instance', None)
if instance:
instance.process_pages()
models.signals.post_save.connect(document_post_save, sender=Document)
class Page(models.Model):
document = models.ForeignKey('Document')
name = models.CharField(max_length=64, blank=True)
image = models.ImageField(upload_to='pdfformfiller_page')
page_num = models.SmallIntegerField()
def __unicode__(self):
return self.name
def process_fields(self):
poppler_con = PopplerController(self.document.pdf.path)
fields = poppler_con.get_page_fields(self.page_num)
for fdata in fields:
try:
field = self.formfield_set.get(name=fdata['name'])
except FormField.DoesNotExist:
field = FormField(page=self,
name=fdata['name'])
area = fdata['area']
field.pos_x = area['pos_x']
field.pos_y = area['pos_y']
field.width = area['width']
field.height = area['height']
field.fieldtype = 't'
field.save()
def page_post_save(sender, **kwargs):
instance = kwargs.get('instance', None)
if instance:
instance.process_fields()
models.signals.post_save.connect(page_post_save, sender=Page)
class FormField(models.Model):
page = models.ForeignKey('Page')
name = models.CharField(max_length=255)
fieldtype = models.CharField(choices=FIELD_TYPE_CHOICES, max_length=1)
pos_x = models.FloatField()
pos_y = models.FloatField()
width = models.FloatField()
height = models.FloatField()
def __unicode__(self):
return self.name
def page_num(self):
return self.page.page_num

View file

@ -2,6 +2,6 @@ from django.conf.urls import patterns, url
from pdfformfiller.views import editor
urlpatterns = patterns('pdfformfiller.views',
url(r'^(?P<pdf>.*)/edit/$', editor.PDFFormFillerEditorView.as_view(), name='pdfformfiller-edit'),
url(r'^(?P<pdf>.*)/edit/$', editor.PDFFormFillerEditor.as_view(), name='pdfformfiller-edit'),
)

View file

@ -1,10 +1,12 @@
import os
from django.views.generic import View
from django.http import HttpResponse
from django.utils import simplejson
import poppler
import os
class PDFFormFillerEditorView(View):
class PDFFormFillerEditor(View):
def get(self, request, pdf=""):
doc = poppler.document_new_from_file('file:///home/mark/Bedraga.pdf', password=None)
@ -20,6 +22,25 @@ class PDFFormFillerEditorView(View):
if os.path.isfile(pdf_path):
return poppler.document_new_from_file('file://' + pdf_path, password=None)
def generate_page_pngs(self, poppler_doc, pages=[]):
import cairo
import StringIO
page = poppler_doc.get_page(pages[0] if pages else 0)
width, height = page.get_size()
surface = cairo.ImageSurface(cairo.FORMAT_RGB24, int(width*2), int(height*2))
context = cairo.Context(surface)
for page_num in pages or xrange(poppler_doc.num_pages()):
page = poppler_doc.get_page(page_num)
page.render(context)
fd = StringIO.StringIO()
surface.write(fd)
fd.seek(0)
yield fd
def get_fields(self, poppler_doc, page):
fields = poppler_doc.get_page(page).get_form_field_mapping()