Added field extraction and model creation

This commit is contained in:
Mark Riedesel 2013-10-15 23:47:32 -05:00
parent 8131e77dd9
commit 1a5910f1ef
4 changed files with 177 additions and 5 deletions

57
controller.py Normal file
View file

@ -0,0 +1,57 @@
import poppler
import pdfformfiller.models
import pdb
class PopplerController(object):
def __init__(self, pdf=""):
self.doc = poppler.document_new_from_file('file:///home/mark/Bedraga.pdf', password=None)
def load_poppler_doc(self, filename):
from django.conf import settings
pdf_path = os.path.abspath(os.path.join(settings.STATIC_FORMS_ROOT, filename))
if os.path.isfile(pdf_path):
return poppler.document_new_from_file('file://' + pdf_path, password=None)
def generate_page_images(self, pages=[]):
import cairo
import StringIO
page = self.doc.get_page(pages[0] if pages else 0)
width, height = page.get_size()
surface = cairo.ImageSurface(cairo.FORMAT_RGB24, int(width*2), int(height*2))
context = cairo.Context(surface)
context.scale(2, 2)
for page_num in pages or xrange(self.doc.get_n_pages()):
page = self.doc.get_page(page_num)
page.render(context)
fd = StringIO.StringIO()
surface.write_to_png(fd)
fd.seek(0)
yield fd
def get_page_fields(self, page_num):
page = self.doc.get_page(page_num)
for field in page.get_form_field_mapping():
area = field.area
data = field.field
yield {
'name': data.get_name(),
'value': data.text_get_text(),
'area': {
'pos_x': area.x1,
'pos_y': area.y1,
'width': area.x2 - area.x1,
'height': area.y2 - area.y1,
}
}
class PDFFormFillerPostSave(PopplerController):
def __init__(self, document):
pass

View file

@ -1,3 +1,97 @@
from django.db import models import os
# Create your models here. from django.db import models
from django.core.files import File
from pdfformfiller.controller import PopplerController
FIELD_TYPE_CHOICES = (
('t', 'Text'),
('c', 'Checkbox'),
)
class Document(models.Model):
name = models.CharField(max_length=64)
pdf = models.FileField(upload_to='pdfformfiller_pdf', blank=True)
def __unicode__(self):
return self.name
def process_pages(self):
poppler_con = PopplerController(self.pdf.path)
for page_num, image in enumerate(poppler_con.generate_page_images()):
try:
page = self.page_set.get(page_num=page_num)
except Page.DoesNotExist:
page = Page(document=self,
page_num=page_num,
name=self.name + (' page %d' % page_num))
page.image.save(
os.path.basename(self.pdf.path) + ('_page%03d.png' % page_num),
File(image)
)
def document_post_save(sender, **kwargs):
instance = kwargs.get('instance', None)
if instance:
instance.process_pages()
models.signals.post_save.connect(document_post_save, sender=Document)
class Page(models.Model):
document = models.ForeignKey('Document')
name = models.CharField(max_length=64, blank=True)
image = models.ImageField(upload_to='pdfformfiller_page')
page_num = models.SmallIntegerField()
def __unicode__(self):
return self.name
def process_fields(self):
poppler_con = PopplerController(self.document.pdf.path)
fields = poppler_con.get_page_fields(self.page_num)
for fdata in fields:
try:
field = self.formfield_set.get(name=fdata['name'])
except FormField.DoesNotExist:
field = FormField(page=self,
name=fdata['name'])
area = fdata['area']
field.pos_x = area['pos_x']
field.pos_y = area['pos_y']
field.width = area['width']
field.height = area['height']
field.fieldtype = 't'
field.save()
def page_post_save(sender, **kwargs):
instance = kwargs.get('instance', None)
if instance:
instance.process_fields()
models.signals.post_save.connect(page_post_save, sender=Page)
class FormField(models.Model):
page = models.ForeignKey('Page')
name = models.CharField(max_length=255)
fieldtype = models.CharField(choices=FIELD_TYPE_CHOICES, max_length=1)
pos_x = models.FloatField()
pos_y = models.FloatField()
width = models.FloatField()
height = models.FloatField()
def __unicode__(self):
return self.name
def page_num(self):
return self.page.page_num

View file

@ -2,6 +2,6 @@ from django.conf.urls import patterns, url
from pdfformfiller.views import editor from pdfformfiller.views import editor
urlpatterns = patterns('pdfformfiller.views', urlpatterns = patterns('pdfformfiller.views',
url(r'^(?P<pdf>.*)/edit/$', editor.PDFFormFillerEditorView.as_view(), name='pdfformfiller-edit'), url(r'^(?P<pdf>.*)/edit/$', editor.PDFFormFillerEditor.as_view(), name='pdfformfiller-edit'),
) )

View file

@ -1,10 +1,12 @@
import os
from django.views.generic import View from django.views.generic import View
from django.http import HttpResponse from django.http import HttpResponse
from django.utils import simplejson from django.utils import simplejson
import poppler import poppler
import os
class PDFFormFillerEditorView(View):
class PDFFormFillerEditor(View):
def get(self, request, pdf=""): def get(self, request, pdf=""):
doc = poppler.document_new_from_file('file:///home/mark/Bedraga.pdf', password=None) doc = poppler.document_new_from_file('file:///home/mark/Bedraga.pdf', password=None)
@ -20,6 +22,25 @@ class PDFFormFillerEditorView(View):
if os.path.isfile(pdf_path): if os.path.isfile(pdf_path):
return poppler.document_new_from_file('file://' + pdf_path, password=None) return poppler.document_new_from_file('file://' + pdf_path, password=None)
def generate_page_pngs(self, poppler_doc, pages=[]):
import cairo
import StringIO
page = poppler_doc.get_page(pages[0] if pages else 0)
width, height = page.get_size()
surface = cairo.ImageSurface(cairo.FORMAT_RGB24, int(width*2), int(height*2))
context = cairo.Context(surface)
for page_num in pages or xrange(poppler_doc.num_pages()):
page = poppler_doc.get_page(page_num)
page.render(context)
fd = StringIO.StringIO()
surface.write(fd)
fd.seek(0)
yield fd
def get_fields(self, poppler_doc, page): def get_fields(self, poppler_doc, page):
fields = poppler_doc.get_page(page).get_form_field_mapping() fields = poppler_doc.get_page(page).get_form_field_mapping()