Added field extraction and model creation
This commit is contained in:
parent
8131e77dd9
commit
1a5910f1ef
4 changed files with 177 additions and 5 deletions
57
controller.py
Normal file
57
controller.py
Normal file
|
@ -0,0 +1,57 @@
|
|||
import poppler
|
||||
|
||||
import pdfformfiller.models
|
||||
import pdb
|
||||
|
||||
class PopplerController(object):
|
||||
def __init__(self, pdf=""):
|
||||
self.doc = poppler.document_new_from_file('file:///home/mark/Bedraga.pdf', password=None)
|
||||
|
||||
def load_poppler_doc(self, filename):
|
||||
from django.conf import settings
|
||||
|
||||
pdf_path = os.path.abspath(os.path.join(settings.STATIC_FORMS_ROOT, filename))
|
||||
if os.path.isfile(pdf_path):
|
||||
return poppler.document_new_from_file('file://' + pdf_path, password=None)
|
||||
|
||||
|
||||
def generate_page_images(self, pages=[]):
|
||||
import cairo
|
||||
import StringIO
|
||||
|
||||
page = self.doc.get_page(pages[0] if pages else 0)
|
||||
width, height = page.get_size()
|
||||
surface = cairo.ImageSurface(cairo.FORMAT_RGB24, int(width*2), int(height*2))
|
||||
context = cairo.Context(surface)
|
||||
context.scale(2, 2)
|
||||
|
||||
for page_num in pages or xrange(self.doc.get_n_pages()):
|
||||
page = self.doc.get_page(page_num)
|
||||
page.render(context)
|
||||
fd = StringIO.StringIO()
|
||||
surface.write_to_png(fd)
|
||||
fd.seek(0)
|
||||
yield fd
|
||||
|
||||
|
||||
def get_page_fields(self, page_num):
|
||||
page = self.doc.get_page(page_num)
|
||||
for field in page.get_form_field_mapping():
|
||||
area = field.area
|
||||
data = field.field
|
||||
|
||||
yield {
|
||||
'name': data.get_name(),
|
||||
'value': data.text_get_text(),
|
||||
'area': {
|
||||
'pos_x': area.x1,
|
||||
'pos_y': area.y1,
|
||||
'width': area.x2 - area.x1,
|
||||
'height': area.y2 - area.y1,
|
||||
}
|
||||
}
|
||||
|
||||
class PDFFormFillerPostSave(PopplerController):
|
||||
def __init__(self, document):
|
||||
pass
|
||||
|
98
models.py
98
models.py
|
@ -1,3 +1,97 @@
|
|||
from django.db import models
|
||||
import os
|
||||
|
||||
# Create your models here.
|
||||
from django.db import models
|
||||
from django.core.files import File
|
||||
|
||||
from pdfformfiller.controller import PopplerController
|
||||
|
||||
|
||||
FIELD_TYPE_CHOICES = (
|
||||
('t', 'Text'),
|
||||
('c', 'Checkbox'),
|
||||
)
|
||||
|
||||
|
||||
class Document(models.Model):
|
||||
name = models.CharField(max_length=64)
|
||||
pdf = models.FileField(upload_to='pdfformfiller_pdf', blank=True)
|
||||
|
||||
def __unicode__(self):
|
||||
return self.name
|
||||
|
||||
def process_pages(self):
|
||||
poppler_con = PopplerController(self.pdf.path)
|
||||
for page_num, image in enumerate(poppler_con.generate_page_images()):
|
||||
try:
|
||||
page = self.page_set.get(page_num=page_num)
|
||||
except Page.DoesNotExist:
|
||||
page = Page(document=self,
|
||||
page_num=page_num,
|
||||
name=self.name + (' page %d' % page_num))
|
||||
page.image.save(
|
||||
os.path.basename(self.pdf.path) + ('_page%03d.png' % page_num),
|
||||
File(image)
|
||||
)
|
||||
|
||||
|
||||
def document_post_save(sender, **kwargs):
|
||||
instance = kwargs.get('instance', None)
|
||||
|
||||
if instance:
|
||||
instance.process_pages()
|
||||
|
||||
models.signals.post_save.connect(document_post_save, sender=Document)
|
||||
|
||||
|
||||
class Page(models.Model):
|
||||
document = models.ForeignKey('Document')
|
||||
name = models.CharField(max_length=64, blank=True)
|
||||
image = models.ImageField(upload_to='pdfformfiller_page')
|
||||
page_num = models.SmallIntegerField()
|
||||
|
||||
def __unicode__(self):
|
||||
return self.name
|
||||
|
||||
def process_fields(self):
|
||||
poppler_con = PopplerController(self.document.pdf.path)
|
||||
fields = poppler_con.get_page_fields(self.page_num)
|
||||
|
||||
for fdata in fields:
|
||||
try:
|
||||
field = self.formfield_set.get(name=fdata['name'])
|
||||
except FormField.DoesNotExist:
|
||||
field = FormField(page=self,
|
||||
name=fdata['name'])
|
||||
|
||||
area = fdata['area']
|
||||
field.pos_x = area['pos_x']
|
||||
field.pos_y = area['pos_y']
|
||||
field.width = area['width']
|
||||
field.height = area['height']
|
||||
field.fieldtype = 't'
|
||||
field.save()
|
||||
|
||||
|
||||
|
||||
def page_post_save(sender, **kwargs):
|
||||
instance = kwargs.get('instance', None)
|
||||
if instance:
|
||||
instance.process_fields()
|
||||
|
||||
models.signals.post_save.connect(page_post_save, sender=Page)
|
||||
|
||||
|
||||
class FormField(models.Model):
|
||||
page = models.ForeignKey('Page')
|
||||
name = models.CharField(max_length=255)
|
||||
fieldtype = models.CharField(choices=FIELD_TYPE_CHOICES, max_length=1)
|
||||
pos_x = models.FloatField()
|
||||
pos_y = models.FloatField()
|
||||
width = models.FloatField()
|
||||
height = models.FloatField()
|
||||
|
||||
def __unicode__(self):
|
||||
return self.name
|
||||
|
||||
def page_num(self):
|
||||
return self.page.page_num
|
||||
|
|
2
urls.py
2
urls.py
|
@ -2,6 +2,6 @@ from django.conf.urls import patterns, url
|
|||
from pdfformfiller.views import editor
|
||||
|
||||
urlpatterns = patterns('pdfformfiller.views',
|
||||
url(r'^(?P<pdf>.*)/edit/$', editor.PDFFormFillerEditorView.as_view(), name='pdfformfiller-edit'),
|
||||
url(r'^(?P<pdf>.*)/edit/$', editor.PDFFormFillerEditor.as_view(), name='pdfformfiller-edit'),
|
||||
)
|
||||
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
import os
|
||||
|
||||
from django.views.generic import View
|
||||
from django.http import HttpResponse
|
||||
from django.utils import simplejson
|
||||
import poppler
|
||||
import os
|
||||
|
||||
class PDFFormFillerEditorView(View):
|
||||
|
||||
class PDFFormFillerEditor(View):
|
||||
def get(self, request, pdf=""):
|
||||
|
||||
doc = poppler.document_new_from_file('file:///home/mark/Bedraga.pdf', password=None)
|
||||
|
@ -20,6 +22,25 @@ class PDFFormFillerEditorView(View):
|
|||
if os.path.isfile(pdf_path):
|
||||
return poppler.document_new_from_file('file://' + pdf_path, password=None)
|
||||
|
||||
|
||||
def generate_page_pngs(self, poppler_doc, pages=[]):
|
||||
import cairo
|
||||
import StringIO
|
||||
|
||||
page = poppler_doc.get_page(pages[0] if pages else 0)
|
||||
width, height = page.get_size()
|
||||
surface = cairo.ImageSurface(cairo.FORMAT_RGB24, int(width*2), int(height*2))
|
||||
context = cairo.Context(surface)
|
||||
|
||||
for page_num in pages or xrange(poppler_doc.num_pages()):
|
||||
page = poppler_doc.get_page(page_num)
|
||||
page.render(context)
|
||||
fd = StringIO.StringIO()
|
||||
surface.write(fd)
|
||||
fd.seek(0)
|
||||
yield fd
|
||||
|
||||
|
||||
def get_fields(self, poppler_doc, page):
|
||||
fields = poppler_doc.get_page(page).get_form_field_mapping()
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue