Added field extraction and model creation
This commit is contained in:
parent
8131e77dd9
commit
1a5910f1ef
4 changed files with 177 additions and 5 deletions
57
controller.py
Normal file
57
controller.py
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
import poppler
|
||||||
|
|
||||||
|
import pdfformfiller.models
|
||||||
|
import pdb
|
||||||
|
|
||||||
|
class PopplerController(object):
|
||||||
|
def __init__(self, pdf=""):
|
||||||
|
self.doc = poppler.document_new_from_file('file:///home/mark/Bedraga.pdf', password=None)
|
||||||
|
|
||||||
|
def load_poppler_doc(self, filename):
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
pdf_path = os.path.abspath(os.path.join(settings.STATIC_FORMS_ROOT, filename))
|
||||||
|
if os.path.isfile(pdf_path):
|
||||||
|
return poppler.document_new_from_file('file://' + pdf_path, password=None)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_page_images(self, pages=[]):
|
||||||
|
import cairo
|
||||||
|
import StringIO
|
||||||
|
|
||||||
|
page = self.doc.get_page(pages[0] if pages else 0)
|
||||||
|
width, height = page.get_size()
|
||||||
|
surface = cairo.ImageSurface(cairo.FORMAT_RGB24, int(width*2), int(height*2))
|
||||||
|
context = cairo.Context(surface)
|
||||||
|
context.scale(2, 2)
|
||||||
|
|
||||||
|
for page_num in pages or xrange(self.doc.get_n_pages()):
|
||||||
|
page = self.doc.get_page(page_num)
|
||||||
|
page.render(context)
|
||||||
|
fd = StringIO.StringIO()
|
||||||
|
surface.write_to_png(fd)
|
||||||
|
fd.seek(0)
|
||||||
|
yield fd
|
||||||
|
|
||||||
|
|
||||||
|
def get_page_fields(self, page_num):
|
||||||
|
page = self.doc.get_page(page_num)
|
||||||
|
for field in page.get_form_field_mapping():
|
||||||
|
area = field.area
|
||||||
|
data = field.field
|
||||||
|
|
||||||
|
yield {
|
||||||
|
'name': data.get_name(),
|
||||||
|
'value': data.text_get_text(),
|
||||||
|
'area': {
|
||||||
|
'pos_x': area.x1,
|
||||||
|
'pos_y': area.y1,
|
||||||
|
'width': area.x2 - area.x1,
|
||||||
|
'height': area.y2 - area.y1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class PDFFormFillerPostSave(PopplerController):
|
||||||
|
def __init__(self, document):
|
||||||
|
pass
|
||||||
|
|
98
models.py
98
models.py
|
@ -1,3 +1,97 @@
|
||||||
from django.db import models
|
import os
|
||||||
|
|
||||||
# Create your models here.
|
from django.db import models
|
||||||
|
from django.core.files import File
|
||||||
|
|
||||||
|
from pdfformfiller.controller import PopplerController
|
||||||
|
|
||||||
|
|
||||||
|
FIELD_TYPE_CHOICES = (
|
||||||
|
('t', 'Text'),
|
||||||
|
('c', 'Checkbox'),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Document(models.Model):
|
||||||
|
name = models.CharField(max_length=64)
|
||||||
|
pdf = models.FileField(upload_to='pdfformfiller_pdf', blank=True)
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
def process_pages(self):
|
||||||
|
poppler_con = PopplerController(self.pdf.path)
|
||||||
|
for page_num, image in enumerate(poppler_con.generate_page_images()):
|
||||||
|
try:
|
||||||
|
page = self.page_set.get(page_num=page_num)
|
||||||
|
except Page.DoesNotExist:
|
||||||
|
page = Page(document=self,
|
||||||
|
page_num=page_num,
|
||||||
|
name=self.name + (' page %d' % page_num))
|
||||||
|
page.image.save(
|
||||||
|
os.path.basename(self.pdf.path) + ('_page%03d.png' % page_num),
|
||||||
|
File(image)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def document_post_save(sender, **kwargs):
|
||||||
|
instance = kwargs.get('instance', None)
|
||||||
|
|
||||||
|
if instance:
|
||||||
|
instance.process_pages()
|
||||||
|
|
||||||
|
models.signals.post_save.connect(document_post_save, sender=Document)
|
||||||
|
|
||||||
|
|
||||||
|
class Page(models.Model):
|
||||||
|
document = models.ForeignKey('Document')
|
||||||
|
name = models.CharField(max_length=64, blank=True)
|
||||||
|
image = models.ImageField(upload_to='pdfformfiller_page')
|
||||||
|
page_num = models.SmallIntegerField()
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
def process_fields(self):
|
||||||
|
poppler_con = PopplerController(self.document.pdf.path)
|
||||||
|
fields = poppler_con.get_page_fields(self.page_num)
|
||||||
|
|
||||||
|
for fdata in fields:
|
||||||
|
try:
|
||||||
|
field = self.formfield_set.get(name=fdata['name'])
|
||||||
|
except FormField.DoesNotExist:
|
||||||
|
field = FormField(page=self,
|
||||||
|
name=fdata['name'])
|
||||||
|
|
||||||
|
area = fdata['area']
|
||||||
|
field.pos_x = area['pos_x']
|
||||||
|
field.pos_y = area['pos_y']
|
||||||
|
field.width = area['width']
|
||||||
|
field.height = area['height']
|
||||||
|
field.fieldtype = 't'
|
||||||
|
field.save()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def page_post_save(sender, **kwargs):
|
||||||
|
instance = kwargs.get('instance', None)
|
||||||
|
if instance:
|
||||||
|
instance.process_fields()
|
||||||
|
|
||||||
|
models.signals.post_save.connect(page_post_save, sender=Page)
|
||||||
|
|
||||||
|
|
||||||
|
class FormField(models.Model):
|
||||||
|
page = models.ForeignKey('Page')
|
||||||
|
name = models.CharField(max_length=255)
|
||||||
|
fieldtype = models.CharField(choices=FIELD_TYPE_CHOICES, max_length=1)
|
||||||
|
pos_x = models.FloatField()
|
||||||
|
pos_y = models.FloatField()
|
||||||
|
width = models.FloatField()
|
||||||
|
height = models.FloatField()
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
def page_num(self):
|
||||||
|
return self.page.page_num
|
||||||
|
|
2
urls.py
2
urls.py
|
@ -2,6 +2,6 @@ from django.conf.urls import patterns, url
|
||||||
from pdfformfiller.views import editor
|
from pdfformfiller.views import editor
|
||||||
|
|
||||||
urlpatterns = patterns('pdfformfiller.views',
|
urlpatterns = patterns('pdfformfiller.views',
|
||||||
url(r'^(?P<pdf>.*)/edit/$', editor.PDFFormFillerEditorView.as_view(), name='pdfformfiller-edit'),
|
url(r'^(?P<pdf>.*)/edit/$', editor.PDFFormFillerEditor.as_view(), name='pdfformfiller-edit'),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
|
import os
|
||||||
|
|
||||||
from django.views.generic import View
|
from django.views.generic import View
|
||||||
from django.http import HttpResponse
|
from django.http import HttpResponse
|
||||||
from django.utils import simplejson
|
from django.utils import simplejson
|
||||||
import poppler
|
import poppler
|
||||||
import os
|
|
||||||
|
|
||||||
class PDFFormFillerEditorView(View):
|
|
||||||
|
class PDFFormFillerEditor(View):
|
||||||
def get(self, request, pdf=""):
|
def get(self, request, pdf=""):
|
||||||
|
|
||||||
doc = poppler.document_new_from_file('file:///home/mark/Bedraga.pdf', password=None)
|
doc = poppler.document_new_from_file('file:///home/mark/Bedraga.pdf', password=None)
|
||||||
|
@ -20,6 +22,25 @@ class PDFFormFillerEditorView(View):
|
||||||
if os.path.isfile(pdf_path):
|
if os.path.isfile(pdf_path):
|
||||||
return poppler.document_new_from_file('file://' + pdf_path, password=None)
|
return poppler.document_new_from_file('file://' + pdf_path, password=None)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_page_pngs(self, poppler_doc, pages=[]):
|
||||||
|
import cairo
|
||||||
|
import StringIO
|
||||||
|
|
||||||
|
page = poppler_doc.get_page(pages[0] if pages else 0)
|
||||||
|
width, height = page.get_size()
|
||||||
|
surface = cairo.ImageSurface(cairo.FORMAT_RGB24, int(width*2), int(height*2))
|
||||||
|
context = cairo.Context(surface)
|
||||||
|
|
||||||
|
for page_num in pages or xrange(poppler_doc.num_pages()):
|
||||||
|
page = poppler_doc.get_page(page_num)
|
||||||
|
page.render(context)
|
||||||
|
fd = StringIO.StringIO()
|
||||||
|
surface.write(fd)
|
||||||
|
fd.seek(0)
|
||||||
|
yield fd
|
||||||
|
|
||||||
|
|
||||||
def get_fields(self, poppler_doc, page):
|
def get_fields(self, poppler_doc, page):
|
||||||
fields = poppler_doc.get_page(page).get_form_field_mapping()
|
fields = poppler_doc.get_page(page).get_form_field_mapping()
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue