diff --git a/controller.py b/controller.py new file mode 100644 index 0000000..7c3a544 --- /dev/null +++ b/controller.py @@ -0,0 +1,57 @@ +import poppler + +import pdfformfiller.models +import pdb + +class PopplerController(object): + def __init__(self, pdf=""): + self.doc = poppler.document_new_from_file('file:///home/mark/Bedraga.pdf', password=None) + + def load_poppler_doc(self, filename): + from django.conf import settings + + pdf_path = os.path.abspath(os.path.join(settings.STATIC_FORMS_ROOT, filename)) + if os.path.isfile(pdf_path): + return poppler.document_new_from_file('file://' + pdf_path, password=None) + + + def generate_page_images(self, pages=[]): + import cairo + import StringIO + + page = self.doc.get_page(pages[0] if pages else 0) + width, height = page.get_size() + surface = cairo.ImageSurface(cairo.FORMAT_RGB24, int(width*2), int(height*2)) + context = cairo.Context(surface) + context.scale(2, 2) + + for page_num in pages or xrange(self.doc.get_n_pages()): + page = self.doc.get_page(page_num) + page.render(context) + fd = StringIO.StringIO() + surface.write_to_png(fd) + fd.seek(0) + yield fd + + + def get_page_fields(self, page_num): + page = self.doc.get_page(page_num) + for field in page.get_form_field_mapping(): + area = field.area + data = field.field + + yield { + 'name': data.get_name(), + 'value': data.text_get_text(), + 'area': { + 'pos_x': area.x1, + 'pos_y': area.y1, + 'width': area.x2 - area.x1, + 'height': area.y2 - area.y1, + } + } + +class PDFFormFillerPostSave(PopplerController): + def __init__(self, document): + pass + diff --git a/models.py b/models.py index 71a8362..cb1ea8d 100644 --- a/models.py +++ b/models.py @@ -1,3 +1,97 @@ -from django.db import models +import os -# Create your models here. +from django.db import models +from django.core.files import File + +from pdfformfiller.controller import PopplerController + + +FIELD_TYPE_CHOICES = ( + ('t', 'Text'), + ('c', 'Checkbox'), + ) + + +class Document(models.Model): + name = models.CharField(max_length=64) + pdf = models.FileField(upload_to='pdfformfiller_pdf', blank=True) + + def __unicode__(self): + return self.name + + def process_pages(self): + poppler_con = PopplerController(self.pdf.path) + for page_num, image in enumerate(poppler_con.generate_page_images()): + try: + page = self.page_set.get(page_num=page_num) + except Page.DoesNotExist: + page = Page(document=self, + page_num=page_num, + name=self.name + (' page %d' % page_num)) + page.image.save( + os.path.basename(self.pdf.path) + ('_page%03d.png' % page_num), + File(image) + ) + + +def document_post_save(sender, **kwargs): + instance = kwargs.get('instance', None) + + if instance: + instance.process_pages() + +models.signals.post_save.connect(document_post_save, sender=Document) + + +class Page(models.Model): + document = models.ForeignKey('Document') + name = models.CharField(max_length=64, blank=True) + image = models.ImageField(upload_to='pdfformfiller_page') + page_num = models.SmallIntegerField() + + def __unicode__(self): + return self.name + + def process_fields(self): + poppler_con = PopplerController(self.document.pdf.path) + fields = poppler_con.get_page_fields(self.page_num) + + for fdata in fields: + try: + field = self.formfield_set.get(name=fdata['name']) + except FormField.DoesNotExist: + field = FormField(page=self, + name=fdata['name']) + + area = fdata['area'] + field.pos_x = area['pos_x'] + field.pos_y = area['pos_y'] + field.width = area['width'] + field.height = area['height'] + field.fieldtype = 't' + field.save() + + + +def page_post_save(sender, **kwargs): + instance = kwargs.get('instance', None) + if instance: + instance.process_fields() + +models.signals.post_save.connect(page_post_save, sender=Page) + + +class FormField(models.Model): + page = models.ForeignKey('Page') + name = models.CharField(max_length=255) + fieldtype = models.CharField(choices=FIELD_TYPE_CHOICES, max_length=1) + pos_x = models.FloatField() + pos_y = models.FloatField() + width = models.FloatField() + height = models.FloatField() + + def __unicode__(self): + return self.name + + def page_num(self): + return self.page.page_num diff --git a/urls.py b/urls.py index cf672c1..f262207 100644 --- a/urls.py +++ b/urls.py @@ -2,6 +2,6 @@ from django.conf.urls import patterns, url from pdfformfiller.views import editor urlpatterns = patterns('pdfformfiller.views', - url(r'^(?P.*)/edit/$', editor.PDFFormFillerEditorView.as_view(), name='pdfformfiller-edit'), + url(r'^(?P.*)/edit/$', editor.PDFFormFillerEditor.as_view(), name='pdfformfiller-edit'), ) diff --git a/views/editor.py b/views/editor.py index 0d52b11..4ff435b 100644 --- a/views/editor.py +++ b/views/editor.py @@ -1,10 +1,12 @@ +import os + from django.views.generic import View from django.http import HttpResponse from django.utils import simplejson import poppler -import os -class PDFFormFillerEditorView(View): + +class PDFFormFillerEditor(View): def get(self, request, pdf=""): doc = poppler.document_new_from_file('file:///home/mark/Bedraga.pdf', password=None) @@ -20,6 +22,25 @@ class PDFFormFillerEditorView(View): if os.path.isfile(pdf_path): return poppler.document_new_from_file('file://' + pdf_path, password=None) + + def generate_page_pngs(self, poppler_doc, pages=[]): + import cairo + import StringIO + + page = poppler_doc.get_page(pages[0] if pages else 0) + width, height = page.get_size() + surface = cairo.ImageSurface(cairo.FORMAT_RGB24, int(width*2), int(height*2)) + context = cairo.Context(surface) + + for page_num in pages or xrange(poppler_doc.num_pages()): + page = poppler_doc.get_page(page_num) + page.render(context) + fd = StringIO.StringIO() + surface.write(fd) + fd.seek(0) + yield fd + + def get_fields(self, poppler_doc, page): fields = poppler_doc.get_page(page).get_form_field_mapping()