--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/thingfish/processor/pdf.rb Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,62 @@
+# -*- ruby -*-
+#encoding: utf-8
+
+require 'pdf-reader'
+
+require 'strelka'
+require 'thingfish' unless defined?( Thingfish )
+require 'thingfish/processor' unless defined?( Thingfish::Processor )
+
+
+# Attach PDF metadata to the Adobe Portable Document Format.
+class Thingfish::Processor::PDF < Thingfish::Processor
+ extend Loggability
+
+ # Package version
+ VERSION = '0.1.0'
+
+ # Version control revision
+ REVISION = %q$Revision$
+
+
+ # Loggability API -- log to the :thingfish logger
+ log_to :thingfish
+
+ # The list of handled types
+ handled_types 'application/pdf'
+
+
+ ### Synchronous processor API -- extract metadata from uploaded PDFs
+ ###
+ def on_request( request )
+ reader = ::PDF::Reader.new( request.body )
+ metadata = self.extract_pdf_metadata( reader )
+
+ request.add_metadata( metadata )
+ end
+
+
+ #########
+ protected
+ #########
+
+ ### Normalize metadata from the PDFReader object and return it as a hash.
+ ###
+ def extract_pdf_metadata( reader )
+ self.log.debug "Extracting PDF metadata..."
+
+ pdf_metadata = {
+ 'pdf:version' => reader.pdf_version,
+ 'pdf:pagecount' => reader.page_count,
+ }.reject {|_,v| v.nil? }
+
+ reader.info.each_pair do |key, val|
+ pdf_metadata[ "pdf:#{key}" ] = val unless val.is_a?( ::PDF::Reader::Reference )
+ end
+
+ self.log.debug " raw PDF metadata: %p" % [ pdf_metadata ]
+ return pdf_metadata
+ end
+
+end # class Thingfish::Processor::PDF
+