Initial release.
authorMahlon E. Smith <mahlon@laika.com>
Wed, 16 Nov 2016 13:11:39 -0800
changeset 0 266fe36d11dd
child 1 70dba2d6deb8
Initial release.
.document
.editorconfig
.gems
.hgignore
.hgsigs
.hgtags
.pryrc
.rdoc_options
.ruby-gemset
.ruby-version
.simplecov
Gemfile
History.md
LICENSE.txt
Manifest.txt
README.md
Rakefile
lib/thingfish/processor/pdf.rb
spec/data/hi.pdf
spec/spec_helper.rb
spec/thingfish/processor/pdf_spec.rb
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.document	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,5 @@
+lib/**/*.rb
+README.md
+ChangeLog.md
+
+LICENSE.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.editorconfig	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,14 @@
+# http://EditorConfig.org
+
+# top-most EditorConfig file
+root = true
+
+# Unix-style newlines with a newline ending every file
+[*]
+end_of_line = lf
+insert_final_newline = true
+
+# Tab indentation
+[**.*]
+indent_style = tab
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.gems	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,3 @@
+hoe-deveiate
+thingfish
+pdf-reader
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgignore	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,3 @@
+/html/
+^ChangeLog$
+^commit\-msg\.txt$
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgsigs	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,1 @@
+a2b6126951333b0758029f9cd808895ca2aa7752 0 iQIVAwUAWCywfAdBeh7mMHXSAQqWwg/6AtBGEYm/HSxYCaheF7taWl/V+mwqeNFs+w72tvAAAW4l1USRuq9iI5s/p9T7p87N/rVylKgYyq891+a5Ptwuy1/fnCbOHtQicWjdQBC/bAFHbh47iV9Rt35o+NQXli+eWNCw6GK1NyNTH1jLJRt+8rv/CZZZ9RsDzCRoM1uWDLNUuDmhcE5J95t3CGg7RZo5S5PAX8QtCkomUpXAO1o4ijnDPLvz8MeSrSF78+zzQDZsQjCH+WR9nI84a9l7WyJUCxr7WvWg464hf2ffk3tTqVatJhLUcmFyycsBqV/NollKb9Rtt2lPdG8FEQspnmivVScJ84O+I3z6WLtsNvugkfnseO13cg/JOiawmsnSr/xi8sp3S0+CuRKaXEgwedvvHmBU09uaoxHB5gE3+pmuF9hmEEtbthJvrY3fi/Lwa6oF6LUHwwyhS2HZHeZ6SEs42cwQrblBZ/DzPtn+OUa8NaEdEXrAfRnzIxdeRO3DkLpOis3Simx2g9Dngj1uy2GspDmtPJE7aXiYqEYGrUBkPqkZ6Ed+Ps/qdZYus9CT94b9XLyZ+idkdOtroLa0EKXv80lU7Az9s9UewuTYcPCEFgjvEVScTq3BS7KYB+xwua6i8xooA1xUEiy2IVLWTheH/b1/SwIGkFM8jOHoBfF0NPM0zm9+HUEhbpXB6Yb6bYM=
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgtags	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,1 @@
+dddd31f340945db872a412f90a9dc2c9d6406dd9 v0.1.0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.pryrc	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,12 @@
+#!/usr/bin/ruby -*- ruby -*-
+
+$LOAD_PATH.unshift( 'lib' )
+
+begin
+	require 'thingfish/processor/mp3'
+rescue Exception => e
+	$stderr.puts "Ack! Libraries failed to load: #{e.message}\n\t" +
+		e.backtrace.join( "\n\t" )
+end
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.rdoc_options	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,16 @@
+--- !ruby/object:RDoc::Options
+encoding: UTF-8
+static_path: []
+rdoc_include:
+  - .
+charset: UTF-8
+exclude: 
+hyperlink_all: false
+line_numbers: false
+main_page: README.md
+markup: markdown
+show_hash: false
+tab_width: 8
+title: Thingfish-Processor-MP3 Documentation
+visibility: :protected
+webcvs:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.ruby-gemset	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,1 @@
+thingfish-processor-pdf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.ruby-version	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,1 @@
+2.3.1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.simplecov	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,9 @@
+# Simplecov config
+
+SimpleCov.start do
+	add_filter 'spec'
+	add_filter 'integration'
+	add_group "Needing tests" do |file|
+		file.covered_percent < 90
+	end
+end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Gemfile	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,2 @@
+source "https://rubygems.org/"
+gemspec
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/History.md	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,4 @@
+## v0.1.0 [2016-11-16] Mahlon E. Smith <mahlon@martini.nu>
+
+Initial release.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE.txt	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,20 @@
+Copyright (c) 2016 Mahlon E. Smith
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Manifest.txt	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,14 @@
+.document
+.editorconfig
+.rdoc_options
+.simplecov
+ChangeLog
+History.md
+LICENSE.txt
+Manifest.txt
+README.md
+Rakefile
+lib/thingfish/processor/pdf.rb
+spec/data/hi.pdf
+spec/spec_helper.rb
+spec/thingfish/processor/pdf_spec.rb
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,76 @@
+# Thingfish-Processor-PDF
+
+home
+: https:/bitbucket.org/mahlon/Thingfish-Processor-PDF
+
+
+## Description
+
+This is a basic pdf processor plugin for the Thingfish digital asset
+manager.  It extracts PDF metadata from uploaded files.
+
+
+## Prerequisites
+
+* Ruby
+
+
+## Installation
+
+    $ gem install thingfish-processor-pdf
+
+
+##  Usage
+
+Simply enable this processor in the *Thingfish* section of your configuration:
+
+    --
+    thingfish:
+      processors:
+        - pdf
+
+
+## Contributing
+
+You can check out the current development source with Mercurial via its
+{project page}[http://bitbucket.org/mahlon/thingfish-processor-pdf].
+
+After checking out the source, run:
+
+    $ rake newb
+
+This task will install any missing dependencies, run the tests/specs,
+and generate the API documentation.
+
+
+## License
+
+Copyright (c) 2016, Mahlon E. Smith
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the author/s, nor the names of the project's
+  contributors may be used to endorse or promote products derived from this
+  software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Rakefile	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,94 @@
+#!/usr/bin/env rake
+
+begin
+	require 'hoe'
+rescue LoadError
+	abort "This Rakefile requires hoe (gem install hoe)"
+end
+
+GEMSPEC = 'thingfish-processor-pdf.gemspec'
+
+
+Hoe.plugin :mercurial
+Hoe.plugin :signing
+Hoe.plugin :deveiate
+
+Hoe.plugins.delete :rubyforge
+
+hoespec = Hoe.spec 'thingfish-processor-pdf' do |spec|
+	spec.readme_file = 'README.md'
+	spec.history_file = 'History.md'
+	spec.extra_rdoc_files = FileList[ '*.rdoc', '*.md' ]
+	spec.urls = {
+		home: 'https://bitbucket.org/mahlon/thingfish-processor-pdf'
+	}
+
+	spec.extra_rdoc_files = FileList[ '*.rdoc', '*.md' ]
+	spec.license 'BSD-3-Clause'
+
+	spec.developer 'Mahlon E. Smith', 'mahlon@martini.nu'
+
+	spec.dependency 'thingfish',   '~> 0.5'
+	spec.dependency 'pdf-reader',  '~> 1.4'
+	spec.dependency 'loggability', '~> 0.11'
+
+	spec.dependency 'hoe-deveiate',            '~> 0.8', :developer
+	spec.dependency 'simplecov',               '~> 0.12', :developer
+	spec.dependency 'rdoc-generator-fivefish', '~> 0.1', :developer
+
+	spec.require_ruby_version( '>=2.3.1' )
+	spec.hg_sign_tags = true if spec.respond_to?( :hg_sign_tags= )
+	spec.check_history_on_release = true if spec.respond_to?( :check_history_on_release= )
+end
+
+
+ENV['VERSION'] ||= hoespec.spec.version.to_s
+
+# Run the tests before checking in
+task 'hg:precheckin' => [ :check_history, :check_manifest, :gemspec, :spec ]
+
+task :test => :spec
+
+# Rebuild the ChangeLog immediately before release
+task :prerelease => 'ChangeLog'
+CLOBBER.include( 'ChangeLog' )
+
+desc "Build a coverage report"
+task :coverage do
+	ENV["COVERAGE"] = 'yes'
+	Rake::Task[:spec].invoke
+end
+CLOBBER.include( 'coverage' )
+
+
+# Use the fivefish formatter for docs generated from development checkout
+if File.directory?( '.hg' )
+	require 'rdoc/task'
+
+	Rake::Task[ 'docs' ].clear
+	RDoc::Task.new( 'docs' ) do |rdoc|
+	    rdoc.main = "README.rdoc"
+		rdoc.markup = 'markdown'
+	    rdoc.rdoc_files.include( "*.rdoc", "ChangeLog", "lib/**/*.rb" )
+	    rdoc.generator = :fivefish
+		rdoc.title = 'Thingfish-Processor-PDF'
+	    rdoc.rdoc_dir = 'doc'
+	end
+end
+
+task :gemspec => GEMSPEC
+file GEMSPEC => __FILE__
+task GEMSPEC do |task|
+	spec = $hoespec.spec
+	spec.files.delete( '.gemtest' )
+	spec.signing_key = nil
+	spec.cert_chain = Rake::FileList[ 'certs/*.pem' ].to_a
+	spec.version = "#{spec.version.bump}.pre#{Time.now.strftime("%Y%m%d%H%M%S")}"
+	File.open( task.name, 'w' ) do |fh|
+		fh.write( spec.to_ruby )
+	end
+end
+CLOBBER.include( GEMSPEC.to_s )
+
+task :default => :gemspec
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/thingfish/processor/pdf.rb	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,62 @@
+# -*- ruby -*-
+#encoding: utf-8
+
+require 'pdf-reader'
+
+require 'strelka'
+require 'thingfish' unless defined?( Thingfish )
+require 'thingfish/processor' unless defined?( Thingfish::Processor )
+
+
+# Attach PDF metadata to the Adobe Portable Document Format.
+class Thingfish::Processor::PDF < Thingfish::Processor
+	extend Loggability
+
+	# Package version
+	VERSION = '0.1.0'
+
+	# Version control revision
+	REVISION = %q$Revision$
+
+
+	# Loggability API -- log to the :thingfish logger
+	log_to :thingfish
+
+	# The list of handled types
+	handled_types 'application/pdf'
+
+
+	### Synchronous processor API -- extract metadata from uploaded PDFs
+	###
+	def on_request( request )
+		reader   = ::PDF::Reader.new( request.body )
+		metadata = self.extract_pdf_metadata( reader )
+
+		request.add_metadata( metadata )
+	end
+
+
+	#########
+	protected
+	#########
+
+	### Normalize metadata from the PDFReader object and return it as a hash.
+	###
+	def extract_pdf_metadata( reader )
+		self.log.debug "Extracting PDF metadata..."
+
+		pdf_metadata = {
+			'pdf:version'   => reader.pdf_version,
+			'pdf:pagecount' => reader.page_count,
+		}.reject {|_,v| v.nil? }
+
+		reader.info.each_pair do |key, val|
+			pdf_metadata[ "pdf:#{key}" ] = val unless val.is_a?( ::PDF::Reader::Reference )
+		end
+
+		self.log.debug "  raw PDF metadata: %p" % [ pdf_metadata ]
+		return pdf_metadata
+	end
+
+end # class Thingfish::Processor::PDF
+
Binary file spec/data/hi.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spec/spec_helper.rb	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,49 @@
+# -*- ruby -*-
+#encoding: utf-8
+
+require 'simplecov' if ENV['COVERAGE']
+
+require 'rspec'
+
+require 'mongrel2'
+require 'mongrel2/testing'
+
+require 'thingfish'
+require 'thingfish/spechelpers'
+require 'thingfish/processor/pdf'
+
+require 'loggability/spechelpers'
+
+
+module Thingfish::Processor::PDF::SpecHelpers
+
+	FIXTURE_DIR = Pathname( __FILE__ ).dirname + 'data'
+
+
+	### Load and return the data from the fixture with the specified +filename+.
+	def fixture_data( filename )
+		fixture = FIXTURE_DIR + filename
+		return fixture.open( 'r', encoding: 'binary' )
+	end
+
+end
+
+
+
+### Mock with RSpec
+RSpec.configure do |config|
+	config.run_all_when_everything_filtered = true
+	config.filter_run :focus
+	config.order = 'random'
+	config.mock_with( :rspec ) do |mock|
+		mock.syntax = :expect
+	end
+
+	config.include( Mongrel2::SpecHelpers )
+	config.include( Thingfish::SpecHelpers )
+	config.include( Loggability::SpecHelpers )
+	config.include( Thingfish::Processor::PDF::SpecHelpers )
+end
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spec/thingfish/processor/pdf_spec.rb	Wed Nov 16 13:11:39 2016 -0800
@@ -0,0 +1,39 @@
+#!/usr/bin/env ruby
+
+require_relative '../../spec_helper'
+
+require 'rspec'
+
+require 'thingfish'
+require 'thingfish/processor'
+
+require 'strelka'
+require 'strelka/httprequest/metadata'
+
+
+describe Thingfish::Processor, "PDF" do
+
+	before( :all ) do
+		Strelka::HTTPRequest.class_eval { include Strelka::HTTPRequest::Metadata }
+	end
+
+
+	let( :processor ) { described_class.create(:pdf) }
+
+	let( :factory ) do
+		Mongrel2::RequestFactory.new(
+			:route => '/',
+			:headers => {:accept => '*/*'})
+	end
+
+
+	it "extracts metadata from uploaded PDFs" do
+		req = factory.post( '/tf', fixture_data('hi.pdf'), 'Content-type' => 'application/pdf' )
+
+		processor.process_request( req )
+
+		expect( req.metadata ).to include( 'pdf:pagecount', 'pdf:producer' )
+	end
+end
+
+# vim: set nosta noet ts=4 sw=4 ft=rspec: