# HG changeset patch # User Mahlon E. Smith # Date 1479330699 28800 # Node ID 266fe36d11dd08ffe49ebca778b15a7bb7e06aaf Initial release. diff -r 000000000000 -r 266fe36d11dd .document --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.document Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,5 @@ +lib/**/*.rb +README.md +ChangeLog.md + +LICENSE.txt diff -r 000000000000 -r 266fe36d11dd .editorconfig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.editorconfig Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,14 @@ +# http://EditorConfig.org + +# top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file +[*] +end_of_line = lf +insert_final_newline = true + +# Tab indentation +[**.*] +indent_style = tab + diff -r 000000000000 -r 266fe36d11dd .gems --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.gems Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,3 @@ +hoe-deveiate +thingfish +pdf-reader diff -r 000000000000 -r 266fe36d11dd .hgignore --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgignore Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,3 @@ +/html/ +^ChangeLog$ +^commit\-msg\.txt$ diff -r 000000000000 -r 266fe36d11dd .hgsigs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgsigs Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,1 @@ +a2b6126951333b0758029f9cd808895ca2aa7752 0 iQIVAwUAWCywfAdBeh7mMHXSAQqWwg/6AtBGEYm/HSxYCaheF7taWl/V+mwqeNFs+w72tvAAAW4l1USRuq9iI5s/p9T7p87N/rVylKgYyq891+a5Ptwuy1/fnCbOHtQicWjdQBC/bAFHbh47iV9Rt35o+NQXli+eWNCw6GK1NyNTH1jLJRt+8rv/CZZZ9RsDzCRoM1uWDLNUuDmhcE5J95t3CGg7RZo5S5PAX8QtCkomUpXAO1o4ijnDPLvz8MeSrSF78+zzQDZsQjCH+WR9nI84a9l7WyJUCxr7WvWg464hf2ffk3tTqVatJhLUcmFyycsBqV/NollKb9Rtt2lPdG8FEQspnmivVScJ84O+I3z6WLtsNvugkfnseO13cg/JOiawmsnSr/xi8sp3S0+CuRKaXEgwedvvHmBU09uaoxHB5gE3+pmuF9hmEEtbthJvrY3fi/Lwa6oF6LUHwwyhS2HZHeZ6SEs42cwQrblBZ/DzPtn+OUa8NaEdEXrAfRnzIxdeRO3DkLpOis3Simx2g9Dngj1uy2GspDmtPJE7aXiYqEYGrUBkPqkZ6Ed+Ps/qdZYus9CT94b9XLyZ+idkdOtroLa0EKXv80lU7Az9s9UewuTYcPCEFgjvEVScTq3BS7KYB+xwua6i8xooA1xUEiy2IVLWTheH/b1/SwIGkFM8jOHoBfF0NPM0zm9+HUEhbpXB6Yb6bYM= diff -r 000000000000 -r 266fe36d11dd .hgtags --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgtags Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,1 @@ +dddd31f340945db872a412f90a9dc2c9d6406dd9 v0.1.0 diff -r 000000000000 -r 266fe36d11dd .pryrc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.pryrc Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,12 @@ +#!/usr/bin/ruby -*- ruby -*- + +$LOAD_PATH.unshift( 'lib' ) + +begin + require 'thingfish/processor/mp3' +rescue Exception => e + $stderr.puts "Ack! Libraries failed to load: #{e.message}\n\t" + + e.backtrace.join( "\n\t" ) +end + + diff -r 000000000000 -r 266fe36d11dd .rdoc_options --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.rdoc_options Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,16 @@ +--- !ruby/object:RDoc::Options +encoding: UTF-8 +static_path: [] +rdoc_include: + - . +charset: UTF-8 +exclude: +hyperlink_all: false +line_numbers: false +main_page: README.md +markup: markdown +show_hash: false +tab_width: 8 +title: Thingfish-Processor-MP3 Documentation +visibility: :protected +webcvs: diff -r 000000000000 -r 266fe36d11dd .ruby-gemset --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.ruby-gemset Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,1 @@ +thingfish-processor-pdf diff -r 000000000000 -r 266fe36d11dd .ruby-version --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.ruby-version Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,1 @@ +2.3.1 diff -r 000000000000 -r 266fe36d11dd .simplecov --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.simplecov Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,9 @@ +# Simplecov config + +SimpleCov.start do + add_filter 'spec' + add_filter 'integration' + add_group "Needing tests" do |file| + file.covered_percent < 90 + end +end diff -r 000000000000 -r 266fe36d11dd Gemfile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Gemfile Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,2 @@ +source "https://rubygems.org/" +gemspec diff -r 000000000000 -r 266fe36d11dd History.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/History.md Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,4 @@ +## v0.1.0 [2016-11-16] Mahlon E. Smith + +Initial release. + diff -r 000000000000 -r 266fe36d11dd LICENSE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE.txt Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,20 @@ +Copyright (c) 2016 Mahlon E. Smith + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff -r 000000000000 -r 266fe36d11dd Manifest.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Manifest.txt Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,14 @@ +.document +.editorconfig +.rdoc_options +.simplecov +ChangeLog +History.md +LICENSE.txt +Manifest.txt +README.md +Rakefile +lib/thingfish/processor/pdf.rb +spec/data/hi.pdf +spec/spec_helper.rb +spec/thingfish/processor/pdf_spec.rb diff -r 000000000000 -r 266fe36d11dd README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,76 @@ +# Thingfish-Processor-PDF + +home +: https:/bitbucket.org/mahlon/Thingfish-Processor-PDF + + +## Description + +This is a basic pdf processor plugin for the Thingfish digital asset +manager. It extracts PDF metadata from uploaded files. + + +## Prerequisites + +* Ruby + + +## Installation + + $ gem install thingfish-processor-pdf + + +## Usage + +Simply enable this processor in the *Thingfish* section of your configuration: + + -- + thingfish: + processors: + - pdf + + +## Contributing + +You can check out the current development source with Mercurial via its +{project page}[http://bitbucket.org/mahlon/thingfish-processor-pdf]. + +After checking out the source, run: + + $ rake newb + +This task will install any missing dependencies, run the tests/specs, +and generate the API documentation. + + +## License + +Copyright (c) 2016, Mahlon E. Smith +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the author/s, nor the names of the project's + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + diff -r 000000000000 -r 266fe36d11dd Rakefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Rakefile Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,94 @@ +#!/usr/bin/env rake + +begin + require 'hoe' +rescue LoadError + abort "This Rakefile requires hoe (gem install hoe)" +end + +GEMSPEC = 'thingfish-processor-pdf.gemspec' + + +Hoe.plugin :mercurial +Hoe.plugin :signing +Hoe.plugin :deveiate + +Hoe.plugins.delete :rubyforge + +hoespec = Hoe.spec 'thingfish-processor-pdf' do |spec| + spec.readme_file = 'README.md' + spec.history_file = 'History.md' + spec.extra_rdoc_files = FileList[ '*.rdoc', '*.md' ] + spec.urls = { + home: 'https://bitbucket.org/mahlon/thingfish-processor-pdf' + } + + spec.extra_rdoc_files = FileList[ '*.rdoc', '*.md' ] + spec.license 'BSD-3-Clause' + + spec.developer 'Mahlon E. Smith', 'mahlon@martini.nu' + + spec.dependency 'thingfish', '~> 0.5' + spec.dependency 'pdf-reader', '~> 1.4' + spec.dependency 'loggability', '~> 0.11' + + spec.dependency 'hoe-deveiate', '~> 0.8', :developer + spec.dependency 'simplecov', '~> 0.12', :developer + spec.dependency 'rdoc-generator-fivefish', '~> 0.1', :developer + + spec.require_ruby_version( '>=2.3.1' ) + spec.hg_sign_tags = true if spec.respond_to?( :hg_sign_tags= ) + spec.check_history_on_release = true if spec.respond_to?( :check_history_on_release= ) +end + + +ENV['VERSION'] ||= hoespec.spec.version.to_s + +# Run the tests before checking in +task 'hg:precheckin' => [ :check_history, :check_manifest, :gemspec, :spec ] + +task :test => :spec + +# Rebuild the ChangeLog immediately before release +task :prerelease => 'ChangeLog' +CLOBBER.include( 'ChangeLog' ) + +desc "Build a coverage report" +task :coverage do + ENV["COVERAGE"] = 'yes' + Rake::Task[:spec].invoke +end +CLOBBER.include( 'coverage' ) + + +# Use the fivefish formatter for docs generated from development checkout +if File.directory?( '.hg' ) + require 'rdoc/task' + + Rake::Task[ 'docs' ].clear + RDoc::Task.new( 'docs' ) do |rdoc| + rdoc.main = "README.rdoc" + rdoc.markup = 'markdown' + rdoc.rdoc_files.include( "*.rdoc", "ChangeLog", "lib/**/*.rb" ) + rdoc.generator = :fivefish + rdoc.title = 'Thingfish-Processor-PDF' + rdoc.rdoc_dir = 'doc' + end +end + +task :gemspec => GEMSPEC +file GEMSPEC => __FILE__ +task GEMSPEC do |task| + spec = $hoespec.spec + spec.files.delete( '.gemtest' ) + spec.signing_key = nil + spec.cert_chain = Rake::FileList[ 'certs/*.pem' ].to_a + spec.version = "#{spec.version.bump}.pre#{Time.now.strftime("%Y%m%d%H%M%S")}" + File.open( task.name, 'w' ) do |fh| + fh.write( spec.to_ruby ) + end +end +CLOBBER.include( GEMSPEC.to_s ) + +task :default => :gemspec + diff -r 000000000000 -r 266fe36d11dd lib/thingfish/processor/pdf.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/thingfish/processor/pdf.rb Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,62 @@ +# -*- ruby -*- +#encoding: utf-8 + +require 'pdf-reader' + +require 'strelka' +require 'thingfish' unless defined?( Thingfish ) +require 'thingfish/processor' unless defined?( Thingfish::Processor ) + + +# Attach PDF metadata to the Adobe Portable Document Format. +class Thingfish::Processor::PDF < Thingfish::Processor + extend Loggability + + # Package version + VERSION = '0.1.0' + + # Version control revision + REVISION = %q$Revision$ + + + # Loggability API -- log to the :thingfish logger + log_to :thingfish + + # The list of handled types + handled_types 'application/pdf' + + + ### Synchronous processor API -- extract metadata from uploaded PDFs + ### + def on_request( request ) + reader = ::PDF::Reader.new( request.body ) + metadata = self.extract_pdf_metadata( reader ) + + request.add_metadata( metadata ) + end + + + ######### + protected + ######### + + ### Normalize metadata from the PDFReader object and return it as a hash. + ### + def extract_pdf_metadata( reader ) + self.log.debug "Extracting PDF metadata..." + + pdf_metadata = { + 'pdf:version' => reader.pdf_version, + 'pdf:pagecount' => reader.page_count, + }.reject {|_,v| v.nil? } + + reader.info.each_pair do |key, val| + pdf_metadata[ "pdf:#{key}" ] = val unless val.is_a?( ::PDF::Reader::Reference ) + end + + self.log.debug " raw PDF metadata: %p" % [ pdf_metadata ] + return pdf_metadata + end + +end # class Thingfish::Processor::PDF + diff -r 000000000000 -r 266fe36d11dd spec/data/hi.pdf Binary file spec/data/hi.pdf has changed diff -r 000000000000 -r 266fe36d11dd spec/spec_helper.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spec/spec_helper.rb Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,49 @@ +# -*- ruby -*- +#encoding: utf-8 + +require 'simplecov' if ENV['COVERAGE'] + +require 'rspec' + +require 'mongrel2' +require 'mongrel2/testing' + +require 'thingfish' +require 'thingfish/spechelpers' +require 'thingfish/processor/pdf' + +require 'loggability/spechelpers' + + +module Thingfish::Processor::PDF::SpecHelpers + + FIXTURE_DIR = Pathname( __FILE__ ).dirname + 'data' + + + ### Load and return the data from the fixture with the specified +filename+. + def fixture_data( filename ) + fixture = FIXTURE_DIR + filename + return fixture.open( 'r', encoding: 'binary' ) + end + +end + + + +### Mock with RSpec +RSpec.configure do |config| + config.run_all_when_everything_filtered = true + config.filter_run :focus + config.order = 'random' + config.mock_with( :rspec ) do |mock| + mock.syntax = :expect + end + + config.include( Mongrel2::SpecHelpers ) + config.include( Thingfish::SpecHelpers ) + config.include( Loggability::SpecHelpers ) + config.include( Thingfish::Processor::PDF::SpecHelpers ) +end + + + diff -r 000000000000 -r 266fe36d11dd spec/thingfish/processor/pdf_spec.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spec/thingfish/processor/pdf_spec.rb Wed Nov 16 13:11:39 2016 -0800 @@ -0,0 +1,39 @@ +#!/usr/bin/env ruby + +require_relative '../../spec_helper' + +require 'rspec' + +require 'thingfish' +require 'thingfish/processor' + +require 'strelka' +require 'strelka/httprequest/metadata' + + +describe Thingfish::Processor, "PDF" do + + before( :all ) do + Strelka::HTTPRequest.class_eval { include Strelka::HTTPRequest::Metadata } + end + + + let( :processor ) { described_class.create(:pdf) } + + let( :factory ) do + Mongrel2::RequestFactory.new( + :route => '/', + :headers => {:accept => '*/*'}) + end + + + it "extracts metadata from uploaded PDFs" do + req = factory.post( '/tf', fixture_data('hi.pdf'), 'Content-type' => 'application/pdf' ) + + processor.process_request( req ) + + expect( req.metadata ).to include( 'pdf:pagecount', 'pdf:producer' ) + end +end + +# vim: set nosta noet ts=4 sw=4 ft=rspec: