diff --git a/app/actors/hyrax/actors/file_actor.rb b/app/actors/hyrax/actors/file_actor.rb index e82d99b33d..a142ac93fd 100644 --- a/app/actors/hyrax/actors/file_actor.rb +++ b/app/actors/hyrax/actors/file_actor.rb @@ -29,7 +29,8 @@ def ingest_file(io) return false unless file_set.save repository_file = related_file Hyrax::VersioningService.create(repository_file, user) - CharacterizeJob.perform_later(file_set, repository_file.id, io.path) # path hint in case next worker is on same filesystem + pathhint = io.uploaded_file.uploader.path if io.uploaded_file # in case next worker is on same filesystem + CharacterizeJob.perform_later(file_set, repository_file.id, pathhint || io.path) end # Reverts file and spawns async job to characterize and create derivatives. @@ -43,9 +44,11 @@ def revert_to(revision_id) CharacterizeJob.perform_later(file_set, repository_file.id) end + # @note FileSet comparison is limited to IDs, but this should be sufficient, given that + # most operations here are on the other side of async retrieval in Jobs (based solely on ID). def ==(other) return false unless other.is_a?(self.class) - file_set == other.file_set && relation == other.relation && user == other.user + file_set.id == other.file_set.id && relation == other.relation && user == other.user end private diff --git a/app/jobs/characterize_job.rb b/app/jobs/characterize_job.rb index 8ccb95616c..b8142853ea 100644 --- a/app/jobs/characterize_job.rb +++ b/app/jobs/characterize_job.rb @@ -8,12 +8,12 @@ class CharacterizeJob < Hyrax::ApplicationJob # @param [String, NilClass] filepath the cached file within the Hyrax.config.working_path def perform(file_set, file_id, filepath = nil) raise "#{file_set.class.characterization_proxy} was not found for FileSet #{file_set.id}" unless file_set.characterization_proxy? - filename = Hyrax::WorkingDirectory.find_or_retrieve(file_id, file_set.id, filepath) - Hydra::Works::CharacterizationService.run(file_set.characterization_proxy, filename) + filepath = Hyrax::WorkingDirectory.find_or_retrieve(file_id, file_set.id) unless filepath && File.exist?(filepath) + Hydra::Works::CharacterizationService.run(file_set.characterization_proxy, filepath) Rails.logger.debug "Ran characterization on #{file_set.characterization_proxy.id} (#{file_set.characterization_proxy.mime_type})" file_set.characterization_proxy.save! file_set.update_index file_set.parent.in_collections.each(&:update_index) if file_set.parent - CreateDerivativesJob.perform_later(file_set, file_id, filename) + CreateDerivativesJob.perform_later(file_set, file_id, filepath) end end diff --git a/app/models/job_io_wrapper.rb b/app/models/job_io_wrapper.rb index 7cc61883a8..31cbbe489a 100644 --- a/app/models/job_io_wrapper.rb +++ b/app/models/job_io_wrapper.rb @@ -57,18 +57,17 @@ def extracted_mime_type end # The magic that switches *once* between local filepath and CarrierWave file - # @return [#read] File-like object ready to #read + # @return [File, StringIO, #read] File-like object ready to #read def file @file ||= (file_from_path || file_from_uploaded_file!) end - # @return [File] + # @return [File, StringIO] depending on CarrierWave configuration # @raise when uploaded_file *becomes* required but is missing def file_from_uploaded_file! raise("path '#{path}' was unusable and uploaded_file empty") unless uploaded_file - self.path = uploaded_file.uploader.file.file # old path useless now - # uploaded_file.uploader.file.to_file - uploaded_file.uploader + self.path = uploaded_file.uploader.file.path # old path useless now + uploaded_file.uploader.sanitized_file.file end # @return [File, nil] nil if the path doesn't exist on this (worker) system or can't be read diff --git a/spec/actors/hyrax/actors/file_actor_spec.rb b/spec/actors/hyrax/actors/file_actor_spec.rb index ccdaa15e1d..2e5963fbd8 100644 --- a/spec/actors/hyrax/actors/file_actor_spec.rb +++ b/spec/actors/hyrax/actors/file_actor_spec.rb @@ -6,12 +6,13 @@ let(:file_set) { create(:file_set) } let(:relation) { :original_file } let(:actor) { described_class.new(file_set, relation, user) } - let(:uploaded_file) { fixture_file_upload('/world.png', 'image/png') } - let(:io) { Hydra::Derivatives::IoDecorator.new(uploaded_file, uploaded_file.content_type, uploaded_file.original_filename) } + let(:fixture) { fixture_file_upload('/world.png', 'image/png') } + let(:huf) { Hyrax::UploadedFile.new(user: user, file_set_uri: file_set.uri, file: fixture) } + let(:io) { JobIoWrapper.new(file_set_id: file_set.id, user: user, uploaded_file: huf) } let(:pcdmfile) do Hydra::PCDM::File.new.tap do |f| - f.content = File.open(uploaded_file.path).read - f.original_name = uploaded_file.original_filename + f.content = File.open(fixture.path).read + f.original_name = fixture.original_filename f.save! end end @@ -33,40 +34,23 @@ class FileSetWithExtras < FileSet Object.send(:remove_const, :FileSetWithExtras) end it 'uses the relation from the actor' do - expect(CharacterizeJob).to receive(:perform_later).with(file_set, String, io.tempfile.path) + expect(CharacterizeJob).to receive(:perform_later).with(FileSetWithExtras, String, huf.uploader.path) actor.ingest_file(io) expect(file_set.reload.remastered.mime_type).to eq 'image/png' end end - context 'when given a mime_type' do - let(:uploaded_file) { fixture_file_upload('/world.png', 'image/gif') } - - it 'uses the provided mime_type' do - expect(CharacterizeJob).to receive(:perform_later).with(file_set, String, io.tempfile.path) - actor.ingest_file(io) - expect(file_set.reload.original_file.mime_type).to eq 'image/gif' - end - end - - context 'when not given a mime_type' do - before { allow(Hyrax::VersioningService).to receive(:create) } - it 'passes a decorated instance of the file with a nil mime_type' do - # The parameter versioning: false instructs the machinery in Hydra::Works to defer versioning - expect(Hydra::Works::AddFileToFileSet).to receive(:call).with( - file_set, - io, - relation, - versioning: false - ).and_call_original - expect(CharacterizeJob).to receive(:perform_later).with(file_set, String, io.tempfile.path) - actor.ingest_file(io) - end + it 'uses the provided mime_type' do + allow(fixture).to receive(:content_type).and_return('image/gif') + expect(CharacterizeJob).to receive(:perform_later).with(FileSet, String, huf.uploader.path) + actor.ingest_file(io) + expect(file_set.reload.original_file.mime_type).to eq 'image/gif' end context 'with two existing versions from different users' do - let(:uploaded_file2) { fixture_file_upload('/small_file.txt', 'text/plain') } - let(:io2) { Hydra::Derivatives::IoDecorator.new(uploaded_file2, uploaded_file2.content_type, uploaded_file2.original_filename) } + let(:fixture2) { fixture_file_upload('/small_file.txt', 'text/plain') } + let(:huf2) { Hyrax::UploadedFile.new(user: user2, file_set_uri: file_set.uri, file: fixture2) } + let(:io2) { JobIoWrapper.new(file_set_id: file_set.id, user: user2, uploaded_file: huf2) } let(:user2) { create(:user) } let(:actor2) { described_class.new(file_set, relation, user2) } let(:versions) { file_set.reload.original_file.versions } @@ -81,9 +65,9 @@ class FileSetWithExtras < FileSet expect(versions.all.count).to eq 2 # the current version expect(Hyrax::VersioningService.latest_version_of(file_set.reload.original_file).label).to eq 'version2' - expect(file_set.original_file.content).to eq uploaded_file2.open.read expect(file_set.original_file.mime_type).to eq 'text/plain' expect(file_set.original_file.original_name).to eq 'small_file.txt' + expect(file_set.original_file.content).to eq fixture2.open.read # the user for each version expect(Hyrax::VersionCommitter.where(version_id: versions.first.uri).pluck(:committer_login)).to eq [user.user_key] expect(Hyrax::VersionCommitter.where(version_id: versions.last.uri).pluck(:committer_login)).to eq [user2.user_key] @@ -97,8 +81,9 @@ class FileSetWithExtras < FileSet end it 'when the file is available' do allow(file_set).to receive(:save).and_return(true) - allow(file_set).to receive(:original_file).and_return(pcdmfile) - expect(CharacterizeJob).to receive(:perform_later).with(file_set, pcdmfile.id, io.tempfile.path) + allow(file_set).to receive(relation).and_return(pcdmfile) + expect(Hyrax::VersioningService).to receive(:create).with(pcdmfile, user) + expect(CharacterizeJob).to receive(:perform_later).with(FileSet, pcdmfile.id, huf.uploader.path) actor.ingest_file(io) end it 'returns false when save fails' do diff --git a/spec/jobs/characterize_job_spec.rb b/spec/jobs/characterize_job_spec.rb index eb2822ae57..fd70dbfb56 100644 --- a/spec/jobs/characterize_job_spec.rb +++ b/spec/jobs/characterize_job_spec.rb @@ -1,19 +1,36 @@ RSpec.describe CharacterizeJob do - let(:file_set) { FileSet.new(id: file_set_id) } let(:file_set_id) { 'abc12345' } - let(:file_path) { Rails.root + 'tmp' + 'uploads' + 'ab' + 'c1' + '23' + '45' + 'abc12345' + 'picture.png' } - let(:filename) { file_path.to_s } + let(:filename) { Rails.root.join('tmp', 'uploads', 'ab', 'c1', '23', '45', 'abc12345', 'picture.png').to_s } + let(:file_set) do + FileSet.new(id: file_set_id).tap do |fs| + allow(fs).to receive(:original_file).and_return(file) + allow(fs).to receive(:update_index) + end + end + # let(:io) { JobIoWrapper.new(file_set_id: file_set.id, user: create(:user), path: filename) } let(:file) do Hydra::PCDM::File.new.tap do |f| f.content = 'foo' f.original_name = 'picture.png' f.save! + allow(f).to receive(:save!) end end before do allow(FileSet).to receive(:find).with(file_set_id).and_return(file_set) - allow(file_set).to receive(:original_file).and_return(file) + allow(Hydra::Works::CharacterizationService).to receive(:run).with(file, filename) + allow(CreateDerivativesJob).to receive(:perform_later).with(file_set, file.id, filename) + end + + context 'with valid filepath param' do + let(:filename) { File.join(fixture_path, 'world.png') } + + it 'skips Hyrax::WorkingDirectory' do + expect(Hyrax::WorkingDirectory).not_to receive(:find_or_retrieve) + expect(Hydra::Works::CharacterizationService).to receive(:run).with(file, filename) + described_class.perform_now(file_set, file.id, filename) + end end context 'when the characterization proxy content is present' do @@ -40,10 +57,6 @@ before do allow(file_set).to receive(:parent).and_return(work) allow(work).to receive(:in_collections).and_return([collection]) - allow(Hydra::Works::CharacterizationService).to receive(:run).with(file, filename) - allow(file).to receive(:save!) - allow(file_set).to receive(:update_index) - allow(CreateDerivativesJob).to receive(:perform_later).with(file_set, file.id, filename) end it "reindexes the collection" do expect(collection).to receive(:update_index)