Skip to content

Commit

Permalink
Merge pull request #1413 from samvera/awsfile_ingest
Browse files Browse the repository at this point in the history
AWS S3 file ingest abstraction/testing
  • Loading branch information
mjgiarlo authored Aug 4, 2017
2 parents 4976fa6 + cc748d7 commit d9bdb70
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 51 deletions.
7 changes: 5 additions & 2 deletions app/actors/hyrax/actors/file_actor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ def ingest_file(io)
return false unless file_set.save
repository_file = related_file
Hyrax::VersioningService.create(repository_file, user)
CharacterizeJob.perform_later(file_set, repository_file.id, io.path) # path hint in case next worker is on same filesystem
pathhint = io.uploaded_file.uploader.path if io.uploaded_file # in case next worker is on same filesystem
CharacterizeJob.perform_later(file_set, repository_file.id, pathhint || io.path)
end

# Reverts file and spawns async job to characterize and create derivatives.
Expand All @@ -43,9 +44,11 @@ def revert_to(revision_id)
CharacterizeJob.perform_later(file_set, repository_file.id)
end

# @note FileSet comparison is limited to IDs, but this should be sufficient, given that
# most operations here are on the other side of async retrieval in Jobs (based solely on ID).
def ==(other)
return false unless other.is_a?(self.class)
file_set == other.file_set && relation == other.relation && user == other.user
file_set.id == other.file_set.id && relation == other.relation && user == other.user
end

private
Expand Down
6 changes: 3 additions & 3 deletions app/jobs/characterize_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ class CharacterizeJob < Hyrax::ApplicationJob
# @param [String, NilClass] filepath the cached file within the Hyrax.config.working_path
def perform(file_set, file_id, filepath = nil)
raise "#{file_set.class.characterization_proxy} was not found for FileSet #{file_set.id}" unless file_set.characterization_proxy?
filename = Hyrax::WorkingDirectory.find_or_retrieve(file_id, file_set.id, filepath)
Hydra::Works::CharacterizationService.run(file_set.characterization_proxy, filename)
filepath = Hyrax::WorkingDirectory.find_or_retrieve(file_id, file_set.id) unless filepath && File.exist?(filepath)
Hydra::Works::CharacterizationService.run(file_set.characterization_proxy, filepath)
Rails.logger.debug "Ran characterization on #{file_set.characterization_proxy.id} (#{file_set.characterization_proxy.mime_type})"
file_set.characterization_proxy.save!
file_set.update_index
file_set.parent.in_collections.each(&:update_index) if file_set.parent
CreateDerivativesJob.perform_later(file_set, file_id, filename)
CreateDerivativesJob.perform_later(file_set, file_id, filepath)
end
end
9 changes: 4 additions & 5 deletions app/models/job_io_wrapper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,17 @@ def extracted_mime_type
end

# The magic that switches *once* between local filepath and CarrierWave file
# @return [#read] File-like object ready to #read
# @return [File, StringIO, #read] File-like object ready to #read
def file
@file ||= (file_from_path || file_from_uploaded_file!)
end

# @return [File]
# @return [File, StringIO] depending on CarrierWave configuration
# @raise when uploaded_file *becomes* required but is missing
def file_from_uploaded_file!
raise("path '#{path}' was unusable and uploaded_file empty") unless uploaded_file
self.path = uploaded_file.uploader.file.file # old path useless now
# uploaded_file.uploader.file.to_file
uploaded_file.uploader
self.path = uploaded_file.uploader.file.path # old path useless now
uploaded_file.uploader.sanitized_file.file
end

# @return [File, nil] nil if the path doesn't exist on this (worker) system or can't be read
Expand Down
51 changes: 18 additions & 33 deletions spec/actors/hyrax/actors/file_actor_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
let(:file_set) { create(:file_set) }
let(:relation) { :original_file }
let(:actor) { described_class.new(file_set, relation, user) }
let(:uploaded_file) { fixture_file_upload('/world.png', 'image/png') }
let(:io) { Hydra::Derivatives::IoDecorator.new(uploaded_file, uploaded_file.content_type, uploaded_file.original_filename) }
let(:fixture) { fixture_file_upload('/world.png', 'image/png') }
let(:huf) { Hyrax::UploadedFile.new(user: user, file_set_uri: file_set.uri, file: fixture) }
let(:io) { JobIoWrapper.new(file_set_id: file_set.id, user: user, uploaded_file: huf) }
let(:pcdmfile) do
Hydra::PCDM::File.new.tap do |f|
f.content = File.open(uploaded_file.path).read
f.original_name = uploaded_file.original_filename
f.content = File.open(fixture.path).read
f.original_name = fixture.original_filename
f.save!
end
end
Expand All @@ -33,40 +34,23 @@ class FileSetWithExtras < FileSet
Object.send(:remove_const, :FileSetWithExtras)
end
it 'uses the relation from the actor' do
expect(CharacterizeJob).to receive(:perform_later).with(file_set, String, io.tempfile.path)
expect(CharacterizeJob).to receive(:perform_later).with(FileSetWithExtras, String, huf.uploader.path)
actor.ingest_file(io)
expect(file_set.reload.remastered.mime_type).to eq 'image/png'
end
end

context 'when given a mime_type' do
let(:uploaded_file) { fixture_file_upload('/world.png', 'image/gif') }

it 'uses the provided mime_type' do
expect(CharacterizeJob).to receive(:perform_later).with(file_set, String, io.tempfile.path)
actor.ingest_file(io)
expect(file_set.reload.original_file.mime_type).to eq 'image/gif'
end
end

context 'when not given a mime_type' do
before { allow(Hyrax::VersioningService).to receive(:create) }
it 'passes a decorated instance of the file with a nil mime_type' do
# The parameter versioning: false instructs the machinery in Hydra::Works to defer versioning
expect(Hydra::Works::AddFileToFileSet).to receive(:call).with(
file_set,
io,
relation,
versioning: false
).and_call_original
expect(CharacterizeJob).to receive(:perform_later).with(file_set, String, io.tempfile.path)
actor.ingest_file(io)
end
it 'uses the provided mime_type' do
allow(fixture).to receive(:content_type).and_return('image/gif')
expect(CharacterizeJob).to receive(:perform_later).with(FileSet, String, huf.uploader.path)
actor.ingest_file(io)
expect(file_set.reload.original_file.mime_type).to eq 'image/gif'
end

context 'with two existing versions from different users' do
let(:uploaded_file2) { fixture_file_upload('/small_file.txt', 'text/plain') }
let(:io2) { Hydra::Derivatives::IoDecorator.new(uploaded_file2, uploaded_file2.content_type, uploaded_file2.original_filename) }
let(:fixture2) { fixture_file_upload('/small_file.txt', 'text/plain') }
let(:huf2) { Hyrax::UploadedFile.new(user: user2, file_set_uri: file_set.uri, file: fixture2) }
let(:io2) { JobIoWrapper.new(file_set_id: file_set.id, user: user2, uploaded_file: huf2) }
let(:user2) { create(:user) }
let(:actor2) { described_class.new(file_set, relation, user2) }
let(:versions) { file_set.reload.original_file.versions }
Expand All @@ -81,9 +65,9 @@ class FileSetWithExtras < FileSet
expect(versions.all.count).to eq 2
# the current version
expect(Hyrax::VersioningService.latest_version_of(file_set.reload.original_file).label).to eq 'version2'
expect(file_set.original_file.content).to eq uploaded_file2.open.read
expect(file_set.original_file.mime_type).to eq 'text/plain'
expect(file_set.original_file.original_name).to eq 'small_file.txt'
expect(file_set.original_file.content).to eq fixture2.open.read
# the user for each version
expect(Hyrax::VersionCommitter.where(version_id: versions.first.uri).pluck(:committer_login)).to eq [user.user_key]
expect(Hyrax::VersionCommitter.where(version_id: versions.last.uri).pluck(:committer_login)).to eq [user2.user_key]
Expand All @@ -97,8 +81,9 @@ class FileSetWithExtras < FileSet
end
it 'when the file is available' do
allow(file_set).to receive(:save).and_return(true)
allow(file_set).to receive(:original_file).and_return(pcdmfile)
expect(CharacterizeJob).to receive(:perform_later).with(file_set, pcdmfile.id, io.tempfile.path)
allow(file_set).to receive(relation).and_return(pcdmfile)
expect(Hyrax::VersioningService).to receive(:create).with(pcdmfile, user)
expect(CharacterizeJob).to receive(:perform_later).with(FileSet, pcdmfile.id, huf.uploader.path)
actor.ingest_file(io)
end
it 'returns false when save fails' do
Expand Down
29 changes: 21 additions & 8 deletions spec/jobs/characterize_job_spec.rb
Original file line number Diff line number Diff line change
@@ -1,19 +1,36 @@
RSpec.describe CharacterizeJob do
let(:file_set) { FileSet.new(id: file_set_id) }
let(:file_set_id) { 'abc12345' }
let(:file_path) { Rails.root + 'tmp' + 'uploads' + 'ab' + 'c1' + '23' + '45' + 'abc12345' + 'picture.png' }
let(:filename) { file_path.to_s }
let(:filename) { Rails.root.join('tmp', 'uploads', 'ab', 'c1', '23', '45', 'abc12345', 'picture.png').to_s }
let(:file_set) do
FileSet.new(id: file_set_id).tap do |fs|
allow(fs).to receive(:original_file).and_return(file)
allow(fs).to receive(:update_index)
end
end
# let(:io) { JobIoWrapper.new(file_set_id: file_set.id, user: create(:user), path: filename) }
let(:file) do
Hydra::PCDM::File.new.tap do |f|
f.content = 'foo'
f.original_name = 'picture.png'
f.save!
allow(f).to receive(:save!)
end
end

before do
allow(FileSet).to receive(:find).with(file_set_id).and_return(file_set)
allow(file_set).to receive(:original_file).and_return(file)
allow(Hydra::Works::CharacterizationService).to receive(:run).with(file, filename)
allow(CreateDerivativesJob).to receive(:perform_later).with(file_set, file.id, filename)
end

context 'with valid filepath param' do
let(:filename) { File.join(fixture_path, 'world.png') }

it 'skips Hyrax::WorkingDirectory' do
expect(Hyrax::WorkingDirectory).not_to receive(:find_or_retrieve)
expect(Hydra::Works::CharacterizationService).to receive(:run).with(file, filename)
described_class.perform_now(file_set, file.id, filename)
end
end

context 'when the characterization proxy content is present' do
Expand All @@ -40,10 +57,6 @@
before do
allow(file_set).to receive(:parent).and_return(work)
allow(work).to receive(:in_collections).and_return([collection])
allow(Hydra::Works::CharacterizationService).to receive(:run).with(file, filename)
allow(file).to receive(:save!)
allow(file_set).to receive(:update_index)
allow(CreateDerivativesJob).to receive(:perform_later).with(file_set, file.id, filename)
end
it "reindexes the collection" do
expect(collection).to receive(:update_index)
Expand Down

0 comments on commit d9bdb70

Please sign in to comment.