diff --git a/app/actors/hyrax/actors/create_with_remote_files_actor.rb b/app/actors/hyrax/actors/create_with_remote_files_actor.rb index 5b015e76ca..4e51ca3466 100644 --- a/app/actors/hyrax/actors/create_with_remote_files_actor.rb +++ b/app/actors/hyrax/actors/create_with_remote_files_actor.rb @@ -5,6 +5,9 @@ module Actors # attributes[:remote_files] = filenames.map do |name| # { url: "https://example.com/file/#{name}", file_name: name } # end + # + # Browse everything may also return a local file. And although it's in the + # url property, it may have spaces, and not be a valid URI. class CreateWithRemoteFilesActor < Hyrax::Actors::AbstractActor # @param [Hyrax::Actors::Environment] env # @return [Boolean] true if create was successful @@ -26,8 +29,8 @@ def whitelisted_ingest_dirs Hyrax.config.whitelisted_ingest_dirs end - def validate_remote_url(url) - uri = URI.parse(URI.encode(url)) + # @param uri [URI] the uri fo the resource to import + def validate_remote_url(uri) if uri.scheme == 'file' path = File.absolute_path(URI.decode(uri.path)) whitelisted_ingest_dirs.any? do |dir| @@ -46,26 +49,29 @@ def attach_files(env, remote_files) return true unless remote_files remote_files.each do |file_info| next if file_info.blank? || file_info[:url].blank? - unless validate_remote_url(file_info[:url]) + # Escape any space characters, so that this is a legal URI + uri = URI.parse(Addressable::URI.escape(file_info[:url])) + unless validate_remote_url(uri) Rails.logger.error "User #{env.user.user_key} attempted to ingest file from url #{file_info[:url]}, which doesn't pass validation" return false end - create_file_from_url(env, file_info[:url], file_info[:file_name]) + create_file_from_url(env, uri, file_info[:file_name]) end true end # Generic utility for creating FileSet from a URL # Used in to import files using URLs from a file picker like browse_everything - def create_file_from_url(env, url, file_name) - ::FileSet.new(import_url: url, label: file_name) do |fs| + def create_file_from_url(env, uri, file_name) + ::FileSet.new(import_url: uri.to_s, label: file_name) do |fs| actor = Hyrax::Actors::FileSetActor.new(fs, env.user) actor.create_metadata(visibility: env.curation_concern.visibility) actor.attach_to_work(env.curation_concern) fs.save! - uri = URI.parse(URI.encode(url)) if uri.scheme == 'file' - IngestLocalFileJob.perform_later(fs, URI.decode(uri.path), env.user) + # Turn any %20 into spaces. + file_path = CGI.unescape(uri.path) + IngestLocalFileJob.perform_later(fs, file_path, env.user) else ImportUrlJob.perform_later(fs, operation_for(user: actor.user)) end diff --git a/spec/actors/hyrax/actors/create_with_remote_files_actor_spec.rb b/spec/actors/hyrax/actors/create_with_remote_files_actor_spec.rb index 65e65179db..f4a25bc4e7 100644 --- a/spec/actors/hyrax/actors/create_with_remote_files_actor_spec.rb +++ b/spec/actors/hyrax/actors/create_with_remote_files_actor_spec.rb @@ -86,19 +86,19 @@ end it "accepts file: urls in whitelisted directories" do - expect(actor.send(:validate_remote_url, "file:///local/file/test.txt")).to be true - expect(actor.send(:validate_remote_url, "file:///local/file/subdirectory/test.txt")).to be true - expect(actor.send(:validate_remote_url, "file:///test/test.txt")).to be true + expect(actor.send(:validate_remote_url, URI('file:///local/file/test.txt'))).to be true + expect(actor.send(:validate_remote_url, URI('file:///local/file/subdirectory/test.txt'))).to be true + expect(actor.send(:validate_remote_url, URI('file:///test/test.txt'))).to be true end it "rejects file: urls outside whitelisted directories" do - expect(actor.send(:validate_remote_url, "file:///tmp/test.txt")).to be false - expect(actor.send(:validate_remote_url, "file:///test/../tmp/test.txt")).to be false - expect(actor.send(:validate_remote_url, "file:///test/")).to be false + expect(actor.send(:validate_remote_url, URI('file:///tmp/test.txt'))).to be false + expect(actor.send(:validate_remote_url, URI('file:///test/../tmp/test.txt'))).to be false + expect(actor.send(:validate_remote_url, URI('file:///test/'))).to be false end it "accepts other types of urls" do - expect(actor.send(:validate_remote_url, "https://example.com/test.txt")).to be true + expect(actor.send(:validate_remote_url, URI('https://example.com/test.txt'))).to be true end end end