Skip to content

Commit

Permalink
#4 added support for reading cff files
Browse files Browse the repository at this point in the history
  • Loading branch information
mfenner committed Dec 7, 2021
1 parent d2debb5 commit 1f6c3f0
Show file tree
Hide file tree
Showing 24 changed files with 841 additions and 27 deletions.
1 change: 1 addition & 0 deletions .tool-versions
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ruby 2.7.5
2 changes: 1 addition & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PATH
remote: .
specs:
briard (2.0.2)
briard (2.1)
activesupport (>= 4.2.5)
benchmark_methods (~> 0.7)
bibtex-ruby (>= 5.1.0)
Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ Briard reads and/or writes these metadata formats:
<td>Yes</td>
<td>Yes</td>
</tr>
<tr>
<td><a href='https://citation-file-format.github.io/'>CFF</a></td>
<td>citation file format (cff)</td>
<td>application/vnd.cff+yaml</td>
<td>Yes</td>
<td>No</td>
</tr>
<tr>
<td><a href='https://jats.nlm.nih.gov/'>JATS</a></td>
<td>jats</td>
Expand Down
4 changes: 2 additions & 2 deletions lib/briard/metadata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def initialize(options={})
elsif options[:input].present? && File.exist?(options[:input])
filename = File.basename(options[:input])
ext = File.extname(options[:input])
if %w(.bib .ris .xml .json).include?(ext)
if %w(.bib .ris .xml .json .cff).include?(ext)
hsh = {
"url" => options[:url],
"state" => options[:state],
Expand Down Expand Up @@ -83,7 +83,7 @@ def initialize(options={})
end

# make sure input is encoded as utf8
string = string.force_encoding("UTF-8") if string.present?
string = string.force_encoding("UTF-8") if string.present? && string.is_a?(String)
@string = string

# input options for citation formatting
Expand Down
4 changes: 4 additions & 0 deletions lib/briard/metadata_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

require_relative 'readers/bibtex_reader'
require_relative 'readers/citeproc_reader'
require_relative 'readers/cff_reader'
require_relative 'readers/codemeta_reader'
require_relative 'readers/crosscite_reader'
require_relative 'readers/crossref_reader'
Expand All @@ -20,6 +21,7 @@
require_relative 'writers/bibtex_writer'
require_relative 'writers/citation_writer'
require_relative 'writers/citeproc_writer'
# require_relative 'writers/cff_writer'
require_relative 'writers/codemeta_writer'
require_relative 'writers/crosscite_writer'
require_relative 'writers/crossref_writer'
Expand All @@ -43,6 +45,7 @@ module MetadataUtils

include Briard::Readers::BibtexReader
include Briard::Readers::CiteprocReader
include Briard::Readers::CffReader
include Briard::Readers::CodemetaReader
include Briard::Readers::CrossciteReader
include Briard::Readers::CrossrefReader
Expand All @@ -55,6 +58,7 @@ module MetadataUtils
include Briard::Writers::BibtexWriter
include Briard::Writers::CitationWriter
include Briard::Writers::CiteprocWriter
# include Briard::Writers::CffWriter
include Briard::Writers::CodemetaWriter
include Briard::Writers::CrossciteWriter
include Briard::Writers::CrossrefWriter
Expand Down
107 changes: 107 additions & 0 deletions lib/briard/readers/cff_reader.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# frozen_string_literal: true

module Briard
module Readers
module CffReader
def get_cff(id: nil, **options)
return { "string" => nil, "state" => "not_found" } unless id.present?
id = normalize_id(id)
response = Maremma.get(github_as_cff_url(id), accept: "json", raw: true)
data = response.body.fetch("data", nil)
# Dates are parsed to date object, need to convert to iso8601 later
string = Psych.safe_load(data, permitted_classes: [Date])
{ "string" => string }
end

def read_cff(string: nil, **options)
read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
meta = string.is_a?(String) ? Psych.safe_load(string, permitted_classes: [Date]) : string

identifiers = Array.wrap(meta.fetch("identifiers", nil)).map do |r|
r = normalize_id(r) if r.is_a?(String)
if r.is_a?(String) && !r.start_with?("https://doi.org")
{ "identifierType" => "URL", "identifier" => r }
elsif r.is_a?(Hash)
{ "identifierType" => get_identifier_type(r["propertyID"]), "identifier" => r["value"] }
end
end.compact.uniq

id = normalize_id(options[:doi] || meta.fetch("doi", nil) || Array.wrap(meta.fetch("identifiers", nil)).find { |i| i["type"] == "doi"}.fetch("value", nil))
url = normalize_id(meta.fetch("repository-code", nil))
creators = cff_creators(Array.wrap(meta.fetch("authors", nil)))

dates = []
dates << { "date" => meta.fetch("date-released", nil).iso8601, "dateType" => "Issued" } if meta.fetch("date-released", nil).present?
publication_year = meta.fetch("date-released").iso8601[0..3] if meta.fetch("date-released", nil).present?
publisher = url.to_s.starts_with?("https://github.com") ? "GitHub" : nil
state = meta.present? || read_options.present? ? "findable" : "not_found"
types = {
"resourceTypeGeneral" => "Software",
"resourceType" => nil,
"schemaOrg" => "SoftwareSourceCode",
"citeproc" => "article-journal",
"bibtex" => "misc",
"ris" => "COMP"
}.compact
subjects = Array.wrap(meta.fetch("keywords", nil)).reduce([]) do |sum, subject|
sum += name_to_fos(subject)

sum
end

titles = meta.fetch("title", nil).present? ? [{ "title" => meta.fetch("title", nil) }] : []
rights_list = meta.fetch("license", nil).present? ? [hsh_to_spdx("rightsIdentifier" => meta.fetch("license"))] : nil

{ "id" => id,
"types" => types,
"identifiers" => identifiers,
"doi" => doi_from_url(id),
"url" => url,
"titles" => titles,
"creators" => creators,
"publisher" => publisher,
"dates" => dates,
"publication_year" => publication_year,
"descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract")), "descriptionType" => "Abstract" }] : nil,
"rights_list" => rights_list,
"version_info" => meta.fetch("version", nil),
"subjects" => subjects,
"state" => state
}.merge(read_options)
end

def cff_creators(creators)
Array.wrap(creators).map do |a|
name_identifiers = normalize_orcid(parse_attributes(a["orcid"])).present? ? [{ "nameIdentifier" => normalize_orcid(parse_attributes(a["orcid"])), "nameIdentifierScheme" => "ORCID", "schemeUri"=>"https://orcid.org" }] : nil
if a["given-names"].present? || name_identifiers.present?
given_name = parse_attributes(a["given-names"])
family_name = parse_attributes(a["family-names"])
affiliation = Array.wrap(a["affiliation"]).map do |a|
if a.is_a?(Hash)
a
elsif a.is_a?(Hash) && a.key?("__content__") && a["__content__"].strip.blank?
nil
elsif a.is_a?(Hash) && a.key?("__content__")
{ "name" => a["__content__"] }
elsif a.strip.blank?
nil
elsif a.is_a?(String)
{ "name" => a }
end
end.compact

{ "nameType" => "Personal",
"nameIdentifiers" => name_identifiers,
"name" => [family_name, given_name].compact.join(", "),
"givenName" => given_name,
"familyName" => family_name,
"affiliation" => affiliation.presence }.compact
else
{ "nameType" => "Organizational",
"name" => a["name"] || a["__content__"] }
end
end
end
end
end
end
6 changes: 3 additions & 3 deletions lib/briard/readers/crossref_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -264,12 +264,12 @@ def crossref_people(bibliographic_metadata, contributor_role)
given_name = parse_attributes(a["given_name"])
family_name = parse_attributes(a["surname"])
affiliation = Array.wrap(a["affiliation"]).map do |a|
if a.is_a?(Hash) && a.key?("__content__") && a["__content__"].strip.blank?
if a.is_a?(Hash)
a
elsif a.is_a?(Hash) && a.key?("__content__") && a["__content__"].strip.blank?
nil
elsif a.is_a?(Hash) && a.key?("__content__")
{ "name" => a["__content__"] }
elsif a.is_a?(Hash)
a
elsif a.strip.blank?
nil
elsif a.is_a?(String)
Expand Down
22 changes: 21 additions & 1 deletion lib/briard/utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,9 @@ def find_from_format_by_id(id)
"orcid"
elsif /\A(http|https):\/(\/)?github\.com\/(.+)\/package.json\z/.match(id)
"npm"
elsif /\A(http|https):\/(\/)?github\.com\/(.+)\z/.match(id)
elsif /\A(http|https):\/(\/)?github\.com\/(.+)\/CITATION.cff\z/.match(id)
"cff"
elsif /\A(http|https):\/(\/)?github\.com\/(.+)\/codemeta.json\z/.match(id)
"codemeta"
else
"schema_org"
Expand All @@ -516,6 +518,8 @@ def find_from_format_by_id(id)
def find_from_format_by_filename(filename)
if filename == "package.json"
"npm"
elsif filename == "CITATION.cff"
"cff"
end
end

Expand All @@ -528,6 +532,8 @@ def find_from_format_by_ext(string, options={})
"crossref"
elsif options[:ext] == ".xml" && Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
"datacite"
elsif options[:ext] == ".cff"
"cff"
elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org")
"schema_org"
elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context") == ("https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld")
Expand Down Expand Up @@ -558,9 +564,13 @@ def find_from_format_by_string(string)
"citeproc"
elsif string.start_with?("TY - ")
"ris"
elsif YAML.load(string).to_h.fetch("cff-version", nil).present?
"cff"
elsif BibTeX.parse(string).first
"bibtex"
end
rescue Psych::SyntaxError => error
"bibtex"
rescue BibTeX::ParseError => error
nil
end
Expand Down Expand Up @@ -1079,6 +1089,16 @@ def github_as_codemeta_url(url)
end
end

def github_as_cff_url(url)
github_hash = github_from_url(url)

if github_hash[:path].to_s.end_with?("CITATION.cff")
"https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}"
elsif github_hash[:owner].present?
"https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/main/CITATION.cff"
end
end

def get_date_parts(iso8601_time)
return { 'date-parts' => [[]] } if iso8601_time.nil?

Expand Down
2 changes: 1 addition & 1 deletion lib/briard/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module Briard
VERSION = "2.0.2"
VERSION = "2.1"
end
29 changes: 29 additions & 0 deletions lib/briard/writers/cff_writer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# frozen_string_literal: true

# module Briard
# module Writers
# module CffWriter
# def cff
# return nil unless valid? || show_errors

# hsh = {
# "@context" => id.present? ? "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld" : nil,
# "@type" => types.present? ? types["schemaOrg"] : nil,
# "@id" => normalize_doi(doi),
# "identifier" => to_schema_org_identifiers(identifiers),
# "codeRepository" => url,
# "name" => parse_attributes(titles, content: "title", first: true),
# "authors" => creators,
# "description" => parse_attributes(descriptions, content: "description", first: true),
# "version" => version_info,
# "tags" => subjects.present? ? Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) } : nil,
# "datePublished" => get_date(dates, "Issued") || publication_year,
# "dateModified" => get_date(dates, "Updated"),
# "publisher" => publisher,
# "license" => Array.wrap(rights_list).map { |l| l["rightsUri"] }.compact.unwrap,
# }.compact
# JSON.pretty_generate hsh.presence
# end
# end
# end
# end
17 changes: 16 additions & 1 deletion spec/find_from_format_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,13 @@
expect(subject.find_from_format_by_id(id)).to eq("op")
end

it "cff" do
id = "https://github.com/citation-file-format/ruby-cff/blob/main/CITATION.cff"
expect(subject.find_from_format_by_id(id)).to eq("cff")
end

it "codemeta" do
id = "https://github.com/datacite/maremma"
id = "https://github.com/datacite/maremma/blob/master/codemeta.json"
expect(subject.find_from_format_by_id(id)).to eq("codemeta")
end

Expand All @@ -64,6 +69,11 @@
filename = "package.json"
expect(subject.find_from_format_by_filename(filename)).to eq("npm")
end

it "cff" do
filename = "CITATION.cff"
expect(subject.find_from_format_by_filename(filename)).to eq("cff")
end
end

context "find_from_format_by_string" do
Expand Down Expand Up @@ -91,6 +101,11 @@
expect(subject.find_from_format_by_string(string)).to eq("codemeta")
end

it "cff" do
string = IO.read(fixture_path + 'CITATION.cff').strip
expect(subject.find_from_format_by_string(string)).to eq("cff")
end

it "schema_org" do
string = IO.read(fixture_path + 'schema_org_topmed.json').strip
expect(subject.find_from_format_by_string(string)).to eq("schema_org")
Expand Down
Loading

0 comments on commit 1f6c3f0

Please sign in to comment.