#4 added support for reading cff files

front-matter · Dec 7, 2021 · 1f6c3f0 · 1f6c3f0
1 parent d2debb5
commit 1f6c3f0
Show file tree

Hide file tree

Showing 24 changed files with 841 additions and 27 deletions.
diff --git a/.tool-versions b/.tool-versions
@@ -0,0 +1 @@
+ruby 2.7.5
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    briard (2.0.2)
+    briard (2.1)
       activesupport (>= 4.2.5)
       benchmark_methods (~> 0.7)
       bibtex-ruby (>= 5.1.0)

diff --git a/README.md b/README.md
@@ -83,6 +83,13 @@ Briard reads and/or writes these metadata formats:
       <td>Yes</td>
       <td>Yes</td>
     </tr>
+    <tr>
+      <td><a href='https://citation-file-format.github.io/'>CFF</a></td>
+      <td>citation file format (cff)</td>
+      <td>application/vnd.cff+yaml</td>
+      <td>Yes</td>
+      <td>No</td>
+    </tr>
     <tr>
       <td><a href='https://jats.nlm.nih.gov/'>JATS</a></td>
       <td>jats</td>

diff --git a/lib/briard/metadata.rb b/lib/briard/metadata.rb
@@ -43,7 +43,7 @@ def initialize(options={})
       elsif options[:input].present? && File.exist?(options[:input])
         filename = File.basename(options[:input])
         ext = File.extname(options[:input])
-        if %w(.bib .ris .xml .json).include?(ext)
+        if %w(.bib .ris .xml .json .cff).include?(ext)
           hsh = {
             "url" => options[:url],
             "state" => options[:state],
@@ -83,7 +83,7 @@ def initialize(options={})
       end
 
       # make sure input is encoded as utf8
-      string = string.force_encoding("UTF-8") if string.present?
+      string = string.force_encoding("UTF-8") if string.present? && string.is_a?(String)
       @string = string
 
       # input options for citation formatting

diff --git a/lib/briard/metadata_utils.rb b/lib/briard/metadata_utils.rb
@@ -8,6 +8,7 @@
 
 require_relative 'readers/bibtex_reader'
 require_relative 'readers/citeproc_reader'
+require_relative 'readers/cff_reader'
 require_relative 'readers/codemeta_reader'
 require_relative 'readers/crosscite_reader'
 require_relative 'readers/crossref_reader'
@@ -20,6 +21,7 @@
 require_relative 'writers/bibtex_writer'
 require_relative 'writers/citation_writer'
 require_relative 'writers/citeproc_writer'
+# require_relative 'writers/cff_writer'
 require_relative 'writers/codemeta_writer'
 require_relative 'writers/crosscite_writer'
 require_relative 'writers/crossref_writer'
@@ -43,6 +45,7 @@ module MetadataUtils
 
     include Briard::Readers::BibtexReader
     include Briard::Readers::CiteprocReader
+    include Briard::Readers::CffReader
     include Briard::Readers::CodemetaReader
     include Briard::Readers::CrossciteReader
     include Briard::Readers::CrossrefReader
@@ -55,6 +58,7 @@ module MetadataUtils
     include Briard::Writers::BibtexWriter
     include Briard::Writers::CitationWriter
     include Briard::Writers::CiteprocWriter
+    # include Briard::Writers::CffWriter
     include Briard::Writers::CodemetaWriter
     include Briard::Writers::CrossciteWriter
     include Briard::Writers::CrossrefWriter

diff --git a/lib/briard/readers/cff_reader.rb b/lib/briard/readers/cff_reader.rb
@@ -0,0 +1,107 @@
+# frozen_string_literal: true
+
+module Briard
+  module Readers
+    module CffReader
+      def get_cff(id: nil, **options)
+        return { "string" => nil, "state" => "not_found" } unless id.present?
+        id = normalize_id(id)
+        response = Maremma.get(github_as_cff_url(id), accept: "json", raw: true)
+        data = response.body.fetch("data", nil)
+        # Dates are parsed to date object, need to convert to iso8601 later
+        string = Psych.safe_load(data, permitted_classes: [Date])
+        { "string" => string }
+      end
+
+      def read_cff(string: nil, **options)
+        read_options = ActiveSupport::HashWithIndifferentAccess.new(options.except(:doi, :id, :url, :sandbox, :validate, :ra))
+        meta = string.is_a?(String) ? Psych.safe_load(string, permitted_classes: [Date]) : string
+
+        identifiers = Array.wrap(meta.fetch("identifiers", nil)).map do |r|
+          r = normalize_id(r) if r.is_a?(String)
+          if r.is_a?(String) && !r.start_with?("https://doi.org")
+              { "identifierType" => "URL", "identifier" => r }
+          elsif r.is_a?(Hash)
+            { "identifierType" => get_identifier_type(r["propertyID"]), "identifier" => r["value"] }
+          end
+        end.compact.uniq
+
+        id = normalize_id(options[:doi] || meta.fetch("doi", nil) || Array.wrap(meta.fetch("identifiers", nil)).find { |i| i["type"] == "doi"}.fetch("value", nil))
+        url = normalize_id(meta.fetch("repository-code", nil))
+        creators = cff_creators(Array.wrap(meta.fetch("authors", nil)))
+
+        dates = []
+        dates << { "date" => meta.fetch("date-released", nil).iso8601, "dateType" => "Issued" } if meta.fetch("date-released", nil).present?
+        publication_year = meta.fetch("date-released").iso8601[0..3] if meta.fetch("date-released", nil).present?
+        publisher = url.to_s.starts_with?("https://github.com") ? "GitHub" : nil
+        state = meta.present? || read_options.present? ? "findable" : "not_found"
+        types = {
+          "resourceTypeGeneral" => "Software",
+          "resourceType" => nil,
+          "schemaOrg" => "SoftwareSourceCode",
+          "citeproc" => "article-journal",
+          "bibtex" => "misc",
+          "ris" => "COMP"
+        }.compact
+        subjects = Array.wrap(meta.fetch("keywords", nil)).reduce([]) do |sum, subject|
+          sum += name_to_fos(subject)
+
+          sum
+        end
+
+        titles =  meta.fetch("title", nil).present? ?  [{ "title" => meta.fetch("title", nil) }] : [] 
+        rights_list = meta.fetch("license", nil).present? ? [hsh_to_spdx("rightsIdentifier" => meta.fetch("license"))] : nil
+
+        { "id" => id,
+          "types" => types,
+          "identifiers" => identifiers,
+          "doi" => doi_from_url(id),
+          "url" => url,
+          "titles" => titles,
+          "creators" => creators,
+          "publisher" => publisher,
+          "dates" => dates,
+          "publication_year" => publication_year,
+          "descriptions" => meta.fetch("abstract", nil).present? ? [{ "description" => sanitize(meta.fetch("abstract")), "descriptionType" => "Abstract" }] : nil,
+          "rights_list" => rights_list,
+          "version_info" => meta.fetch("version", nil),
+          "subjects" => subjects,
+          "state" => state
+        }.merge(read_options)
+      end
+
+      def cff_creators(creators)
+        Array.wrap(creators).map do |a|
+          name_identifiers = normalize_orcid(parse_attributes(a["orcid"])).present? ? [{ "nameIdentifier" => normalize_orcid(parse_attributes(a["orcid"])), "nameIdentifierScheme" => "ORCID", "schemeUri"=>"https://orcid.org" }] : nil
+          if a["given-names"].present? || name_identifiers.present?
+            given_name = parse_attributes(a["given-names"])
+            family_name = parse_attributes(a["family-names"])
+            affiliation = Array.wrap(a["affiliation"]).map do |a|
+              if a.is_a?(Hash)
+                a
+              elsif a.is_a?(Hash) && a.key?("__content__") && a["__content__"].strip.blank?
+                nil
+              elsif a.is_a?(Hash) && a.key?("__content__")
+                { "name" => a["__content__"] }
+              elsif a.strip.blank?
+                nil
+              elsif a.is_a?(String)
+                { "name" => a }
+              end
+            end.compact
+
+            { "nameType" => "Personal",
+              "nameIdentifiers" => name_identifiers,
+              "name" => [family_name, given_name].compact.join(", "),
+              "givenName" => given_name,
+              "familyName" => family_name,
+              "affiliation" => affiliation.presence }.compact
+          else
+            { "nameType" => "Organizational",
+              "name" => a["name"] || a["__content__"] }
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/briard/readers/crossref_reader.rb b/lib/briard/readers/crossref_reader.rb
@@ -264,12 +264,12 @@ def crossref_people(bibliographic_metadata, contributor_role)
             given_name = parse_attributes(a["given_name"])
             family_name = parse_attributes(a["surname"])
             affiliation = Array.wrap(a["affiliation"]).map do |a|
-              if a.is_a?(Hash) && a.key?("__content__") && a["__content__"].strip.blank?
+              if a.is_a?(Hash)
+                a
+              elsif a.is_a?(Hash) && a.key?("__content__") && a["__content__"].strip.blank?
                 nil
               elsif a.is_a?(Hash) && a.key?("__content__")
                 { "name" => a["__content__"] }
-              elsif a.is_a?(Hash)
-                a
               elsif a.strip.blank?
                 nil
               elsif a.is_a?(String)

diff --git a/lib/briard/utils.rb b/lib/briard/utils.rb
@@ -506,7 +506,9 @@ def find_from_format_by_id(id)
         "orcid"
       elsif /\A(http|https):\/(\/)?github\.com\/(.+)\/package.json\z/.match(id)
         "npm"
-      elsif /\A(http|https):\/(\/)?github\.com\/(.+)\z/.match(id)
+      elsif /\A(http|https):\/(\/)?github\.com\/(.+)\/CITATION.cff\z/.match(id)
+        "cff"
+      elsif /\A(http|https):\/(\/)?github\.com\/(.+)\/codemeta.json\z/.match(id)
         "codemeta"
       else
         "schema_org"
@@ -516,6 +518,8 @@ def find_from_format_by_id(id)
     def find_from_format_by_filename(filename)
       if filename == "package.json"
         "npm"
+      elsif filename == "CITATION.cff"
+        "cff"
       end
     end
 
@@ -528,6 +532,8 @@ def find_from_format_by_ext(string, options={})
         "crossref"
       elsif options[:ext] == ".xml" && Nokogiri::XML(string, nil, 'UTF-8', &:noblanks).collect_namespaces.find { |k, v| v.start_with?("http://datacite.org/schema/kernel") }
         "datacite"
+      elsif options[:ext] == ".cff"
+        "cff"
       elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context").to_s.start_with?("http://schema.org", "https://schema.org")
         "schema_org"
       elsif options[:ext] == ".json" && Maremma.from_json(string).to_h.dig("@context") == ("https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld")
@@ -558,9 +564,13 @@ def find_from_format_by_string(string)
         "citeproc"
       elsif string.start_with?("TY  - ")
         "ris"
+      elsif YAML.load(string).to_h.fetch("cff-version", nil).present?
+        "cff"
       elsif BibTeX.parse(string).first
         "bibtex"
       end
+    rescue Psych::SyntaxError => error
+      "bibtex"
     rescue BibTeX::ParseError => error
       nil
     end
@@ -1079,6 +1089,16 @@ def github_as_codemeta_url(url)
       end
     end
 
+    def github_as_cff_url(url)
+      github_hash = github_from_url(url)
+
+      if github_hash[:path].to_s.end_with?("CITATION.cff")
+        "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/#{github_hash[:release]}/#{github_hash[:path]}"
+      elsif github_hash[:owner].present?
+        "https://raw.githubusercontent.com/#{github_hash[:owner]}/#{github_hash[:repo]}/main/CITATION.cff"
+      end
+    end
+
     def get_date_parts(iso8601_time)
       return { 'date-parts' => [[]] } if iso8601_time.nil?
 

diff --git a/lib/briard/version.rb b/lib/briard/version.rb
@@ -1,3 +1,3 @@
 module Briard
-  VERSION = "2.0.2"
+  VERSION = "2.1"
 end
diff --git a/lib/briard/writers/cff_writer.rb b/lib/briard/writers/cff_writer.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+
+# module Briard
+#   module Writers
+#     module CffWriter
+#       def cff
+#         return nil unless valid? || show_errors
+
+#         hsh = {
+#           "@context" => id.present? ? "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld" : nil,
+#           "@type" => types.present? ? types["schemaOrg"] : nil,
+#           "@id" => normalize_doi(doi),
+#           "identifier" => to_schema_org_identifiers(identifiers),
+#           "codeRepository" => url,
+#           "name" => parse_attributes(titles, content: "title", first: true),
+#           "authors" => creators,
+#           "description" => parse_attributes(descriptions, content: "description", first: true),
+#           "version" => version_info,
+#           "tags" => subjects.present? ? Array.wrap(subjects).map { |k| parse_attributes(k, content: "subject", first: true) } : nil,
+#           "datePublished" => get_date(dates, "Issued") || publication_year,
+#           "dateModified" => get_date(dates, "Updated"),
+#           "publisher" => publisher,
+#           "license" => Array.wrap(rights_list).map { |l| l["rightsUri"] }.compact.unwrap,
+#         }.compact
+#         JSON.pretty_generate hsh.presence
+#       end
+#     end
+#   end
+# end
diff --git a/spec/find_from_format_spec.rb b/spec/find_from_format_spec.rb
@@ -39,8 +39,13 @@
       expect(subject.find_from_format_by_id(id)).to eq("op")
     end
 
+    it "cff" do
+      id = "https://github.com/citation-file-format/ruby-cff/blob/main/CITATION.cff"
+      expect(subject.find_from_format_by_id(id)).to eq("cff")
+    end
+
     it "codemeta" do
-      id = "https://github.com/datacite/maremma"
+      id = "https://github.com/datacite/maremma/blob/master/codemeta.json"
       expect(subject.find_from_format_by_id(id)).to eq("codemeta")
     end
 
@@ -64,6 +69,11 @@
       filename = "package.json"
       expect(subject.find_from_format_by_filename(filename)).to eq("npm")
     end
+
+    it "cff" do
+      filename = "CITATION.cff"
+      expect(subject.find_from_format_by_filename(filename)).to eq("cff")
+    end
   end
 
   context "find_from_format_by_string" do
@@ -91,6 +101,11 @@
       expect(subject.find_from_format_by_string(string)).to eq("codemeta")
     end
 
+    it "cff" do
+      string = IO.read(fixture_path + 'CITATION.cff').strip
+      expect(subject.find_from_format_by_string(string)).to eq("cff")
+    end
+
     it "schema_org" do
       string = IO.read(fixture_path + 'schema_org_topmed.json').strip
       expect(subject.find_from_format_by_string(string)).to eq("schema_org")