Skip to content
This repository has been archived by the owner on Dec 2, 2021. It is now read-only.

Commit

Permalink
Avoid the ntriples step. Go from JSON-LD to SPARQL
Browse files Browse the repository at this point in the history
  • Loading branch information
jcoyne committed Aug 24, 2018
1 parent 9a42279 commit 3f88a5c
Show file tree
Hide file tree
Showing 20 changed files with 123 additions and 1,414 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ Or install it yourself as:
```
exe/extract call StanfordOrganizations > step1.json
exe/transform call StanfordOrganizationsToJsonList -i step1.json > step2.json
exe/transform call OrganizationsListToTriples -i step2.json > step3.nt
exe/load call Sparql -i step3.nt
exe/transform call OrganizationsListToJSONLD -i step2.json > step3.jsonld
exe/load call Sparql -i step3.jsonld
```


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@

require 'traject_plus'
require 'rialto/etl/readers/ndjson_reader'
require 'rialto/etl/writers/organization_ntriples_writer'

extend TrajectPlus::Macros
extend TrajectPlus::Macros::JSON

# This takes in the Newline Delimited JSON, transforms it to JSON-LD, and writes it out as Ntriples

settings do
provide 'writer_class_name', 'Rialto::Etl::Writers::OrganizationNtriplesWriter'
provide 'writer_class_name', 'Traject::JsonWriter'
provide 'reader_class_name', 'Rialto::Etl::Readers::NDJsonReader'
end

Expand Down
4 changes: 2 additions & 2 deletions lib/rialto/etl/configs/sparql.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# frozen_string_literal: true

require 'rialto/etl/readers/rdf_reader'
require 'rialto/etl/readers/ndjsonld_reader'
require 'rialto/etl/writers/sparql_writer'
settings do
provide 'writer_class_name', 'Rialto::Etl::Writers::SparqlWriter'
provide 'reader_class_name', 'Rialto::Etl::Readers::RDFReader'
provide 'reader_class_name', 'Rialto::Etl::Readers::NDJsonLDReader'
provide 'sparql_writer.update_url', ::Settings.sparql_writer.update_url
end
15 changes: 6 additions & 9 deletions lib/rialto/etl/loaders/sparql.rb
Original file line number Diff line number Diff line change
@@ -1,30 +1,27 @@
# frozen_string_literal: true

# frozen_string_literal: true

require 'traject'
require 'rdf'
require 'rdf/ntriples'
require 'json/ld'

module Rialto
module Etl
module Loaders
# Loader that takes Ntriples and puts it in SPARQL
# Loader that takes newline delimited JSON, (with JSON-LD records) and puts it in SPARQL
class Sparql
# A valid file path
attr_reader :input

# Initialize a new instance of the loader
#
# @param input [String] valid file path of an Ntriples file
# @param input [String] valid file path of a newline delimited JSON-LD file
def initialize(input:)
@input = input
end

# Load a RDF file into a SPARQL endpoint, using Traject
# Load a JSON-LD file into a SPARQL endpoint, using Traject
def load
RDF::Reader.open(input) do |reader|
loader.process(reader)
File.open(input, 'r') do |stream|
loader.process(stream)
end
end

Expand Down
12 changes: 7 additions & 5 deletions lib/rialto/etl/readers/ndjson_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@ def each
return enum_for(:each) unless block_given?

@input_stream.each_with_index do |json, i|
begin
yield JSON.parse(json)
rescue JSON::ParserError => e
logger.error("Problem with JSON record on line #{i}: #{e.message}")
end
yield decode(json, i)
end
end

def decode(row, line_number)
JSON.parse(row)
rescue JSON::ParserError => e
logger.error("Problem with JSON record on line #{line_number}: #{e.message}")
end
end
end
end
Expand Down
19 changes: 19 additions & 0 deletions lib/rialto/etl/readers/ndjsonld_reader.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# frozen_string_literal: true

require 'rialto/etl/readers/ndjson_reader'
# require 'rdf'

module Rialto
module Etl
module Readers
# Read newline-delimited JSON file, where each line is a json-LD object.
# UTF-8 encoding is required.
class NDJsonLDReader < NDJsonReader
# @return [RDF::Graph]
def decode(row, line_number)
RDF::Graph.new << JSON::LD::API.toRdf(super)
end
end
end
end
end
25 changes: 0 additions & 25 deletions lib/rialto/etl/readers/rdf_reader.rb

This file was deleted.

2 changes: 1 addition & 1 deletion lib/rialto/etl/transformers.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# frozen_string_literal: true

require 'rialto/etl/transformers/organizations_list_to_triples'
require 'rialto/etl/transformers/organizations_list_to_jsonld'
require 'rialto/etl/transformers/stanford_organizations_to_json_list'

module Rialto
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ module Rialto
module Etl
module Transformers
# Transformer turning Stanford org info into Vivo format
class OrganizationsListToTriples
class OrganizationsListToJSONLD
# A valid file path
attr_reader :input

Expand All @@ -28,7 +28,7 @@ def transform

def transformer
@transformer ||= Traject::Indexer.new.tap do |indexer|
indexer.load_config_file('lib/rialto/etl/configs/organizations_to_triples.rb')
indexer.load_config_file('lib/rialto/etl/configs/organizations_to_jsonld.rb')
end
end
end
Expand Down
25 changes: 0 additions & 25 deletions lib/rialto/etl/writers/organization_ntriples_writer.rb

This file was deleted.

2 changes: 1 addition & 1 deletion spec/cli/transform_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
allow(transformer).to receive(:say)
transformer.list
expect(transformer).to have_received(:say)
.with('Transformers supported: StanfordOrganizationsToJsonList, StanfordOrganizationsToVivo')
.with('Transformers supported: OrganizationsListToJSONLD, StanfordOrganizationsToJsonList')
end
end
end
Loading

0 comments on commit 3f88a5c

Please sign in to comment.