Skip to content
This repository has been archived by the owner on Oct 8, 2020. It is now read-only.

Commit

Permalink
Use RDF layer for loading/writing
Browse files Browse the repository at this point in the history
  • Loading branch information
LorenzBuehmann committed Jun 16, 2019
1 parent 69624cf commit 8719a31
Showing 1 changed file with 4 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,10 @@ package net.sansa_stack.inference.flink.data

import java.net.URI

import scala.collection.JavaConverters._
import scala.language.implicitConversions

import net.sansa_stack.rdf.flink.io.ntriples.NTriplesReader
import org.apache.flink.api.scala.{ExecutionEnvironment, _}
import org.apache.jena.riot.{Lang, RDFDataMgr}

import net.sansa_stack.rdf.benchmark.io.ReadableByteChannelFromIterator


/**
Expand All @@ -28,35 +25,16 @@ object RDFGraphLoader {
}

def loadFromDisk(paths: Seq[URI], env: ExecutionEnvironment): RDFGraph = {
// // create a configuration object
// val parameters = new Configuration
//
// // set the recursive enumeration parameter
// parameters.setBoolean("recursive.file.enumeration", true)
// env.readTextFile(f).withParameters(parameters)

val tmp: List[String] = paths.map(path => path.toString).toList

val triples = tmp
.map(f => env.readTextFile(f)) // no support to read from multiple paths at once, thus, map + union here
.reduce(_ union _) // TODO Flink 1.5.0 supports multiple paths via FileInputFormat
.mapPartition(p => {
// convert iterator to input stream
val is = ReadableByteChannelFromIterator.toInputStream(p.asJava)

RDFDataMgr.createIteratorTriples(is, Lang.NTRIPLES, null).asScala
})
.name("triples")

RDFGraph(triples)
RDFGraph(NTriplesReader.load(env, paths))
}

def main(args: Array[String]): Unit = {
if (args.length == 0) println("Usage: RDFGraphLoader <PATH_TO_FILE>")

val path = args(0)

val env = ExecutionEnvironment.getExecutionEnvironment
// val env = ExecutionEnvironment.getExecutionEnvironment
val env = ExecutionEnvironment.createLocalEnvironment(parallelism = 2)

val ds = RDFGraphLoader.loadFromDisk(path, env).triples

Expand Down

0 comments on commit 8719a31

Please sign in to comment.