Skip to content
This repository has been archived by the owner on Oct 8, 2020. It is now read-only.

Feature/flink jena #14

Merged
merged 28 commits into from
Jun 28, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
44067c4
Flink reasoning with Jena datastructures.
LorenzBuehmann Jun 26, 2018
24d641b
Merge branch 'develop' into feature/flink-jena
LorenzBuehmann Jun 27, 2018
2df2c0c
Cont. Flink RDF
LorenzBuehmann Jun 27, 2018
7d231ec
Minor Pom changes
LorenzBuehmann Dec 9, 2016
272f0e7
Flink reasoning with Jena datastructures.
LorenzBuehmann Jun 26, 2018
69624cf
Cont. Flink RDF
LorenzBuehmann Jun 27, 2018
8719a31
Use RDF layer for loading/writing
LorenzBuehmann Jun 16, 2019
ec65954
Merge branch 'feature/flink-jena' of github.com:SANSA-Stack/SANSA-Inf…
LorenzBuehmann Jun 16, 2019
4d99e4f
Key type wrapper.
LorenzBuehmann Jun 17, 2019
fc07298
Flink needs either key type or key selector function for join()
LorenzBuehmann Jun 17, 2019
be1bf6f
Minor changes in main entry class, e.g. always write to disk and bump…
LorenzBuehmann Jun 17, 2019
3a34c4a
Merge branch 'develop' into feature/flink-jena
LorenzBuehmann Jun 18, 2019
095b5a0
boolean function utils.
LorenzBuehmann Jun 27, 2019
04effc9
POM cleanup
LorenzBuehmann Jun 27, 2019
3dd33b9
Merge remote-tracking branch 'origin/feature/0.6.0-SNAPSHOT' into fea…
LorenzBuehmann Jun 27, 2019
d5e6b6c
Utils simplified
LorenzBuehmann Jun 27, 2019
7cf311c
Extended test output if test fails.
LorenzBuehmann Jun 27, 2019
f66a9e0
Simplified conversion from RDD[Triple] to Jena Model
LorenzBuehmann Jun 27, 2019
79e2e3a
minor changes in I/O
LorenzBuehmann Jun 27, 2019
9da7479
subtraction operation for Flink DataSet with Jena Triple
LorenzBuehmann Jun 27, 2019
2e84e5b
Improved Flink conformance test setup
LorenzBuehmann Jun 27, 2019
89394b0
log output of test base class
LorenzBuehmann Jun 27, 2019
af1ff94
Flink conformance test clean up
LorenzBuehmann Jun 28, 2019
8a4d6d3
Flink reasoning on Triple DataSet
LorenzBuehmann Jun 28, 2019
634099a
Added missing deps for dist package
LorenzBuehmann Jun 28, 2019
2ddb2f9
Minor
LorenzBuehmann Jun 28, 2019
b5e03b2
Scalastyle cleanup
LorenzBuehmann Jun 28, 2019
50260fe
Merge branch 'develop' into feature/flink-jena
GezimSejdiu Jun 28, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
minor changes in I/O
  • Loading branch information
LorenzBuehmann committed Jun 27, 2019
commit 79e2e3a17008bdcac977bc456c5aca1360153051
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ object RDFGraphLoader {
}

def loadFromDisk(paths: Seq[URI], env: ExecutionEnvironment): RDFGraph = {
RDFGraph(NTriplesReader.load(env, paths))
RDFGraph(NTriplesReader.load(env, paths).name("triples"))
}

def main(args: Array[String]): Unit = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ import java.nio.charset.StandardCharsets
import org.apache.flink.api.common.operators.Order
import org.apache.flink.api.scala._
import org.apache.flink.core.fs.FileSystem
import org.apache.jena.graph.GraphUtil
import org.apache.jena.rdf.model.{Model, ModelFactory}
import org.apache.jena.sparql.graph.GraphFactory
import org.apache.jena.sparql.util.TripleComparator
import org.slf4j.LoggerFactory

Expand Down Expand Up @@ -52,32 +54,36 @@ object RDFGraphWriter {

// sort triples if enabled
val tmp = if (sorted) {
graph.triples// .sortPartition(t => t, Order.ASCENDING) // map(t => (t, t)).sortPartition(1, Order.DESCENDING).map(_._1)
graph.triples.sortPartition(_.hashCode(), Order.ASCENDING)
} else {
graph.triples
}

if (singleFile) {
tmp.setParallelism(1)
}

tmp
val sink = tmp
.map(new JenaTripleToNTripleString()) // to N-TRIPLES string
.writeAsText(path.toString, writeMode = FileSystem.WriteMode.OVERWRITE)

// write to single file if enabled
if (singleFile) {
sink.setParallelism(1)
}

logger.info("finished writing triples to disk in " + (System.currentTimeMillis()-startTime) + "ms.")
}

/**
* Converts an RDF graph to an Apache Jena in-memory model.
*
* @note For large graphs this can be too expensive
* and lead to a OOM exception
*
* @param graph the RDF graph
*
* @return the in-memory Apache Jena model containing the triples
*/
def convertToModel(graph: RDFGraph) : Model = {
val modelString = graph.triples.map(new JenaTripleToNTripleString())
.collect().mkString("\n")

val model = ModelFactory.createDefaultModel()

if(!modelString.trim.isEmpty) {
model.read(new ByteArrayInputStream(modelString.getBytes(StandardCharsets.UTF_8)), null, "N-TRIPLES")
}

GraphUtil.add(model.getGraph, graph.triples.collect().toArray)
model
}
}