Skip to content
This repository has been archived by the owner on Oct 8, 2020. It is now read-only.

Commit

Permalink
Flink needs either key type or key selector function for join()
Browse files Browse the repository at this point in the history
and distinct() operators.
  • Loading branch information
LorenzBuehmann committed Jun 17, 2019
1 parent 4d99e4f commit fc07298
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ class ForwardRuleReasonerRDFS(env: ExecutionEnvironment) extends ForwardRuleReas
.union(
Seq(otherTriples, subClassOfTriplesTrans, subPropertyOfTriplesTrans, typeTriples, triplesRDFS7, triplesRDFS9)
)
.distinct()
.distinct(t => t.hashCode())

// we perform also additional rules if enabled
if (level != SIMPLE) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.scala.{DataSet, _}
import org.apache.flink.util.Collector
import org.apache.jena.graph.{Node, Triple}
import org.apache.jena.sparql.util.NodeComparator

import net.sansa_stack.inference.flink.utils.NodeKey
import net.sansa_stack.inference.utils.Profiler

/**
Expand Down Expand Up @@ -186,28 +188,65 @@ trait TransitiveReasoner extends Profiler{
* @return a DataSet containing the transitive closure of the triples
*/
def computeTransitiveClosureOptSemiNaive(triples: DataSet[Triple]): DataSet[Triple] = {

// apparently, we have to use pairs for (subject, object) because the Jena Triple is not a Scala tuple
// and we have to provide positions of key and value in the iterate method
// the initial set of edges is used as input for both, the workset and the solutionset
val initialTC = triples.map(t => (NodeKey(t.getSubject), NodeKey(t.getObject)))
val pred = triples.first(1).collect().head.getPredicate

log.info("computing TC...")
def iterate(s: DataSet[Triple], ws: DataSet[Triple]): (DataSet[Triple], DataSet[Triple]) = {
val resolvedRedirects = triples.join(ws)
.where { _.getSubject }
.equalTo { _.getObject }
def iterate(s: DataSet[(NodeKey, NodeKey)], ws: DataSet[(NodeKey, NodeKey)])
: (DataSet[(NodeKey, NodeKey)], DataSet[(NodeKey, NodeKey)]) = {
val resolvedRedirects = initialTC.join(ws)
.where(0)
.equalTo(1)
.map { joinResult => joinResult match {
case (redirect, link) =>
Triple.create(link.getSubject, redirect.getPredicate, redirect.getObject)
case (redirect, link) => (link._1, redirect._2)
}
}.name("TC-From-Iteration")
(resolvedRedirects, resolvedRedirects)
}

val tc = triples
.iterateDelta(triples, 10, Array("s", "o"))(iterate)
val tc = initialTC
.iterateDelta(initialTC, 10, Array(0))(iterate)
.name("Final-TC")
log.info("finished computing TC")
// .map { cl => cl}
// .name("Final-Redirect-Result")
tc
tc.map(t => Triple.create(t._1.node, pred, t._2.node))
}


// /**
// * Computes the transitive closure on a DataSet of triples.
// * Note, that the assumption is that all triples do have the same predicate.
// * This implementation uses the Flink iterate operator (see
// * [[https://ci.apache.org/projects/flink/flink-docs-master/dev/batch/iterations.html"]])
// *
// * @param triples the DataSet of triples
// * @return a DataSet containing the transitive closure of the triples
// */
// def computeTransitiveClosureOptSemiNaive(triples: DataSet[Triple]): DataSet[Triple] = {
// log.info("computing TC...")
// def iterate(s: DataSet[Triple], ws: DataSet[Triple]): (DataSet[Triple], DataSet[Triple]) = {
// val resolvedRedirects = triples.join(ws)
// .where { _.getSubject }
// .equalTo { _.getObject }
// .map { joinResult => joinResult match {
// case (redirect, link) =>
// Triple.create(link.getSubject, redirect.getPredicate, redirect.getObject)
// }
// }.name("TC-From-Iteration")
// (resolvedRedirects, resolvedRedirects)
// }
//
// val tc = triples
// .iterateDelta(triples, 10, Array("s", "o"))(iterate)
// .name("Final-TC")
// log.info("finished computing TC")
// // .map { cl => cl}
// // .name("Final-Redirect-Result")
// tc
// }

}

0 comments on commit fc07298

Please sign in to comment.