Skip to content

Commit

Permalink
Merge pull request apache#8 from Yongyao/SDAP-35
Browse files Browse the repository at this point in the history
SDAP-35 Overhaul MUDROD configuration
  • Loading branch information
lewismc authored Mar 13, 2018
2 parents 09babed + 11d053b commit ba9cd45
Show file tree
Hide file tree
Showing 69 changed files with 1,398 additions and 1,480 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,8 @@ service/bin/
core/lib
service/lib
core/mudrod.log*
core/.classpath
core/.externalToolBuilders/Maven_Ant_Builder.launch
core/maven-eclipse.xml
service/.classpath
web/.classpath
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public MudrodAbstract(Properties props, ESDriver es, SparkDriver spark) {
this.es = es;
this.spark = spark;

if (this.props != null) {
if (this.props != null && this.es!=null) {
this.initMudrod();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,28 @@ public OntologyDiscoveryEngine(Properties props, ESDriver es, SparkDriver spark)
* Method of preprocessing ontology
*/
public void preprocess() {
LOG.info("*****************Ontology preprocessing starts******************");
LOG.info("Ontology preprocessing starts");
startTime = System.currentTimeMillis();

DiscoveryStepAbstract at = new AggregateTriples(this.props, this.es, this.spark);
at.execute();

endTime = System.currentTimeMillis();
LOG.info("*****************Ontology preprocessing ends******************Took {}s", (endTime - startTime) / 1000);
LOG.info("Ontology preprocessing ends. Took {}s", (endTime - startTime) / 1000);
}

/**
* Method of processing ontology
*/
public void process() {
LOG.info("*****************Ontology processing starts******************");
LOG.info("Ontology processing starts.");
startTime = System.currentTimeMillis();

DiscoveryStepAbstract ol = new OntologyLinkCal(this.props, this.es, this.spark);
ol.execute();

endTime = System.currentTimeMillis();
LOG.info("*****************Ontology processing ends******************Took {}s", (endTime - startTime) / 1000);
LOG.info("Ontology processing ends. Took {}s", (endTime - startTime) / 1000);
}

public void output() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import org.apache.sdap.mudrod.driver.SparkDriver;
import org.apache.sdap.mudrod.recommendation.pre.ImportMetadata;
import org.apache.sdap.mudrod.recommendation.pre.MetadataTFIDFGenerator;
import org.apache.sdap.mudrod.recommendation.pre.NormalizeVariables;
import org.apache.sdap.mudrod.recommendation.pre.NormalizeFeatures;
import org.apache.sdap.mudrod.recommendation.pre.SessionCooccurence;
import org.apache.sdap.mudrod.recommendation.process.AbstractBasedSimilarity;
import org.apache.sdap.mudrod.recommendation.process.VariableBasedSimilarity;
import org.apache.sdap.mudrod.recommendation.process.FeatureBasedSimilarity;
import org.apache.sdap.mudrod.recommendation.process.SessionBasedCF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -26,7 +26,7 @@ public RecommendEngine(Properties props, ESDriver es, SparkDriver spark) {

@Override
public void preprocess() {
LOG.info("*****************Recommendation preprocessing starts******************");
LOG.info("Recommendation preprocessing starts.");

startTime = System.currentTimeMillis();

Expand All @@ -39,38 +39,37 @@ public void preprocess() {
DiscoveryStepAbstract sessionMatrixGen = new SessionCooccurence(this.props, this.es, this.spark);
sessionMatrixGen.execute();

DiscoveryStepAbstract transformer = new NormalizeVariables(this.props, this.es, this.spark);
DiscoveryStepAbstract transformer = new NormalizeFeatures(this.props, this.es, this.spark);
transformer.execute();

endTime = System.currentTimeMillis();

LOG.info("*****************Recommendation preprocessing ends******************Took {}s {}", (endTime - startTime) / 1000);
LOG.info("Recommendation preprocessing ends. Took {}s {}", (endTime - startTime) / 1000);
}

@Override
public void process() {
// TODO Auto-generated method stub
LOG.info("*****************Recommendation processing starts******************");
LOG.info("Recommendation processing starts.");

startTime = System.currentTimeMillis();

DiscoveryStepAbstract tfCF = new AbstractBasedSimilarity(this.props, this.es, this.spark);
tfCF.execute();

DiscoveryStepAbstract cbCF = new VariableBasedSimilarity(this.props, this.es, this.spark);
DiscoveryStepAbstract cbCF = new FeatureBasedSimilarity(this.props, this.es, this.spark);
cbCF.execute();

DiscoveryStepAbstract sbCF = new SessionBasedCF(this.props, this.es, this.spark);
sbCF.execute();

endTime = System.currentTimeMillis();

LOG.info("*****************Recommendation processing ends******************Took {}s {}", (endTime - startTime) / 1000);
LOG.info("Recommendation processing ends. Took {}s {}", (endTime - startTime) / 1000);
}

@Override
public void output() {
// TODO Auto-generated method stub

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,12 @@ public List<String> customAnalyzing(String indexName, List<String> list) throws
}

public void deleteAllByQuery(String index, String type, QueryBuilder query) {
ImmutableOpenMap<String, MappingMetaData> mappings = getClient().admin().cluster().prepareState().execute().actionGet()
.getState().metaData().index(index).getMappings();

//check if the type exists
if (!mappings.containsKey(type)) return;

createBulkProcessor();
SearchResponse scrollResp = getClient().prepareSearch(index).setSearchType(SearchType.QUERY_AND_FETCH).setTypes(type).setScroll(new TimeValue(60000)).setQuery(query).setSize(10000).execute()
.actionGet();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.apache.sdap.mudrod.discoveryengine.DiscoveryStepAbstract;
import org.apache.sdap.mudrod.driver.ESDriver;
import org.apache.sdap.mudrod.driver.SparkDriver;
import org.apache.sdap.mudrod.main.MudrodConstants;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
Expand All @@ -42,7 +43,7 @@ public class LinkageIntegration extends DiscoveryStepAbstract {
private static final long serialVersionUID = 1L;
transient List<LinkedTerm> termList = new ArrayList<>();
DecimalFormat df = new DecimalFormat("#.00");
private static final String INDEX_NAME = "indexName";
private static final String INDEX_NAME = MudrodConstants.ES_INDEX_NAME;
private static final String WEIGHT = "weight";

public LinkageIntegration(Properties props, ESDriver es, SparkDriver spark) {
Expand Down Expand Up @@ -108,13 +109,10 @@ public Map<String, Double> appyMajorRule(String input) {
}

double finalWeight = tmp + ((sumModelWeight - 2) * 0.05);
if (finalWeight < 0) {
finalWeight = 0;
}

if (finalWeight > 1) {
finalWeight = 1;
}

if (finalWeight < 0) finalWeight = 0;
if (finalWeight > 1) finalWeight = 1;

termsMap.put(entry.getKey(), Double.parseDouble(df.format(finalWeight)));
}

Expand All @@ -138,7 +136,7 @@ public String getIngeratedList(String input, int num) {
}
count++;
}
LOG.info("\n************************Integrated results***************************");
LOG.info("Integrated results:");
LOG.info(output);
return output;
}
Expand Down Expand Up @@ -173,32 +171,32 @@ public JsonObject getIngeratedListInJson(String input) {
* the similarities from different sources
*/
public Map<String, List<LinkedTerm>> aggregateRelatedTermsFromAllmodel(String input) {
aggregateRelatedTerms(input, props.getProperty("userHistoryLinkageType"));
aggregateRelatedTerms(input, props.getProperty("clickStreamLinkageType"));
aggregateRelatedTerms(input, props.getProperty("metadataLinkageType"));
aggregateRelatedTermsSWEET(input, props.getProperty("ontologyLinkageType"));
aggregateRelatedTerms(input, MudrodConstants.USE_HISTORY_LINKAGE_TYPE);
aggregateRelatedTerms(input, MudrodConstants.CLICK_STREAM_LINKAGE_TYPE);
aggregateRelatedTerms(input, MudrodConstants.METADATA_LINKAGE_TYPE);
aggregateRelatedTermsSWEET(input, MudrodConstants.ONTOLOGY_LINKAGE_TYPE);

return termList.stream().collect(Collectors.groupingBy(w -> w.term));
}

public int getModelweight(String model) {
if (model.equals(props.getProperty("userHistoryLinkageType"))) {
return Integer.parseInt(props.getProperty("userHistory_w"));
if (model.equals(MudrodConstants.USE_HISTORY_LINKAGE_TYPE)) {
return Integer.parseInt(props.getProperty(MudrodConstants.USER_HISTORY_W));
}

if (model.equals(props.getProperty("clickStreamLinkageType"))) {
return Integer.parseInt(props.getProperty("clickStream_w"));
if (model.equals(MudrodConstants.CLICK_STREAM_LINKAGE_TYPE)) {
return Integer.parseInt(props.getProperty(MudrodConstants.CLICKSTREAM_W));
}

if (model.equals(props.getProperty("metadataLinkageType"))) {
return Integer.parseInt(props.getProperty("metadata_w"));
if (model.equals(MudrodConstants.METADATA_LINKAGE_TYPE)) {
return Integer.parseInt(props.getProperty(MudrodConstants.METADATA_W));
}

if (model.equals(props.getProperty("ontologyLinkageType"))) {
return Integer.parseInt(props.getProperty("ontology_w"));
if (model.equals(MudrodConstants.ONTOLOGY_LINKAGE_TYPE)) {
return Integer.parseInt(props.getProperty(MudrodConstants.ONTOLOGY_W));
}

return 999999;
return Integer.MAX_VALUE;
}

/**
Expand Down Expand Up @@ -246,7 +244,7 @@ public void aggregateRelatedTerms(String input, String model) {
public void aggregateRelatedTermsSWEET(String input, String model) {
SearchResponse usrhis = es.getClient().prepareSearch(props.getProperty(INDEX_NAME)).setTypes(model).setQuery(QueryBuilders.termQuery("concept_A", input)).addSort(WEIGHT, SortOrder.DESC)
.setSize(11).execute().actionGet();
LOG.info("\n************************ {} results***************************", model);
LOG.info("{} results:", model);
for (SearchHit hit : usrhis.getHits().getHits()) {
Map<String, Object> result = hit.getSource();
String conceptB = (String) result.get("concept_B");
Expand Down
Loading

0 comments on commit ba9cd45

Please sign in to comment.