Skip to content

Commit

Permalink
Merge pull request apache#5 from lewismc/SDAP-18
Browse files Browse the repository at this point in the history
SDAP-18 Review code contribition from Sigee
  • Loading branch information
lewismc authored Mar 7, 2018
2 parents 38b8277 + 0a6b88f commit 09babed
Show file tree
Hide file tree
Showing 36 changed files with 314 additions and 389 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import org.apache.sdap.mudrod.recommendation.pre.SessionCooccurence;
import org.apache.sdap.mudrod.recommendation.process.AbstractBasedSimilarity;
import org.apache.sdap.mudrod.recommendation.process.VariableBasedSimilarity;
import org.apache.sdap.mudrod.recommendation.process.sessionBasedCF;
import org.apache.sdap.mudrod.recommendation.process.SessionBasedCF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -60,7 +60,7 @@ public void process() {
DiscoveryStepAbstract cbCF = new VariableBasedSimilarity(this.props, this.es, this.spark);
cbCF.execute();

DiscoveryStepAbstract sbCF = new sessionBasedCF(this.props, this.es, this.spark);
DiscoveryStepAbstract sbCF = new SessionBasedCF(this.props, this.es, this.spark);
sbCF.execute();

endTime = System.currentTimeMillis();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@ public void preprocess() {

ArrayList<String> inputList = (ArrayList<String>) getFileList(props.getProperty(MudrodConstants.DATA_DIR));

for (int i = 0; i < inputList.size(); i++) {
timeSuffix = inputList.get(i);
for (String anInputList : inputList) {
timeSuffix = anInputList;
props.put(MudrodConstants.TIME_SUFFIX, timeSuffix);
startTime = System.currentTimeMillis();
LOG.info("Processing logs dated {}", inputList.get(i));
LOG.info("Processing logs dated {}", anInputList);

DiscoveryStepAbstract im = new ImportLogFile(this.props, this.es, this.spark);
im.execute();
Expand All @@ -122,7 +122,7 @@ public void preprocess() {

endTime = System.currentTimeMillis();

LOG.info("Web log preprocessing for logs dated {} complete. Time elapsed {} seconds.", inputList.get(i), (endTime - startTime) / 1000);
LOG.info("Web log preprocessing for logs dated {} complete. Time elapsed {} seconds.", anInputList, (endTime - startTime) / 1000);
}

DiscoveryStepAbstract hg = new HistoryGenerator(this.props, this.es, this.spark);
Expand All @@ -140,8 +140,8 @@ public void preprocess() {
public void logIngest() {
LOG.info("Starting Web log ingest.");
ArrayList<String> inputList = (ArrayList<String>) getFileList(props.getProperty(MudrodConstants.DATA_DIR));
for (int i = 0; i < inputList.size(); i++) {
timeSuffix = inputList.get(i);
for (String anInputList : inputList) {
timeSuffix = anInputList;
props.put("TimeSuffix", timeSuffix);
DiscoveryStepAbstract im = new ImportLogFile(this.props, this.es, this.spark);
im.execute();
Expand All @@ -157,8 +157,8 @@ public void logIngest() {
public void sessionRestruct() {
LOG.info("Starting Session reconstruction.");
ArrayList<String> inputList = (ArrayList<String>) getFileList(props.getProperty(MudrodConstants.DATA_DIR));
for (int i = 0; i < inputList.size(); i++) {
timeSuffix = inputList.get(i); // change timeSuffix dynamically
for (String anInputList : inputList) {
timeSuffix = anInputList; // change timeSuffix dynamically
props.put(MudrodConstants.TIME_SUFFIX, timeSuffix);
DiscoveryStepAbstract cd = new CrawlerDetection(this.props, this.es, this.spark);
cd.execute();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,9 @@ public List<String> customAnalyzing(String indexName, List<String> list) throws
if (list == null) {
return list;
}
int size = list.size();
List<String> customlist = new ArrayList<>();
for (int i = 0; i < size; i++) {
customlist.add(this.customAnalyzing(indexName, list.get(i)));
for (String aList : list) {
customlist.add(this.customAnalyzing(indexName, aList));
}

return customlist;
Expand Down Expand Up @@ -224,9 +223,7 @@ public List<String> getIndexListWithPrefix(Object object) {
String[] indices = client.admin().indices().getIndex(new GetIndexRequest()).actionGet().getIndices();

ArrayList<String> indexList = new ArrayList<>();
int length = indices.length;
for (int i = 0; i < length; i++) {
String indexName = indices[i];
for (String indexName : indices) {
if (indexName.startsWith(object.toString())) {
indexList.add(indexName);
}
Expand Down
32 changes: 0 additions & 32 deletions core/src/main/java/org/apache/sdap/mudrod/driver/SparkDriver.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,43 +19,11 @@
import org.apache.spark.serializer.KryoSerializer;
import org.apache.spark.sql.SQLContext;

import java.io.File;
import java.io.Serializable;
import java.net.URISyntaxException;
import java.util.Properties;
//import org.apache.spark.sql.SparkSession;

public class SparkDriver implements Serializable {

//TODO the commented out code below is the API uprgade
//for Spark 2.0.0. It requires a large upgrade and simplification
//across the mudrod codebase so should be done in an individual ticket.
// /**
// *
// */
// private static final long serialVersionUID = 1L;
// private SparkSession builder;
//
// public SparkDriver() {
// builder = SparkSession.builder()
// .master("local[2]")
// .config("spark.hadoop.validateOutputSpecs", "false")
// .config("spark.files.overwrite", "true")
// .getOrCreate();
// }
//
// public SparkSession getBuilder() {
// return builder;
// }
//
// public void setBuilder(SparkSession builder) {
// this.builder = builder;
// }
//
// public void close() {
// builder.stop();
// }

/**
*
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,7 @@ public Properties loadConfig() {
Element rootNode = document.getRootElement();
List<Element> paraList = rootNode.getChildren("para");

for (int i = 0; i < paraList.size(); i++) {
Element paraNode = paraList.get(i);
for (Element paraNode : paraList) {
String attributeName = paraNode.getAttributeValue("name");
if (MudrodConstants.SVM_SGD_MODEL.equals(attributeName)) {
props.put(attributeName, decompressSVMWithSGDModel(paraNode.getTextTrim()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ private void harvestMetadatafromWeb() {
int docId = startIndex + i;
File itemfile = new File(props.getProperty(MudrodConstants.RAW_METADATA_PATH) + "/" + docId + ".json");

try (FileWriter fw = new FileWriter(itemfile.getAbsoluteFile()); BufferedWriter bw = new BufferedWriter(fw);) {
try (FileWriter fw = new FileWriter(itemfile.getAbsoluteFile()); BufferedWriter bw = new BufferedWriter(fw)) {
itemfile.createNewFile();
bw.write(item.toString());
} catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,15 +319,15 @@ private void splitString(String oristr, List<String> list) {

String strs[] = oristr.trim().split(",");
if (strs != null) {
for (int i = 0; i < strs.length; i++) {
String str = strs[i].trim();
for (String str1 : strs) {
String str = str1.trim();
if (str.startsWith(",") || str.startsWith("\"")) {
str = str.substring(1);
}
if (str.endsWith(",") || str.endsWith("\"")) {
str = str.substring(0, str.length() - 1);
}
if (str == "") {
if ("".equals(str)) {
continue;
}
list.add(str);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,17 +167,17 @@ public Element findChild(String str, Element ele) {
public void getAllClass() throws IOException {
List<?> classElements = rootNode.getChildren("Class", Namespace.getNamespace("owl", owl_namespace));

for (int i = 0; i < classElements.size(); i++) {
Element classElement = (Element) classElements.get(i);
for (Object classElement1 : classElements) {
Element classElement = (Element) classElement1;
String className = classElement.getAttributeValue("about", Namespace.getNamespace("rdf", rdf_namespace));

if (className == null) {
className = classElement.getAttributeValue("ID", Namespace.getNamespace("rdf", rdf_namespace));
}

List<?> subclassElements = classElement.getChildren("subClassOf", Namespace.getNamespace("rdfs", rdfs_namespace));
for (int j = 0; j < subclassElements.size(); j++) {
Element subclassElement = (Element) subclassElements.get(j);
for (Object subclassElement1 : subclassElements) {
Element subclassElement = (Element) subclassElement1;
String subclassName = subclassElement.getAttributeValue("resource", Namespace.getNamespace("rdf", rdf_namespace));
if (subclassName == null) {
Element allValuesFromEle = findChild("allValuesFrom", subclassElement);
Expand All @@ -192,8 +192,8 @@ public void getAllClass() throws IOException {
}

List equalClassElements = classElement.getChildren("equivalentClass", Namespace.getNamespace("owl", owl_namespace));
for (int k = 0; k < equalClassElements.size(); k++) {
Element equalClassElement = (Element) equalClassElements.get(k);
for (Object equalClassElement1 : equalClassElements) {
Element equalClassElement = (Element) equalClassElement1;
String equalClassElementName = equalClassElement.getAttributeValue("resource", Namespace.getNamespace("rdf", rdf_namespace));

if (equalClassElementName != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,10 @@ public void load() {
*/
@Override
public void load(String[] urls) {
for (int i = 0; i < urls.length; i++) {
String url = urls[i].trim();
if (!"".equals(url))
if (LOG.isInfoEnabled()) {
LOG.info("Reading and processing {}", url);
}
for (String url1 : urls) {
String url = url1.trim();
if (!"".equals(url) && LOG.isInfoEnabled())
LOG.info("Reading and processing {}", url);
load(ontologyModel, url);
}
parser.parse(ontology, ontologyModel);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ private Double getVersionNum(String version) {
if (version == null) {
return 0.0;
}
Double versionNum = 0.0;
Double versionNum;
Pattern p = Pattern.compile(".*[a-zA-Z].*");
if ("Operational/Near-Real-Time".equals(version)) {
versionNum = 2.0;
Expand Down Expand Up @@ -197,7 +197,7 @@ public Double getProLevelNum(String pro) {
if (pro == null) {
return 1.0;
}
Double proNum = 0.0;
Double proNum;
Pattern p = Pattern.compile(".*[a-zA-Z].*");
if (pro.matches("[0-9]{1}[a-zA-Z]{1}")) {
proNum = Double.parseDouble(pro.substring(0, 1));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,7 @@ public JavaPairRDD<String, List<String>> removeRetiredDataset(ESDriver es, JavaP
public Tuple2<String, List<String>> call(Tuple2<String, List<String>> arg0) throws Exception {
List<String> oriDatasets = arg0._2;
List<String> newDatasets = new ArrayList<>();
int size = oriDatasets.size();
for (int i = 0; i < size; i++) {
String name = oriDatasets.get(i);
for (String name : oriDatasets) {
if (nameMap.containsKey(name)) {
newDatasets.add(nameMap.get(name));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@
/**
* ClassName: Recommend metedata based on session level co-occurrence
*/
public class sessionBasedCF extends DiscoveryStepAbstract {
public class SessionBasedCF extends DiscoveryStepAbstract {

private static final Logger LOG = LoggerFactory.getLogger(sessionBasedCF.class);
private static final Logger LOG = LoggerFactory.getLogger(SessionBasedCF.class);

/**
* Creates a new instance of sessionBasedCF.
* Creates a new instance of SessionBasedCF.
*
* @param props
* the Mudrod configuration
Expand All @@ -43,7 +43,7 @@ public class sessionBasedCF extends DiscoveryStepAbstract {
* @param spark
* the spark drive
*/
public sessionBasedCF(Properties props, ESDriver es, SparkDriver spark) {
public SessionBasedCF(Properties props, ESDriver es, SparkDriver spark) {
super(props, es, spark);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@

import java.io.IOException;
import java.io.Serializable;
import java.text.DecimalFormat;
import java.util.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;

Expand All @@ -29,13 +32,10 @@ public class VariableBasedSimilarity extends DiscoveryStepAbstract implements Se

private static final Logger LOG = LoggerFactory.getLogger(VariableBasedSimilarity.class);

private DecimalFormat df = new DecimalFormat("#.000");
// a map from variable to its type
public Map<String, Integer> variableTypes;
public Map<String, Integer> variableWeights;

private static final Integer VAR_SPATIAL = 1;
private static final Integer VAR_TEMPORAL = 2;
private static final Integer VAR_CATEGORICAL = 3;
private static final Integer VAR_ORDINAL = 4;

Expand Down Expand Up @@ -155,14 +155,10 @@ public void VariableBasedSimilarity(ESDriver es) {
}
}

int size = metadatas.size();
for (int i = 0; i < size; i++) {
Map<String, Object> metadataA = metadatas.get(i);
for (Map<String, Object> metadataA : metadatas) {
String shortNameA = (String) metadataA.get("Dataset-ShortName");

for (int j = 0; j < size; j++) {
metadataA = metadatas.get(i);
Map<String, Object> metadataB = metadatas.get(j);
for (Map<String, Object> metadataB : metadatas) {
String shortNameB = (String) metadataB.get("Dataset-ShortName");

try {
Expand Down Expand Up @@ -230,10 +226,10 @@ public void spatialSimilarity(Map<String, Object> metadataA, Map<String, Object>

public void temporalSimilarity(Map<String, Object> metadataA, Map<String, Object> metadataB, XContentBuilder contentBuilder) throws IOException {

double similarity = 0.0;
double similarity;
double startTimeA = Double.parseDouble((String) metadataA.get("Dataset-DatasetCoverage-StartTimeLong"));
String endTimeAStr = (String) metadataA.get("Dataset-DatasetCoverage-StopTimeLong");
double endTimeA = 0.0;
double endTimeA;
if ("".equals(endTimeAStr)) {
endTimeA = System.currentTimeMillis();
} else {
Expand All @@ -243,15 +239,15 @@ public void temporalSimilarity(Map<String, Object> metadataA, Map<String, Object

double startTimeB = Double.parseDouble((String) metadataB.get("Dataset-DatasetCoverage-StartTimeLong"));
String endTimeBStr = (String) metadataB.get("Dataset-DatasetCoverage-StopTimeLong");
double endTimeB = 0.0;
double endTimeB;
if ("".equals(endTimeBStr)) {
endTimeB = System.currentTimeMillis();
} else {
endTimeB = Double.parseDouble(endTimeBStr);
}
double timespanB = endTimeB - startTimeB;

double intersect = 0.0;
double intersect;
if (startTimeB >= endTimeA || endTimeB <= startTimeA) {
intersect = 0.0;
} else if (startTimeB >= startTimeA && endTimeB <= endTimeA) {
Expand Down Expand Up @@ -283,7 +279,6 @@ public void categoricalVariablesSimilarity(Map<String, Object> metadataA, Map<St
if (aList != null && bList != null) {

int lengthA = aList.size();
int lengthB = bList.size();
List<String> newAList = new ArrayList<>(aList);
List<String> newBList = new ArrayList<>(bList);
newAList.retainAll(newBList);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,8 @@ public Map<String, Double> getRelatedData(String type, String input, int num) {
Map<String, Double> sortedMap = new HashMap<>();
try {
List<LinkedTerm> links = getRelatedDataFromES(type, input, num);
int size = links.size();
for (int i = 0; i < size; i++) {
termsMap.put(links.get(i).term, links.get(i).weight);
for (LinkedTerm link : links) {
termsMap.put(link.term, link.weight);
}

sortedMap = sortMapByValue(termsMap); // terms_map will be empty
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,7 @@ public List<Tuple2<String, String>> loadMetadataFromES(ESDriver es, List<String>
String shortName = (String) result.get("Dataset-ShortName");

String filedStr = "";
int size = variables.size();
for (int i = 0; i < size; i++) {
String filed = variables.get(i);
for (String filed : variables) {
Object filedValue = result.get(filed);

if (filedValue != null) {
Expand Down
Loading

0 comments on commit 09babed

Please sign in to comment.