diff --git a/Chatbot/QuestionAnswering/__init__.py b/Chatbot/QuestionAnswering/__init__.py index e69de29..4955e7e 100644 --- a/Chatbot/QuestionAnswering/__init__.py +++ b/Chatbot/QuestionAnswering/__init__.py @@ -0,0 +1,3 @@ +import os +import sys +sys.path.append(os.path.dirname(__file__)) diff --git a/Chatbot/QuestionAnswering/chat.py b/Chatbot/QuestionAnswering/chat.py index 33558a5..3dbd073 100644 --- a/Chatbot/QuestionAnswering/chat.py +++ b/Chatbot/QuestionAnswering/chat.py @@ -10,7 +10,7 @@ def main(): chatter = GossipBot() - #chatter.randomTalks() + #chatter.randomTalks(num=1000) chatter.chatTime() @@ -57,7 +57,11 @@ def getResponse(self,query,threshold=50): return reply def randomPick(self, answers): - return answers[random.randrange(0,len(answers))][0] + try: + answer = answers[random.randrange(0,len(answers))][0] + except: + answer = "沒有資料" + return answer def randomTalks(self, num=100): with open("data/Titles.txt",'r',encoding='utf-8') as data: @@ -65,7 +69,7 @@ def randomTalks(self, num=100): for i in range(0,num): query = titles[random.randrange(0,len(titles))] print("User: " + query) - print("MianBot: " +self.getResponse(query)) + print("MianBot: " +self.getResponse(query) + "\n") if __name__=="__main__": main() diff --git a/Chatbot/QuestionAnswering/match.py b/Chatbot/QuestionAnswering/match.py index ee9b1db..65821bc 100644 --- a/Chatbot/QuestionAnswering/match.py +++ b/Chatbot/QuestionAnswering/match.py @@ -2,13 +2,10 @@ import os import random -from responsesEvaluate import Evaluator -from Matcher.fuzzyMatcher import FuzzyMatcher -from Matcher.wordWeightMatcher import WordWeightMatcher -from Matcher.matcher import Matcher - -def main(): - matcherTesting("Fuzzy",removeStopWords=False) +from .responsesEvaluate import Evaluator +from .Matcher.fuzzyMatcher import FuzzyMatcher +from .Matcher.wordWeightMatcher import WordWeightMatcher +from .Matcher.matcher import Matcher def getMatcher(matcherType,removeStopWords=False): @@ -38,13 +35,15 @@ def getMatcher(matcherType,removeStopWords=False): def matcherTesting(matcherType,removeStopWords=False): matcher = getMatcher(matcherType,removeStopWords) + cur_path = os.path.dirname(__file__) + while True: query = input("隨便說些什麼吧: ") title,index = matcher.match(query) sim = matcher.getSimilarity() print("最為相似的標題是 %s ,相似度為 %d " % (title,sim)) - res = json.load(open(os.path.join("data/processed/reply/",str(int(index/1000))+'.json'),'r',encoding='utf-8')) + res = json.load(open(os.path.join(cur_path+"data/processed/reply/",str(int(index/1000))+'.json'),'r',encoding='utf-8')) targetId = index % 1000 #randomId = random.randrange(0,len(res[targetId])) @@ -63,8 +62,11 @@ def woreWeightMatch(): def fuzzyMatch(cleansw=False): + cur_dir = os.getcwd() + os.chdir(os.path.dirname(__file__)) fuzzyMatcher = FuzzyMatcher(segLib="Taiba",removeStopWords=cleansw) fuzzyMatcher.loadTitles(path="data/Titles.txt") + os.chdir(cur_dir) if cleansw: fuzzyMatcher.TitlesSegmentation(cleansw) @@ -79,7 +81,3 @@ def fuzzyMatch(cleansw=False): #fuzzyMatcher.loadStopWords(path="data/stopwords/chinese_sw.txt") #fuzzyMatcher.loadStopWords(path="data/stopwords/ptt_words.txt") #fuzzyMatcher.loadStopWords(path="data/stopwords/specialMarks.txt") - - -if __name__ == '__main__': - main() diff --git a/Chatbot/QuestionAnswering/qaBase.py b/Chatbot/QuestionAnswering/qaBase.py index 286de9d..b56cf5e 100644 --- a/Chatbot/QuestionAnswering/qaBase.py +++ b/Chatbot/QuestionAnswering/qaBase.py @@ -1,36 +1,61 @@ import json +import logging +import os from .match import * from .responsesEvaluate import Evaluator -class QABase(object): +class Answerer(object): - def __init__(self, data_path): + def __init__(self): - """ - Args: - data_path: 指出 data 資料夾位在哪個位置 - """ self.general_questions = [] - self.data_path = data_path + self.path = os.path.dirname(__file__) self.matcher = getMatcher(matcherType="Fuzzy") self.evaluator = Evaluator() + self.testSegment() + + def testSegment(self): + logging.info("測試斷詞模塊中") + try: + self.matcher.wordSegmentation("測試一下斷詞") + logging.info("測試成功") + except Exception as e: + logging.info(repr(e)) + logging.info("模塊載入失敗,請確認data與字典齊全") def getResponse(self, sentence, api_key=None): if api_key is not None: response = self.getCustomQA(sentence,api_key) else: - response = self._getGeneralQA(sentence) + response = self.getGeneralQA(sentence) return response - def getGeneralQA(self, sentence): + def getGeneralQA(self,query,threshold=50): - pass + title,index = self.matcher.match(query) + sim = self.matcher.getSimilarity() + if sim < threshold: + return None + else: + res = json.load(open(os.path.join(self.path+"/data/processed/reply/",str(int(index/1000))+'.json'), + 'r',encoding='utf-8')) + targetId = index % 1000 + candiates = self.evaluator.getBestResponse(res[targetId],topk=3) + reply = self.randomPick(candiates) + return reply + + def randomPick(self, answers): + try: + answer = answers[random.randrange(0,len(answers))][0] + except: + answer = None + return answer def getCustomQA(self, sentence, api_key): #TODO GET USER'S QA BY api_key #customqa_list = json.loads(getUserQA(api_key)) - pass + return None diff --git a/Chatbot/QuestionAnswering/responsesEvaluate.py b/Chatbot/QuestionAnswering/responsesEvaluate.py index 813d3f9..0d17d51 100644 --- a/Chatbot/QuestionAnswering/responsesEvaluate.py +++ b/Chatbot/QuestionAnswering/responsesEvaluate.py @@ -7,7 +7,7 @@ from gensim import corpora # 引入斷詞與停用詞的配置 -from Matcher.matcher import Matcher +from .Matcher.matcher import Matcher class Evaluator(Matcher): """ @@ -21,7 +21,8 @@ def __init__(self,segLib="Taiba"): self.segResponses = [] self.totalWords = 0 - self.debugLog = open("data/EvaluateLog.txt",'w',encoding="utf-8") + self.path = os.path.dirname(__file__) + self.debugLog = open(self.path + "/data/EvaluateLog.txt",'w',encoding="utf-8") self.filteredWords = set() # 必須濾除的回應 @@ -29,9 +30,9 @@ def __init__(self,segLib="Taiba"): self.tokenDictionary = None # 用於分配詞 id,與建置詞袋 # 中文停用詞與特殊符號加載 - self.loadStopWords(path="data/stopwords/chinese_sw.txt") - self.loadStopWords(path="data/stopwords/specialMarks.txt") - self.loadFilterdWord(path="data/stopwords/ptt_words.txt") + self.loadStopWords(path=self.path + "/data/stopwords/chinese_sw.txt") + self.loadStopWords(path=self.path + "/data/stopwords/specialMarks.txt") + self.loadFilterdWord(path=self.path + "/data/stopwords/ptt_words.txt") def cleanFormerResult(self): """ diff --git a/Chatbot/chatbot.py b/Chatbot/chatbot.py index 3d78fb0..9b7587a 100644 --- a/Chatbot/chatbot.py +++ b/Chatbot/chatbot.py @@ -5,6 +5,7 @@ import console import task_modules.module_switch as module_switch import RuleMatcher.customRuleBase as crb +import QuestionAnswering.qaBase as qa class Chatbot(object): @@ -26,11 +27,14 @@ def __init__(self, name="NCKU"): self.exception_log = open('log/exception.log','w',encoding='utf-8') os.chdir(cur_dir) + # For rule matching self.console = console.Console(model_path="model/ch-corpus-3sg.bin") - self.custom_rulebase = crb.CustomRuleBase() # for one time matching. self.custom_rulebase.model = self.console.rb.model # pass word2vec model + # For QA + self.answerer = qa.Answerer() + self.default_response = [ "是嗎?", "我不太明白你的意思", @@ -107,7 +111,7 @@ def listen(self, sentence, target=None, api_key=None): # We can only send back a default response. return self.getDefaultResponse(),None,None,None - #TODO + #TODO # Use generative model to solve this case def getResponseOnRootDomains(self, target=None): @@ -177,8 +181,7 @@ def getResponseForGeneralQA(self, sentence): Listen user's input and return a response which is based on our knowledge base. """ - #TODO 接上 QA bot - pass + return self.answerer.getResponse(sentence) def getResponseForCustomQA(self,sentence,api_key): @@ -188,9 +191,7 @@ def getResponseForCustomQA(self,sentence,api_key): """ if api_key is None: return None - - #TODO 接上 QA bot - return None + return self.answerer.getResponse(sentence,api_key) def getLoggerData(self): return [self.root_domain,