Skip to content

Commit

Permalink
Finish general qa on some domain.
Browse files Browse the repository at this point in the history
  • Loading branch information
zake7749 committed Nov 3, 2016
1 parent b01872a commit 9e76fb4
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 38 deletions.
3 changes: 3 additions & 0 deletions Chatbot/QuestionAnswering/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import os
import sys
sys.path.append(os.path.dirname(__file__))
10 changes: 7 additions & 3 deletions Chatbot/QuestionAnswering/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
def main():

chatter = GossipBot()
#chatter.randomTalks()
#chatter.randomTalks(num=1000)
chatter.chatTime()


Expand Down Expand Up @@ -57,15 +57,19 @@ def getResponse(self,query,threshold=50):
return reply

def randomPick(self, answers):
return answers[random.randrange(0,len(answers))][0]
try:
answer = answers[random.randrange(0,len(answers))][0]
except:
answer = "沒有資料"
return answer

def randomTalks(self, num=100):
with open("data/Titles.txt",'r',encoding='utf-8') as data:
titles = [line.strip('\n') for line in data]
for i in range(0,num):
query = titles[random.randrange(0,len(titles))]
print("User: " + query)
print("MianBot: " +self.getResponse(query))
print("MianBot: " +self.getResponse(query) + "\n")

if __name__=="__main__":
main()
22 changes: 10 additions & 12 deletions Chatbot/QuestionAnswering/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,10 @@
import os
import random

from responsesEvaluate import Evaluator
from Matcher.fuzzyMatcher import FuzzyMatcher
from Matcher.wordWeightMatcher import WordWeightMatcher
from Matcher.matcher import Matcher

def main():
matcherTesting("Fuzzy",removeStopWords=False)
from .responsesEvaluate import Evaluator
from .Matcher.fuzzyMatcher import FuzzyMatcher
from .Matcher.wordWeightMatcher import WordWeightMatcher
from .Matcher.matcher import Matcher

def getMatcher(matcherType,removeStopWords=False):

Expand Down Expand Up @@ -38,13 +35,15 @@ def getMatcher(matcherType,removeStopWords=False):
def matcherTesting(matcherType,removeStopWords=False):

matcher = getMatcher(matcherType,removeStopWords)
cur_path = os.path.dirname(__file__)

while True:
query = input("隨便說些什麼吧: ")
title,index = matcher.match(query)
sim = matcher.getSimilarity()
print("最為相似的標題是 %s ,相似度為 %d " % (title,sim))

res = json.load(open(os.path.join("data/processed/reply/",str(int(index/1000))+'.json'),'r',encoding='utf-8'))
res = json.load(open(os.path.join(cur_path+"data/processed/reply/",str(int(index/1000))+'.json'),'r',encoding='utf-8'))
targetId = index % 1000
#randomId = random.randrange(0,len(res[targetId]))

Expand All @@ -63,8 +62,11 @@ def woreWeightMatch():

def fuzzyMatch(cleansw=False):

cur_dir = os.getcwd()
os.chdir(os.path.dirname(__file__))
fuzzyMatcher = FuzzyMatcher(segLib="Taiba",removeStopWords=cleansw)
fuzzyMatcher.loadTitles(path="data/Titles.txt")
os.chdir(cur_dir)

if cleansw:
fuzzyMatcher.TitlesSegmentation(cleansw)
Expand All @@ -79,7 +81,3 @@ def fuzzyMatch(cleansw=False):
#fuzzyMatcher.loadStopWords(path="data/stopwords/chinese_sw.txt")
#fuzzyMatcher.loadStopWords(path="data/stopwords/ptt_words.txt")
#fuzzyMatcher.loadStopWords(path="data/stopwords/specialMarks.txt")


if __name__ == '__main__':
main()
47 changes: 36 additions & 11 deletions Chatbot/QuestionAnswering/qaBase.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,61 @@
import json
import logging
import os

from .match import *
from .responsesEvaluate import Evaluator

class QABase(object):
class Answerer(object):

def __init__(self, data_path):
def __init__(self):

"""
Args:
data_path: 指出 data 資料夾位在哪個位置
"""
self.general_questions = []
self.data_path = data_path
self.path = os.path.dirname(__file__)

self.matcher = getMatcher(matcherType="Fuzzy")
self.evaluator = Evaluator()
self.testSegment()

def testSegment(self):
logging.info("測試斷詞模塊中")
try:
self.matcher.wordSegmentation("測試一下斷詞")
logging.info("測試成功")
except Exception as e:
logging.info(repr(e))
logging.info("模塊載入失敗,請確認data與字典齊全")

def getResponse(self, sentence, api_key=None):

if api_key is not None:
response = self.getCustomQA(sentence,api_key)
else:
response = self._getGeneralQA(sentence)
response = self.getGeneralQA(sentence)
return response

def getGeneralQA(self, sentence):
def getGeneralQA(self,query,threshold=50):

pass
title,index = self.matcher.match(query)
sim = self.matcher.getSimilarity()
if sim < threshold:
return None
else:
res = json.load(open(os.path.join(self.path+"/data/processed/reply/",str(int(index/1000))+'.json'),
'r',encoding='utf-8'))
targetId = index % 1000
candiates = self.evaluator.getBestResponse(res[targetId],topk=3)
reply = self.randomPick(candiates)
return reply

def randomPick(self, answers):
try:
answer = answers[random.randrange(0,len(answers))][0]
except:
answer = None
return answer

def getCustomQA(self, sentence, api_key):

#TODO GET USER'S QA BY api_key
#customqa_list = json.loads(getUserQA(api_key))
pass
return None
11 changes: 6 additions & 5 deletions Chatbot/QuestionAnswering/responsesEvaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from gensim import corpora

# 引入斷詞與停用詞的配置
from Matcher.matcher import Matcher
from .Matcher.matcher import Matcher

class Evaluator(Matcher):
"""
Expand All @@ -21,17 +21,18 @@ def __init__(self,segLib="Taiba"):
self.segResponses = []
self.totalWords = 0

self.debugLog = open("data/EvaluateLog.txt",'w',encoding="utf-8")
self.path = os.path.dirname(__file__)
self.debugLog = open(self.path + "/data/EvaluateLog.txt",'w',encoding="utf-8")

self.filteredWords = set() # 必須濾除的回應

self.counterDictionary = defaultdict(int) # 用於統計詞頻
self.tokenDictionary = None # 用於分配詞 id,與建置詞袋

# 中文停用詞與特殊符號加載
self.loadStopWords(path="data/stopwords/chinese_sw.txt")
self.loadStopWords(path="data/stopwords/specialMarks.txt")
self.loadFilterdWord(path="data/stopwords/ptt_words.txt")
self.loadStopWords(path=self.path + "/data/stopwords/chinese_sw.txt")
self.loadStopWords(path=self.path + "/data/stopwords/specialMarks.txt")
self.loadFilterdWord(path=self.path + "/data/stopwords/ptt_words.txt")

def cleanFormerResult(self):
"""
Expand Down
15 changes: 8 additions & 7 deletions Chatbot/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import console
import task_modules.module_switch as module_switch
import RuleMatcher.customRuleBase as crb
import QuestionAnswering.qaBase as qa

class Chatbot(object):

Expand All @@ -26,11 +27,14 @@ def __init__(self, name="NCKU"):
self.exception_log = open('log/exception.log','w',encoding='utf-8')
os.chdir(cur_dir)

# For rule matching
self.console = console.Console(model_path="model/ch-corpus-3sg.bin")

self.custom_rulebase = crb.CustomRuleBase() # for one time matching.
self.custom_rulebase.model = self.console.rb.model # pass word2vec model

# For QA
self.answerer = qa.Answerer()

self.default_response = [
"是嗎?",
"我不太明白你的意思",
Expand Down Expand Up @@ -107,7 +111,7 @@ def listen(self, sentence, target=None, api_key=None):
# We can only send back a default response.
return self.getDefaultResponse(),None,None,None

#TODO
#TODO
# Use generative model to solve this case

def getResponseOnRootDomains(self, target=None):
Expand Down Expand Up @@ -177,8 +181,7 @@ def getResponseForGeneralQA(self, sentence):
Listen user's input and return a response which is based on our
knowledge base.
"""
#TODO 接上 QA bot
pass
return self.answerer.getResponse(sentence)

def getResponseForCustomQA(self,sentence,api_key):

Expand All @@ -188,9 +191,7 @@ def getResponseForCustomQA(self,sentence,api_key):
"""
if api_key is None:
return None

#TODO 接上 QA bot
return None
return self.answerer.getResponse(sentence,api_key)

def getLoggerData(self):
return [self.root_domain,
Expand Down

0 comments on commit 9e76fb4

Please sign in to comment.