-
Notifications
You must be signed in to change notification settings - Fork 0
/
whooshSQL.py
82 lines (72 loc) · 3.02 KB
/
whooshSQL.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
from whoosh.fields import Schema, ID, TEXT
from jieba.analyse import ChineseAnalyzer
from whoosh.filedb.filestore import FileStorage
import pymysql.cursors
connection = pymysql.connect(host='localhost',
user='root',
password='siliver88',
db='tender',
charset='utf8',
cursorclass=pymysql.cursors.DictCursor)
class CreateIndex(object):
def Gettabs(self):
dbtable_names = []
with connection.cursor() as cursor:
sql = '''SHOW TABLES'''
cursor.execute(sql)
dbtable_names_for_now = cursor.fetchall()
for i in range(len(dbtable_names_for_now)):
dbtable_names.append(
dbtable_names_for_now[i]['Tables_in_tender'])
return dbtable_names
def BuiltIndex(self):
analyzer = ChineseAnalyzer()
# define schema
schema = Schema(
title=TEXT(sortable=True),
zb_url=TEXT(sortable=True),
ctime=TEXT(sortable=True),
deadline=TEXT(sortable=True),
bsdeadline=TEXT(sortable=True),
dbtb=TEXT(sortable=True),
content=TEXT(sortable=True, analyzer=analyzer),
lettercard=TEXT(sortable=True, analyzer=analyzer)
)
dirname = './whoosh_index'
storage = FileStorage(dirname)
if not os.path.exists(dirname):
os.mkdir(dirname)
# create index file
ix = storage.create_index(schema, indexname='Hello')
else:
ix = storage.open_index(indexname='Hello')
writer = ix.writer()
# fetch rows from DB
num = 0
try:
with connection.cursor() as cursor:
for tbname in self.Gettabs():
sql = '''SELECT `title`, `zb_url`, `ctime`, `deadline`, `bsdeadline`,`dbtb`, `content`, `lettercard` FROM {}'''.format(tbname)
cursor.execute(sql)
rows = cursor.fetchall()
# write the rows into indexes
for row in rows:
writer.add_document(title=str(row["title"]),
zb_url=str(row["zb_url"]),
ctime=str(row["ctime"]),
deadline=str(row['deadline']),
bsdeadline=str(row['bsdeadline']),
dbtb=str(row["dbtb"]),
content=str(row["content"]),
lettercard=str(row["lettercard"])
)
num += 1
writer.commit()
finally:
connection.close()
print("%d docs indexed!" % num)
createindex = CreateIndex()
createindex.BuiltIndex()