forked from Breaddsmall/bookstore
-
Notifications
You must be signed in to change notification settings - Fork 0
/
initialize_book_split.py
77 lines (64 loc) · 2.04 KB
/
initialize_book_split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, String, Integer, ForeignKey, create_engine, PrimaryKeyConstraint, Text, DateTime, \
Boolean, LargeBinary
from sqlalchemy.orm import sessionmaker
import jieba
import psycopg2
from datetime import datetime, time
url = 'postgresql://{}:{}@{}:{}/{}'
user='postgres'
password='zxcvbnm'
host='localhost'
port='5432'
db='bookstore'
url = url.format(user, password, host, port, db)
engine = create_engine(url)
# engine = create_engine(Conf.get_sql_conf('local'))
Base = declarative_base()
def init():
DBSession = sessionmaker(bind=engine)
session = DBSession()
session.execute(
"CREATE TABLE IF NOT EXISTS book_split ("
"id int4 PRIMARY KEY, book_intro text); "
)
# 提交即保存到数据库
session.commit()
# 关闭session
session.close()
def split():
DBSession = sessionmaker(bind=engine)
session = DBSession()
Base.metadata.create_all(engine)
row = session.execute("SELECT id, book_intro FROM book;").fetchall()
for i in row:
tmp = i.book_intro
ans = ""
if tmp != None:
seg_list = jieba.cut_for_search(tmp)
ans = " ".join(seg_list)
session.execute(
"INSERT into book_split(id, book_intro) VALUES (%d, '%s')"
% (int(i.id), ans))
session.commit()
# 关闭session
session.close()
def add_fts():
DBSession = sessionmaker(bind=engine)
session = DBSession()
Base.metadata.create_all(engine)
session.execute("DROP INDEX IF EXISTS fts_gin_index;")
session.execute("ALTER TABLE book_split ADD COLUMN fts tsvector;")
session.execute("UPDATE book_split SET fts = setweight(to_tsvector('english', book_intro), 'A') ;")
session.execute("CREATE INDEX fts_gin_index ON book_split USING gin (fts);")
session.commit()
# 关闭session
session.close()
if __name__ == "__main__":
# 创建数据库
init()
# 分词
split()
#建索引
add_fts()