Skip to content

Commit

Permalink
feat(editor): ChatExcel
Browse files Browse the repository at this point in the history
🔥ChatExcel Mode Operation Manual
  • Loading branch information
yhjun1026 committed Aug 29, 2023
1 parent d8ca59d commit 237992e
Show file tree
Hide file tree
Showing 9 changed files with 158 additions and 15 deletions.
Binary file added assets/chat_excel/chat_excel_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/chat_excel/chat_excel_2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/chat_excel/chat_excel_3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/chat_excel/chat_excel_4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/chat_excel/chat_excel_5.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/chat_excel/chat_excel_6.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added assets/chat_excel/chat_excel_7.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
26 changes: 26 additions & 0 deletions docs/getting_started/application/chatexcel/chatexcel.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
ChatExcel
==================================
ChatExcel uses natural language to analyze and query Excel data.![db plugins demonstration](../../../../assets/chat_excel/chat_excel_1.png)

### 1.Select And Upload Excel or CSV File
Select your excel or csv file to upload and start the conversation.
```{tip}
ChatExcel
The ChatExcel function supports Excel and CSV format files, select the corresponding file to use.
```
![db plugins demonstration](../../../../assets/chat_excel/chat_excel_2.png)
![db plugins demonstration](../../../../assets/chat_excel/chat_excel_3.png)

### 2.Wait for Data Processing
After the data is uploaded, it will first learn and process the data structure and field meaning.
![db plugins demonstration](../../../../assets/chat_excel/chat_excel_4.png)

### 3.Use Data Analysis Calculation
Now you can use natural language to analyze and query data in the dialog box.
![db plugins demonstration](../../../../assets/chat_excel/chat_excel_5.png)
![db plugins demonstration](../../../../assets/chat_excel/chat_excel_6.png)
![db plugins demonstration](../../../../assets/chat_excel/chat_excel_7.png)



147 changes: 132 additions & 15 deletions pilot/scene/chat_data/chat_excel/excel_learning/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,155 @@
import pandas as pd
import matplotlib
import seaborn as sns
import uuid
from pandas import DataFrame

import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
from matplotlib import font_manager
from matplotlib.font_manager import FontManager
matplotlib.use("Agg")
import time
from fsspec import filesystem
import spatial
import spatial

from pilot.scene.chat_data.chat_excel.excel_reader import ExcelReader


def data_pre_classification(df: DataFrame):
## Data pre-classification
columns = df.columns.tolist()


number_columns = []
non_numeric_colums = []

# 收集数据分类小于10个的列
non_numeric_colums_value_map = {}
numeric_colums_value_map = {}
for column_name in columns:

if pd.to_numeric(df[column_name], errors='coerce').notna().all():
number_columns.append(column_name)
unique_values = df[column_name].unique()
numeric_colums_value_map.update({column_name: len(unique_values)})
else:
non_numeric_colums.append(column_name)
unique_values = df[column_name].unique()
non_numeric_colums_value_map.update({column_name: len(unique_values)})


if len(non_numeric_colums) <=0:
sorted_colums_value_map = dict(sorted(numeric_colums_value_map.items(), key=lambda x: x[1]))
numeric_colums_sort_list = list(sorted_colums_value_map.keys())
x_column = number_columns[0]
hue_column = numeric_colums_sort_list[0]
y_column = numeric_colums_sort_list[1]
elif len(number_columns) <=0:
raise ValueError("Have No numeric Column!")
else:
# 数字和非数字都存在多列,放弃部分数字列
y_column = number_columns[0]
x_column = non_numeric_colums[0]
# if len(non_numeric_colums) > 1:
#
# else:

# non_numeric_colums_sort_list.remove(non_numeric_colums[0])
# hue_column = non_numeric_colums_sort_list
return x_column, y_column, hue_column

if __name__ == "__main__":
# connect = duckdb.connect("/Users/tuyang.yhj/Downloads/example.xlsx")
#

# fonts = fm.findSystemFonts()
# for font in fonts:
# if 'Hei' in font:
# print(font)

# fm = FontManager()
# mat_fonts = set(f.name for f in fm.ttflist)
# for i in mat_fonts:
# print(i)
# print(len(mat_fonts))
# 获取系统中的默认中文字体名称
# default_font = fm.fontManager.defaultFontProperties.get_family()


#
excel_reader = ExcelReader("/Users/tuyang.yhj/Downloads/example.xlsx")
#
# # colunms, datas = excel_reader.run( "SELECT CONCAT(Year, '-', Quarter) AS QuarterYear, SUM(Sales) AS TotalSales FROM example GROUP BY QuarterYear ORDER BY QuarterYear")
# # colunms, datas = excel_reader.run( """ SELECT Year, SUM(Sales) AS Total_Sales FROM example GROUP BY Year ORDER BY Year; """)
df = excel_reader.get_df_by_sql_ex(""" SELECT Segment, Country, SUM(Sales) AS Total_Sales, SUM(Profit) AS Total_Profit FROM example GROUP BY Segment, Country """)

x,y,hue =data_pre_classification(df)
print(x, y, hue)

# colunms, datas = excel_reader.run( "SELECT CONCAT(Year, '-', Quarter) AS QuarterYear, SUM(Sales) AS TotalSales FROM example GROUP BY QuarterYear ORDER BY QuarterYear")
colunms, datas = excel_reader.run( """ SELECT Year, SUM(Sales) AS Total_Sales FROM example GROUP BY Year ORDER BY Year; """)
df = excel_reader.get_df_by_sql_ex("SELECT Country, SUM(Profit) AS Total_Profit FROM example GROUP BY Country;")
columns = df.columns.tolist()
plt.rcParams["font.family"] = ["sans-serif"]
rc = {"font.sans-serif": "SimHei", "axes.unicode_minus": False}
sns.set_style(rc={'font.sans-serif': "Microsoft Yahei"})
sns.set(context="notebook", style="ticks", color_codes=True, rc=rc)
font_names = ['Heiti TC', 'Songti SC', 'STHeiti Light', 'Microsoft YaHei', 'SimSun', 'SimHei', 'KaiTi']
fm = FontManager()
mat_fonts = set(f.name for f in fm.ttflist)
can_use_fonts = []
for font_name in font_names:
if font_name in mat_fonts:
can_use_fonts.append(font_name)
if len(can_use_fonts) > 0:
plt.rcParams['font.sans-serif'] = can_use_fonts

rc = {'font.sans-serif': can_use_fonts}
plt.rcParams['axes.unicode_minus'] = False # 解决无法显示符号的问题
sns.set(font='Heiti TC', font_scale=0.8) # 解决Seaborn中文显示问题
sns.set_palette("Set3") # 设置颜色主题
sns.set_style("dark")
sns.color_palette("hls", 10)
sns.hls_palette(8, l=.5, s=.7)
sns.set(context='notebook', style='ticks', rc=rc)
# sns.set_palette("Set3") # 设置颜色主题
# sns.set_style("dark")
# sns.color_palette("hls", 10)
# sns.hls_palette(8, l=.5, s=.7)
# sns.set(context='notebook', style='ticks', rc=rc)

# fig, ax = plt.pie(df[columns[1]], labels=df[columns[0]], autopct='%1.1f%%', startangle=90)
fig, ax = plt.subplots(figsize=(8, 5), dpi=100)
plt.subplots_adjust(top=0.9)
ax = df.plot(kind='pie', y=columns[1], ax=ax, labels=df[columns[0]].values, startangle=90, autopct='%1.1f%%')
# 手动设置 labels 的位置和大小
ax.legend(loc='center left', bbox_to_anchor=(-1, 0.5, 0,0), labels=None, fontsize=10)
plt.axis('equal') # 使饼图为正圆形
plt.show()
# plt.ticklabel_format(style='plain')
# ax = df.plot(kind='bar', ax=ax)
# sns.barplot(df, x=x, y=y, hue= "Country", ax=ax)
sns.catplot(data=df, x=x, y=y, hue='Country', kind='bar')
# 设置 y 轴刻度格式为普通数字格式
ax.yaxis.set_major_formatter(mtick.FuncFormatter(lambda x, _: '{:,.0f}'.format(x)))

# fonts = font_manager.findSystemFonts()
# font_path = ""
# for font in fonts:
# if "Heiti" in font:
# font_path = font
# my_font = font_manager.FontProperties(fname=font_path)
# plt.title("测试", fontproperties=my_font)
# plt.ylabel(columns[1], fontproperties=my_font)
# plt.xlabel(columns[0], fontproperties=my_font)



chart_name = "bar_" + str(uuid.uuid1()) + ".png"
chart_path = chart_name
plt.savefig(chart_path, bbox_inches='tight', dpi=100)



# sns.set(context="notebook", style="ticks", color_codes=True)
# sns.set_palette("Set3") # 设置颜色主题
#
# # fig, ax = plt.pie(df[columns[1]], labels=df[columns[0]], autopct='%1.1f%%', startangle=90)
# fig, ax = plt.subplots(figsize=(8, 5), dpi=100)
# plt.subplots_adjust(top=0.9)
# ax = df.plot(kind='pie', y=columns[1], ax=ax, labels=df[columns[0]].values, startangle=90, autopct='%1.1f%%')
# # 手动设置 labels 的位置和大小
# ax.legend(loc='center left', bbox_to_anchor=(-1, 0.5, 0,0), labels=None, fontsize=10)
# plt.axis('equal') # 使饼图为正圆形
# plt.show()

#
#
# def csv_colunm_foramt(val):
Expand Down

0 comments on commit 237992e

Please sign in to comment.