from pyecharts.charts import Pie, Bar, Line, Page
from pyecharts import options as opts
from pyecharts.globalsimport SymbolType
# 数据对
data_pair = [list(z) for z inzip([i for i in star_num.index], star_num.values.tolist())]
pie1 = Pie(init_opts=opts.InitOpts(width='800px', height='400px'))
pie1.add('', data_pair, radius=['35%', '60%'])
pie1.set_global_opts(title_opts=opts.TitleOpts(title='豆瓣短评评分占比'),
legend_opts=opts.LegendOpts(orient='vertical', pos_top='15%', pos_left='2%')
pie1.set_series_opts(label_opts=opts.LabelOpts(formatter='{b}:{d}%'))
pie1.render_notebook()
import jieba
defget_cut_words(content_series):
# 读入停用词表
stop_words = []
withopen(r"hit_stopwords.txt", 'r', encoding='utf-8') as f:
lines = f.readlines()
for line in lines:
stop_words.append(line.strip())
# 添加关键词
my_words = ['长津湖', '志愿军']
for i in my_words:
jieba.add_word(i)
# 自定义停用词
my_stop_words = ['电影',"长津湖","战争"]
stop_words.extend(my_stop_words)
word_num = jieba.lcut(content_series.str.cat(sep='。'), cut_all=False)
# 条件筛选
word_num_selected = [i for i in word_num if i notin stop_words andlen(i)>=2]
return word_num_selected