编写程序,统计两会政府工作报告热词频率,并生成词云。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import jieba
from wordcloud import WordCloud
from collections import Counter
import matplotlib.pyplot as plt

# 读取文本文件
with open("2025政府工作报告.txt", "r", encoding="utf-8") as f:
t = f.read()

# 使用jieba进行中文分词
ls = jieba.lcut(t)

# 过滤掉单字词
filtered_words = [word for word in ls if len(word) > 1]

# 统计词频
word_counts = Counter(filtered_words)

# 创建WordCloud对象,设置词云的基本参数
w = WordCloud( \
width = 1000, height = 700,\
background_color = "white",
font_path = "msyh.ttc"
)

# 根据词频生成词云
w.generate_from_frequencies(word_counts)

# 显示词云图
plt.figure(figsize=(10, 7))
plt.imshow(w, interpolation='bilinear')
plt.axis('off') # 不显示坐标轴
plt.show()

# 将生成的词云图保存到文件
w.to_file("grwordcloude_filtered.png")

运行结果:
结果图