python分詞和生成詞雲圖

想念@思戀發表於2020-12-08

main.py

#coding: utf-8
'''
程式執行前,需要用詞雲圖資料夾下的wordcloud.py替換掉以下路徑中的wordcloud.py
Anaconda3\Lib\site-packages\wordcloud
以上路徑為pip install wordcloud下載包所在路徑,如果涉及虛擬環境,則進入以下路徑
Anaconda3\envs\***\Lib\site-packages\wordcloud
替換原因:原來的wordcloud.py無法返回生成的詞語和頻率
'''
import jieba.analyse
import jieba
import wordcloud
# import pandas as pd

def cut(text):
    # 選擇分詞模式
    word_list = jieba.cut(text, cut_all=True)
    # 分詞後在單獨個體之間加上空格
    result = " ".join(word_list)
    # 返回分詞結果
    return result

if __name__ == '__main__':

    # 載入自定義詞典
    jieba.load_userdict('dict.txt')

    src = 'text.txt'
    f = open(src, 'r', encoding='utf-8')
    text = f.read()
    text = cut(text)
    # print(text)
    w = wordcloud.WordCloud(font_path='msyh.ttc', width=1000, height=700, background_color='white')
    w.generate(text)
    words_dict = w.return_words  # 字典
    # for k, v in words.items():
    #     print(k)

    w.to_file('grwordcloud.png')

wordcloud.py

class WordCloud(object):
	def __init__(self,...)
		...
		self.return_words = {}
		...
	def generate_from_frequencies(self, frequencies, max_font_size=None):

		...
		for word, freq in frequencies:
			...
			self.return_words[word] = freq
			...
		...
		return self

相關文章