import requests
import wordcloud
import jieba
from bs4 import BeautifulSoup
from matplotlib import pyplot as plt
from pylab import mpl
#設(shè)置字體
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
url = 'https://s.weibo.com/top/summary?Refer=top_hottopnav=1wvr=6'
try:
#獲取數(shù)據(jù)
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
soup = BeautifulSoup(r.text,'html.parser')
data = soup.find_all('a')
d_list = []
for item in data:
d_list.append(item.text)
words = d_list[4:-11:]
#中文分詞
result = list(jieba.cut(words[0]))
for word in words[1::]:
result.extend(jieba.cut(word))
redata = []
for it in result:
if len(it) = 1:
continue
else:
redata.append(it)
result_str = ' '.join(redata)
#輸出詞云圖
font = r'C:\Windows\Fonts\simhei.ttf'
w = wordcloud.WordCloud(font_path=font,width=600,height=400)
w.generate(result_str)
w.to_file('微博熱搜關(guān)鍵詞詞云.png')
key = list(set(redata))
x,y = [],[]
#篩選數(shù)據(jù)
for st in key:
count = redata.count(st)
if count = 1:
continue
else:
x.append(st)
y.append(count)
x.sort()
y.sort()
#繪制結(jié)果圖
plt.plot(x,y)
plt.show()
except Exception as e:
print(e)
到此這篇關(guān)于Python爬蟲分析微博熱搜關(guān)鍵詞的文章就介紹到這了,更多相關(guān)Python爬蟲微博熱搜內(nèi)容請搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家!