写了一个python 爬虫程序 定时爬取抖音的热门话题
import requests
import pprint
import time
import pandas as pd
# 抖音热搜榜
hot_search = 'https://aweme-hl.snssdk.com/aweme/v1/hot/search/list/?detail_list=1'
headers = {"User-Agent":"Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Mobile Safari/537.36"}
hot_json = requests.get(hot_search,headers=headers).json()
hot_list=[]
for data in hot_json['data']['word_list']:
ctime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
item = {}
keyword = data['word']
hot_value = data['hot_value']
item['keyword'] = keyword
item['hotvalue'] = hot_value
item['time'] = ctime
hot_list.append(item)
print("OK!")
data = pd.DataFrame(hot_list)
data.to_csv(f"/home/douyin_crawler/hot_topics_new.csv",encoding='utf-8',mode='a',index=False,header=False)
使用crontab service 编辑任务内容
就可以定时爬取热门话题存到csv文件中了。