0%

使用python爬虫进行bilibili弹幕爬取的记录

爬取方法借鉴了https://blog.csdn.net/johnchang0201/article/details/103004229
爬取完数据之后将数据保存到了excel中

以下为代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import requests
from bs4 import BeautifulSoup
import xlwt


###获得某个bilibili视频的弹幕

#输入url,获得其页面的内容
def get_text(url):
try:
kv = {
"user-agent": "Mozilla/5.0"
}
r = requests.get(url, headers=kv)
r.raise_for_status()
r.encoding = r.apparent_encoding
# print(r.encoding)
return r.text
except:
print("爬取失败")
return 0


# #输入视频对应的url(从浏览器网址部分直接复制即可),输出对应的记录弹幕的xml页面
# def get_bullet_comments(BV_url):
# heart_beat_url =

def process_text(text_in):
soup = BeautifulSoup(text_in, 'lxml')
results = soup.find_all('d') # 找出所有'd'
comments = [comment.text for comment in results] # 因为出来的时候是bs4格式的,我们需要把他转化成list
# print(type(comments))
return comments

#将弹幕输出到excel中
def export_excel(comments):
workbook = xlwt.Workbook(encoding="utf-8")
worksheet = workbook.add_sheet('My Worksheet')
RowNum = len(comments)
for i in range(0,RowNum):
worksheet.write(i, 0, comments[i])
workbook.save("Comments.xls")



def main():
url = "https://api.bilibili.com/x/v1/dm/list.so?oid=136276727"
text = get_text(url)
comments = process_text(text)
export_excel(comments)

# for comment_single in comments:
# print(comment_single)



if __name__ == '__main__':
main()