使用python爬虫进行bilibili弹幕爬取的记录

爬取方法借鉴了https://blog.csdn.net/johnchang0201/article/details/103004229
爬取完数据之后将数据保存到了excel中

以下为代码：

import requests
from bs4 import BeautifulSoup
import xlwt


###获得某个bilibili视频的弹幕

#输入url，获得其页面的内容
def get_text(url):
    try:
        kv = {
            "user-agent": "Mozilla/5.0"
        }
        r = requests.get(url, headers=kv)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        # print(r.encoding)
        return r.text
    except:
        print("爬取失败")
        return 0


# #输入视频对应的url（从浏览器网址部分直接复制即可），输出对应的记录弹幕的xml页面
# def get_bullet_comments(BV_url):
#     heart_beat_url =

def process_text(text_in):
    soup = BeautifulSoup(text_in, 'lxml')
    results = soup.find_all('d')  # 找出所有'd'
    comments = [comment.text for comment in results]  # 因为出来的时候是bs4格式的，我们需要把他转化成list
    # print(type(comments))
    return comments

#将弹幕输出到excel中
def export_excel(comments):
    workbook = xlwt.Workbook(encoding="utf-8")
    worksheet = workbook.add_sheet('My Worksheet')
    RowNum = len(comments)
    for i in range(0,RowNum):
        worksheet.write(i, 0, comments[i])
    workbook.save("Comments.xls")



def main():
    url = "https://api.bilibili.com/x/v1/dm/list.so?oid=136276727"
    text = get_text(url)
    comments = process_text(text)
    export_excel(comments)

    # for comment_single in comments:
    #     print(comment_single)



if __name__ == '__main__':
    main()