爬取方法借鉴了https://blog.csdn.net/johnchang0201/article/details/103004229
爬取完数据之后将数据保存到了excel中
以下为代码:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
   | import requests from bs4 import BeautifulSoup import xlwt
 
 
 
 
  def get_text(url):     try:         kv = {             "user-agent": "Mozilla/5.0"         }         r = requests.get(url, headers=kv)         r.raise_for_status()         r.encoding = r.apparent_encoding                  return r.text     except:         print("爬取失败")         return 0
 
 
 
 
 
  def process_text(text_in):     soup = BeautifulSoup(text_in, 'lxml')     results = soup.find_all('d')       comments = [comment.text for comment in results]            return comments
 
  def export_excel(comments):     workbook = xlwt.Workbook(encoding="utf-8")     worksheet = workbook.add_sheet('My Worksheet')     RowNum = len(comments)     for i in range(0,RowNum):         worksheet.write(i, 0, comments[i])     workbook.save("Comments.xls")
 
 
  def main():     url = "https://api.bilibili.com/x/v1/dm/list.so?oid=136276727"     text = get_text(url)     comments = process_text(text)     export_excel(comments)
           
 
 
  if __name__ == '__main__':     main()
   |