0
点赞
收藏
分享

微信扫一扫

python爬取QQ音乐排行榜数据存储到数据库

1、爬虫的应用领域 搜索引擎

2、谷歌分析网站 Elements、Console、Sources、Network

3、数据怎么爬取 目标网站,发起请求-请求方式和请求数据

4、爬取QQ音乐排行榜数据

5、数据解析-BeautifulSoup

6、持久化数据到数据库


import requests
from bs4 import BeautifulSoup
import mysql.connector


def send_requests():
url = "https://y.qq.com/n/ryqq/toplist/4"
headers = {
"User-Agent": "Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.36(KABUL, like Gecko) "
"Chrome/86.0.4240.198Safari/537.36 "
}
resp = requests.get(url=url, headers=headers)
# print(resp.text)
parser_content(resp)


def parser_content(resp):
html = resp.text
bs = BeautifulSoup(html, 'html.parser')
ul = bs.find('ul', class_='songlist__list')
li_list = ul.find_all('li')
# print(li_list)
lst = []
count = 0
for item in li_list:
count +=1
if count <= 3:
songlist_number = item.find('div', class_='songlist__number songlist__number--top').text
else:
songlist_number = item.find('div', class_='songlist__number').text
songlist_songname = item.find('div', class_='songlist__songname').text
songname = songlist_songname.split('播放')
songname = songname[0]
songname = ''.join(songname)
# print(songname)
# break
# songname = songname = item.find('a', class_='songlist__songname')
# songname = songlist_songname.find('a', class_='songlist__cover').text
songlist_artist = item.find('div', class_='songlist__artist').text
songlist_time = item.find('div', class_='songlist__time').text
# print(songlist_number, songname, songlist_artist, songlist_time)
# break
lst.append([songlist_number, songname, songlist_artist, songlist_time])
# print(lst)
save_mysql(lst)


my_db = mysql.connector.connect(host='localhost', user='root', password='root', database='python_db',auth_plugin='mysql_native_password')
my_cursor = my_db.cursor()
def save_mysql(lst):
# print(my_db)
# sql语句
sql = 'insert into tbl_qqmusic (songlist_number, songname, songlist_artist, songlist_time) values (%s,%s,%s,%s)'
# 执行批量插入
my_cursor.executemany(sql, lst)
# 提交事务
my_db.commit()
print("保存完毕")


if __name__ == '__main__':
send_requests()

python爬取QQ音乐排行榜数据存储到数据库_python

python爬取QQ音乐排行榜数据存储到数据库_mysql_02

注:数据库知识:

启动mysql
net start mysql

进入数据库
mysql -u root -p
空密码

修改密码
alter user user() identified by "root";

新建立数据库
create database python_lianjia;

查询数据库
show databases;

选择数据库
use python_lianjia;

查询数据库下面的所有表
show tables

删除数据库
drop databases python_lianjia;

退出数据库
quit

关闭数据库服务
net stop mysql

本例中创建数据库:


create table tbl_qqmusic(
id int(4) primary key auto_increment,
songlist_number varchar(255),
songname varchar(255),
songlist_artist varchar(255),
songlist_time varchar(255)
);

python中数据库操作:

my_db = mysql.connector.connect(host='localhost', user='root', password='root', database='python_lianjia',
auth_plugin='mysql_native_password')
my_cursor = my_db.cursor()

print(self.my_db)

# sql语句
sql = 'insert into tbl_lianjia (title,positionInfo,houseInfo,followInfo,totalPrice,unitPrice) values (%s,%s,%s,%s,%s,%s)'
# 执行批量插入
my_cursor.executemany(sql, lst)
# 提交事务
my_db.commit()


总结:

1、BeautifulSoup数据解析

html = resp.text
bs = BeautifulSoup(html, 'html.parser')
ul = bs.find('ul', class_='songlist__list')
li_list = ul.find_all('li')
# print(li_list)
lst = []

2、持久化数据到数据库

import mysql.connector
my_db = mysql.connector.connect(host='localhost', user='root', password='root', database='python_db',auth_plugin='mysql_native_password')
my_cursor = my_db.cursor()
def save_mysql(lst):
# print(my_db)
# sql语句
sql = 'insert into tbl_qqmusic (songlist_number, songname, songlist_artist, songlist_time) values (%s,%s,%s,%s)'
# 执行批量插入
my_cursor.executemany(sql, lst)
# 提交事务
my_db.commit()
print("保存完毕")

3、数据库建表(在数据库中操作)

create table tbl_qqmusic(
id int(4) primary key auto_increment,
songlist_number varchar(255),
songname varchar(255),
songlist_artist varchar(255),
songlist_time varchar(255)
);

4、数据库基本操作(在数据库中操作)

进入数据库
mysql -u root -p
空密码

修改密码
alter user user() identified by "root";

新建立数据库
create database python_lianjia;

查询数据库
show databases;

选择数据库
use python_lianjia;

查询数据库下面的所有表
show tables

删除数据库
drop databases python_lianjia;

退出数据库
quit



举报

相关推荐

0 条评论