声明
此代码仅用于爬虫技术的学习交流,如有侵权,请联系站长。
引言
自己写的一个爬虫工具,我这个人比较喜欢离线的东西,当然,只有掌握在自己手心中的才会安心。这也催生出了这个小工具。后期可能还会加入其它网站的爬取操作。
i> 收费的也可以下载嗷~
直接上代码吧
# coding=gbk
import requests
import os
import json
import time
from urllib.parse import unquote
class kugou:
def __init__(self, name, page=1):
print("正在初始化")
self.name = name
self.reqId = "66732db1-8977-11eb-92ee-2b146622418c"
self.page = page
self.init()
self.searchURL = "http://bd.kuwo.cn/api/www/search/searchMusicBykeyWord?key=" + str(
name) + "&pn=" + str(self.page) + "&rn=30&httpsStatus=1&reqId=" + self.reqId
self.songName = ""
self.songList = {}
self.songId = 0
self.headers = {
'Accept': 'application/json, text/plain, */*',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Cookie': '_ga=GA1.2.162059578.1616242395; _gid=GA1.2.1967165994.1616242395; _gat=1;',
'DNT': '1',
'Host': 'bd.kuwo.cn',
'Referer': 'http://bd.kuwo.cn/search/list?key=%E9%BA%BB%E9%9B%80',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36',
}
self.headers2 = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Cookie': '_ga=GA1.2.162059578.1616242395; _gid=GA1.2.1967165994.1616242395; kw_token=',
'DNT': '1',
'Host': 'bd.kuwo.cn',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36',
}
print("初始化结束...")
self.search()
self.show()
self.check()
self.getLink()
def init(self):
self.cookiesGetter = requests.get(url="http://bd.kuwo.cn/search/list?key=" + str(self.name))
def search(self):
print("正在检索...")
self.headers["csrf"] = str(self.cookiesGetter.cookies.get("kw_token"))
self.headers["Cookie"] = self.headers["Cookie"] + "kw_token=" + str(self.cookiesGetter.cookies.get("kw_token"))
try:
content = requests.get(url=self.searchURL, headers=self.headers).content
self.searchResult = json.loads(content)["data"]["list"]
self.num = 0
for i in self.searchResult:
self.songList[self.num] = (i["name"], i["artist"], i["songTimeMinutes"], i["rid"])
self.num += 1
print("检索结束...共计查询结果" + str(self.num) + "条")
except requests.exceptions as e:
print("检索时故障发生在连接阶段,可能酷狗音乐API变更或连接不稳定\n" + e)
except Exception as e:
print("JSON 解析故障,服务器返回值不符合模板规范,可能API变更\n" + e)
def show(self):
self.num = 0
print("{:<3}{:<20}{:<20}{:<8}{:<18}".format("编号", "名称", "歌手", "时长", "sid"))
for i in self.songList:
print("{:<3}{:<20}{:<20}{:<8}{:<18}".format(self.num,
str(self.songList[i][0]),
str(self.songList[i][1]),
str(self.songList[i][2]),
str(self.songList[i][3])
), end="\n")
self.num += 1
def getLink(self):
print("当前页:" + str(self.page) + "下载歌曲序号(-上一页 +下一页):")
s = input()
if s == '+':
self.page += 1
self.__init__(self.name, self.page)
elif s == '-':
self.page -= 1
if self.page <= 0:
print("[!] 页码不合法!")
self.__init__(self.name, 1)
self.__init__(self.name, self.page)
s = int(s)
songName = self.songList[s][0]
songArtist = self.songList[s][1]
songTime = self.songList[s][2]
songSid = self.songList[s][3]
self.songId = songSid
self.headers2["csrf"] = str(self.cookiesGetter.cookies.get("kw_token"))
self.headers2["Cookie"] = self.headers["Cookie"] + "kw_token=" + str(self.cookiesGetter.cookies.get("kw_token"))
self.downloadURL = "http://bd.kuwo.cn/url?format=mp3&rid=" + str(self.songId) + \
"&response=url&type=convert_url3&br=969kmp3&from=web&t=" + str(
int(time.time())) + "000&httpsStatus=1" + \
"&reqId=" + self.reqId
try:
content = requests.get(self.downloadURL, headers=self.headers2).content
files = json.loads(content)["url"]
print(
"[#] 歌曲名称:" + songName + "\n[#] 歌手:" + songArtist + "\n[#] 歌曲时长" + songTime + "\n[#] 歌曲ID" + str(
songSid) + "\n[$]即将下载")
self.download(songName, songArtist, files)
except requests.exceptions as e:
print("获取链接时故障发生在连接阶段,可能酷狗音乐API变更或连接不稳定\n" + e)
except Exception as e:
print("JSON 解析故障,服务器返回值不符合模板规范,可能API变更\n" + e)
def download(self, songName, songArtist, files):
try:
with open("download/" + songName + "-" + songArtist + ".mp3", "wb+") as f:
f.write(requests.get(files).content)
except requests.exceptions as e:
print("下载时发生故障,可能酷狗音乐进行校验或连接不稳定\n" + e)
def check(self):
if not os.path.exists("download"):
os.makedirs("download")
class _5Sing:
def __init__(self, name, page=1):
self.name = name
self.page = page
self.searchURL = "http://search.5sing.kugou.com/home/json?keyword=" + \
name + "sort=1&page=" + str(page) + "&filter=0&type=0"
self.searchHeader = {'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Referer': 'http://search.5sing.kugou.com/home/index?keyword=',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
}
def init(self):
self.cookiesGetter = requests.get(url="http://search.5sing.kugou.com/home/index?keyword=" + str(self.name))
def getList(self):
print("正在检索...")
try:
list_raw = requests.get(url=self.searchURL, headers=self.searchHeader).content.decode('unicode_escape')
self.searchResult = json.loads(list_raw)["list"]
except requests.exceptions as e:
print("获取链接时故障发生在连接阶段,可能酷狗音乐API变更或连接不稳定\n" + e)
except Exception as e:
print("JSON 解析故障,服务器返回值不符合模板规范,可能API变更\n" + e)
if __name__ == '__main__':
_5Sing("星星电灯")
try:
print("歌曲名称:", end="")
song = input()
while song == "":
print("[!] 歌曲名为空, 请重新输入歌曲名: ", end="")
song = input()
print("接口:1.酷狗")
interfaces = input()
while interfaces == '' or interfaces not in ("1"):
print("[!] 接口代号不匹配,请重新选择接口: ", end="")
interfaces = input()
if interfaces == "1":
kugou(song)
# if interfaces == "2":
# _5Sing(song)
except Exception as e:
print("存在故障:" + e)