接口爬虫编写

import requests

headers = {
	'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
}

def get_track_list(albumId,pageNum):

    #声明变量
    trackList = []
    
    # 音频列地址
    url = 'https://www.ximalaya.com/revision/album/getTracksList?albumId='+str(albumId
    )+'&pageNum='+str(pageNum)
    
    #请求地址
    resp = requests.get(url,headers = headers)
    
    #获取内容json
    result = resp.json()
    
    
    #取值判断是否200
    if result['ret'] == 200:
    
        #json取值
        tracks = result['data']['tracks']
        
        #循环数组
        for track in tracks:
        
            #拼接字符
            trackList.append({'trackId':track['trackId'],'title': track['title'] })
            
            
    return trackList
#获得每个连接的json
def get_track_url(trackId):
    url = 'https://www.ximalaya.com/revision/play/tracks?trackIds='+str(trackId)
    
    resp = requests.get(url,headers = headers)
    result = resp.json()
    
    if result['ret'] == 200:
        tracksForAudioPlay = result['data']['tracksForAudioPlay']
        
        if len(tracksForAudioPlay) > 0:
            #对每个连接获取src音频路径
            return tracksForAudioPlay[0]['src']

#下载
def download_track(url,file):
    
    resp = requests.get(url,headers = headers,stream = True)

    # 将他拷贝到本地文件 w 写  b 二进制  wb代表写入二进制文本
    with open(file,'wb') as f:
        for data in resp.iter_content(chunk_size = 1024):
            if data: 
                f.write(data)
            
#主程序入口   
#本次爬虫是通过接口爬取,参考文章:cnblogs.com/wuliqv/p/9386143.html      
if __name__ == '__main__':

    albumId = 7620048
    
    pageNum = 1
    
    dir = 'C:/Users/21at/Desktop/python/temp/'
    
    #获取url连接json
    trackList = get_track_list(albumId,pageNum)
    
    print('trackList'+str(trackList))
    
    for track in trackList:
        
        #通过获取每个id拼接获取路径后截取src
        trackUrl = get_track_url(track['trackId'])
        
        if trackUrl:
            
            #截取
            ext = trackUrl[trackUrl.rindex('.'):]
            
            file_path = dir + track['title'] + ext
            
            print('正在下载'+trackUrl)
            
            #下载
            download_track(trackUrl,file_path)
来做第一个评论吧!~

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
😂
😀
😅
😊
🙂
🙃
😌
😍
😘
😜
😝
😏
😒
🙄
😳
😡
😔
😫
😱
😭
💩
👻
🙌
🖕
👍
👫
👬
👭
🌚
🌝
🙈
💊
😶
🙏
🍦
🍉
😣
Source: github.com/k4yt3x/flowerhd
颜文字
Emoji
小恐龙
花!
上一篇
下一篇