0%

获取网易云音乐歌单

  1. 0. 内容概述
  2. 1. 网络请求分析
    1. 1.1 找到请求地址
    2. 1.2 POST请求参数分析
      1. 1.2.1 请求内容简略
      2. 1.2.2 加密函数和加密算法分析
      3. 1.2.3 POST请求内容分析
  3. 2. Python实现参数加密和数据爬取

0. 内容概述

获取一个用户的所有歌单(包括创建的歌单和收藏的歌单),将歌单的信息(包括封面/名称/歌曲文件)下载到本地。

  • 可以选择下载一个用户的所有歌单所有歌曲
  • 也可以下载一个歌单内的所有歌曲

难点:解析网易云音乐获取歌单和音乐mp3文件时发送的POST请求,模拟其加密方式,发送自己想要的请求。

工具:

  1. Firefox浏览器
  2. Python 3.8.2

1. 网络请求分析

1.1 找到请求地址

登录网易云音乐网页版之后,进入“我的音乐”,打开开发者工具转到网络,清除掉所有消息后,点击一个自建的歌单,可以看到出现了几个请求,如下图所示。

image-20210210143234671

因为GET请求只获取到了两个图片信息,因此推断歌曲信息包含在了POST请求中。

在新标签页打开其中后缀为detail的POST请求:

https://music.163.com/weapi/v6/playlist/detail?csrf_token=e7cd35dfd29adb489a4d5b8c3b3ef8fa

发现歌单信息包含在了该请求的返回结果中,包括歌曲的ID,专辑信息等

image-20210210143819911

因此只需要模拟这个请求,就可以获取到歌单的内容。

1.2 POST请求参数分析

1.2.1 请求内容简略

在浏览器中查看POST请求参数,发现两道密文:

image-20210210144408449

将发送该请求的js文件下载下来,很明显是经过webpack打包过的,搜索encSecKey,找到如下代码:

image-20210210145308133

将包含这两个参数的完整函数提取出来,得到如下格式化结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
encryptFetch = function (e) {
var t = 1 < arguments.length && void 0 !== arguments[1] ? arguments[1] : {}, n = 2 < arguments.length && void 0 !== arguments[2] ? arguments[2] : initFetch, o = e, r = {}, a = _extends$1({}, defaultOptions, t), i = o.split("?");
void 0 === a.headers && (a.headers = {});
var s = a.headers, c = a.encrypt, l = a.paramstr, u = a.noEnc, d = a.serializedParam, f = a.whiteHost;
delete a.encrypt, delete a.paramstr, delete a.noEnc, delete a.serializedParam;
var p = c;
void 0 === t.encrypt && void 0 !== u && (p = !u), l && (a.data = obj2str(a.data));
var _ = s[CT_TYPE] !== CT_FILE, h = !1;
if (f) {
var m = [];
"string" == typeof f ? m = [f] : isArray(f) && (m = m.concat(f)), h = m.some(function (e) { return -1 < o.indexOf(e) })
} var y = h || rMatchUrl.test(o);
if (p && _ && y) {
logReq(o, a), s[CT_TYPE] = FORM_TYPE, 2 === i.length && (r = query2obj(i[1])), o = i[0], KeyArr.forEach(function (e) {
if (a[e]) {
var t = "string" == typeof a[e] ? query2obj(a[e]) : a[e];
r = _extends$1({}, r, t)
}
});
var g = getCookie("__csrf");
g && (r.csrf_token = g), o = o.replace(/\/api\//, "/weapi/") + (g ? "?" + obj2query({ csrf_token: r.csrf_token }) : ""), a.method = "post", delete a.query, delete a.data;

/****** 关键代码 ******/
var v = encrypt.asrsea(JSON.stringify(r), enk.emj2code(["流泪", "强"]), enk.BASE_CODE, enk.emj2code(["爱心", "女孩", "惊恐", "大笑"]));
return a.body = obj2query({ params: v.encText, encSecKey: v.encSecKey }), n(o, a)
}
if (void 0 === s[CT_TYPE] && (a.headers[CT_TYPE] = FORM_TYPE), d) {
var b = "", w = a.data;
isObject(w) ? b = obj2query(w) : "string" == typeof w && (b = w), delete a.data;
var S = a.method;
if ("string" == typeof S && (S = S.toLowerCase()), "post" === S) a.body = b;
else if ("get" === S) {
var C = -1 !== o.indexOf("?") ? "&" : "?";
o += "" + C + b
}
}
return n(o, a)
}

这段代码用到了大量的匿名变量,但是很显然,加密paramsencSecKey参数的代码由encrypt.asrsea函数完成,因此接下来解析该函数的输入参数和构成,按照数据流的方式进行。

1.2.2 加密函数和加密算法分析

第一个参数:JSON.stringify(r)

image-20210210154252941

函数encryptFetch意思是为请求加密,那么推测,其唯一参数e为网络请求的地址,在函数的第一行写到:o=e, i=o.split("?"),猜测是对请求地址中的参数进行提取;

在14行处,2===i.length && (r = query2obj(i[1]))很显然了,如果该请求存在参数,那么用?分离出来正是位于第二位,使用query2obj将其转换成一个JS对象;

在20行,在cookie中获取了一个跨域保护参数__csrf,并将其加入到JS对象r中;

在21行,对网络请求的地址进行了修改,在__csrf不为空的情况下,将其作为参数写入到请求地址o中去,这个o在加密时并没有用到

综上所述,JSON.stringify(r)是请求地址参数的字符串化结果,一个例子如下:

1
2
3
4
5
// encrypyFetch('https://aaa.com/?mode=123')
r = {
mode: 123
}
JSON.stringify(r) // ""{\"mode\":123}""

第二/第三/第四个参数均来自enk,在JS文件中搜索,找到这个变量的所有相关代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
var encrypt = { asrsea: d$1, ecnonasr: e$1 },
emjMap = { "色": "00e0b", "流感": "509f6", "这边": "259df", "弱": "8642d", "嘴唇": "bc356", "亲": "62901", "开心": "477df", "呲牙": "22677", "憨笑": "ec152", "猫": "b5ff6", "皱眉": "8ace6", "幽灵": "15bb7", "蛋糕": "b7251", "发怒": "52b3a", "大哭": "b17a8", "兔子": "76aea", "星星": "8a5aa", "钟情": "76d2e", "牵手": "41762", "公鸡": "9ec4e", "爱意": "e341f", "禁止": "56135", "狗": "fccf6", "亲亲": "95280", "叉": "104e0", "礼物": "312ec", "晕": "bda92", "呆": "557c9", "生病": "38701", "钻石": "14af6", "拜": "c9d05", "怒": "c4f7f", "示爱": "0c368", "汗": "5b7a4", "小鸡": "6bee2", "痛苦": "55932", "撇嘴": "575cc", "惶恐": "e10b4", "口罩": "24d81", "吐舌": "3cfe4", "心碎": "875d3", "生气": "e8204", "可爱": "7b97d", "鬼脸": "def52", "跳舞": "741d5", "男孩": "46b8e", "奸笑": "289dc", "猪": "6935b", "圈": "3ece0", "便便": "462db", "外星": "0a22b", "圣诞": "8e7", "流泪": "01000", "强": "1", "爱心": "0CoJU", "女孩": "m6Qyw", "惊恐": "8W8ju", "大笑": "d" },
md = ["色", "流感", "这边", "弱", "嘴唇", "亲", "开心", "呲牙", "憨笑", "猫", "皱眉", "幽灵", "蛋糕", "发怒", "大哭", "兔子", "星星", "钟情", "牵手", "公鸡", "爱意", "禁止", "狗", "亲亲", "叉", "礼物", "晕", "呆", "生病", "钻石", "拜", "怒", "示爱", "汗", "小鸡", "痛苦", "撇嘴", "惶恐", "口罩", "吐舌", "心碎", "生气", "可爱", "鬼脸", "跳舞", "男孩", "奸笑", "猪", "圈", "便便", "外星", "圣诞"],
BASE_CODE = emj2code(md);
function emj2code(e) {
return e.map(function (e) { return emjMap[e] }).join("")
}
var enk = { emj2code: emj2code, BASE_CODE: BASE_CODE }

/* 以下是三个参数的执行结果 */
console.log(enk.emj2code(["流泪", "强"])) // 010001
console.log(enk.BASE_CODE) // 00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7
console.log(enk.emj2code(["爱心", "女孩", "惊恐", "大笑"])) // 0CoJUm6Qyw8W8jud

参数在整个js文件中搜索asrsea,并将其相关的函数提取出来,得到如下代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
var encrypt = { asrsea: d$1, ecnonasr: e$1 }
function d$1(e, t, n, o) {
// t = 010001
// n = enk.BASE_CODE
// o = 0CoJUm6Qyw8W8jud
var r = {}, a = a$1(16);
return r.encText = b(e, o), r.encText = b(r.encText, a), r.encSecKey = c$1(a, t, n), r
}
function a$1(e) {
var t = void 0, n = void 0, o = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", r = "";
for (t = 0; t < e; t += 1)
n = Math.random() * o.length, n = Math.floor(n), r += o.charAt(n);
return r
}
function b(e, t) {
var n = aes.enc.Utf8.parse(t), o = aes.enc.Utf8.parse("0102030405060708"), r = aes.enc.Utf8.parse(e);
return aes.AES.encrypt(r, n, { iv: o, mode: aes.mode.CBC }).toString()
}
function c$1(e, t, n) {
var o;
return RSA.setMaxDigits(131), o = new RSA.RSAKeyPair(t, "", n), RSA.encryptedString(o, e)
}

加密函数的入口是d$1函数,在该函数中,encText使用b函数进行了两层加密,encSecKey使用c$1函数进行了一层加密,两个参数之间的关联在于由a$1函数生成a变量,该变量就是一个由数字和字母构成的16位长度的随机字符串。

encText的加密过程为:使用enk.emj2code(["爱心", "女孩", "惊恐", "大笑"])生成的字符串0CoJUm6Qyw8W8jud作为密钥(KEY)、0102030405060708作为偏移量(IV)进行加密,接着再用随机字符串a作为密钥,同样的偏移量进行二次加密;

aes的加密由crypto-js实现

encSecKey的加密用到了RSA方法,其加密方法如下:

其中,是信息,是次方,对应代码中的"010001"的10进制数65537,是模大小,对应代码的enk.BASE_CODE,字符串长度258位,转为10进制为309位,是加密后的密文。

总结一下加密过程:首先生成一个16位的随机字符串a与常量字符串0CoJUm6Qyw8W8jud一起,作为加密和解密POST参数的密钥,用到了对称的的AES算法,密文放在params参数中;参数encSecKey存放的则是通过RSA算法加密后的字符串a

1.2.3 POST请求内容分析

要分析请求内容,一般是通过在浏览器相关代码处打断点,查看变量实现的,或者极端一点,直接破解RSA算法加密的密文,反解出参数内容,然后破解成功第二天直接到网易报道上班。

获取歌单内容的请求链接为

https://music.163.com/weapi/v6/playlist/detail?csrf_token=e7cd35dfd29adb489a4d5b8c3b3ef8fa

发起者是名为musicfrontencryptvalidator.min.js的文件,在这个文件中找到encSecKey,并打上断点,再点击另一个歌单,发现歌单刷新了却还是无法命中断点,一开始还以为是浏览器不支持,骂了一通Firefox并装了个Chrome,后来发现Chrome也做不到还强行装在了C盘,又骂骂咧咧地把Chrome卸了重新回来找问题。

终于在刷新了一遍又一遍网页之后,注意到一个获取playlist的POST请求:

https://music.163.com/weapi/user/playlist?csrf_token=e7cd35dfd29adb489a4d5b8c3b3ef8fa

发起者是名为core_68ac1b3aadf40a20caba599a0ab2365d.js的文件,其请求参数也是paramsencSecKey

image-20210210224850304

在这个文件中找到encSecKey打上断点,切换歌单,终于看到了希望看到的参数:

image-20210210225204938

参数为:

1
2
3
4
5
6
7
8
{
csrf_token: "e7cd35dfd29adb489a4d5b8c3b3ef8fa"
id: "4914485933"
limit: "1000"
n: "1000"
offset: "0"
total: "true"
}

其中的id是歌单的id,那么好了,只需要获取到歌单的ID,就可以获取到其内容了。

2. Python实现参数加密和数据爬取

代码参考自https://github.com/Jack-Cherish/python-spider,用到了PyCrypto库作为AES的加密工具,使用pip安装时提示某个.h文件中存在语法错误,推测是C++的包出错导致的,参考了一个解决方案:https://blog.csdn.net/airconan/article/details/88386378,打开VS2019的安装目录,找到C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.27.29110\include\stdint.h,将其拷贝至C:\Program Files (x86)\Windows Kits\10\Include\10.0.18362.0\ucrt\stdint.h的同一目录下,并将这个文件中的#include <stdint.h>改为#include "stdint.h",重新pip安装,即可安装成功。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
import requests
import json
import binascii
import os
import base64
import click
import sys
import re
from http import cookiejar
from Crypto.Cipher import AES
import hashlib
import logging
import time


# 下载日志
t = int(time.time())
log_file_name = 'download-log-{:d}.log'.format(t)
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s')
logger = logging.getLogger(log_file_name)


def init_file_logger(filename='./logs/' + log_file_name):
fh = logging.FileHandler(filename=filename, mode='w', encoding='utf-8')
fh.setLevel(logging.INFO)
fh.setFormatter(logging.Formatter(
'%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s'))
logger.handlers.append(fh)


class Encrypyed():

def __init__(self):
self.modulus = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
self.nonce = '0CoJUm6Qyw8W8jud'
self.pub_key = '010001'

# 登录加密算法, 基于https://github.com/stkevintan/nw_musicbox脚本实现
def encrypted_request(self, params):
text = json.dumps(params)
sec_key = self.create_secret_key(16)
enc_text = self.aes_encrypt(self.aes_encrypt(
text, self.nonce), sec_key.decode('utf-8'))
enc_sec_key = self.rsa_encrpt(sec_key, self.pub_key, self.modulus)
data = {'params': enc_text, 'encSecKey': enc_sec_key}
return data

def aes_encrypt(self, text, secKey):
pad = 16 - len(text) % 16
text = text + chr(pad) * pad
encryptor = AES.new(secKey.encode('utf-8'),
AES.MODE_CBC, b'0102030405060708')
ciphertext = encryptor.encrypt(text.encode('utf-8'))
ciphertext = base64.b64encode(ciphertext).decode('utf-8')
return ciphertext

def rsa_encrpt(self, text, pubKey, modulus):
text = text[::-1]
rs = pow(int(binascii.hexlify(text), 16),
int(pubKey, 16), int(modulus, 16))
return format(rs, 'x').zfill(256)

def create_secret_key(self, size):
return binascii.hexlify(os.urandom(size))[:16]


class Crawler():
"""
网易云爬取API
"""

def __init__(self, timeout=60, cookie_path='.'):
self.headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded',
'Host': 'music.163.com',
'Referer': 'http://music.163.com/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
'Cookie': 'MUSIC_U=d33658da9213990dd11a7440ac3225d9b6d633afc3be6807f3afb748d63b3b1033a649814e309366'
}
self.session = requests.Session()
self.session.headers.update(self.headers)
self.session.cookies = cookiejar.LWPCookieJar(cookie_path)
self.download_session = requests.Session()
self.timeout = timeout
self.ep = Encrypyed()

def post_request(self, url, params):
"""
Post请求
:return: 字典
"""

data = self.ep.encrypted_request(params)
resp = self.session.post(url, data=data, timeout=self.timeout)
result = resp.json()
if result['code'] != 200:
print('post_request error, params={:s}'.format(json.dumps(params)))
logger.error(
'post_request error, params={:s}'.format(json.dumps(params)))
else:
return result

def get_song_url(self, song_id, bit_rate=320000):
"""
获得歌曲的下载地址
:params song_id: 音乐ID<int>.
:params bit_rate: {'MD 128k': 128000, 'HD 320k': 320000}
:return: 歌曲下载地址
"""
url = 'http://music.163.com/weapi/song/enhance/player/url?csrf_token='
csrf = ''
params = {'ids': [song_id], 'br': bit_rate, 'csrf_token': csrf}
result = self.post_request(url, params)
# 歌曲下载地址
song_url = result['data'][0]['url']

# 歌曲不存在
if song_url is None:
print('Song {} is not available due to copyright issue.'.format(song_id))
logger.warning(
'Song {} is not available due to copyright issue.'.format(song_id))
else:
return song_url

def get_song_by_url(self, song_url, song_name, folder):
"""
下载歌曲到本地
:params song_url: 歌曲下载地址
:params song_name: 歌曲名字
:params folder: 保存路径
"""
if not os.path.exists(folder):
os.makedirs(folder)
fpath = os.path.join(folder, song_name + '.mp3')
if sys.platform == 'win32' or sys.platform == 'cygwin':
valid_name = re.sub(r'[<>:"/\\|?*]', '', song_name)
if valid_name != song_name:
print('{} will be saved as: {}.mp3'.format(song_name, valid_name))
logger.warning('{} will be saved as: {}.mp3'.format(
song_name, valid_name))
fpath = os.path.join(folder, valid_name + '.mp3')

if not os.path.exists(fpath):
resp = self.download_session.get(
song_url, timeout=self.timeout, stream=True)
length = int(resp.headers.get('content-length'))
label = 'Downloading {} {}kb'.format(song_name, int(length/1024))

with click.progressbar(length=length, label=label) as progressbar:
with open(fpath, 'wb') as song_file:
for chunk in resp.iter_content(chunk_size=1024):
if chunk:
song_file.write(chunk)
progressbar.update(1024)


if __name__ == '__main__':
# 创建爬虫
c = Crawler()

# # 获取一首歌
# url = c.get_song_url('1364973839')
# c.get_song_by_url(url, 'DEJA VU (EXTENDED MIX)', '.')
# exit()

# 开启文件日志
init_file_logger()

# 获取用户的所有歌单
user_id = '350546638'
url = 'https://music.163.com/weapi/user/playlist'
r = c.post_request(url, {
"uid": user_id,
"limit": "1001",
"offset": "0"
})
play_list = r['playlist']
for p in play_list:
if str(p['userId']) != user_id:
continue

music_list_id = str(p['id'])
music_list_name = p['name']
url = 'https://music.163.com/weapi/v6/playlist/detail'
params = {
"id": music_list_id,
'limit': "1000",
'n': "1000",
'offset': "0",
'total': "true"
}
r = c.post_request(url, params)
# with open('music_list_{:s}.json'.format(music_list_id), 'w', encoding='utf-8') as f:
# o = json.dumps(r, ensure_ascii=False)
# f.write(o)

# with open('music_list_{:s}.json'.format(music_list_id), 'r', encoding='utf-8') as f:
# r = json.load(f)

music_list = r['playlist']['tracks']
for music in music_list:
mu_name = music['name']
mu_id = music['id']
print('download {:s}'.format(mu_name))
logger.info('download {:s}'.format(mu_name))

try:
url = c.get_song_url(mu_id)
# 去掉非法字符
mu_name = mu_name.replace('/', '')
mu_name = mu_name.replace('.', '')
c.get_song_by_url(
url, mu_name, './{:s}-{:s}'.format(music_list_name, music_list_id))

except:
print('download_song_by_id error {:s}'.format(mu_name))
logger.error('download_song_by_id error {:s}'.format(mu_name))