目标:球队的基础数据
url:https://www.cbaleague.com/data/#/teamMain?teamId=29115
加密效果如下:
找到解密算法
首先可以通过堆栈调用找到 send
,然后找到 onreadystatechange
,这里就是这一次请求应答的接受函数:
一路跟踪调用栈,发现了熟悉的家伙:
function a5e(e) {
const t = "uVayqL4ONKjFbVzQ";
// 这里就是网易里面crypto-js的几个API
var r = Fv.enc.Utf8.parse(t)
, n = Fv.AES.decrypt(e, r, {
mode: Fv.mode.ECB,
padding: Fv.pad.Pkcs7
});
try {
return JSON.parse(Fv.enc.Utf8.stringify(n))
} catch {
return e
}
}
接着复现这个解密过程:
var Fv = require("crypto-js")
function a5e(e) {
const t = "uVayqL4ONKjFbVzQ";
var r = Fv.enc.Utf8.parse(t)
, n = Fv.AES.decrypt(e, r, {
mode: Fv.mode.ECB,
padding: Fv.pad.Pkcs7
});
try {
return JSON.parse(Fv.enc.Utf8.stringify(n))
} catch {
return e
}
}
直接秒杀,上爬虫。
爬取数据
import json
import requests
import execjs
url = "https://data-server.cbaleague.com/api/teams/basic-data"
url1 = "https://data-server.cbaleague.com/api/com-code-tables/getTeam?type=2&season=2024"
url2 = "https://data-server.cbaleague.com/api/teams/29115/seasons/2024/players"
headers = {
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Cookie": "Hm_lvt_c61966058bd0c31ce4de210f2b4379fe=1740549272; HMACCOUNT=F6C1466DD9EC6555; Hm_lpvt_c61966058bd0c31ce4de210f2b4379fe=1740549300",
"Isencrypt": "encrypt",
"Origin": "https://www.cbaleague.com",
"Pragma": "no-cache",
"Referer": "https://www.cbaleague.com/data/",
"Sec-Ch-Ua": "\"Chromium\";v=\"122\", \"Not(A:Brand\";v=\"24\", \"Google Chrome\";v=\"122\"",
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": "\"Windows\"",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36"
}
headers_post = {
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Content-Length": "130",
"Content-Type": "application/json;charset=UTF-8",
"Cookie": "Hm_lvt_c61966058bd0c31ce4de210f2b4379fe=1740549272; HMACCOUNT=F6C1466DD9EC6555; Hm_lpvt_c61966058bd0c31ce4de210f2b4379fe=1740549300",
"Isencrypt": "encrypt",
"Origin": "https://www.cbaleague.com",
"Pragma": "no-cache",
"Referer": "https://www.cbaleague.com/data/",
"Sec-Ch-Ua": "\"Chromium\";v=\"122\", \"Not(A:Brand\";v=\"24\", \"Google Chrome\";v=\"122\"",
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": "\"Windows\"",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36"
}
data = {
"acrossTeamId": None,
"isAsc": None,
"matchTypeId": None,
"season": "2024",
"selfOrOther": "1",
"sortStr": "winRateSort",
"statisticalDimension": "1"
}
f = open("cba.js", "r", encoding="utf-8")
js_code = f.read()
js = execjs.compile(js_code)
# resp = requests.post(url, headers=headers_post, data=data)
resp = requests.get(url=url2, headers=headers)
res_json = js.call("a5e", resp.text.strip('"'))
print(res_json)
爬取数据-方法 2
搜索拦截器关键词 interceptors
,得到结果如下:
有两个 resp 结果,以此对这两个地方打断点调试,即可找到 a5e
函数,即加密函数:
后面的步骤和第一种一样,相比第一种,第二种非常见单,但是吃经验。
:::info
为什么会想到拦截器:
在调用堆栈中,Promise.then
是需要添加 Axios
库的,而拦截器也刚好需要 Axios
库,所以属于是碰碰运气。
:::
评论