目标:球队的基础数据

url:https://www.cbaleague.com/data/#/teamMain?teamId=29115

加密效果如下:

找到解密算法

首先可以通过堆栈调用找到 send,然后找到 onreadystatechange,这里就是这一次请求应答的接受函数:

一路跟踪调用栈,发现了熟悉的家伙:

function a5e(e) {
    const t = "uVayqL4ONKjFbVzQ";
  // 这里就是网易里面crypto-js的几个API
    var r = Fv.enc.Utf8.parse(t)
      , n = Fv.AES.decrypt(e, r, {
        mode: Fv.mode.ECB,
        padding: Fv.pad.Pkcs7
    });
    try {
        return JSON.parse(Fv.enc.Utf8.stringify(n))
    } catch {
        return e
    }
}

接着复现这个解密过程:

var Fv = require("crypto-js")

function a5e(e) {
    const t = "uVayqL4ONKjFbVzQ";
    var r = Fv.enc.Utf8.parse(t)
      , n = Fv.AES.decrypt(e, r, {
        mode: Fv.mode.ECB,
        padding: Fv.pad.Pkcs7
    });
    try {
        return JSON.parse(Fv.enc.Utf8.stringify(n))
    } catch {
        return e
    }
}

直接秒杀,上爬虫。

爬取数据

import json
import requests
import execjs

url = "https://data-server.cbaleague.com/api/teams/basic-data"
url1 = "https://data-server.cbaleague.com/api/com-code-tables/getTeam?type=2&season=2024"
url2 = "https://data-server.cbaleague.com/api/teams/29115/seasons/2024/players"
headers = {
    "Accept": "application/json, text/plain, */*",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Cache-Control": "no-cache",
    "Cookie": "Hm_lvt_c61966058bd0c31ce4de210f2b4379fe=1740549272; HMACCOUNT=F6C1466DD9EC6555; Hm_lpvt_c61966058bd0c31ce4de210f2b4379fe=1740549300",
    "Isencrypt": "encrypt",
    "Origin": "https://www.cbaleague.com",
    "Pragma": "no-cache",
    "Referer": "https://www.cbaleague.com/data/",
    "Sec-Ch-Ua": "\"Chromium\";v=\"122\", \"Not(A:Brand\";v=\"24\", \"Google Chrome\";v=\"122\"",
    "Sec-Ch-Ua-Mobile": "?0",
    "Sec-Ch-Ua-Platform": "\"Windows\"",
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-site",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36"
}

headers_post = {
    "Accept": "application/json, text/plain, */*",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Cache-Control": "no-cache",
    "Content-Length": "130",
    "Content-Type": "application/json;charset=UTF-8",
    "Cookie": "Hm_lvt_c61966058bd0c31ce4de210f2b4379fe=1740549272; HMACCOUNT=F6C1466DD9EC6555; Hm_lpvt_c61966058bd0c31ce4de210f2b4379fe=1740549300",
    "Isencrypt": "encrypt",
    "Origin": "https://www.cbaleague.com",
    "Pragma": "no-cache",
    "Referer": "https://www.cbaleague.com/data/",
    "Sec-Ch-Ua": "\"Chromium\";v=\"122\", \"Not(A:Brand\";v=\"24\", \"Google Chrome\";v=\"122\"",
    "Sec-Ch-Ua-Mobile": "?0",
    "Sec-Ch-Ua-Platform": "\"Windows\"",
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-site",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36"
}

data = {
    "acrossTeamId": None,
    "isAsc": None,
    "matchTypeId": None,
    "season": "2024",
    "selfOrOther": "1",
    "sortStr": "winRateSort",
    "statisticalDimension": "1"
}

f = open("cba.js", "r", encoding="utf-8")
js_code = f.read()
js = execjs.compile(js_code)

# resp = requests.post(url, headers=headers_post, data=data)
resp = requests.get(url=url2, headers=headers)
res_json = js.call("a5e", resp.text.strip('"'))

print(res_json)

爬取数据-方法 2

搜索拦截器关键词 interceptors,得到结果如下:

有两个 resp 结果,以此对这两个地方打断点调试,即可找到 a5e 函数,即加密函数:

后面的步骤和第一种一样,相比第一种,第二种非常见单,但是吃经验。

:::info
为什么会想到拦截器:

在调用堆栈中,Promise.then 是需要添加 Axios 库的,而拦截器也刚好需要 Axios 库,所以属于是碰碰运气。
:::

关于拦截器的资料