这是一个实时票房数据站,大多数据都是明文的,只有综合票房、分账票房数据有字体反爬!

视频教程

https://www.bilibili.com/video/BV1a34y1L7Ri/

完整代码

import time,random,hashlib,requests,jsonpath,re,ddddocr,io,uuid
from fontTools.ttLib import TTFont
from PIL import ImageFont, Image, ImageDraw

ocr = ddddocr.DdddOcr()

def main():
    uid=uuid.uuid4()
    ts=time.time()*1000
    key='A013F70DB97834C0A5492378BD76C53A'
    ua='TW96aWxsYS81LjAgKFdpbmRvd3MgTlQgMTAuMDsgV2luNjQ7IHg2NCkgQXBwbGVXZWJLaXQvNTM3LjM2IChLSFRNTCwgbGlrZSBHZWNrbykgQ2hyb21lLzEwMS4wLjQ5NTEuNjQgU2FmYXJpLzUzNy4zNiBFZGcvMTAxLjAuMTIxMC41Mw=='
    index=int(1000*random.random()+1)
    enstr=f'method=GET&timeStamp={ts}&User-Agent={ua}&index={index}&channelId=40009&sVersion=2&key={key}'
    signkey=hashlib.md5(enstr.encode(encoding='UTF-8')).hexdigest()
    url=f'https://piaofang.maoyan.com/dashboard-ajax?orderType=0&uuid={uid}&timeStamp={ts}&User-Agent={ua}&index={index}&channelId=40009&sVersion=2&signKey={signkey}'
    headers={
        'Accept':'application/json, text/plain, */*',
        'Referer':'https://piaofang.maoyan.com/dashboard',
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53'
    }
    html=requests.get(url,headers=headers,verify=False)
    fonturl='http:'+re.search('opentype"\),url\("(//.*?\.woff)"',html.json()['fontStyle']).group(1)
    r = requests.get(fonturl)
    with open('temp.woff', 'wb') as f:
        f.write(r.content)
        f.close
    tfont = TTFont("temp.woff")
    uni_list = tfont.getGlyphOrder()[2:]
    print('uni列表:',uni_list)

    charList = []
    font = ImageFont.truetype("temp.woff", 40)
    #将10个uni字符画到im,进而使用ocr识别获得对应数字
    for uchar in uni_list:
        unknown_char = f"\\u{uchar[3:]}".encode().decode("unicode_escape")
        im = Image.new(mode='RGB', size=(42, 40), color="white")
        draw = ImageDraw.Draw(im=im)
        draw.text(xy=(0, 0), text=unknown_char, fill=0, font=font)
        img_byte = io.BytesIO()
        im.save(img_byte, format='JPEG')
        charList.append(ocr.classification(img_byte.getvalue()))
    print('对应字符:',charList)

    #解析获取需要的数据
    moviename=jsonpath.jsonpath(html.json(),'$.movieList.data.list..movieInfo.movieName')
    movieInfo = jsonpath.jsonpath(html.json(), '$.movieList.data.list..movieInfo.releaseInfo')
    sumBoxDesc = jsonpath.jsonpath(html.json(), '$.movieList.data.list..sumBoxDesc')
    boxRate = jsonpath.jsonpath(html.json(), '$.movieList.data.list..boxRate') #票房占比
    showCount = jsonpath.jsonpath(html.json(), '$.movieList.data.list..showCount') #排片场次
    enNum=jsonpath.jsonpath(html.json(), '$.movieList.data.list..boxSplitUnit.num')
    enNumDw=jsonpath.jsonpath(html.json(), '$.movieList.data.list..boxSplitUnit.unit')

    #解析票房信息
    for j in range(len(moviename)):
        tmpstr=enNum[j].split(';')
        rstr=''
        for i in tmpstr:
            if i =='': continue
            tmp = 'uni' + i.replace('&#x', '', 1).replace('.','').upper()
            for k in range(len(uni_list)):
                if tmp == uni_list[k]:
                    if '.' in i:
                        rstr = rstr + '.'+charList[k]
                    else:
                        rstr = rstr + charList[k]
                    break
        print(f'{moviename[j]}\t{movieInfo[j]}\t{sumBoxDesc[j]}\t综合票房:{rstr}{enNumDw[j]}\t票房占比:{boxRate[j]}\t排片场次:{showCount[j]}')

if __name__ == '__main__':
    main()

本站所有资源版权均属于原作者所有,这里所提供资源均只能用于参考学习使用,请在下载后24小时内删除,严禁商用。若由于商用引起版权纠纷,一切责任均由使用者承担。