DOTA2利雅得大师赛利用api多线程对选手数据和战队数据爬取与分析
时间:2022-11-19 13:00:00
首先,恭喜中国队LGD赢得利雅得大师赛冠军!
目录
数据的爬取
选手数据和团队数据爬行
数据分析
数据(因为原数据的英雄都是id,将英雄连接到这里id用英雄名代替)
选手KDA前十名
参战率、参葬率、战死率
场均十分钟补刀前十,以及他们GPM,XPM
场均反眼数前十的选手
队伍场均治疗量
编辑
英雄在这场比赛中的前十名和胜率
计算先摧毁对方优势路、中路、劣势路一塔的胜率
最后看看LGD英雄和胜率的使用
数据的爬取
数据来源于opendota,对opendota的api调用获取数据,找到利雅得的所有比赛id,那就要先找到这个比赛,首先咱们来看看opendota的api页面,网站OpenDota API
打开到match页面将看到请求数据api介绍及对应url里面的json数据
我们在利雅得大师赛中找到一场比赛,比如6676393091
利用api提供的网站打开看看
一大串json这样看数据不方便,按F12打开抓包工具,看看里面
可以看出这场比赛的数据有很多分组,相应的分组是什么意思?api查看文档,其中league中的leagueid这是利雅得大师赛的比赛id为14391,这就是dota2不同比赛之间的区别,然后通过leagueid获取到利雅得大师赛的所有比赛id,然后进行数据爬取,下面我直接上代码
首先是导入所需的模块,爬上利雅得的所有比赛id,参数id是利雅得大师赛id 14391
import os.path import time import pandas as pd import requests import threading def pro_game(id): pro_match_id = [] pro_url = f'https://api.opendota.com/api/leagues/{id}/matches' text = requests.get(url=pro_url,headers=headers).json() for item in text: pro_match_id.append(item['match_id']) print(pro_match_id) return pro_match_id
获取的比赛id之后,我们需要爬取每场比赛的数据。爬行前,我们需要比赛id进行post请求,因为如果执行post的话,opendota内置功能将进一步分析视频,从而获得更多的数据,如推塔时间团战爆发次数等get得不到的,一定要先post请求一下
def jiexi(match_id): post_url = f'https://api.opendota.com/api/request/{match_id}' pos = requests.post(url=post_url, headers=headers) match_url = f'https://api.opendota.com/api/matches/{match_id}' match_text = requests.get(url=match_url, headers=headers).json() return match_text
get在获得每场比赛的数据后,您可以选择您想要的数据进行爬行,选手的数据爬行和存储如下:
本来想看看能不能定位选手是哪条路,但是因为dota2.玩法多样,玩家每场分路也不一样,导致分路数据不太准确,所以我在后面的数据分析中没有用到他
def role(lr): if lr == 1: return '优势路' if lr == 2: return '中路' if lr == 3: return '劣势路'
根据选手的哪一方和哪一方的胜利来判断选手是否赢得了这场比赛
选手数据和团队数据爬行
def xuanshou(match_text): print(开始) for item in match_text['players']: row_dic = {} try: row_dic['match_id'] = item['match_id'] row_dic['name'] = item['name'] row_dic['ID'] = item['account_id'] row_dic['英雄id'] = item['hero_id'] l_role = item['lane_role'] row_dic[位置] = role(l_role) row_dic[10分钟补刀] = item['benchmarks']['lhten']['raw'] row_dic[杀人数] = item['kills'] row_dic[死亡数] = item['deaths'] row_dic[助攻] = item['assists'] row_dic[反眼数] = item['observer_kills'] row_dic[总金钱] = item['total_gold'] row_dic['治疗量'] = item['hero_healing'] row_dic['GPM'] = item['benchmarks']['gold_per_min']['raw'] row_dic['XPM'] = item['benchmarks']['xp_per_min']['raw'] row_dic[每分钟补刀数] = item['benchmarks']['last_hits_per_min']['raw'] row_dic[每分钟伤害] = item['benchmarks']['hero_damage_per_min']['raw'] row_dic[总伤害] = item['hero_damage'] row_dic[] = match_text['radiant_team']['name'] row_dic[天辉杀人数] = match_text['radiant_score'] row_dic[] = match_text['dire_team']['name'] row_dic[] = match_text['dire_score'] place = item['isRadiant'] if place == True: row_dic[所在方] = '天辉' if item['win'] == 0: row_dic[输赢] = '输' if item['win'] == 1: row_dic[输赢] = '赢' if place == False: row_dic[所在方] = '夜宴' if item['win'] == 0: row_dic[输赢] = '输' if item['win'] == 1: row_dic[输赢] = '赢' except: print(‘出错’) row_list.append(row_dic) time.sleep(0.5)
def paqu(match_text): dicq = {} dicq['比赛id'] = match_text['match_id']
dicq['持续时间'] = match_text['duration']/60
dicq['天辉方战队'] = match_text['radiant_team']['name']
dicq['天辉杀人数'] = match_text['radiant_score']
dicq['夜宴方战队'] = match_text['dire_team']['name']
dicq['夜宴杀人数'] = match_text['dire_score']
dicq['胜利方'] = '天辉' if match_text['radiant_win'] == True else '夜宴'
dicq['团战次数'] = len(match_text['teamfights'])
for item in match_text['objectives']:
if item['type']=='building_kill' and item['key'] =='npc_dota_goodguys_tower1_mid':
dicq['天辉中路一塔'] = item['time']/60
if item['type']=='building_kill' and item['key'] =='npc_dota_goodguys_tower1_top':
dicq['天辉劣势路一塔'] = item['time']/60
if item['type']=='building_kill' and item['key'] =='npc_dota_goodguys_tower1_bot':
dicq['天辉优势路一塔'] = item['time']/60
if item['type'] == 'building_kill' and item['key'] =='npc_dota_badguys_tower1_mid':
dicq['夜宴中路1塔'] = item['time'] / 60
if item['type']=='building_kill' and item['key'] =='npc_dota_badguys_tower1_top':
dicq['夜宴优势路一塔'] = item['time']/60
if item['type']=='building_kill' and item['key'] =='npc_dota_badguys_tower1_bot':
dicq['夜宴劣势路一塔'] = item['time']/60
quanju_list.append(dicq)
print(f"{dicq['天辉方战队']}vs{dicq['夜宴方战队']}完成")
主函数里面使用双线程进行数据的爬取
if __name__ =='__main__':
# account_id = 1150728771
# m = match(account_id,100)
where = input('请输入文件存储地址:')
name = 'DOTA2利雅得.xlsx'
dizhi = os.path.join(where,name)
id = pro_game(14391)
print(id)
row_list = []
quanju_list = []
for mid in id:
shuju = jiexi(mid)
thread1 = threading.Thread(target=xuanshou,args=(shuju,))
thread2 = threading.Thread(target=paqu,args=(shuju,))
thread1.start()
thread2.start()
zd_d = pd.DataFrame(quanju_list)
r_d = pd.DataFrame(row_list)
r_d.to_excel(dizhi,sheet_name='选手数据')
with pd.ExcelWriter(dizhi,mode='a',engine='openpyxl')as writer:
zd_d.to_excel(writer,sheet_name='战队数据')
运行程序后结果大概是这样
数据分析
import pandas as pd
import matplotlib.pyplot as plt
lujing = r"E:/python文件/DOTA2利雅得.xlsx"
lujing1 = r"E:\python文件\DOTA2数据.xlsx"
shuju = pd.read_excel(lujing,sheet_name='选手数据')
shuju1 = pd.read_excel(lujing1,sheet_name='dota2英雄id')
shuju = pd.merge(shuju,shuju1,on='英雄id')
shuju.head()
数据(因为原数据的英雄都是id,这里进行连接将英雄id替换为英雄名 )
选手KDA前十名
def kda(shuju):
if shuju['死亡数'] == 0:
k = shuju['杀人数']+shuju['助攻']
return k
else:
k = (shuju['杀人数']+shuju['助攻'])/shuju['死亡数']
return k
shuju['KDA'] = shuju.apply(kda,axis=1)
kda_m = shuju.groupby(['name','战队'])['KDA'].mean()
kda_m.sort_values(ascending=False,inplace=True)
kda_m.head(10)
选手参战率,参葬率,战死率
def canzhan(shuju):
if shuju['所在方'] =='天辉':
x = (shuju['杀人数']+shuju['助攻'])/shuju['天辉杀人数']
else:
x = (shuju['杀人数']+shuju['助攻'])/shuju['夜宴杀人数']
return x
def canzang(shuju):
if shuju['所在方'] =='天辉':
y = shuju['死亡数']/shuju['夜宴杀人数']
else:
y = shuju['死亡数']/shuju['天辉杀人数']
return y
shuju['参战率'] = shuju.apply(canzhan,axis=1)
shuju['参葬率'] = shuju.apply(canzang,axis=1)
x = shuju.loc[(shuju['战队']=='PSG.LGD')&(shuju['输赢']=='输'),:]
x = shuju.groupby(['name','战队'])[['参葬率','参战率']].mean()
x['战死率'] = x['参葬率']/x['参战率']
x.sort_values(by='战死率',ascending=True).head(10)
选手场均十分钟补刀数前十,以及他们的GPM,XPM
budao = shuju.groupby(['name','战队'])[['10分钟补刀','GPM','XPM']].mean()
budao = budao.reset_index(drop=False)
budao.sort_values(by='10分钟补刀',ascending=False).head(10)
选手场均反眼数前十
fanyan = shuju.groupby(['name','战队'])['反眼数'].mean()
fanyan = fanyan.reset_index()
fy = fanyan.sort_values(by='反眼数',ascending = False)
fy.head(10)
战队场均治疗量
zhiliao = shuju.groupby(['战队'])['治疗量'].mean()
zhiliao.sort_values(ascending=False,inplace=True)
x = zhiliao.reset_index(drop=False)
x
本次比赛英雄的上场数前十以及胜率
changshu = shuju.groupby(['英雄名'])['输赢'].count()
hero_wl = shuju.groupby(['英雄名'])['输赢'].value_counts()
wl = hero_wl[:,'赢']
hero = pd.merge(changshu,wl,on='英雄名')
hero['胜率'] = hero['输赢_y']/hero['输赢_x']
hero.rename(columns={'输赢_x':'场数','输赢_y':'胜场'},inplace = True)
y =hero.sort_values(['场数','胜场'],ascending=False).head(10)
y
计算先摧毁对方优势路,中路,劣势路一塔的胜率
x.fillna(100,inplace=True)
tiaojian1 = '天辉中路一塔>夜宴中路1塔 and 胜利方=="天辉"'
tiaojian2 = '天辉中路一塔<夜宴中路1塔 and 胜利方=="夜宴"'
y1 = x.query(tiaojian1)
y2 = x.query(tiaojian2)
(len(y1)+len(y2))/len(x)
0.722
tiaojian1 = '天辉优势路一塔>夜宴优势路一塔 and 胜利方=="天辉"'
tiaojian2 = '天辉优势路一塔<夜宴优势路一塔 and 胜利方=="夜宴"'
y1 = x.query(tiaojian1)
y2 = x.query(tiaojian2)
(len(y1)+len(y2))/len(x)
0.592
tiaojian1 = '天辉劣势路一塔>夜宴劣势路一塔 and 胜利方=="天辉"'
tiaojian2 = '天辉劣势路一塔<夜宴劣势路一塔 and 胜利方=="夜宴"'
y1 = x.query(tiaojian1)
y2 = x.query(tiaojian2)
(len(y1)+len(y2))/len(x)
0.759
最后看看LGD的使用英雄以及胜率
tiaojian = "战队=='PSG.LGD'"
sec = shuju.query(tiaojian)
changshu = sec.groupby(['英雄名'])['输赢'].count()
win = sec.groupby(['英雄名'])['输赢'].value_counts()
wl = win[:,'赢']
hero = pd.merge(changshu,wl,on='英雄名')
hero['胜率'] = hero['输赢_y']/hero['输赢_x']
hero.rename(columns={'输赢_x':'场数','输赢_y':'胜场'},inplace = True)
y =hero.sort_values(['场数','胜场'],ascending=False).head(10)
y