import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import cv2
import time
import random
import requests
import urllib.request
from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import teamgamelog,leaguedashteamshotlocations,leaguedashteamstats,drafthistory,playerawards,leagueplayerondetails,teaminfocommon,alltimeleadersgrids,leagueleaders
from nba_api.stats.library.parameters import SeasonAll
from matplotlib.pyplot import suptitle
from matplotlib.patches import Circle, Rectangle, Arc
from matplotlib.offsetbox import  OffsetImage
from sklearn.linear_model import LogisticRegression
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
import colorama
from colorama import Fore
from skimage import io

plt.style.use('fivethirtyeight') # or seaborn / fivethirtyeight

team_dict = teams.get_teams()
team_dict = sorted(team_dict, key=lambda x: x['full_name'])
player_dict = players.get_players()

# Set skip = True to avoid long computational time
skip = True

team_dict = teams.get_teams()
team_dict = sorted(team_dict, key=lambda x: x['full_name'])
player_dict = players.get_players()

if not skip:
    eFG,TwoPM_season,TwoPA_season,FGM_season,FGA_season,ThreePM_season,ThreePA_season = [],[],[],[],[],[],[]

    year = 1982
    yrs = []

    while year != 1996:
        FGM = 0
        FGA = 0
        FG3M = 0
        FG3A = 0
        for team in team_dict:
            team_log = teamgamelog.TeamGameLog(team_id=team['id'], season=str(year), season_type_all_star='Regular Season')
            time.sleep(.25)
            team_games = team_log.get_data_frames()[0]        
            FGM += team_games['FGM'].sum()
            FGA += team_games['FGA'].sum()
            FG3M += team_games['FG3M'].sum()
            FG3A += team_games['FG3A'].sum()

        FGM_season.append(FGM)
        FGA_season.append(FGA)

        TwoPM_season.append(FGM - FG3M)
        TwoPA_season.append(FGA - FG3A)

        ThreePM_season.append(FG3M)
        ThreePA_season.append(FG3A)

        eFG.append(round((FGM + .5 * FG3M) / FGA * 100, 1))
        yrs.append("'"+str(year)[2:])
        year += 1

    while year != 2022:
        team_log = leaguedashteamstats.LeagueDashTeamStats(season=str(year)+'-'+str(year+1)[2:])
        time.sleep(.25)
        team_games = team_log.get_data_frames()[0]

        FGM = team_games['FGM'].sum()
        FGA = team_games['FGA'].sum()
        FG3M = team_games['FG3M'].sum()
        FG3A = team_games['FG3A'].sum()

        FGM_season.append(FGM)
        FGA_season.append(FGA)

        TwoPM_season.append(FGM - FG3M)
        TwoPA_season.append(FGA - FG3A)

        ThreePM_season.append(FG3M)
        ThreePA_season.append(FG3A)

        eFG.append(round((FGM + .5 * FG3M) / FGA * 100, 1))

        yrs.append("'"+str(year)[2:])
        year += 1

if not skip:
    eFG_df = pd.DataFrame(list(zip(yrs, eFG)), columns=['Year', 'eFG%'])
    FGM_season_df = pd.DataFrame(list(zip(yrs, FGM_season)), columns=['Year', 'FGM'])
    FGA_season_df = pd.DataFrame(list(zip(yrs, FGA_season)), columns=['Year', 'FGA'])
    TwoPM_season_df = pd.DataFrame(list(zip(yrs, TwoPM_season)), columns=['Year', '2PM'])
    TwoPA_season_df = pd.DataFrame(list(zip(yrs, TwoPA_season)), columns=['Year', '2PA'])
    ThreePM_season_df = pd.DataFrame(list(zip(yrs, ThreePM_season)), columns=['Year', '3PM'])
    ThreePA_season_df = pd.DataFrame(list(zip(yrs, ThreePA_season)), columns=['Year', '3PA'])
    yrs_df = pd.DataFrame(yrs, columns=['Year'])

if not skip:
    eFG_df.to_csv('df\eFG_df', index=False)
    FGM_season_df.to_csv('df\FGM_season_df', index=False)
    FGA_season_df.to_csv('df\FGA_season_df', index=False)
    TwoPM_season_df.to_csv('df\TwoPM_season_df', index=False)
    TwoPA_season_df.to_csv('df\TwoPA_season_df', index=False)
    ThreePM_season_df.to_csv('df\ThreePM_season_df', index=False)
    ThreePA_season_df.to_csv('df\ThreePA_season_df', index=False)
    yrs_df.to_csv('df\Years_df', index=False)

eFG_df = pd.read_csv('df\eFG_df')
FGM_season_df = pd.read_csv('df\FGM_season_df')
FGA_season_df = pd.read_csv('df\FGA_season_df')
TwoPM_season_df = pd.read_csv('df\TwoPM_season_df')
TwoPA_season_df = pd.read_csv('df\TwoPA_season_df')
ThreePM_season_df = pd.read_csv('df\ThreePM_season_df')
ThreePA_season_df = pd.read_csv('df\ThreePA_season_df')
yrs_df = pd.read_csv('df\Years_df')
yrs = yrs_df['Year'].values.tolist()

def eFg_plot(eFG_df):
    plt.figure(figsize=(28, 11))

    plt.plot(eFG_df['Year'], eFG_df['eFG%'], color='turquoise', label='eFG%', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='turquoise', markersize=7)

    plt.axhline(y=eFG_df['eFG%'].mean(), color='gold', linestyle=':', label='Mean eFG%')
    plt.xlabel('Season')
    plt.ylabel('eFG%')
    plt.title('NBA eFG% throughout seasons (1982 - 2021)')
    plt.axis('tight')
    plt.legend(loc='lower right')
    plt.show()

eFg_plot(eFG_df)

                                             # Illustrative Code

eFG_ = []
years = ['2018-19', '2019-20', '2020-21', '2021-22']

for year in years:
    
    # Getting the stats for all teams in the season
    team_games_ = leaguedashteamstats.LeagueDashTeamStats(season=year).get_data_frames()[0]
    time.sleep(.25)

    # Selecting the stats of interest 
    FGM_ = team_games_['FGM'].sum()
    FGA_ = team_games_['FGA'].sum()
    FG3M_ = team_games_['FG3M'].sum()
    
    # Getting the eFG for the season
    eFG_.append(round((FGM_ + 0.5 * FG3M_) / FGA_ * 100, 1))

eFG_

[52.4, 52.9, 53.8, 53.2]

def Fg_plot(FGM_season_df, FGA_season_df):
    plt.figure(figsize=(28, 11))

    plt.plot(FGM_season_df['Year'], FGM_season_df['FGM'], color='turquoise', label='FGM', linestyle='-', linewidth = 1, marker='o', markerfacecolor='turquoise', markersize=7)
    plt.plot(FGA_season_df['Year'], FGA_season_df['FGA'], color='gold', label='FGA', linestyle='-', linewidth = 1, marker='o', markerfacecolor='gold', markersize=7)

    plt.xlabel('Season')
    plt.ylabel('FG')
    plt.title('NBA FG throughout seasons (1982 - 2021)')
    plt.axis('tight')
    plt.legend(loc='lower right')
    plt.show()

Fg_plot(FGM_season_df, FGA_season_df)

                                                         # Illustrative Code

FGM_season_, FGA_season_ = [], []
years = ['2019-20', '2020-21', '2021-22']

for year in years:
    
    # Getting the stats for all teams in the season
    team_games_ = leaguedashteamstats.LeagueDashTeamStats(season=year).get_data_frames()[0]
    time.sleep(.25)

    # Selecting the stats of interest
    FGM_ = team_games_['FGM'].sum()
    FGA_ = team_games_['FGA'].sum()
    
    # Getting FGM and FGA for the season
    FGM_season_.append(FGM_)
    FGA_season_.append(FGA_)
    
print(f"   Season  \t  {Fore.YELLOW} FGA \t\t  {Fore.CYAN} FGM \n {Fore.RESET}")
for i in range(3):
    print(f"  {years[i]}: \t {Fore.YELLOW} {FGA_season_[i]} \t {Fore.CYAN} {FGM_season_[i]}\n {Fore.RESET}")

   Season  	   FGA 		   FGM 
 
  2019-20: 	  188116 	  86550
 
  2020-21: 	  190983 	  89020
 
  2021-22: 	  216722 	  99930

def MissedFGpct_plot(shot_missed_pct):
    plt.figure(figsize=(28, 11))

    plt.plot(shot_missed_pct['Year'], shot_missed_pct['Miss%'], color='teal', label='FG Missed%', linestyle='-', 
             linewidth = 1, marker='o', markerfacecolor='teal', markersize=7)

    plt.xlabel('Season')
    plt.ylabel('Missed FG')
    plt.title('NBA Missed FG% throughout seasons (1982 - 2021)')
    plt.axis('tight')
    plt.legend(loc='lower right')
    plt.show()

shot_missed = pd.DataFrame(list(zip(yrs, FGA_season_df['FGA'] - FGM_season_df['FGM'])), columns=['Year', 'Miss'])
shot_missed_pct = pd.DataFrame(list(zip(yrs, (FGA_season_df['FGA'] - FGM_season_df['FGM']) / FGA_season_df['FGA'] * 100)), columns=['Year', 'Miss%'])

def MissedFG_plot(shot_missed):
    plt.figure(figsize=(28, 11))

    plt.plot(shot_missed['Year'], shot_missed['Miss'], color='teal', label='FG Missed', linestyle='-', 
             linewidth = 1, marker='o', markerfacecolor='teal', markersize=7)

    plt.xlabel('Season')
    plt.ylabel('Missed FG')
    plt.title('NBA Missed FG throughout seasons (1982 - 2021)')
    plt.axis('tight')
    plt.legend(loc='lower right')
    plt.show()

MissedFG_plot(shot_missed)

MissedFGpct_plot(shot_missed_pct)

def Fg_plot2(TwoPA_season_df, ThreePA_season_df):
    plt.figure(figsize=(20, 10))
    
    plt.plot(TwoPA_season_df['Year'], TwoPA_season_df['2PA'], color='gold', label='2PA', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='gold', markersize=7)
    
    plt.plot(ThreePA_season_df['Year'], ThreePA_season_df['3PA'], color='teal', label='3PA', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='teal', markersize=7)
    
    plt.xlabel('Season')
    plt.ylabel('FG')
    plt.title('NBA FGA by Value throughout seasons (1982 - 2021)')
    plt.axis('tight')
    plt.legend(loc='lower right')
    plt.show()

def Fg_plot2_(TwoPM_season_df, ThreePM_season_df):
    plt.figure(figsize=(20, 10))
    
    plt.plot(TwoPM_season_df['Year'], TwoPM_season_df['2PM'], color='gold', label='2PM', linestyle='-', linewidth = 1,
              marker='o', markerfacecolor='gold', markersize=7)
    
    plt.plot(ThreePM_season_df['Year'], ThreePM_season_df['3PM'], color='teal', label='3PM', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='teal', markersize=7)
    
    plt.xlabel('Season')
    plt.ylabel('FG')
    plt.title('NBA FGM by Value throughout seasons (1982 - 2021)')
    plt.axis('tight')
    plt.legend(loc='lower right')
    plt.show()

Fg_plot2(TwoPA_season_df, ThreePA_season_df)

Fg_plot2_(TwoPM_season_df, ThreePM_season_df)

three_miss_pct = pd.DataFrame(list(zip(yrs[3:],(ThreePA_season_df['3PA'][3:] - ThreePM_season_df['3PM'][3:]) / ThreePA_season_df['3PA'][3:] * 100)), columns=['Year', 'Miss%'])
three_miss_pct;

def Three_miss_pct_plot(three_miss_pct):
    plt.figure(figsize=(28, 11))
    plt.plot(three_miss_pct['Year'], three_miss_pct['Miss%'], color='teal', label='3FG%', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='teal', markersize=7)
    
    plt.xlabel('Season')
    plt.ylabel('FG')
    plt.title('NBA Missed 3FG% throughout seasons (1982 - 2021)')
    plt.axis('tight')
    plt.legend(loc='lower right')
    plt.show()

Three_miss_pct_plot(three_miss_pct)

# 2021-22 Season Most Efficient Shot
shotloc = leaguedashteamshotlocations.LeagueDashTeamShotLocations(season='2021-22').get_data_frames()[0]
# shotloc

RA = shotloc['Restricted Area']
Paint = shotloc['In The Paint (Non-RA)']
Mid = shotloc['Mid-Range']
LeftCorner3 = shotloc['Left Corner 3']
RightCorner3 = shotloc['Right Corner 3']
AboveBreak3 = shotloc['Above the Break 3']
Back = shotloc['Backcourt']

RA_eFG = round(RA['FG_PCT'].mean()*100, 1)
Paint_eFG = round(Paint['FG_PCT'].mean()*100, 1)
Mid_eFG = round(Mid['FG_PCT'].mean()*100, 1)
LeftCorner3_eFG = round(LeftCorner3['FG_PCT'].mean()*150, 1)
RightCorner3_eFG = round(RightCorner3['FG_PCT'].mean()*150, 1)
AboveBreak3_eFG = round(AboveBreak3['FG_PCT'].mean()*150, 1)
Back_eFG = round(Back['FG_PCT'].mean()*150, 1)

Efficiency_Shots = [("RA",RA_eFG), ("Paint",Paint_eFG), ("Mid",Mid_eFG), ("LeftCorner3",LeftCorner3_eFG), ("RightCorner3",RightCorner3_eFG), ("AboveBreak3",AboveBreak3_eFG), ("Backcourt",Back_eFG)]
Efficiency_Shots = pd.DataFrame(sorted(Efficiency_Shots, key=lambda x: x[1],reverse=True), columns=['Location', 'eFG%'])
# Efficiency_Shots

def draw_court_eFG(ax=None, color='black', lw=2, outer_lines=False):
    # If an axes object isn't provided to plot onto, just get current one
    if ax is None:
        ax = plt.gca()

    # Create the various parts of an NBA basketball court

    # Create the basketball hoop
    # Diameter of a hoop is 18" so it has a radius of 9", which is a value
    # 7.5 in our coordinate system
    hoop = Circle((0, 0), radius=7.5, linewidth=lw, color=color, fill=False)

    # Create backboard
    backboard = Rectangle((-30, -7.5), 60, -1, linewidth=lw, color=color)

    # The paint
    # Create the outer box 0f the paint, width=16ft, height=19ft
    outer_box = Rectangle((-80, -47.5), 160, 190, linewidth=lw, color=color,
                          fill=False)
    # Create the inner box of the paint, widt=12ft, height=19ft
    inner_box = Rectangle((-60, -47.5), 120, 190, linewidth=lw, color=color,
                          fill=False)

    # Create free throw top arc
    top_free_throw = Arc((0, 142.5), 120, 120, theta1=0, theta2=180,
                         linewidth=lw, color=color, fill=False)
    # Create free throw bottom arc
    bottom_free_throw = Arc((0, 142.5), 120, 120, theta1=180, theta2=0,
                            linewidth=lw, color=color, linestyle='dashed')
    # Restricted Zone, it is an arc with 4ft radius from center of the hoop
    restricted = Arc((0, 0), 80, 80, theta1=0, theta2=180, linewidth=lw,
                     color=color)

    # Three point line
    # Create the side 3pt lines, they are 14ft long before they begin to arc
    corner_three_a = Rectangle((-220, -47.5), 0, 140, linewidth=lw,
                               color=color)
    corner_three_b = Rectangle((220, -47.5), 0, 140, linewidth=lw, color=color)
    # 3pt arc - center of arc will be the hoop, arc is 23'9" away from hoop
    # I just played around with the theta values until they lined up with the 
    # threes
    three_arc = Arc((0, 0), 475, 475, theta1=22, theta2=158, linewidth=lw,
                    color=color)

    # Center Court
    center_outer_arc = Arc((0, 422.5), 120, 120, theta1=180, theta2=0,
                           linewidth=lw, color=color)
    center_inner_arc = Arc((0, 422.5), 40, 40, theta1=180, theta2=0,
                           linewidth=lw, color=color)
    centerlower_outer_arc = Arc((0, 422.5), -120, -120, theta1=180, theta2=0,
                           linewidth=lw, color=color)
    centerlower_inner_arc = Arc((0, 422.5), -40, -40, theta1=180, theta2=0,
                           linewidth=lw, color=color)
    # Colored Court
    center_inner_circle = Circle((0, 422.5), radius=20, color='tab:blue', fill=True)

    center_outer_circle = Circle((0, 422.5), radius=60, color='tab:cyan', fill=True)
    
    # eFG Percentages
    RA_draw = Circle((0, 0), radius = 40, color='lavenderblush', fill=True)

    LC3_draw = Rectangle((-220, -47.5), -30, 140, linewidth=lw, color='palegreen', fill=True)

    RC3_draw = Rectangle((220, -47.5), 30, 140, linewidth=lw, color='lightcoral', fill=True)

    Paint_draw = Rectangle((-80, -47.5), 160, 190, linewidth=lw, color='lightyellow', fill=True)

    AboveBreak_draw = Rectangle((-250, -47.5), 500, 470, linewidth=lw, color='azure', fill=True)

    Mid_draw = Circle((0,0), radius=238, color='ivory', fill=True)

    Backcourt = Rectangle((-250, 422.5), 500, 60, linewidth=lw, color='whitesmoke', fill=True)

    # List of the court elements to be plotted onto the axes
    court_elements = [Backcourt, AboveBreak_draw, Mid_draw, LC3_draw, RC3_draw, Paint_draw, RA_draw,
                    hoop, backboard, outer_box, inner_box, top_free_throw,
                    bottom_free_throw, restricted, corner_three_a,
                    corner_three_b, three_arc, center_outer_circle, 
                    center_inner_circle, center_outer_arc,
                    center_inner_arc, centerlower_inner_arc, centerlower_outer_arc]

    if outer_lines:
        # Draw the half court line, baseline and side out bound lines
        outer_lines = Rectangle((-250, -47.5), 500, 470, linewidth=lw,
                                color=color, fill=False)
        court_elements.append(outer_lines)

    # Add the court elements onto the axes
    for element in court_elements:
        ax.add_patch(element)

    return ax

PaintFGM,PaintFGA,MidFGM,MidFGA,ThreeFGM,ThreeFGA,TotAttl,TotMadel = [],[],[],[],[],[],[],[]

PaintAttPCT,PaintMadePCT,MidAttPCT,MidMadePCT,ThreeAttPCT,ThreeMadePCT = [],[],[],[],[],[]

RA_eFG,Paint_eFG,Mid_eFG,LeftCorner3_eFG,RightCorner3_eFG,AboveBreak3_eFG,Back_eFG = [],[],[],[],[],[],[]

PaintMadeRatio, MidMadeRatio, ThreeMadeRatio,ThreeMadeRatioEffective, TwoPointersEff = [],[],[],[],[]

year = 1996
yrs = []
while year != 2022:
    shotloc_log = leaguedashteamshotlocations.LeagueDashTeamShotLocations(season=str(year)+'-'+str(year+1)[2:])
    time.sleep(.50)
    shotloc = shotloc_log.get_data_frames()[0]    
    RA = shotloc['Restricted Area']
    Paint = shotloc['In The Paint (Non-RA)']
    Mid = shotloc['Mid-Range']
    LeftCorner3 = shotloc['Left Corner 3']
    RightCorner3 = shotloc['Right Corner 3']
    AboveBreak3 = shotloc['Above the Break 3']
    Back = shotloc['Backcourt']

    PaintMade = RA['FGM'].sum() + Paint['FGM'].sum()
    PaintAtt = RA['FGA'].sum() + Paint['FGA'].sum()
    MidMade = Mid['FGM'].sum()
    MidAtt = Mid['FGA'].sum()
    ThreeMade = LeftCorner3['FGM'].sum() + RightCorner3['FGM'].sum() + AboveBreak3['FGM'].sum() + Back['FGM'].sum()
    ThreeAtt = LeftCorner3['FGA'].sum() + RightCorner3['FGA'].sum() + AboveBreak3['FGA'].sum() + Back['FGA'].sum()
    TotMade = PaintMade + MidMade + ThreeMade
    TotAtt = PaintAtt + MidAtt + ThreeAtt

    PaintFGM.append(PaintMade)
    PaintFGA.append(PaintAtt)
    MidFGM.append(MidMade)
    MidFGA.append(MidAtt)
    ThreeFGM.append(ThreeMade)
    ThreeFGA.append(ThreeAtt)
    TotMadel.append(TotAtt)
    TotAttl.append(TotMade)

    PaintAttPCT.append(PaintAtt / TotAtt * 100)
    PaintMadePCT.append(PaintMade / TotMade * 100)
    MidAttPCT.append(MidAtt / TotAtt * 100)
    MidMadePCT.append(MidMade / TotMade * 100)
    ThreeAttPCT.append(ThreeAtt / TotAtt * 100)
    ThreeMadePCT.append(ThreeMade / TotMade * 100)

    PaintMadeRatio.append(PaintMade / PaintAtt * 100)
    MidMadeRatio.append(MidMade / MidAtt * 100)
    ThreeMadeRatio.append(ThreeMade / ThreeAtt * 100)
    #eFG%
    ThreeMadeRatioEffective.append(ThreeMade*1.5 / ThreeAtt * 100)
    TwoPointersEff.append((PaintMade + MidMade) / (PaintAtt + MidAtt) * 100)

    RA_eFG.append(round(RA['FG_PCT'].mean()*100, 1))
    Paint_eFG.append(round(Paint['FG_PCT'].mean()*100, 1))
    Mid_eFG.append(round(Mid['FG_PCT'].mean()*100, 1))
    LeftCorner3_eFG.append(round(LeftCorner3['FG_PCT'].mean()*150, 1))
    RightCorner3_eFG.append(round(RightCorner3['FG_PCT'].mean()*150, 1))
    AboveBreak3_eFG.append(round(AboveBreak3['FG_PCT'].mean()*150, 1))
    Back_eFG.append(round(Back['FG_PCT'].mean()*150, 1))
    
    yrs.append("'"+str(year)[2:])
    year += 1

RA_eFG_df = pd.DataFrame(list(zip(yrs, RA_eFG)), columns=['Year', 'eFG%'])
Paint_eFG_df = pd.DataFrame(list(zip(yrs, Paint_eFG)), columns=['Year', 'eFG%'])
Mid_eFG_df = pd.DataFrame(list(zip(yrs, Mid_eFG)), columns=['Year', 'eFG%'])
LeftCorner3_eFG_df = pd.DataFrame(list(zip(yrs, LeftCorner3_eFG)), columns=['Year', 'eFG%'])
RightCorner3_eFG_df = pd.DataFrame(list(zip(yrs, RightCorner3_eFG)), columns=['Year', 'eFG%'])
AboveBreak3_eFG_df = pd.DataFrame(list(zip(yrs, AboveBreak3_eFG)), columns=['Year', 'eFG%'])
Back_eFG_df = pd.DataFrame(list(zip(yrs, Back_eFG)), columns=['Year', 'eFG%'])

def shotplot21():
    plt.figure(figsize=(12,11))
    draw_court_eFG(outer_lines=True)
    plt.text(-20, 25, "65.4%", fontsize=20, color='limegreen', font='serif')
    plt.text(-250, 50, "57.6%", fontsize=20, color='yellowgreen', font='serif')
    plt.text(220, 50, "57.1%", fontsize=20, color='greenyellow', font='serif')
    plt.text(-20, 300, "51.9%", fontsize=20, color='yellow', font='serif')
    plt.text(-20, 120, "42.6%", fontsize=20, color='orange', font='serif')
    plt.text(120, 75, "40.1%", fontsize=20, color='orangered', font='serif')
    plt.text(-150, 450, "3.7%", fontsize=20, color='darkred', font='serif')
    plt.text(-50, -50, "2021-22 Shot Efficiency", fontsize=20, color='darkcyan', font='serif')

    plt.xlim(-265,265)
    plt.ylim(500, -47.5)
    plt.tick_params(labelbottom=False, labelleft=False)
    plt.savefig("Imgs/ShotEfficiency_plot.png")

# shotplot21();

                                                 # Illustrative Code

# Getting the 2021-22 stats: FGM, FGA, FG_PCT.  They are classified by zone: Restricted Area, In The Paint (Non-RA), Mid-Range...
shotloc_ = leaguedashteamshotlocations.LeagueDashTeamShotLocations(season='2021-22').get_data_frames()[0]

# Dividing the main Dataframe into zone-based ones
RA_ = shotloc_['Restricted Area']
Paint_ = shotloc_['In The Paint (Non-RA)']
Mid_ = shotloc_['Mid-Range']

# Getting eFG% for each zone
RA_eFG_ = round(RA_['FG_PCT'].mean()*100, 1)
Paint_eFG_ = round(Paint_['FG_PCT'].mean()*100, 1)
Mid_eFG_ = round(Mid_['FG_PCT'].mean()*100, 1)
    
RA_eFG_, Paint_eFG_, Mid_eFG_

(65.4, 42.6, 40.1)

def eFg_plot2(RA_eFG_df, Paint_eFG_df, Mid_eFG_df, LeftCorner3_eFG_df, RightCorner3_eFG_df, AboveBreak3_eFG_df, Back_eFG_df):
    plt.figure(figsize=(24, 10))

    plt.plot(RA_eFG_df['Year'], RA_eFG_df['eFG%'], color='teal', linestyle='-', linewidth = 1,
            marker='o', markerfacecolor='teal', markersize=7, label='Restricted Area')

    plt.plot(Paint_eFG_df['Year'], Paint_eFG_df['eFG%'], color='silver', linestyle='-', linewidth = 1,
            marker='o', markerfacecolor='silver', markersize=7, label='Paint')

    plt.plot(Mid_eFG_df['Year'], Mid_eFG_df['eFG%'], color='mediumspringgreen', linestyle='-', linewidth = 1,
            marker='o', markerfacecolor='mediumspringgreen', markersize=7, label='Mid-Range')

    plt.plot(LeftCorner3_eFG_df['Year'], LeftCorner3_eFG_df['eFG%'], color='coral', linestyle='-', linewidth = 1,
            marker='o', markerfacecolor='coral', markersize=7, label='Left Corner 3')

    plt.plot(RightCorner3_eFG_df['Year'], RightCorner3_eFG_df['eFG%'], color='yellow', linestyle='-', linewidth = 1,
            marker='o', markerfacecolor='yellow', markersize=7, label='Right Corner 3')

    plt.plot(AboveBreak3_eFG_df['Year'], AboveBreak3_eFG_df['eFG%'], color='magenta', linestyle='-', linewidth = 1,
            marker='o', markerfacecolor='magenta', markersize=7, label='Above Break 3')

    plt.plot(Back_eFG_df['Year'], Back_eFG_df['eFG%'], color='midnightblue', linestyle='-', linewidth = 1,
            marker='o', markerfacecolor='midnightblue', markersize=7, label='Backcourt')
        
    plt.xlabel('Year')
    plt.ylabel('eFG%')
    plt.title('NBA eFG% (by zone) throughout seasons (1996 - 2021)')
    plt.axis('tight')
    plt.legend(loc='lower left')
    plt.savefig("Imgs/eFG%_plot.png")
    plt.show()

eFg_plot2(RA_eFG_df, Paint_eFG_df, Mid_eFG_df, LeftCorner3_eFG_df, RightCorner3_eFG_df, AboveBreak3_eFG_df, Back_eFG_df)

                                                 # Illustrative Code
    
RA_eFG_, Paint_eFG_, Mid_eFG_ = [], [], []
years = ['2019-20', '2020-21', '2021-22']

for year in years:
    
    # Getting the 2021-22 stats: FGM, FGA, FG_PCT.  They are classified by zone: Restricted Area, In The Paint (Non-RA), Mid-Range...
    shotloc_ = leaguedashteamshotlocations.LeagueDashTeamShotLocations(season=year).get_data_frames()[0]

    # Dividing the main Dataframe into zone-based ones
    RA_ = shotloc_['Restricted Area']
    Paint_ = shotloc_['In The Paint (Non-RA)']
    Mid_ = shotloc_['Mid-Range']

    # Getting eFG% for each zone
    RA_eFG_.append(round(RA_['FG_PCT'].mean()*100, 1))
    Paint_eFG_.append(round(Paint_['FG_PCT'].mean()*100, 1))
    Mid_eFG_.append(round(Mid_['FG_PCT'].mean()*100, 1)) 
    
print(f"   Season  \t  {Fore.CYAN} RA \t {Fore.WHITE} FGM \t  {Fore.GREEN} Mid \n {Fore.RESET}")
for i in range(3):
    print(f"  {years[i]}: \t {Fore.CYAN}{RA_eFG_[i]}  \t  {Fore.WHITE}{Paint_eFG_[i]}     {Fore.GREEN}{Mid_eFG_[i]} \n {Fore.RESET}")

   Season  	   RA 	  FGM 	   Mid 
 
  2019-20: 	 63.6  	  39.7     40.0 
 
  2020-21: 	 64.2  	  42.5     41.0 
 
  2021-22: 	 65.4  	  42.6     40.1

TwoPM_SeasonPercent = TwoPM_season_df['2PM'] / FGM_season_df['FGM'] * 100
ThreePM_SeasonPercent = ThreePM_season_df['3PM'] / FGM_season_df['FGM'] * 100
TwoPA_SeasonPercent = TwoPA_season_df['2PA'] / FGA_season_df['FGA'] * 100
ThreePA_SeasonPercent = ThreePA_season_df['3PA'] / FGA_season_df['FGA'] * 100

TwoPM_SeasonPercent_df = pd.DataFrame(list(zip(yrs, TwoPM_SeasonPercent)), columns=['Year', 'FG%'])
ThreePM_SeasonPercent_df = pd.DataFrame(list(zip(yrs, ThreePM_SeasonPercent)), columns=['Year', 'FG%'])
TwoPA_SeasonPercent_df = pd.DataFrame(list(zip(yrs, TwoPA_SeasonPercent)), columns=['Year', 'FG%'])
ThreePA_SeasonPercent_df = pd.DataFrame(list(zip(yrs, ThreePA_SeasonPercent)), columns=['Year', 'FG%'])

def FG_Percent_plot(TwoPM_SeasonPercent_df, ThreePM_SeasonPercent_df, TwoPA_SeasonPercent_df, ThreePA_SeasonPercent_df):
    plt.figure(figsize=(28, 11))

    plt.plot(TwoPM_SeasonPercent_df['Year'], TwoPM_SeasonPercent_df['FG%'], color='orchid', label='2PM%', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='orchid', markersize=7)
    plt.plot(ThreePM_SeasonPercent_df['Year'], ThreePM_SeasonPercent_df['FG%'], color='gold', label='3PM%', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='gold', markersize=7)
    plt.plot(TwoPA_SeasonPercent_df['Year'], TwoPA_SeasonPercent_df['FG%'], color='mediumspringgreen', label='2PA%', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='mediumspringgreen', markersize=7)
    plt.plot(ThreePA_SeasonPercent_df['Year'], ThreePA_SeasonPercent_df['FG%'], color='aquamarine', label='3PA%', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='aquamarine', markersize=7)

    plt.xlabel('Season')
    plt.ylabel('FG%')
    plt.title('NBA FG% over Total (by value) throughout seasons (1982 - 2021)')
    plt.axis('tight')
    plt.legend(loc='lower right')
    plt.show()

FG_Percent_plot(TwoPM_SeasonPercent_df, ThreePM_SeasonPercent_df, TwoPA_SeasonPercent_df, ThreePA_SeasonPercent_df)

PaintAtt_df = pd.DataFrame(list(zip(yrs, PaintAttPCT)), columns=['Year', 'FG%'])
MidAtt_df = pd.DataFrame(list(zip(yrs, MidAttPCT)), columns=['Year', 'FG%'])
ThreeAtt_df = pd.DataFrame(list(zip(yrs, ThreeAttPCT)), columns=['Year', 'FG%'])
ThreeAttVolume_df = pd.DataFrame(list(zip(yrs, ThreeFGA)), columns=['Year', 'FG3A'])
ThreeMadeVolume_df = pd.DataFrame(list(zip(yrs, ThreeFGM)), columns=['Year', 'FG3M'])

def Fga_plot(PaintAtt_df, MidAtt_df, ThreeAtt_df):
    plt.figure(figsize=(28, 11))
    plt.plot(PaintAtt_df['Year'], PaintAtt_df['FG%'], color='teal', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='teal', markersize=7, label='Paint')
    plt.plot(MidAtt_df['Year'], MidAtt_df['FG%'], color='gold', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='gold', markersize=7, label='Mid-Range')
    plt.plot(ThreeAtt_df['Year'], ThreeAtt_df['FG%'], color='mediumspringgreen', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='mediumspringgreen', markersize=7, label='3-Point')
    plt.vlines("'14", 0, ThreeAtt_df['FG%'].iloc[18], color='mediumorchid', linestyle='--', label="Curry's I MVP")

    plt.text(15, 0, "Curry's I MVP", fontsize=15, color='mediumorchid', font='serif')
    plt.xlabel('Year')
    plt.ylabel('FGA% of total FGA')
    plt.title('NBA FGA% of total FGA (by type) throughout seasons (1996 - 2021)')
    plt.axis('tight')
    plt.legend(loc='center left')
    plt.show()

PaintMade_df = pd.DataFrame(list(zip(yrs, PaintMadePCT)), columns=['Year', 'FG%'])
MidMade_df = pd.DataFrame(list(zip(yrs, MidMadePCT)), columns=['Year', 'FG%'])
ThreeMade_df = pd.DataFrame(list(zip(yrs, ThreeMadePCT)), columns=['Year', 'FG%'])

def Fgm_plot(PaintMade_df, MidMade_df, ThreeMade_df):
    plt.figure(figsize=(28, 11))
    
    plt.plot(PaintMade_df['Year'], PaintMade_df['FG%'], color='teal', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='teal', markersize=7, label='Paint')
    plt.plot(MidMade_df['Year'], MidMade_df['FG%'], color='gold', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='gold', markersize=7, label='Mid-Range')
    plt.plot(ThreeMade_df['Year'], ThreeMade_df['FG%'], color='aquamarine', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='aquamarine', markersize=7, label='3-Point')
    
    plt.vlines("'14", 0, ThreeMade_df['FG%'].iloc[18], color='mediumorchid', linestyle='--', label="Curry's I MVP")
    plt.text(15, 0, "Curry's I MVP", fontsize=15, color='mediumorchid', font='serif')
    
    plt.xlabel('Year')
    plt.ylabel('FGM% of total FGM')
    plt.title('NBA FGM% of total FGM (by type) throughout seasons (1996 - 2021)')
    plt.axis('tight')
    plt.legend(loc='upper left')
    plt.show()

Fga_plot(PaintAtt_df, MidAtt_df, ThreeAtt_df)

Fgm_plot(PaintMade_df, MidMade_df, ThreeMade_df)

def ThreeAtt_plot(ThreeAttVolume_df):
    ThreeAttVolume_df.plot.bar(x='Year', y='FG3A', color='mediumspringgreen', figsize=(28, 11),
                               title="Total Three-Pointer Attempts throughout seasons (1996 - 2021)", label='3-Pointer Attempts')
    
    plt.vlines(18, 55000, 75000, color='mediumorchid', linestyle='--', label="Curry's I MVP")
    plt.text(17, 78000, "Curry's I MVP", fontsize=25, color='mediumorchid', font='serif')
    
    plt.vlines(15, 36000, 56000, color='teal', linestyle='--', label="Curry's I MVP")
    plt.text(14, 60000, "Curry's Draft", fontsize=25, color='teal', font='serif')
    
    plt.xlabel('Year')
    plt.ylabel('Three Pointer Attempts')
    plt.axis('tight')
    plt.show()

def ThreeMade_plot(ThreeMadeVolume_df):
    ThreeMadeVolume_df.plot.bar(x='Year', y='FG3M', color='aquamarine', figsize=(28, 11),
                                title="Total Three-Pointers Made throughout seasons (1996 - 2021)", label='3-Pointers Made')
    
    plt.vlines(18, 19000, 24000, color='mediumorchid', linestyle='--', label="Curry's I MVP")
    plt.text(17, 25000, "Curry's I MVP", fontsize=25, color='mediumorchid', font='serif')
    
    plt.vlines(15, 12500, 17500, color='teal', linestyle='--', label="Curry's I MVP")
    plt.text(14, 18500, "Curry's Draft", fontsize=25, color='teal', font='serif')
    
    plt.xlabel('Year')
    plt.ylabel('Three Pointers Made')
    plt.axis('tight')
    plt.show()

ThreeAtt_plot(ThreeAttVolume_df)

ThreeMade_plot(ThreeMadeVolume_df)

alltimeleadersgrids.AllTimeLeadersGrids().get_data_frames()[12][["PLAYER_NAME", "FG3M", "FG3M_RANK"]]

# Same graphs as above but joint in one

def TotFg_plot(PaintAtt_df, PaintMade_df, MidAtt_df, MidMade_df, ThreeAtt_df, ThreeMade_df):
    plt.figure(figsize=(28, 11))

    plt.plot(PaintAtt_df['Year'], PaintAtt_df['FG%'], color='teal', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='teal', markersize=7, label='Paint FGA')
    plt.plot(PaintMade_df['Year'], PaintMade_df['FG%'], color='deepskyblue', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='deepskyblue', markersize=7, label='Paint FGM')

    plt.plot(MidAtt_df['Year'], MidAtt_df['FG%'], color='gold', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='gold', markersize=7, label='Mid-Range FGA')
    plt.plot(MidMade_df['Year'], MidMade_df['FG%'], color='yellow', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='yellow', markersize=7, label='Mid-Range FGM')

    plt.plot(ThreeAtt_df['Year'], ThreeAtt_df['FG%'], color='mediumspringgreen', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='mediumspringgreen', markersize=7, label='3-Point FGA')
    plt.plot(ThreeMade_df['Year'], ThreeMade_df['FG%'], color='aquamarine', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='aquamarine', markersize=7, label='3-Point FGM')

    plt.xlabel('Year')
    plt.ylabel('FG% of total FG')
    plt.title('NBA FG% of total FG (by type) throughout seasons (1996 - 2021)')
    plt.axis('tight')
    plt.legend(loc='upper left')
    plt.show()

TotFg_plot(PaintAtt_df, PaintMade_df, MidAtt_df, MidMade_df, ThreeAtt_df, ThreeMade_df)

PaintMadeRatio_df = pd.DataFrame(list(zip(yrs, PaintMadeRatio)), columns=['Year', 'FG%'])
MidMadeRatio_df = pd.DataFrame(list(zip(yrs, MidMadeRatio)), columns=['Year', 'FG%'])
ThreeMadeRatio_df = pd.DataFrame(list(zip(yrs, ThreeMadeRatio)), columns=['Year', 'FG%'])
ThreeMadeRatioEffective_df = pd.DataFrame(list(zip(yrs, ThreeMadeRatioEffective)), columns=['Year', 'FG%'])

def Fg_plot3(PaintMadeRatio_df, MidMadeRatio_df, ThreeMadeRatio_df, ThreeMadeRatioEffective_df):
    plt.figure(figsize=(28, 11))
    plt.plot(PaintMadeRatio_df['Year'], PaintMadeRatio_df['FG%'], color='teal', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='teal', markersize=7, label='Paint')
    plt.plot(MidMadeRatio_df['Year'], MidMadeRatio_df['FG%'], color='gold', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='gold', markersize=7, label='Mid-Range')
    plt.plot(ThreeMadeRatio_df['Year'], ThreeMadeRatio_df['FG%'], color='mediumspringgreen', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='mediumspringgreen', markersize=7, label='3-Point')
    plt.plot(ThreeMadeRatioEffective_df['Year'], ThreeMadeRatioEffective_df['FG%'], color='orchid', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='orchid', markersize=7, label='Effective 3-Point')

    plt.xlabel('Year')
    plt.ylabel('FGA%')
    plt.title('NBA FG% throughout seasons (1996 - 2021)')
    plt.axis('tight')
    plt.legend(loc='center left')
    plt.show()

Fg_plot3(PaintMadeRatio_df, MidMadeRatio_df, ThreeMadeRatio_df, ThreeMadeRatioEffective_df)

ThreeMadeRatioEffective_df = pd.DataFrame(list(zip(yrs, ThreeMadeRatioEffective)), columns=['Year', 'eFG%'])
TwoPointersEff_df = pd.DataFrame(list(zip(yrs, TwoPointersEff)), columns=['Year', 'eFG%'])

def eFg_plot3(ThreeMadeRatioEffective_df, TwoPointersEff_df):
    plt.figure(figsize=(22, 8))
    plt.plot(ThreeMadeRatioEffective_df['Year'], ThreeMadeRatioEffective_df['eFG%'], color='teal', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='teal', markersize=7, label='Effective Three-pointers')
    plt.plot(TwoPointersEff_df['Year'], TwoPointersEff_df['eFG%'], color='gold', linestyle='-', linewidth = 1,
             marker='o', markerfacecolor='gold', markersize=7, label='Effective Two-Pointers')

    plt.xlabel('Year')
    plt.ylabel('eFG%')
    plt.title('NBA eFG% (by value) throughout seasons (1996 - 2021)')
    plt.axis('tight')
    plt.legend(loc='center left')
    plt.show()

eFg_plot3(ThreeMadeRatioEffective_df, TwoPointersEff_df)

TD_df = pd.read_csv('df\Triple Double')
#TD_df

def TD_plot(TD_df):
    TD_df.plot.bar(x='Year', y='TD', color='mediumspringgreen', figsize=(28, 11),
                    title="Westbrook's Effect on Triple Doubles", label='Triple Doubles Volume')
    plt.text(14, 140, "Westbrook's 2016 MVP", fontsize=15, color='mediumorchid', font='serif')
    plt.vlines(16, TD_df['TD'].iloc[16], TD_df['TD'].iloc[17]+20, color='mediumorchid', linestyle='--', label="Westbrook 2016 MVP")
    plt.show()

TD_plot(TD_df)

if not skip:
    roty_history = pd.DataFrame([], columns=['PERSON_ID','FIRST_NAME','LAST_NAME','SEASON','OVERALL_PICK'])
    year = 1947
    positions = 60
    while year != 2022:
        yearstr = "'" + str(year)[2:4]
        draft = drafthistory.DraftHistory(season_year_nullable=str(year)).get_data_frames()[0]
        time.sleep(.25)
        for i in range(1, positions+1):
            if i in set(draft[draft['OVERALL_PICK'] == i]['OVERALL_PICK']):
                name = draft[draft['OVERALL_PICK'] == i]['PLAYER_NAME'].iloc[0]
                id = draft[draft['PLAYER_NAME'] == name]['PERSON_ID'].iloc[0]
                awards = playerawards.PlayerAwards(id).get_data_frames()[0]        
                time.sleep(.25)

                if not awards[awards['DESCRIPTION'] == 'NBA Rookie of the Year'].empty:
                    idx = awards.index[awards['DESCRIPTION'] == 'NBA Rookie of the Year']
                    awards['SEASON'] = "'" + awards['SEASON'][idx[0]][2:4]
                    roty = awards[awards['DESCRIPTION'] == 'NBA Rookie of the Year'][['PERSON_ID','FIRST_NAME','LAST_NAME','SEASON']]
                    info = draft[draft['OVERALL_PICK'] == i][['OVERALL_PICK','PERSON_ID']]
                    rotynfo = pd.merge(roty, info)
                    roty_history = pd.concat([roty_history, rotynfo])

        if yearstr not in set(roty_history['SEASON']):
            emptyroty = pd.DataFrame([[None, None, None, 0]], columns=['PERSON_ID','FIRST_NAME','LAST_NAME','OVERALL_PICK'])
            emptynfo = emptyroty.join(pd.DataFrame(["'" + draft['SEASON'][0][2:4]], columns=['SEASON']))
            roty_history = pd.concat([roty_history, emptynfo])

        year += 1

if not skip:
    roty_history.sort_values(by=['SEASON'], inplace=True)
    roty_history.to_csv("df/ROTY", index=False)

if skip:
    roty_history = pd.read_csv('df/ROTY')
    # roty_history

roty_graph = roty_history[['SEASON', 'OVERALL_PICK']]

roty_graph.plot.bar(x='SEASON', y='OVERALL_PICK', color='paleturquoise', figsize=(22, 8),
                           title=f"NBA ROY draft pick positions", label='ROY Draft Pick')
plt.xlabel('Year')
plt.ylabel('ROY Draft Pick')
plt.axis('tight')
plt.show()

if not skip:
    # Getting all-time players stats
    alltime_details =  pd.DataFrame([], columns=['SEASON','ROTY','GROUP_SET', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'PERSON_ID', 'PERSON_NAME', 'COURT_STATUS', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK', 'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK', 'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK', 'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK'])

    year = 2007 # First year with available data
    while year != 2022:
        season_details = pd.DataFrame([], columns=['SEASON','ROTY','GROUP_SET', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'PERSON_ID', 'PERSON_NAME', 'COURT_STATUS', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK', 'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK', 'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK', 'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK'])
        for team in team_dict:
            team_details = leagueplayerondetails.LeaguePlayerOnDetails(season=str(year)+'-'+str(year+1)[2:], team_id=team['id']).get_data_frames()[0]
            time.sleep(.25)
            team_details.rename(columns = {'VS_PLAYER_ID':'PERSON_ID'}, inplace = True)    
            team_details.rename(columns = {'VS_PLAYER_NAME':'PERSON_NAME'}, inplace = True)

            yrs = pd.DataFrame([str(year)[2:]]*len(team_details), columns=['SEASON'])
            rotyzero = pd.DataFrame([0]*len(team_details), columns=['ROTY'])
            team_details = team_details.join(yrs).join(rotyzero)

            if int(roty_history[roty_history['SEASON'] == "'"+str(year)[2:]]['PERSON_ID']) in set(team_details['PERSON_ID']):
                team_details.at[team_details.index[team_details['PERSON_ID'] == int(roty_history[roty_history['SEASON'] == "'"+str(year)[2:]]['PERSON_ID'])].tolist()[0], 'ROTY'] = 1

            season_details = pd.concat([season_details, team_details])

        alltime_details = pd.concat([alltime_details, season_details])
        year += 1

if not skip:
    alltime_detailsils.to_csv("df/AllTimeInfo", index=False)

if skip:
    alltime_details = pd.read_csv('df/AllTimeInfo')
    # alltime_details

# Getting all-time rookie stats
alltime_rookie = pd.DataFrame([], columns=['SEASON','ROTY','GROUP_SET', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'PERSON_ID', 'PERSON_NAME', 'COURT_STATUS', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK', 'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK', 'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK', 'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK'])

year = 2007
while year != 2022:
    draft = drafthistory.DraftHistory(season_year_nullable=year).get_data_frames()[0]
    time.sleep(.25)
    rookies = pd.DataFrame([], columns=['SEASON','ROTY','GROUP_SET', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'PERSON_ID', 'PERSON_NAME', 'COURT_STATUS', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK', 'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK', 'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK', 'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK'])

    for index, row in draft.iterrows():
        rookie_info = alltime_details[alltime_details['PERSON_ID'] == row['PERSON_ID']]      
        rookie_info = rookie_info[rookie_info['SEASON'] == int(str(year)[2:])]

        rookies = pd.concat([rookies, rookie_info])
    
    alltime_rookie = pd.concat([alltime_rookie, rookies])
    year += 1

# alltime_rookie

vars = ['SEASON','ROTY','GP', 'W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 
        'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV',
        'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS'
        
       # ,'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK', 'FGA_RANK', 'FG_PCT_RANK', 
       # 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK', 
       # 'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK', 'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK'
        ]

alltime_rookie[['PERSON_NAME', 'SEASON', 'ROTY']].head(7)

                                                        # Illustrative Code

vars = ['GP', 'W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 
        'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS']

                                                        # Illustrative Code
X = alltime_rookie[vars + ['SEASON']]
y = alltime_rookie[['ROTY', 'SEASON']]
                                            # Splitting the dataset into training and testing sets
X_train = X[X['SEASON'] != 21]
X_test = X[X['SEASON'] == 21]
y_train = y[y['SEASON'] != 21]
y_test = y[y['SEASON'] == 21]

X = alltime_rookie[['SEASON','ROTY'] + vars]
y = alltime_rookie[['SEASON','ROTY']]

# Splitting the dataset into training and testing sets
season = 21
X_train = X[X['SEASON'] != season].drop(['ROTY'], axis=1)
X_test = X[X['SEASON'] == season].drop(['ROTY'], axis=1)
y_train = y[y['SEASON'] != season].drop(['SEASON'], axis=1).astype('int').to_numpy()[:,0]
y_test = y[y['SEASON'] == season].drop(['SEASON'], axis=1).astype('int').to_numpy()[:,0]

scaler = StandardScaler()
scaler.fit(X_train);

X_train_norm = scaler.transform(X_train)
X_test_norm = scaler.transform(X_test)

          # Illustrative Code
    
log_regression = LogisticRegression()

          # Illustrative Code
    
log_regression.fit(X_train_norm,y_train)

LogisticRegression()

          # Illustrative Code

y_pred = log_regression.predict(X_test_norm)
y_prob = log_regression.predict_proba(X_test_norm)

# Instantiating the model
log_regression = LogisticRegression()

# Fitting the model using the training data
log_regression.fit(X_train_norm,y_train)

# Using the model to make predictions on test data
y_pred = log_regression.predict(X_test_norm)
y_prob = log_regression.predict_proba(X_test_norm)

y_pred

array([0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

filter = []
for i,x in enumerate(y_pred):
    if x == 1:
        filter.append(i)

draft = drafthistory.DraftHistory(season_year_nullable=2021).get_data_frames()[0]
print("Predicted Winner:")
print(draft.iloc[filter]['PLAYER_NAME'].values) 
print("Real Winner:", roty_history[roty_history['SEASON'] == "'21"][["FIRST_NAME", "LAST_NAME"]].values[0])

Predicted Winner:
['Evan Mobley' 'Scottie Barnes']
Real Winner: ['Scottie' 'Barnes']

prob = y_prob[:, 1].round(3)
prob

array([0.128, 0.087, 0.539, 0.632, 0.015, 0.045, 0.002, 0.172, 0.045,
       0.033, 0.001, 0.004, 0.035, 0.001, 0.023, 0.005, 0.001, 0.005,
       0.001, 0.001, 0.002, 0.003, 0.002, 0.001, 0.002, 0.001, 0.004,
       0.003, 0.001, 0.002, 0.002, 0.001, 0.004, 0.417, 0.001, 0.001,
       0.077, 0.001, 0.001, 0.001, 0.004, 0.001, 0.007, 0.   , 0.002,
       0.001, 0.001, 0.001, 0.001, 0.   , 0.008, 0.003, 0.001, 0.002,
       0.005])

names = pd.DataFrame(draft['PLAYER_NAME'].to_list(), columns=["Player"])
prob_df = pd.DataFrame(prob, columns=["Probability"])
Player_Prob = names.join(prob_df)

Player_Prob.head(10)

i = prob.argmax()
print(f"Predicted Winner: {draft.iloc[i]['PLAYER_NAME']} 🏆")
print("Real Winner:", roty_history[roty_history['SEASON'] == "'21"][["FIRST_NAME", "LAST_NAME"]].values[0])

Predicted Winner: Scottie Barnes 🏆
Real Winner: ['Scottie' 'Barnes']

prob_max = [1 if x == prob.max() else 0 for x in prob]

print(cnf_matrix := metrics.confusion_matrix(y_test, y_pred))
print(f"True Winners: {cnf_matrix[1][1]}")
print(f"True Losers: {cnf_matrix[0][0]}")
print(f"False Winner: {cnf_matrix[0][1]}")
print(f"False Losers: {cnf_matrix[1][0]}")

[[53  1]
 [ 0  1]]
True Winners: 1
True Losers: 53
False Winner: 1
False Losers: 0

print("Accuracy:", metrics.accuracy_score(y_test, y_pred)*100, "%")

Accuracy: 98.18181818181819 %

print(cnf_matrix := metrics.confusion_matrix(y_test, prob_max))
print(f"True Winners: {cnf_matrix[1][1]}")
print(f"True Losers: {cnf_matrix[0][0]}")
print(f"False Winner: {cnf_matrix[0][1]}")
print(f"False Losers: {cnf_matrix[1][0]}")

[[54  0]
 [ 0  1]]
True Winners: 1
True Losers: 54
False Winner: 0
False Losers: 0

# Prediction Accuracy

print("Accuracy:", \
      metrics.accuracy_score(y_test, prob_max) \
      *100, "%")

Accuracy: 100.0 %

def simulation():
    print("To leave insert 'Q' at any time!")
    abbr_df = pd.DataFrame([team['abbreviation'] for team in team_dict], columns=['Team'])
    show = ""
    flag = True
    while show != "Y" and show != "N":
        show = input("Do you want to see the teams before choosing? (Y/N) ").upper()
        if show == 'Q':
            return None, None, None
        if show == "Y":
            print(abbr_df)
    
    abbr1 = ""
    while abbr1 not in abbr_df.values[:,0]:
        abbr1 = input("Insert First Team (Abbreviation) - ").upper()
        if abbr1 == 'Q':
            return None, None, None
        if abbr1 not in abbr_df.values[:,0]:
            print("Insert a valid team (Abbreviation)!")

    team1 = teams.find_team_by_abbreviation(abbr1)
    print(f"First Team selected: {team1['full_name']}")
    abbr2 = ""
    while abbr2 not in abbr_df.values[:,0]:
        abbr2 = input("Insert Second Team (Abbreviation) - ").upper()
        if abbr2 == 'Q':
            return None, None, None
        if abbr1 not in abbr_df.values[:,0]:
                print("Insert a valid team (Abbreviation)!")
                
    team2 = teams.find_team_by_abbreviation(abbr2)
    print(f"Second Team selected: {team2['full_name']}")
    year = 0
    while not 1946 < year < 2022:
        year = input("Insert Season (yyyy) - ")
        if year == 'Q':
            return None, None, None
        year = int(year)
        if not 1946 < year < 2022:
            print("Insert a valid year!")
        else:
            print(f"Season selected: {year}")
    return team1, team2, year

                                        #    ⚠ Code to Run ⚠
team1, team2, year = simulation()

To leave insert 'Q' at any time!
Do you want to see the teams before choosing? (Y/N) n
Insert First Team (Abbreviation) - gsw
First Team selected: Golden State Warriors
Insert Second Team (Abbreviation) - bos
Second Team selected: Boston Celtics
Insert Season (yyyy) - 2021
Season selected: 2021

'''
for team in team_dict:
    team_log = teamgamelog.TeamGameLog(team_id=team['id'], season=year, season_type_all_star='Regular Season').get_data_frames()[0]
    time.sleep(.25)
    team_log.to_csv(f"df/Season2021/{team['abbreviation']}")
''';

def game_log(first_team, second_team, year):
    team1 = teams.find_team_by_abbreviation(first_team)
    team2 = teams.find_team_by_abbreviation(second_team)
    abbr1 = first_team
    abbr2 = second_team
    
    team1pts,team1ptsallowed = [],[]
    team2pts,team2ptsallowed = [],[]

    for team in team_dict:
        abbr0 = team['abbreviation']
        
        if year == 2021:
            team_log0 = pd.read_csv(f"df/Season2021/{abbr0}")
            team_log1 = pd.read_csv(f"df/Season2021/{abbr1}")
            team_log2 = pd.read_csv(f"df/Season2021/{abbr2}")
        else:
            team_log0 = teamgamelog.TeamGameLog(team_id=team['id'], season=year, season_type_all_star='Regular Season').get_data_frames()[0]
            time.sleep(.25)
            team_log1 = teamgamelog.TeamGameLog(team_id=team1['id'], season=year, season_type_all_star='Regular Season').get_data_frames()[0]
            time.sleep(.25)
            team_log2 = teamgamelog.TeamGameLog(team_id=team2['id'], season=year, season_type_all_star='Regular Season').get_data_frames()[0]
            time.sleep(.25)
        
        # Team 1
        awaygames0 = team_log0[team_log0['MATCHUP'] == f'{abbr0} @ {abbr1}']
        homegames0 = team_log0[team_log0['MATCHUP'] == f'{abbr0} vs. {abbr1}']
        games_log0 = pd.concat([awaygames0, homegames0])
        team1ptsallowed += games_log0['PTS'].to_list()
        

        awaygames1 = team_log1[team_log1['MATCHUP'] == f'{abbr1} @ {abbr0}']
        homegames1 = team_log1[team_log1['MATCHUP'] == f'{abbr1} vs. {abbr0}']
        games_log1 = pd.concat([awaygames1, homegames1])
        team1pts += games_log1['PTS'].to_list()

        # Team 2
        awaygames2 = team_log0[team_log0['MATCHUP'] == f'{abbr0} @ {abbr2}']
        homegames2 = team_log0[team_log0['MATCHUP'] == f'{abbr0} vs. {abbr2}']
        games_log2 = pd.concat([awaygames2, homegames2])
        team2ptsallowed += games_log2['PTS'].to_list()

        awaygames3 = team_log2[team_log2['MATCHUP'] == f'{abbr2} @ {abbr0}']
        homegames3 = team_log2[team_log2['MATCHUP'] == f'{abbr2} vs. {abbr0}']
        games_log3 = pd.concat([awaygames3, homegames3])
        team2pts += games_log3['PTS'].to_list()

    # Direct Matches
    '''
    awaydirgames1 = team_log1[team_log1['MATCHUP'] == f'{abbr1} @ {abbr2}']
    homedirgames1 = team_log1[team_log1['MATCHUP'] == f'{abbr1} vs. {abbr2}']
    dirgames_log1 = pd.concat([awaydirgames1, homedirgames1])
    team1pts += dirgames_log1['PTS'].to_list()
    team2ptsallowed += dirgames_log1['PTS'].to_list()

    awaydirgames2 = team_log2[team_log2['MATCHUP'] == f'{abbr2} @ {abbr1}']
    homedirgames2 = team_log2[team_log2['MATCHUP'] == f'{abbr2} vs. {abbr1}']
    dirgames_log2 = pd.concat([awaydirgames2, homedirgames2])
    team2pts += dirgames_log2['PTS'].to_list()
    team1ptsallowed += dirgames_log2['PTS'].to_list()
    '''

    Team1pts = pd.DataFrame(team1pts, columns=['Points'])
    Team2pts = pd.DataFrame(team2pts, columns=['Points'])
    Team1ptsallowed = pd.DataFrame(team1ptsallowed, columns=['Points'])
    Team2ptsallowed = pd.DataFrame(team2ptsallowed, columns=['Points'])
        
    return  team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed

                                        #    ⚠ Code to Run ⚠
team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed \
= game_log(team1['abbreviation'], team2['abbreviation'], year)
Team1pts.head(7)

if not skip:
    team1, team2, year = simulation()

# team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log()

def hist_pts_scored(Team1pts, Team2pts):
    fig, axes = plt.subplots(1, 2, figsize=(20,8))
    axes[0].set_xlabel("Points")
    axes[0].set_ylabel("Frequency")
    axes[1].set_xlabel("Points")
    axes[1].set_ylabel("Frequency")

    Team1pts.hist(bins=10, color="orchid", ax=axes[0])
    axes[0].title.set_text(f"{team1['full_name']}")
    Team2pts.hist(bins=10, color="aquamarine", ax=axes[1])
    axes[1].title.set_text(f"{team2['full_name']}")

    fig.suptitle(f"Team Points Made Frequency | {year}-{str(year+1)[2:]} Season")
    plt.show()

                                        #    ⚠ Code to Run ⚠
hist_pts_scored(Team1pts, Team2pts)

def hist_pts_allowed(Team1ptsallowed, Team2ptsallowed):
    fig, axes = plt.subplots(1, 2, figsize=(20,8))
    axes[0].set_xlabel("Points")
    axes[0].set_ylabel("Frequency")
    axes[1].set_xlabel("Points")
    axes[1].set_ylabel("Frequency")

    Team1ptsallowed.hist(bins=10, color="turquoise", ax=axes[0])
    axes[0].title.set_text(f"{team1['full_name']}")
    Team2ptsallowed.hist(bins=10, color="springgreen", ax=axes[1])
    axes[1].title.set_text(f"{team2['full_name']}")

    fig.suptitle(f"Team Points Allowed Frequency | {year}-{str(year+1)[2:]} Season")
    plt.show()

                                        #    ⚠ Code to Run ⚠
hist_pts_allowed(Team1ptsallowed, Team2ptsallowed)

B = 10000
win, tie = 0, 0

for b in range(B):
    team1scores = np.random.choice(team1pts, len(team1pts)) + np.random.choice(team2ptsallowed, len(team1pts))
    team2scores = np.random.choice(team2pts, len(team2pts)) + np.random.choice(team1ptsallowed, len(team2pts))
    team1score = round(np.mean(team1scores))
    team2score = round(np.mean(team2scores))
    if team1score > team2score:
        win += 1
    elif team1score < team2score:
        continue
    else:
        tie += 1

print(f"{team1['full_name']} win rate against {team2['full_name']}: {round(win/B*100, 2)} %")
print(f"{team1['full_name']} lose rate against {team2['full_name']}: {round((B-win-tie)/B*100, 2)} %")
print(f"{team1['full_name']} tie rate against {team2['full_name']}: {round(tie/B*100, 2)} %")

Golden State Warriors win rate against Boston Celtics: 20.92 %
Golden State Warriors lose rate against Boston Celtics: 67.05 %
Golden State Warriors tie rate against Boston Celtics: 12.03 %

# Team 1
print(f"{team1['full_name']} points scored mean is", team1ptsmean := round(Team1pts.mean()[0], 2))
print(f"{team1['full_name']} points scored standard deviation is", team1ptssd := round(Team1pts.std()[0], 2), "\n", "- "*33)

print(f"{team1['full_name']} points allowed mean is", team1ptsallowedmean := round(Team1ptsallowed.mean()[0], 2))
print(f"{team1['full_name']} points allowed standard deviation is", team1ptsallowedsd := round(Team1ptsallowed.std()[0], 2), "\n", "- "*33)

# Team2
print(f"{team2['full_name']} points scored mean is", team2ptsmean := round(Team2pts.mean()[0], 2))
print(f"{team2['full_name']} points scored standard deviation is", team2ptssd := round(Team2pts.std()[0], 2), "\n", "- "*33)

print(f"{team2['full_name']} points allowed mean is", team2ptsallowedmean := round(Team2ptsallowed.mean()[0], 2))
print(f"{team2['full_name']} points allowed standard deviation is", team2ptsallowedsd := round(Team2ptsallowed.std()[0], 2))

Golden State Warriors points scored mean is 111.0
Golden State Warriors points scored standard deviation is 11.78 
 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
Golden State Warriors points allowed mean is 105.46
Golden State Warriors points allowed standard deviation is 11.13 
 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
Boston Celtics points scored mean is 111.76
Boston Celtics points scored standard deviation is 14.15 
 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
Boston Celtics points allowed mean is 104.48
Boston Celtics points allowed standard deviation is 13.39

def game_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed):

    team1ptsmean = round(Team1pts.mean()[0], 2)
    team1ptssd = round(Team1pts.std()[0], 2)
    team1ptsallowedmean = round(Team1ptsallowed.mean()[0], 2)
    team1ptsallowedsd = round(Team1ptsallowed.std()[0], 2)
    team2ptsmean = round(Team2pts.mean()[0], 2)
    team2ptssd = round(Team2pts.std()[0], 2)
    team2ptsallowedmean = round(Team2ptsallowed.mean()[0], 2)
    team2ptsallowedsd = round(Team2ptsallowed.std()[0], 2)
    
    B = 10000
    win = 0
    tie = 0

    for b in range(B):
        team1score = round(np.random.normal(loc=team1ptsmean, scale=team1ptssd) + np.random.normal(loc=team2ptsallowedmean, scale=team2ptsallowedsd))
        team2score = round(np.random.normal(loc=team2ptsmean, scale=team2ptssd) + np.random.normal(loc=team1ptsallowedmean, scale=team1ptsallowedsd))
        if team1score > team2score:
            win += 1
            tie += 0
        elif team1score < team2score:
            win += 0
            tie += 0
        else:
            win += 0
            tie += 1

    win_rate = round(win/B, 2) + round(tie/B, 2) / 2 # Winning rate + half Tying rate
    
    # print(f"{team1['full_name']} win rate against {team2['full_name']}: {round(win_rate * 100, 2)} %")
    
    # print(f"{team1['full_name']} pure win rate against {team2['full_name']}: {round(win/B*100, 2)} %")
    # print(f"{team1['full_name']} lose rate against {team2['full_name']}: {round((B-win-tie)/B*100, 2)} %")
    # print(f"{team1['full_name']} tie rate against {team2['full_name']}: {round(tie/B*100, 2)} %")
    
    return win_rate

                    # Illustrative Code
if not skip:
    team1score = \
    normal(loc=ptsmean1, scale=ptssd1) + \
    normal(loc=ptsallwdmean2, scale=ptsallowedsd2)

win_rate = game_simulation(Team1pts,Team1ptsallowed, \
                           Team2pts,Team2ptsallowed)
print(f"{team1['full_name']} win rate against {team2['full_name']}: {round(win_rate * 100, 2)} %")

Golden State Warriors win rate against Boston Celtics: 48.0 %

# Team rankings
def season(year):
    east_conference = []
    west_conference = []

    if year == 2021:
        East_conference = pd.read_csv("df/EastConference2021")
        West_conference = pd.read_csv("df/WestConference2021")

    else:
        for team in team_dict:
            team_rank = teaminfocommon.TeamInfoCommon(team_id=team['id'], season_nullable=year).get_data_frames()[0]
            time.sleep(.25)
            rank = team_rank['CONF_RANK'][0]
            if rank <= 8:
                conference = team_rank['TEAM_CONFERENCE'][0]
                if conference == "East":
                    east_conference.append([rank, team['abbreviation']])
                else:
                    west_conference.append([rank, team['abbreviation']])

        West_conference = pd.DataFrame(west_conference, columns=['Rank','Team'])
        East_conference = pd.DataFrame(east_conference, columns=['Rank','Team'])
    return East_conference,West_conference

def series_simulation(Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed):
    t1 = 0
    t2 = 0
    win_rate = game_simulation(Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed)
    lose_rate = round(1 - win_rate)
    # print(win_rate)
    for game in range(7):
        if t1 == 4 or t2 == 4:
            break
        win = random.choices([0, 1], weights=[lose_rate, win_rate], k=1)[0]
        if win:
            t1 += 1
            # print(f"Game {game+1} won")
        else:
            t2 += 1
            # print(f"Game {game+1} lost")
    if t1 > t2:
        # print(f"{t1} - {t2}")
        return 1
    elif t2 > t1:
        # print(f"{t1} - {t2}")
        return 0

'''MIA = io.imread("https://drive.google.com/uc?id=1ZNPGmb6S3efC0eN30zScYoGEmf6nbQ2Q")[:, :, ::-1]
ATL = io.imread("https://drive.google.com/uc?id=1a6QCkhCu-QUJ74pFR-8bbMqlFOj1zRY_")[:, :, ::-1]
PHI = io.imread("https://drive.google.com/uc?id=1-V8iR5ctywsVrHoiLGX16RXD0u_i9xMB")[:, :, ::-1]
TOR = io.imread("https://drive.google.com/uc?id=1JJ_vACPX8sU7QUfasVKJVGb5hoXxH1wu")[:, :, ::-1]
MIL = io.imread("https://drive.google.com/uc?id=14NmPAo_7ebziRdvSaLhN7XU_RLuR_f62")[:, :, ::-1]
CHI = io.imread("https://drive.google.com/uc?id=1gb13wGtGyKDBAW3u9IJwjsFtMNv5kbfH")[:, :, ::-1]
BOS = io.imread("https://drive.google.com/uc?id=1XsrU5eAwaupHeduUAiSrnBfkxJXWei-h")[:, :, ::-1]
BKN = io.imread("https://drive.google.com/uc?id=1BNmK8CE3SLxoBuc497euaM5vxa0_LHuD")[:, :, ::-1]
PHX = io.imread("https://drive.google.com/uc?id=1SwLeExrR_xT34lMhyYJ1IASZ15oa56t9")[:, :, ::-1]
NOP = io.imread("https://drive.google.com/uc?id=1U3AdUaUOMYZAJa4mnVqeswwiNXno27oT")[:, :, ::-1]
DAL = io.imread("https://drive.google.com/uc?id=11g0jh7vqODLH9KrnsB1wu-yhAO9q2OX0")[:, :, ::-1]
UTA = io.imread("https://drive.google.com/uc?id=1V2bmV4GULmpFcKld7iWbegMAW6jm-Tm3")[:, :, ::-1]
GSW = io.imread("https://drive.google.com/uc?id=1F4l7ixktz2SChQlfvpGHSG1HYzd9NmCy")[:, :, ::-1]
DEN = io.imread("https://drive.google.com/uc?id=1fwoE2Cl4qwRyhVAhSLfEl3i8UnA3n-6K")[:, :, ::-1]
MEM = io.imread("https://drive.google.com/uc?id=1e4qFREBRDUNHuSi8Mu1K-WZa059l1CRB")[:, :, ::-1]
MIN = io.imread("https://drive.google.com/uc?id=1khu-mMYeXezVG4KWkBvRg-ucMbYtqVlT")[:, :, ::-1]'''

logos = {'mia':MIA, 'atl':ATL, 'phi':PHI, 'tor':TOR, 'mil':MIL, 'chi':CHI, 'bos':BOS, 'bkn':BKN, \
         'phx':PHX, 'nop':NOP, 'dal':DAL, 'uta':UTA, 'gsw':GSW, 'den':DEN, 'mem':MEM, 'min':MIN}

def playoffs():
    year = 2021
    while not 2004 <= year <= 2021:
        year = int(input("Insert Season"))
        if not 2004 <= year <= 2021:
            print("Insert a valid Season")

    east_conference,west_conference = season(year)
    playoffs = cv2.imread("Imgs/Playoffs.png")

    # Round 1 Eastern Conference
    first = east_conference[east_conference['Rank'] == 1]['Team'].iloc[[0][0]]
    second = east_conference[east_conference['Rank'] == 2]['Team'].iloc[[0][0]]
    third = east_conference[east_conference['Rank'] == 3]['Team'].iloc[[0][0]]
    fourth = east_conference[east_conference['Rank'] == 4]['Team'].iloc[[0][0]]
    fifth = east_conference[east_conference['Rank'] == 5]['Team'].iloc[[0][0]]
    sixth = east_conference[east_conference['Rank'] == 6]['Team'].iloc[[0][0]]
    seventh = east_conference[east_conference['Rank'] == 7]['Team'].iloc[[0][0]]
    eighth = east_conference[east_conference['Rank'] == 8]['Team'].iloc[[0][0]]

    logoe1 = cv2.imread(f"Imgs/{first}.png")
    playoffs[60:110, 720:770] = logoe1

    logoe8 = cv2.imread(f"Imgs/{eighth}.png")
    playoffs[120:170, 720:770] = logoe8 

    logoe4 = cv2.imread(f"Imgs/{fourth}.png")
    playoffs[180:230, 720:770] = logoe4

    logoe5 = cv2.imread(f"Imgs/{fifth}.png")
    playoffs[240:290, 720:770] = logoe5

    logoe3 = cv2.imread(f"Imgs/{third}.png")
    playoffs[310:360, 720:770] = logoe3

    logoe6 = cv2.imread(f"Imgs/{sixth}.png")
    playoffs[370:420, 720:770] = logoe6

    logoe2 = cv2.imread(f"Imgs/{second}.png")
    playoffs[430:480, 720:770] = logoe2

    logoe7 = cv2.imread(f"Imgs/{seventh}.png")
    playoffs[490:540, 720:770] = logoe7

    team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log(first, eighth, year)
    r1e1 = first if series_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed) else eighth
    print(first, " - ", eighth, " wins ", r1e1)
    logor1e1 = cv2.imread(f"Imgs/{r1e1}.png")
    playoffs[90:140, 610:660] = logor1e1

    team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log(fourth, fifth, year)
    r1e2 = fourth if series_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed) else fifth
    print(fourth, " - ", fifth, " wins ", r1e2)
    logor1e2 = cv2.imread(f"Imgs/{r1e2}.png")
    playoffs[210:260, 610:660] = logor1e2

    team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log(third, sixth, year)
    r1e3 = third if series_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed) else sixth
    print(third, " - ", sixth, " wins ", r1e3)
    logor1e3 = cv2.imread(f"Imgs/{r1e3}.png")
    playoffs[340:390, 610:660] = logor1e3

    team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log(second, seventh, year)
    r1e4 = second if series_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed) else seventh
    print(second, " - ", seventh, " wins ", r1e4)
    logor1e4 = cv2.imread(f"Imgs/{r1e4}.png")
    playoffs[470:520, 610:660] = logor1e4

    # Round 1 Western Conference
    first = west_conference[west_conference['Rank'] == 1]['Team'].iloc[[0][0]]
    second = west_conference[west_conference['Rank'] == 2]['Team'].iloc[[0][0]]
    third = west_conference[west_conference['Rank'] == 3]['Team'].iloc[[0][0]]
    fourth = west_conference[west_conference['Rank'] == 4]['Team'].iloc[[0][0]]
    fifth = west_conference[west_conference['Rank'] == 5]['Team'].iloc[[0][0]]
    sixth = west_conference[west_conference['Rank'] == 6]['Team'].iloc[[0][0]]
    seventh = west_conference[west_conference['Rank'] == 7]['Team'].iloc[[0][0]]
    eighth = west_conference[west_conference['Rank'] == 8]['Team'].iloc[[0][0]]

    logow1 = cv2.imread(f"Imgs/{first}.png")
    playoffs[60:110, 60:110] = logow1

    logow8 = cv2.imread(f"Imgs/{eighth}.png")
    playoffs[120:170, 60:110] = logow8

    logow4 = cv2.imread(f"Imgs/{fourth}.png")
    playoffs[180:230, 60:110] = logow4

    logow5 = cv2.imread(f"Imgs/{fifth}.png")
    playoffs[240:290, 60:110] = logow5

    logow3 = cv2.imread(f"Imgs/{third}.png")
    playoffs[310:360, 60:110] = logow3

    logow6 = cv2.imread(f"Imgs/{sixth}.png")
    playoffs[370:420, 60:110] = logow6

    logo2 = cv2.imread(f"Imgs/{second}.png")
    playoffs[440:490, 60:110] = logo2

    logow7 = cv2.imread(f"Imgs/{seventh}.png")
    playoffs[500:550, 60:110] = logow7

    team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log(first, eighth, year)
    r1w1 = first if series_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed) else eighth
    print(first, " - ", eighth, " wins ", r1w1)
    logor1w1 = cv2.imread(f"Imgs/{r1w1}.png")
    playoffs[90:140, 190:240] = logor1w1

    team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log(fourth, fifth, year)
    r1w2 = fourth if series_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed) else fifth
    print(fourth, " - ", fifth, " wins ", r1w2)
    logor1w2 = cv2.imread(f"Imgs/{r1w2}.png")
    playoffs[210:260, 190:240] = logor1w2

    team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log(third, sixth, year)
    r1w3 = third if series_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed) else sixth
    print(third, " - ", sixth, " wins ", r1w3)
    logor1w3 = cv2.imread(f"Imgs/{r1w3}.png")
    playoffs[340:390, 190:240] = logor1w3

    team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log(second, seventh, year)
    r1w4 = second if series_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed) else seventh
    print(second, " - ", seventh, " wins ", r1w4)
    logor1w4 = cv2.imread(f"Imgs/{r1w4}.png")
    playoffs[460:510, 190:240] = logor1w4

    # Eastern Conference Semifinals
    team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log(r1e1, r1e2, year)
    r2e1 = r1e1 if series_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed) else r1e2
    print(r1e1, " - ", r1e2, " wins ", r2e1)
    logor2e1 = cv2.imread(f"Imgs/{r2e1}.png")
    playoffs[150:200, 500:550] = logor2e1

    team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log(r1e3, r1e4, year)
    r2e2 = r1e3 if series_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed) else r1e4
    print(r1e3, " - ", r1e4, " wins ", r2e2)
    logor2e2 = cv2.imread(f"Imgs/{r2e2}.png")
    playoffs[400:450, 500:550] = logor2e2

    # Western Conference Semifinals
    team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log(r1w1, r1w2, year)
    r2w1 = r1w1 if series_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed) else r1w2
    print(r1w1, " - ", r1w2, " wins ", r2w1)
    logor2w1 = cv2.imread(f"Imgs/{r2w1}.png")
    playoffs[150:200, 300:350] = logor2w1

    team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log(r1w3, r1w4, year)
    r2w2 = r1w3 if series_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed) else r1w4
    print(r1w3, " - ", r1w4, " wins ", r2w2)
    logor2w2 = cv2.imread(f"Imgs/{r2w2}.png")
    playoffs[400:450, 300:350] = logor2w2

    # Eastern Conference Finals
    team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log(r2e1, r2e2, year)
    r3e = r2e1 if series_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed) else r2e2
    print(r2e1, " - ", r2e2, " wins ", r3e)
    logor3e = cv2.imread(f"Imgs/{r3e}.png")
    playoffs[277:327, 510:560] = logor3e

    # Western Conference Finals
    team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log(r2w1, r2w2, year)
    r3w = r2w1 if series_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed) else r2w2
    print(r2w1, " - ", r2w2, " wins ", r3w)
    logor3w = cv2.imread(f"Imgs/{r3w}.png")
    playoffs[277:327, 280:330] = logor3w

    # Finals
    team1pts,team1ptsallowed,team2pts,team2ptsallowed,Team1pts,Team2pts,Team1ptsallowed,Team2ptsallowed = game_log(r3w, r3e, year)
    champs = r2w1 if series_simulation(Team1pts,Team1ptsallowed,Team2pts,Team2ptsallowed) else r2w2
    print(r3w, " - ", r3e, " wins ", champs)
    logochamps = cv2.imread(f"Imgs/{champs}.png")
    playoffs[277:327, 395:445] = logochamps

    playoffs = cv2.cvtColor(playoffs, cv2.COLOR_BGR2RGB)
    return playoffs

playoffs_bracket = playoffs()

MIA  -  ATL  wins  MIA
PHI  -  TOR  wins  PHI
MIL  -  CHI  wins  MIL
BOS  -  BKN  wins  BOS
PHX  -  NOP  wins  PHX
DAL  -  UTA  wins  UTA
GSW  -  DEN  wins  GSW
MEM  -  MIN  wins  MEM
MIA  -  PHI  wins  MIA
MIL  -  BOS  wins  BOS
PHX  -  UTA  wins  PHX
GSW  -  MEM  wins  GSW
MIA  -  BOS  wins  BOS
PHX  -  GSW  wins  PHX
PHX  -  BOS  wins  PHX

plt.figure(figsize=(15,14))
plt.axis('off')
plt.imshow(playoffs_bracket)
plt.show()

#playoffs2021 = io.imread("https://drive.google.com/uc?id=1KGBNvKylJwilDQyKE5srciuPR26RdKyz")[:, :, ::-1]
playoffs2021 = cv2.imread("Imgs/Playoffs.png")

playoffs2021[60:110, 720:770] = MIA

playoffs2021[120:170, 720:770] = ATL   

playoffs2021[180:230, 720:770] = PHI

playoffs2021[240:290, 720:770] = TOR

playoffs2021[310:360, 720:770] = MIL

playoffs2021[370:420, 720:770] = CHI

playoffs2021[430:480, 720:770] = BOS

playoffs2021[490:540, 720:770] = BKN

playoffs2021[60:110, 60:110] = PHX

playoffs2021[120:170, 60:110] = NOP

playoffs2021[180:230, 60:110] = DAL

playoffs2021[240:290, 60:110] = UTA

playoffs2021[310:360, 60:110] = GSW

playoffs2021[370:420, 60:110] = DEN

playoffs2021[440:490, 60:110] = MEM

playoffs2021[500:550, 60:110] = MIN

logor1e1 = MIA
playoffs2021[90:140, 610:660] = logor1e1

logor1e2 = PHI
playoffs2021[210:260, 610:660] = logor1e2

logor1e3 = MIL
playoffs2021[340:390, 610:660] = logor1e3 

logor1e4 = BOS
playoffs2021[470:520, 610:660] = logor1e4 

logor1w1 = PHX
playoffs2021[90:140, 190:240] = logor1w1

logor1w2 = DAL
playoffs2021[210:260, 190:240] = logor1w2

logor1w3 = GSW
playoffs2021[340:390, 190:240] = logor1w3

logor1w4 = MEM
playoffs2021[460:510, 190:240] = logor1w4

logor2e1 = MIA
playoffs2021[150:200, 500:550] = logor2e1

logor2e2 = BOS
playoffs2021[400:450, 500:550] = logor2e2

logor2w1 = DAL
playoffs2021[150:200, 300:350] = logor2w1

logor2w2 = GSW
playoffs2021[400:450, 300:350] = logor2w2

logor3e = BOS
playoffs2021[277:327, 510:560] = logor3e

logor3w = GSW
playoffs2021[277:327, 280:330] = logor3w

logochamps = GSW
playoffs2021[277:327, 395:445] = logochamps

playoffs2021 = cv2.cvtColor(playoffs2021, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(15,14))
plt.axis('off')
plt.imshow(playoffs2021)
plt.show()

players = leagueleaders.LeagueLeaders().get_data_frames()[0]
players = players[players['MIN'] >= 500] # Players with at least averaged six minutes per game
# ['PLAYER_ID', 'RANK', 'PLAYER', 'TEAM', 'GP', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'EFF', 'AST_TOV', 'STL_TOV']

var_excluded = {'PLAYER_ID', 'RANK', 'PLAYER', 'TEAM', 'GP', 'FG_PCT', 'FG3_PCT', 'FT_PCT', 'EFF', 'AST_TOV', 'STL_TOV'}
# Averaging each stats by the games played: from total stats to per game stats
players_avg = [[np.round(players.iloc[x, y] / players['GP'].values[x], 2) if players.columns[y] not in var_excluded else players.iloc[x, y] for y in range(len(players.columns))] for x in range(len(players.values))]
players_avg_df = pd.DataFrame(players_avg, columns=players.columns)

# vars = ['FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB','DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']
vars = ['PTS', 'AST', 'REB', 'BLK', 'STL', 'FG3_PCT']

X = players_avg_df[vars]

                                            # Illustrative Code
    
players = leagueleaders.LeagueLeaders().get_data_frames()[0]

# Only players that at least averaged six minutes per game are kept
players = players[players['MIN'] >= 500]

                                # Illustrative Code

players_avg_df.head()

                                # Illustrative Code
    
vars = ['PTS', 'AST', 'REB', 'BLK', 'STL', 'FG3_PCT']
X = players_avg_df[vars]
X.head(10)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

clusters = pd.DataFrame(KMeans(n_clusters=5, random_state=0).fit_predict(X_scaled), columns=['CLUSTER'])

players_roles = pd.concat([players_avg_df, clusters], axis=1)
players_roles[['PLAYER', 'CLUSTER'] + vars].head()

cluster0 = players_roles[players_roles['CLUSTER'] == 0]
c0 = cluster0.drop(['PLAYER_ID', 'RANK', 'PLAYER', 'TEAM', 'GP', 'MIN', 'EFF', 'AST_TOV', 'STL_TOV'], axis=1)
c0_avg = pd.DataFrame([round(c0.mean(), 2)], columns=vars)
# c0_avg

cluster1 = players_roles[players_roles['CLUSTER'] == 1]
c1 = cluster1.drop(['PLAYER_ID', 'RANK', 'PLAYER', 'TEAM', 'GP', 'MIN', 'EFF', 'AST_TOV', 'STL_TOV'], axis=1)
c1_avg = pd.DataFrame([round(c1.mean(), 2)], columns=vars)
# c1_avg

cluster2 = players_roles[players_roles['CLUSTER'] == 2]
c2 = cluster2.drop(['PLAYER_ID', 'RANK', 'PLAYER', 'TEAM', 'GP', 'MIN', 'EFF', 'AST_TOV', 'STL_TOV'], axis=1)
c2_avg = pd.DataFrame([round(c2.mean(), 2)], columns=vars)
# c2_avg

cluster3 = players_roles[players_roles['CLUSTER'] == 3]
c3 = cluster3.drop(['PLAYER_ID', 'RANK', 'PLAYER', 'TEAM', 'GP', 'MIN', 'EFF', 'AST_TOV', 'STL_TOV'], axis=1)
c3_avg = pd.DataFrame([round(c3.mean(), 2)], columns=vars)
# c3_avg

cluster4 = players_roles[players_roles['CLUSTER'] == 4]
c4 = cluster4.drop(['PLAYER_ID', 'RANK', 'PLAYER', 'TEAM', 'GP', 'MIN', 'EFF', 'AST_TOV', 'STL_TOV'], axis=1)
c4_avg = pd.DataFrame([round(c4.mean(), 2)], columns=vars)
# c4_avg

clusters_plot = pd.DataFrame([['0'] + c0_avg.values[0].tolist(), ['1'] + c1_avg.values[0].tolist(), ['2'] + c2_avg.values[0].tolist(), ['3'] + c3_avg.values[0].tolist(), ['4'] + c4_avg.values[0].tolist()], 
                    columns = ['CLUSTER'] + vars)
# clusters_plot

stats_plot = pd.DataFrame([[x] + clusters_plot[x].values.tolist() for x in clusters_plot.columns.tolist()[1:]],
                          columns=["STAT", "0", "1", "2", "3", "4"])
# stats_plot

clusters_plot

stats_plot

plt.style.use('seaborn') # seaborn / fivethirtyeight
plt.figure(figsize=(21,8))

x = np.arange(0, len(vars)*7, 7)
w = 1

plt.bar(x-2, stats_plot['0'].tolist(), width=w, color='tab:blue')
plt.bar(x-1, stats_plot['1'].tolist(), width=w, color='tab:orange')
plt.bar(x, stats_plot['2'].tolist(), width=w, color='tab:green')
plt.bar(x+1, stats_plot['3'].tolist(), width=w, color='tab:red')
plt.bar(x+2, stats_plot['4'].tolist(), width=w, color='tab:purple')


plt.xticks(x, vars)
plt.xlabel("Box Score Statistic")
plt.ylabel("Value")
plt.legend(["0", "1", "2", "3", "4"])
plt.title("Similar Players by Box Score Statistic")

plt.show()

plt.style.use('seaborn') # seaborn / fivethirtyeight
plt.figure(figsize=(21,8))

colors = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple', 'tab:brown', 'tab:pink', 
          'tab:gray', 'tab:olive', 'tab:cyan', 'b', 'g', 'r', 'c', 'm', 'k', 'w', 'gold']
x = np.arange(0, (len(vars)+2)*5, len(vars)+2)
w = 1
l = np.arange(-len(vars)/2, len(vars)/2)

for i,var in enumerate(vars):
    plt.bar(x+l[i], clusters_plot[var].tolist(), width=w, color=colors[i])

plt.xticks(x, ["0", "1", "2", "3", "4"])
plt.xlabel("Cluster")
plt.ylabel("Value")
plt.legend(vars)

plt.show()

print(f"Members of Cluster 0 :", len(c0))
print(f"Members of Cluster 1 :", len(c1))
print(f"Members of Cluster 2 :", len(c2))
print(f"Members of Cluster 3 :", len(c3))
print(f"Members of Cluster 4 :", len(c4))

Members of Cluster 0 : 117
Members of Cluster 1 : 148
Members of Cluster 2 : 23
Members of Cluster 3 : 37
Members of Cluster 4 : 50

print("       Player       |       Clusters       ")
print("Stephen Curry       |        ", players_roles[players_roles['PLAYER'] == 'Stephen Curry']['CLUSTER'].values[0])
print("LeBron James        |        ", players_roles[players_roles['PLAYER'] == 'LeBron James']['CLUSTER'].values[0])
print("Kevin Durant        |        ", players_roles[players_roles['PLAYER'] == 'Kevin Durant']['CLUSTER'].values[0])
print("Nikola Jokic        |        ", players_roles[players_roles['PLAYER'] == 'Nikola Jokic']['CLUSTER'].values[0])
print("Klay Thompson       |        ", players_roles[players_roles['PLAYER'] == 'Klay Thompson']['CLUSTER'].values[0])
print("Steven Adams        |        ", players_roles[players_roles['PLAYER'] == 'Steven Adams']['CLUSTER'].values[0])
print("Russell Westbrook   |        ", players_roles[players_roles['PLAYER'] == 'Russell Westbrook']['CLUSTER'].values[0])
print("Rudy Gobert         |        ", players_roles[players_roles['PLAYER'] == 'Rudy Gobert']['CLUSTER'].values[0])

       Player       |       Clusters       
Stephen Curry       |         4
LeBron James        |         4
Kevin Durant        |         4
Nikola Jokic        |         4
Klay Thompson       |         0
Steven Adams        |         2
Russell Westbrook   |         4
Rudy Gobert         |         2

cleaned_players = players_roles[vars + ['CLUSTER']]
# cleaned_players

plt.style.use('fivethirtyeight') # seaborn / fivethirtyeight

ax = plt.figure(figsize=(7,11)).add_subplot(111, projection='3d')
# ax.grid(False)
map = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple']
i = 0
cleaned_players = players_roles[vars + ['CLUSTER']]

for grp_name, grp_idx in cleaned_players.groupby('CLUSTER').groups.items():
    x = cleaned_players.iloc[grp_idx, 0] # PTS
    x_mean = cleaned_players.iloc[grp_idx, 0].mean() # PTS mean
    y = cleaned_players.iloc[grp_idx, 1] # AST
    y_mean = cleaned_players.iloc[grp_idx, 1].mean() # AST mean
    z = cleaned_players.iloc[grp_idx, 2] # REB
    z_mean = cleaned_players.iloc[grp_idx, 2].mean() # REB
    ax.scatter(x, y, z, c=map[i], label=grp_name)
    # ax.scatter(x_mean, y_mean, z_mean, c="k", marker="X") # Centroids
    i += 1

ax.set_xlabel("PTS")
ax.set_ylabel("AST")
ax.set_zlabel("REB")
ax.set_title("Players Clusters")
ax.legend()
plt.show()

%matplotlib notebook

plt.style.use('fivethirtyeight') # seaborn / fivethirtyeight

ax = plt.figure(figsize=(7,11)).add_subplot(111, projection='3d')
# ax.grid(False)
map = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple']
i = 0
cleaned_players = players_roles[vars + ['CLUSTER']]

for grp_name, grp_idx in cleaned_players.groupby('CLUSTER').groups.items():
    x = cleaned_players.iloc[grp_idx, 0] # PTS
    x_mean = cleaned_players.iloc[grp_idx, 0].mean() # PTS mean
    y = cleaned_players.iloc[grp_idx, 1] # AST
    y_mean = cleaned_players.iloc[grp_idx, 1].mean() # AST mean
    z = cleaned_players.iloc[grp_idx, 2] # REB
    z_mean = cleaned_players.iloc[grp_idx, 2].mean() # REB
    ax.scatter(x, y, z, c=map[i], label=grp_name)
    # ax.scatter(x_mean, y_mean, z_mean, c="k", marker="X") # Centroids
    i += 1

ax.set_xlabel("PTS")
ax.set_ylabel("AST")
ax.set_zlabel("REB")
ax.set_title("Players Clusters")
ax.legend()
plt.show()

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_22104/3656988517.py in <module>
      5 map = ['tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple']
      6 i = 0
----> 7 cleaned_players = players_roles[vars + ['CLUSTER']]
      8 
      9 for grp_name, grp_idx in cleaned_players.groupby('CLUSTER').groups.items():

~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   3462             if is_iterator(key):
   3463                 key = list(key)
-> 3464             indexer = self.loc._get_listlike_indexer(key, axis=1)[1]
   3465 
   3466         # take() does not accept boolean indexers

~\anaconda3\lib\site-packages\pandas\core\indexing.py in _get_listlike_indexer(self, key, axis)
   1312             keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
   1313 
-> 1314         self._validate_read_indexer(keyarr, indexer, axis)
   1315 
   1316         if needs_i8_conversion(ax.dtype) or isinstance(

~\anaconda3\lib\site-packages\pandas\core\indexing.py in _validate_read_indexer(self, key, indexer, axis)
   1375 
   1376             not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
-> 1377             raise KeyError(f"{not_found} not in index")
   1378 
   1379 

KeyError: "['SEASON', 'MVP'] not in index"

%matplotlib inline

tot_vars = ['PLAYER_ID', 'RANK', 'PLAYER', 'TEAM', 'GP', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'EFF', 'AST_TOV', 'STL_TOV']
var_excluded = ['PLAYER_ID', 'RANK', 'PLAYER', 'TEAM', 'GP', 'FG_PCT', 'FG3_PCT', 'FT_PCT', 'EFF', 'AST_TOV', 'STL_TOV']

vars = ['SEASON', 'MVP', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB','DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']
#vars = ['PTS', 'AST', 'REB', 'BLK', 'STL', 'FG3_PCT']

if not skip:
    #awards = playerawards.PlayerAwards(id).get_data_frames()[0] # NBA Most Valuable Player
    year = 1979 # First year without any null stat
    alltime_players = pd.DataFrame([], columns=tot_vars + ['MVP'])

    while year != 2022:
        players = leagueleaders.LeagueLeaders(season=str(year)+'-'+str(year+1)[2:]).get_data_frames()[0]
        time.sleep(.25)
        
        mvp_zeroes = pd.DataFrame([0]*len(players), columns=['MVP'])
        season = pd.DataFrame([year]*len(players), columns=['SEASON'])
        players = pd.concat([players, mvp_zeroes], axis=1)
        players = pd.concat([players, season], axis=1)
        
        for id in players['PLAYER_ID'].values:
            awards = playerawards.PlayerAwards(id).get_data_frames()[0]
            time.sleep(.25)
            
            mvps = awards[awards['DESCRIPTION'] == 'NBA Most Valuable Player']
            if not mvps.empty:
                if str(year)+'-'+str(year+1)[2:] in mvps['SEASON'].values:
                    players.at[players.index[players['PLAYER_ID'] == id][0], 'MVP'] = 1
                    break

        alltime_players = pd.concat([alltime_players, players])
        year += 1
        
    alltime_players

if not skip:
    alltime_players.to_csv("df/AllTimePlayersMVP", index=False)

alltime_players = pd.read_csv("df/AllTimePlayersMVP")

alltime_players['MVP'].sum()

43

X = alltime_players[vars]
y = alltime_players[['SEASON', 'MVP']]

season = 2021
X_train = X[X['SEASON'] != season].drop(['MVP'], axis=1)
X_test = X[X['SEASON'] == season].drop(['MVP'], axis=1)
y_train = y[y['SEASON'] != season].drop(['SEASON'], axis=1).astype('int').to_numpy()[:,0]
y_test = y[y['SEASON'] == season].drop(['SEASON'], axis=1).astype('int').to_numpy()[:,0]

scaler = StandardScaler()
scaler.fit_transform(X_train);

X_train_norm = scaler.transform(X_train)
X_test_norm = scaler.transform(X_test)

# Instantiating the model
log_regression = LogisticRegression()

# Fitting the model using the training data
log_regression.fit(X_train_norm,y_train)

# Using the model to make predictions on test data
y_pred = log_regression.predict(X_test_norm)
y_prob = log_regression.predict_proba(X_test_norm)

# y_pred

prob = y_prob[:, 1].round(3)
# prob

i = y_pred.argmax()
players = leagueleaders.LeagueLeaders(season='2021-22').get_data_frames()[0]
print("Predicted Winner:", players.iloc[i]['PLAYER'])
print("Real Winner:     ", alltime_players.loc[(alltime_players['SEASON'] == 2021) & (alltime_players['MVP'] == 1)]['PLAYER'].values[0])

Predicted Winner: Nikola Jokic
Real Winner:      Nikola Jokic

print("Accuracy:", metrics.accuracy_score(y_test, y_pred)*100, "%")

Accuracy: 100.0 %

                                                # Illustrative Code    
vars = ['SEASON', 'MVP', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', \
        'FT_PCT', 'OREB','DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS

                                                # Illustrative Code    
X = alltime_players[vars]
y = alltime_players[['SEASON', 'MVP']]

                                                # Illustrative Code    
X_train = X[X['SEASON'] != season]#.drop(['MVP'])
X_test = X[X['SEASON'] == season]#.drop(['MVP'])
y_train = y[y['SEASON'] != season]#.drop(['SEASON'])
y_test = y[y['SEASON'] == season]#.drop(['SEASON'])

                                                # Illustrative Code    
scaler = StandardScaler()
scaler.fit(X_train)
X_train_norm = scaler.transform(X_train)
X_test_norm = scaler.transform(X_test)

                                                # Illustrative Code    
log_regression = LogisticRegression(max_iter=10000)
log_regression.fit(X_train_norm,y_train)

X = alltime_players[vars]
y = alltime_players[['SEASON', 'MVP']]
correct_pred,tot_pred = 0,0

season = 1980
while season != 2022:
    X_train = X[X['SEASON'] != season].drop(['MVP'], axis=1)
    X_test = X[X['SEASON'] == season].drop(['MVP'], axis=1)
    y_train = y[y['SEASON'] != season].drop(['SEASON'], axis=1).astype('int').to_numpy()[:,0]
    y_test = y[y['SEASON'] == season].drop(['SEASON'], axis=1).astype('int').to_numpy()[:,0]

    scaler = StandardScaler()
    scaler.fit(X_train)

    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)

    log_regression = LogisticRegression(max_iter=10000)
    log_regression.fit(X_train_norm,y_train)

    y_prob = log_regression.predict_proba(X_test_norm)
    prob = y_prob[:, 1].round(3)
    prob_max = [1 if x == prob.max() else 0 for x in prob]

    cnf_matrix = metrics.confusion_matrix(y_test, prob_max)
    correct_pred += cnf_matrix[1][1]
    tot_pred += 1

    season += 1
print("Accuracy:", round(correct_pred / tot_pred * 100, 2), "%")

Accuracy: 45.24 %

X = alltime_players[vars]
y = alltime_players[['SEASON', 'MVP']]
correct_pred,tot_pred = 0,0

season = 2010
while season != 2022:
    X_train = X[X['SEASON'] != season].drop(['MVP'], axis=1)
    X_test = X[X['SEASON'] == season].drop(['MVP'], axis=1)
    y_train = y[y['SEASON'] != season].drop(['SEASON'], axis=1).astype('int').to_numpy()[:,0]
    y_test = y[y['SEASON'] == season].drop(['SEASON'], axis=1).astype('int').to_numpy()[:,0]

    scaler = StandardScaler()
    scaler.fit(X_train)

    X_train_norm = scaler.transform(X_train)
    X_test_norm = scaler.transform(X_test)

    log_regression = LogisticRegression(max_iter=10000)
    log_regression.fit(X_train_norm,y_train)

    y_prob = log_regression.predict_proba(X_test_norm)
    prob = y_prob[:, 1].round(3)
    prob_max = [1 if x == prob.max() else 0 for x in prob]

    cnf_matrix = metrics.confusion_matrix(y_test, prob_max)
    correct_pred += cnf_matrix[1][1]
    tot_pred += 1

    season += 1
print("Accuracy:", round(correct_pred / tot_pred * 100, 2), "%")

Accuracy: 58.33 %

	PLAYER_NAME	FG3M	FG3M_RANK
0	Stephen Curry	3117	1
1	Ray Allen	2973	2
2	James Harden	2593	3
3	Reggie Miller	2560	4
4	Kyle Korver	2450	5
5	Vince Carter	2290	6
6	Jason Terry	2282	7
7	Jamal Crawford	2221	8
8	Damian Lillard	2143	9
9	Paul Pierce	2143	9

	PERSON_NAME	SEASON	ROTY
362	Durant, Kevin	7	1
2	Horford, Al	7	0
262	Conley, Mike	7	0
366	Green, Jeff	7	0
312	Yi Jianlian	7	0
313	Brewer, Corey	7	0
181	Wright, Brandan	7	0

	PLAYER_ID	RANK	PLAYER	TEAM	GP	MIN	FGM	FGA	FG_PCT	FG3M	...	REB	AST	STL	BLK	TOV	PF	PTS	EFF	AST_TOV	STL_TOV
0	1629027	1	Trae Young	ATL	76	34.89	9.36	20.32	0.460	3.07	...	3.74	9.70	0.95	0.09	3.99	1.68	28.36	2066	2.43	0.24
1	201942	2	DeMar DeRozan	CHI	76	36.09	10.18	20.20	0.504	0.66	...	5.16	4.92	0.89	0.32	2.38	2.34	27.87	1961	2.07	0.38
2	203954	3	Joel Embiid	PHI	68	33.76	9.79	19.62	0.499	1.37	...	11.71	4.18	1.13	1.46	3.15	2.66	30.57	2304	1.33	0.36
3	1628369	4	Jayson Tatum	BOS	76	35.93	9.32	20.58	0.453	3.03	...	8.01	4.39	0.99	0.64	2.86	2.29	26.92	1971	1.54	0.35
4	203999	5	Nikola Jokic	DEN	74	33.46	10.32	17.72	0.583	1.31	...	13.77	7.89	1.47	0.85	3.80	2.58	27.08	2862	2.08	0.39

	PTS	AST	REB	BLK	STL	FG3_PCT
0	28.36	9.70	3.74	0.09	0.95	0.382
1	27.87	4.92	5.16	0.32	0.89	0.352
2	30.57	4.18	11.71	1.46	1.13	0.371
3	26.92	4.39	8.01	0.64	0.99	0.353
4	27.08	7.89	13.77	0.85	1.47	0.337
5	29.88	5.79	11.61	1.36	1.07	0.293
6	28.42	8.74	9.12	0.55	1.15	0.353
7	26.79	4.84	5.03	0.38	1.13	0.383
8	24.57	3.64	9.82	1.12	0.97	0.410
9	25.87	5.34	4.21	0.18	1.48	0.355

	PLAYER	CLUSTER	PTS	AST	REB	BLK	STL	FG3_PCT
0	Trae Young	4	28.36	9.70	3.74	0.09	0.95	0.382
1	DeMar DeRozan	4	27.87	4.92	5.16	0.32	0.89	0.352
2	Joel Embiid	3	30.57	4.18	11.71	1.46	1.13	0.371
3	Jayson Tatum	4	26.92	4.39	8.01	0.64	0.99	0.353
4	Nikola Jokic	4	27.08	7.89	13.77	0.85	1.47	0.337

	Player	Probability
0	Cade Cunningham	0.128
1	Jalen Green	0.087
2	Evan Mobley	0.539
3	Scottie Barnes	0.632
4	Jalen Suggs	0.015
5	Josh Giddey	0.045
6	Jonathan Kuminga	0.002
7	Franz Wagner	0.172
8	Davion Mitchell	0.045
9	Ziaire Williams	0.033

	CLUSTER	PTS	AST	REB	BLK	STL	FG3_PCT
0	0	11.68	2.71	3.76	0.37	0.94	0.36
1	1	7.07	1.37	3.10	0.30	0.50	0.33
2	2	8.17	1.36	7.58	1.05	0.68	0.01
3	3	14.34	2.10	7.65	1.17	0.75	0.35
4	4	21.25	6.32	6.05	0.48	1.23	0.35

	Points
0	110
1	127
2	111
3	88
4	117
5	110
6	102

NBA All-Stats Project 🏀

Group 24 🛠

Main Targets of the Project 🔍

Main Targets of Basketball 📌

Points per Possession

Main Targets of the Project 🔍

. . . What is a Field Goal? 💭¶

Field Goals Efficiency Criterion

Effective Field Goal Percentage

The Golden Age 📈

Total Field Goal Attempted and Made

Total Field Goal Missed

Total Field Goal Missed %

The Dark Age 📉

FGA (1982 - 2021)

FGM (1982 - 2021)

Total Three-Pointers Missed Percentage

The Future?

Goodbye Two-Pointers 👋🏻

Not so Fast ✋🏻

Main Targets of the Project 🔍

. . . What was the shot efficiency from the different zones last season? 💭

2021-22 Season Field Goal Efficiency by Zone

. . . What was the shot efficiency from the different zones in NBA history? 💭

Effective Field Goal Percentage by Zone

We have a clear winner 🎉

Backcourt Threes 🥇

Most Efficient shots 🎯

Data Transforming the NBA

Main Targets of the Project 🔍

Percentage of Field Goals over Total (by Value)

Percentage of Field Goal Attempts over Total

Percentage of Field Goal Made over Total

Curry's Effect on Three-Pointers (1996 - 2021)

Three Point Attempts

Three Points Made

Percentage of Field Goal over Total (by Type)

Evolution of the Game

Field Goal Percentage (by Type)

Evolution of the Game

Effective Field Goal Percentage

Main Targets of the Project 🔍

. . . What is a Triple Double? 💭¶

Triple Double Boxscore Categories

Triple Doubles

Main Targets of the Project 🔍

. . . What is the Rookie of the Year award? 💭¶

. . . What does 'Best Rookie' mean? 💭¶

Draft Position in which each ROY has been picked

Rookie of the Year Prediction

Data Preparation

Training and Testing Datasets

Model Prediction

Results

Confusion Matrix

Main Targets of the Project 🔍

Data Collection

Points Made

Points Allowed

Bootstrap

*Non-Parametric Bootstrap*

*Parametric Bootstrap*

Main Targets of the Project 🔍

. . . What are the Playoffs? 💭¶

Simulation with Parametric Bootstrap

2021 Playoffs

Main Targets of the Project 🔍

KMeans for Similar Players

Data Cleaning

Data Cleaning

Clusters

Box Score Statistic Barplot

Clusters Barplot

Clusters Dimensions & Info

Main Targets of the Project 🔍

. . . What is the Most Valuable Player award? 💭¶

MVP Prediction

MVP Prediction

Main Targets of the Project 🔍

The End¶

Non-Parametric Bootstrap

Parametric Bootstrap