Use bayesian mmrs

sickboyyy · Feb 10, 2021 · 04c7299 · 04c7299
2 parents 70bf4f1 + e8c17a1
commit 04c7299
Show file tree

Hide file tree

Showing 8 changed files with 398 additions and 202 deletions.
diff --git a/Dockerfile.build b/Dockerfile.build
@@ -0,0 +1,10 @@
+FROM ubuntu:20.04
+
+WORKDIR /w3champions-mmr-service
+
+COPY . .
+
+RUN pip install pipenv
+RUN pipenv install
+
+CMD pipenv run python main.py
diff --git a/Pipfile b/Pipfile
@@ -4,9 +4,11 @@ url = "https://pypi.org/simple"
 verify_ssl = true
 
 [dev-packages]
+pytest = "*"
 
 [packages]
 glicko2 = "*"
 scipy = "*"
 uvicorn = "*"
 fastapi = "*"
+requests = "*"
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -7,9 +7,9 @@ pool:
 steps:
 - script: |
     python3 -m pip install --upgrade pip setuptools wheel
-    python3 -m pip install pipenv pytest
-    python3 -m pipenv install --system
-    python3 -m pytest
+    python3 -m pip install pipenv
+    python3 -m pipenv install --dev
+    python3 -m pipenv run pytest
   displayName: 'Run fastapi tests'
 
 - task: PublishTestResults@2

diff --git a/main.py b/main.py
@@ -1,13 +1,28 @@
+import numpy as np
 import uvicorn
 from fastapi import FastAPI
 
-from mmr.update_mmr import UpdateMmrRequestBody, update_after_game, UpdateMmrResponseBody
+from mmr.bayesian_rating_w3c import UpdateMmrRequestBody, update_after_game, UpdateMmrResponseBody
+from teambalance.balance import BalanceTeamResponseBody, BalanceTeamRequestBody, find_best_game
 
 app = FastAPI()
 
 
 @app.post("/mmr/update")
 async def update_mmr(body: UpdateMmrRequestBody) -> UpdateMmrResponseBody:
+    for i, rd in enumerate(body.rds_list):
+        if rd < 80:
+            body.rds_list[i] *= 80/60.25
+
+    for i, rating in enumerate(body.ratings_list):
+        if rating < 0:
+            body.ratings_list[i] = 0
+
+    return update_after_game(body.ratings_list, body.rds_list, body.winning_team, body.number_of_teams)
+
+
+@app.post("/team/balance")
+async def update_mmr(body: BalanceTeamRequestBody) -> BalanceTeamResponseBody:
     for i, rd in enumerate(body.rds_list):
         if rd < 60.25:
             body.rds_list[i] = 60.25
@@ -16,7 +31,7 @@ async def update_mmr(body: UpdateMmrRequestBody) -> UpdateMmrResponseBody:
         if rating < 0:
             body.ratings_list[i] = 0
 
-    return update_after_game(body.ratings_list, body.rds_list, 1 if body.t1_won else 0)
+    return find_best_game(np.array(body.ratings_list), np.array(body.rds_list), body.gamemode)
 
 
 if __name__ == "__main__":

diff --git a/mmr/bayesian_rating_w3c.py b/mmr/bayesian_rating_w3c.py
@@ -1,103 +1,108 @@
+import numpy as np
+from pydantic import BaseModel
 from scipy import integrate
 from scipy import optimize
-from pydantic import BaseModel
-import numpy as np
+
 
 class UpdateMmrRequestBody(BaseModel):
     ratings_list: list
     rds_list: list
-    T_won: int
-    T: int
+    winning_team: int
+    number_of_teams: int
 
 
 class UpdateMmrResponseBody(BaseModel):
     ratings_list: list
     rds_list: list
 
-#ratings_list: ratings of all players in the game
-#rds_list: rating deviations of all players in the game
-#T_won: index of winning team so in [0,1] for solo/RT and in [0,1,2,3] for FFA
-#T: number of teams in the game (2 for solo/RT, 4 for FFA)
 
-def update_after_game(ratings_list, rds_list, T_won, T):
+# ratings_list: ratings of all players in the game
+# rds_list: rating deviations of all players in the game
+# T_won: index of winning team so in [0,1] for solo/RT and in [0,1,2,3] for FFA
+# T: number of teams in the game (2 for solo/RT, 4 for FFA)
+
+def update_after_game(ratings_list, rds_list, winning_team, number_of_teams):
     ratings_G = np.array(ratings_list)
     rds_G = np.array(rds_list)
-    
-    #the whole rating system has 3 parameters:
-    #(mu_0, RD_0): starting rating and deviation of (1500, 350), as we currently use - handled in the matchmaking app
-    #beta: encodes performance uncertainty (this is game dependent - similar to the volatility param in Glicko2)
-    #rd_min: a minimum rating deviation to prevent staleness
+
+    # the whole rating system has 3 parameters:
+    # (mu_0, RD_0): starting rating and deviation of (1500, 350), as we currently use - handled in the matchmaking app
+    # beta: encodes performance uncertainty (this is game dependent - similar to the volatility param in Glicko2)
+    # rd_min: a minimum rating deviation to prevent staleness
     beta = 215
     rd_min = 80
-    
-    #N: number of players in the game
+
+    # N: number of players in the game
     N = int(len(ratings_G))
-    #maximum a posteriori to compute new ratings
-    opt = optimize.minimize(lambda x: -posterior_pdf(x, ratings_G, rds_G, beta, T_won, T),
-                            x0= [ratings_G])
-    #updated ratings
+    # maximum a posteriori to compute new ratings
+    opt = optimize.minimize(lambda x: -posterior_pdf(x, ratings_G, rds_G, beta, winning_team, number_of_teams),
+                            x0=[ratings_G], tol=0.00000000001)
+    # updated ratings
     ratings_G_u = opt.x
     rds_G_u = []
     for p in range(N):
-        #compute the normalization constant by fixing other ratings to their updated values
-        #(slight approximation but alternative is a nasty (N dimensional) integration step, not feasible)
+        # compute the normalization constant by fixing other ratings to their updated values
+        # (slight approximation but alternative is a nasty (N dimensional) integration step, not feasible)
         C_int, _ = integrate.quad(lambda x: np.exp(
-            posterior_pdf(np.concatenate([ratings_G_u[:p], np.array(x),ratings_G_u[p+1:]], axis=None),
-                          ratings_G, rds_G, beta, T_won, T, p)),
-        #integration bounds a -> b
-                                  a=0,b=5000)
-        #compute second moment of posterior to get new rating deviation
-        #integral of p(x)*(x-mu)**2/C_int over the domain
-        rd_G_u_p = np.sqrt(integrate.quad(lambda x: (x-ratings_G_u[p])**2/C_int*np.exp(posterior_pdf(np.concatenate(
-            [ratings_G_u[:p], np.array(x), ratings_G_u[p+1:]], axis=None), ratings_G, rds_G, beta, T_won, T, p)),
-                                          a=0, b=5000)[0])
-        #floor rating deviation to prevent rating staleness
+            posterior_pdf(np.concatenate([ratings_G_u[:p], np.array(x), ratings_G_u[p + 1:]], axis=None),
+                          ratings_G, rds_G, beta, winning_team, number_of_teams, p)),
+                                  # integration bounds a -> b
+                                  a=0, b=5000)
+        # compute second moment of posterior to get new rating deviation
+        # integral of p(x)*(x-mu)**2/C_int over the domain
+        rd_G_u_p = np.sqrt(
+            integrate.quad(lambda x: (x - ratings_G_u[p]) ** 2 / C_int * np.exp(posterior_pdf(np.concatenate(
+                [ratings_G_u[:p], np.array(x), ratings_G_u[p + 1:]], axis=None), ratings_G, rds_G, beta, winning_team,
+                number_of_teams, p)),
+                           a=0, b=5000)[0])
+        # floor rating deviation to prevent rating staleness
         rd_G_u_p = max(rd_min, rd_G_u_p)
-        #updated rating deviations
+        # updated rating deviations
         rds_G_u.append(rd_G_u_p)
-    return UpdateMmrResponseBody(ratings_list=ratings_G_u.tolist(), rds_list=rds_G_u.tolist())
+    return UpdateMmrResponseBody(ratings_list=ratings_G_u.tolist(), rds_list=rds_G_u)
 
 
-#this is faster than using the scipy.stats implementation
-#(it's always the case on every project I've ever worked with, no surprises)
+# this is faster than using the scipy.stats implementation
+# (it's always the case on every project I've ever worked with, no surprises)
 def logistic_pdf(x, mu, s):
-    return np.exp((x-mu)/s)/(s*(1+np.exp((x-mu)/s))**2)
+    return np.exp((x - mu) / s) / (s * (1 + np.exp((x - mu) / s)) ** 2)
+
 
-#this is the posterior probabiliy density function
-#ratings_G_o: prior mean
-#rds_G_o: prior deviation
-#beta: performance uncertainty
-#T_won: index of winning team
-#T: number of teams in the game
-#m: marginlization variable for integration purposes
-def posterior_pdf(ratings_G_u, ratings_G_o, rds_G_o, beta, T_won, T, m = None):
-    #N: number of players in the game
+# this is the posterior probabiliy density function
+# ratings_G_o: prior mean
+# rds_G_o: prior deviation
+# beta: performance uncertainty
+# T_won: index of winning team
+# T: number of teams in the game
+# m: marginlization variable for integration purposes
+def posterior_pdf(ratings_G_u, ratings_G_o, rds_G_o, beta, T_won, T, m=None):
+    # N: number of players in the game
     N = int(len(ratings_G_o))
-    #P: number of players per team
-    P = int(float(N)/float(T))
-    #constant to go from Mu/Standard deviation representation of Logistic distr. to Mu/Scale representation 
-    #https://en.wikipedia.org/wiki/Logistic_distribution
+    # P: number of players per team
+    P = int(float(N) / float(T))
+    # constant to go from Mu/Standard deviation representation of Logistic distr. to Mu/Scale representation
+    # https://en.wikipedia.org/wiki/Logistic_distribution
     C_sd = 0.551328895
-    #the game's collective rating deviation
-    #each player's rating deviation is inflated by beta, which quantifies performance uncertainty
-    #see https://jmlr.csail.mit.edu/papers/volume12/weng11a/weng11a.pdf
-    #section 3.5, that's where I found this idea :)
-    rd_G = np.sqrt(np.sum(np.power(rds_G_o,2)) + N*beta**2)
-    #each team's collective rating as the geometric mean of ratings
-    ratings_T_u = np.array([(np.prod(np.power(ratings_G_u[t*P:(t+1)*P], 1/float(P)))) for t in range(T)])
-    #Bradley-Terry model which handles both 1 team vs 1 team and FFA
-    #differs from usual BT as we have:
-    #1) different rating deviation for each team
-    #2) performance uncertainty with beta
-    #3) multiple players by team
-    s_p = np.exp((P*ratings_T_u[T_won])/(C_sd*rd_G))
-    s_G =  np.sum(np.exp((P*ratings_T_u)/(C_sd*rd_G)))
-    #s_p/s_G = win probability for the winning team
-    #this is the evidence for the observed result
-    loglikelihood = np.log(s_p/s_G)
+    # the game's collective rating deviation
+    # each player's rating deviation is inflated by beta, which quantifies performance uncertainty
+    # see https://jmlr.csail.mit.edu/papers/volume12/weng11a/weng11a.pdf
+    # section 3.5, that's where I found this idea :)
+    rd_G = np.sqrt(np.sum(np.power(rds_G_o, 2)) + N * beta ** 2)
+    # each team's collective rating as the geometric mean of ratings
+    ratings_T_u = np.array([(np.prod(np.power(ratings_G_u[t * P:(t + 1) * P], 1 / float(P)))) for t in range(T)])
+    # Bradley-Terry model which handles both 1 team vs 1 team and FFA
+    # differs from usual BT as we have:
+    # 1) different rating deviation for each team
+    # 2) performance uncertainty with beta
+    # 3) multiple players by team
+    s_p = np.exp((P * ratings_T_u[T_won]) / (C_sd * rd_G))
+    s_G = np.sum(np.exp((P * ratings_T_u) / (C_sd * rd_G)))
+    # s_p/s_G = win probability for the winning team
+    # this is the evidence for the observed result
+    loglikelihood = np.log(s_p / s_G)
     for n in range(N):
-        #trick for the marginalization step in the integral
+        # trick for the marginalization step in the integral
         if m == None or m == n:
-            #this is the evidence for each player's updated rating under the prior
-            loglikelihood += np.log(logistic_pdf(ratings_G_u[n], ratings_G_o[n], C_sd*rds_G_o[n]))
+            # this is the evidence for each player's updated rating under the prior
+            loglikelihood += np.log(logistic_pdf(ratings_G_u[n], ratings_G_o[n], C_sd * rds_G_o[n]))
     return loglikelihood
diff --git a/teambalance/balance.py b/teambalance/balance.py
@@ -1,28 +1,42 @@
 import numpy as np
 from itertools import combinations
 
+from pydantic import BaseModel
+
 C_sd = 0.551328895
 
-#this constructs the set of unique team configurations
-#doing this divides by 24 from the "brute force" method that tries all possible combinations
-#for footmen frenzy (3v3v3v3)
-#as we go from 15400 possibilities to 369600 - it's really worth doing it as runtime goes from ~1 sec to 30 secs
-#I generalized this to make it for any number of teams & number of players on the team
-#this only needs to be done "once" ever - so need to make sure it's not recalculated needlessly all the time
 
+class BalanceTeamRequestBody(BaseModel):
+    ratings_list: list
+    rds_list: list
+    gamemode: str
+
+
+class BalanceTeamResponseBody(BaseModel):
+    ratings_list: list
+    rds_list: list
+
+
+# this constructs the set of unique team configurations
+# doing this divides by 24 from the "brute force" method that tries all possible combinations
+# for footmen frenzy (3v3v3v3)
+# as we go from 15400 possibilities to 369600 - it's really worth doing it as runtime goes from ~1 sec to 30 secs
+# I generalized this to make it for any number of teams & number of players on the team
+# this only needs to be done "once" ever - so need to make sure it's not recalculated needlessly all the time
 
-def generate_superset_recursive(T,P):
-    superset = set()
-    set_players = set(i for i in range(T*P))
+
+def generate_superset_recursive(T, P):
+    set_players = set(i for i in range(T * P))
     potential_games = []
     for c in combinations(set_players, P):
         potential_games.append([frozenset(c)])
     L = 1
-    while L<T:
+    while L < T:
         potential_games = recursion(set_players, potential_games, P)
-        L+=1
+        L += 1
     return set(frozenset(game) for game in potential_games)
 
+
 def recursion(set_players, potential_games, P):
     potential_G_next = []
     for G in potential_games:
@@ -34,33 +48,34 @@ def recursion(set_players, potential_games, P):
             potential_G_next.append(G_T)
     return potential_G_next
 
-#this gives the winning odds for each team for configuration of the game
+
+# this gives the winning odds for each team for configuration of the game
 def game_odds(ratings_G, rds_G, T, P):
-    #should be hardcoded to the same value as in the python mmr service
+    # should be hardcoded to the same value as in the python mmr service
     beta = 215
-    #number of players per game
+    # number of players per game
     N = len(rds_G)
-    rd_G = np.sqrt(np.sum(rds_G**2)+N*beta**2)
-    ratings_T = np.array([(np.prod(np.power(ratings_G[t*P:(t+1)*P], 1/float(P)))) for t in range(T)])
-    odds = np.exp((P*ratings_T)/(C_sd*rd_G))/np.sum(np.exp((P*ratings_T)/(C_sd*rd_G)))
+    rd_G = np.sqrt(np.sum(rds_G ** 2) + N * beta ** 2)
+    ratings_T = np.array([(np.prod(np.power(ratings_G[t * P:(t + 1) * P], 1 / float(P)))) for t in range(T)])
+    odds = np.exp((P * ratings_T) / (C_sd * rd_G)) / np.sum(np.exp((P * ratings_T) / (C_sd * rd_G)))
     return odds
 
 
-#gamemode should be of the form "PvPvP" or "PonPonP"
-#number of teams is occurences of "v"+1
+# gamemode should be of the form "PvPvP" or "PonPonP"
+# number of teams is occurences of "v"+1
 
 def find_best_game(ratings, rds, gamemode):
-    #that part should be refactored by someone who understands
-    #what variables will remain in memory on the live service
-    #the point is that generaate_super_recursive should only be called once per gamemode
-    #whenever we restart the service
-    #and its output be available in memory at any time later
+    # that part should be refactored by someone who understands
+    # what variables will remain in memory on the live service
+    # the point is that generaate_super_recursive should only be called once per gamemode
+    # whenever we restart the service
+    # and its output be available in memory at any time later
     T = gamemode.count(gamemode[0])
     P = int(gamemode[0])
     if 'superset' not in globals():
         global superset
         superset = {}
-        superset[gamemode] =generate_superset_recursive(T, P)
+        superset[gamemode] = generate_superset_recursive(T, P)
     else:
         if gamemode not in superset.keys():
             superset[gamemode] = generate_superset_recursive(T, P)
@@ -69,23 +84,21 @@ def find_best_game(ratings, rds, gamemode):
     for Game in gamemode_set:
         potential_G = [p for Team in Game for p in Team]
         ratings_G = ratings[potential_G]
-        probas = game_odds(ratings_G, rds_G, T, P)
-        #that's helpstone's metric for a fair game
+        probas = game_odds(ratings_G, rds, T, P)
+        # that's helpstone's metric for a fair game
         fairness_G = np.max(probas) - np.min(probas)
         if fairness_G < most_fair:
             best_G = potential_G
-            best_ratings = ratings_G
             most_fair = fairness_G
-    return [int(np.ceil((best_G.index(p)+1)/P)) for p in range(T*P)]
+    return [int(np.ceil((best_G.index(p) + 1) / P)) for p in range(T * P)]
 
 # #Example usage
-# ratings_G = np.round(np.random.normal(1500, 300, 12),0)
+# ratings_G = np.round(np.random.normal(1500, 300, 12), 0).tolist()
 # print(ratings_G)
-# rds_G = np.array([90]*12)
+# rds_G = np.array([90] * 12).tolist()
 # teams_footies = find_best_game(ratings_G, rds_G, '3v3v3v3')
 # print(teams_footies)
 
-
 # ratings_G = np.round(np.random.normal(1500, 300, 8),0)
 # print(ratings_G)
 # rds_G = np.array([90]*8)