-- From Wilfred.Hansen@LOAN1.SP.CS.CMU.EDU Fri Dec 18 08:27:40 1992

-- Pseudo-code for a ratings algorithm

-- This algorithm computes p.rating, the rating for each player, p,
-- assuming that the player's initial rating, p.seed, is within a few 
-- stones of the correct value.  
-- The approach is to measure the likelihood of each player's rating 
-- and the likelihood of the outcome of each game given the difference
-- in the opponent's ratings.  Multiplying all these likelihoods gives 
-- likelihood of the set of game outcomes for the given set of ratings.
--
-- Each iteration step adjusts each player's rating by +delta, -delta,
-- or not at all, depending on which increases the likelihood of the
-- outcomes for the player's games.  Then the likelihood for the entire
-- collection of ratings and game outcomes is recomputed and the change 
-- by the current delta is accepted if the likelihood increased.  The 
-- iteration repeats with diminishing values of delta until the desired
-- significance is achieved.

-- For general discussion of the statistics, see: 
--	Elo, Arpad E.,  Ratings of Chessplayers, past and present.  
--		NY: Arco Pub., 1986
--	Joe, Harry, "Rating systems based on paired comparison models", 
--		Statistics and Probability Letters v. 11, 1991 p. 343-347
--	David, H. A., "The method of paired comparisons"


-- In this algorithm, a difference of one in rating is intended to
-- indicate a difference of one stone in strength.  There is no built-in
-- normalization, so there is no way to know if a rating of 1.00
-- corresponds to Shodan;  however, relative values should be a 
-- fair approximation of relative strength.  
-- 
-- Strictly speaking, the algorithm has no notion of a difference of one 
-- stone;  the ratings are based on the players announced ratings.  If 
-- players adopt consistent ratings that differ by, say, half a stone,
-- the computed ratings will be based on that measure.  
--
-- When entering rating values, dan values are entered as the corresponding 
-- positive number and the kyu value k is entered as 1-k (so 1 kyu is zero, 
-- 2 kyu is -1, and so on).

-- Two special data types, Rating and Likelihood, appear below.
-- 	Both are implemented as floating values.  
-- Rating is a player rating as defined just above.  
-- Likelihood is similar to a probability.  It is a mapping of a rating 
--	or game result into [0,1] such that higher values correspond
--	to more likely ratings or results.

-- Here are parameters as used for the AGA rating system:

p.seed: Rating  -- Players's initial rating.  
	The latest rating at which the player played in a tournament.
p.sigma: Rating = 
	.5 for players rated at least twice before
	.8 for players rated once before
	1.0 for previously unrated players 10 kyu and above
	2.0 for others
px_sigma: Rating = 1.04	  -- see description of game_po, below

handicapeqv: Rating	  -- Rating point equivalents of handicaps
		-- Handicap is Nstones stones on board
		-- and a komi of additional points for white
		--   Nstones is 0 or 2...9
		--   -20 <= komi <= 20
	=== if Nstones == 0  then  .5 - .1 * komi else Nstones - .1*komi

maxdelta: Rating = 4	  -- In successive passes, adjust ratings by 
		--  4.  2.  1.  0.5  0.25  0.125  0.0625  
		--  0.03125  0.015625  0.0078125  0.00390625

EPSILON: Number = 1e-15   -- Very small, positive non-zero value
BIG: Number = 4		  -- number of standard deviations of ratings change
			  --   after which to use EPSILON as likelihood

-- information for each game
--
Game:::
	handicapeqv: Rating	-- see above
	po: Likelihood		-- likelihood of the outcome given the 
				-- current rating difference of the players
	oldpo: Likelihood 		-- po from prior iteration
	white: Player		-- refers to player info for white player
	black: Player		--     ditto for black player
	whitewins: Boolean	-- TRUE if W wins 

-- information for each player
--
Player:::
	rating: Rating	-- current rating estimate 
	pr: Likelihood 	-- what is the likelihood of the current rating?
	oldpr: Likelihood	-- pr from prior iteration
	seed: Rating	-- initial rating 
	sigma: Rating	-- standard deviation of curve of likelihood of
			-- ratings in the neighborhood of seed.
	direction: {UP, DOWN, NONE}  -- direction to adust rating
	-- also: a list of all games the player has played in


-- "pr" is the likelihood that a given rating is correct
-- "po" similarly the likelihood that a game outcome is correct
-- Multiplying all pr and pr values computes the total likelihood,
-- "pt", that is, the likelihood of all game outcomes given a 
-- particular set of ratings for the players.


-- player_pr(p: Player): Likelihood
-- 	Compute the "prior distribution", pr, of the rating for player 'p'.
-- 	Assume the "correct" rating is in a sample space normally
--	distributed about p.seed with standard deviation p.sigma.
--	Compute z as the corresponding value in a normal
--	distribution with mean of 0 and standard deviation of 1.
--	Then compute the likelihood that z is the correct rating
--	by evaluating the normal(0,1) density function at z.
--
player_pr(p: Player): Likelihood ===
	z = ((p.rating - p.seed) / p.sigma)
	return   exp(-z*z/2) / sqrt(2*pi)	-- normal density function
	-- exp(...)/sqrt(...) is the function for the normal curve,
	-- that is, a probability density function with mean 0 and s.d. 1.


-- game_po(g: Game): Likelihood
--	Compute the likelihood, po, of the outcome of game 'g'.
--	The likelihood that white wins is the value of the normal
--	distribution function evaluated at the ratings difference, 
--	as adjusted by the handicap stones and normalized by px_sigma.
--	With px_sigma of 1.04:
--		ratings		likelihood
--		difference	white wins
--		    0		    .5
--		    1		    .83
--		    2		    .97
--	That is, if the handicap is two stones too low, white will
--	win 97 games out of a hundred.
--	The likelihood of a black win is (1 - likelihood white wins).
--
game_po(g: Game): Likelihood ===
	rd = g.white.rating - g.black.rating - g.handicapeqv
	p = .5 + erf((rd/px_sigma) / sqrt(2)) / 2
	if g.whitewins return p else return 1-p
	--
	-- erf(x) = [2/sqrt(pi)] * integral from 0 to x of exp(-t*t) dt
	-- Some Unix systems have erf.  In the expression given, it computes
	-- the normal distribution function, that is, the integral of the 
	-- normal density function from negative infinity
	-- to rd/px_sigma, the value whose likelihood we want.

	-- Ralston, Anthony, A First Course in Numerical Analysis, 
	-- McGraw-Hill (New York, 1965), p. 21:
	-- "the normal distribution function corresponding to the 
	-- normal density function with zero mean and variance n/12
	-- is given by 0.5 + 0.5 * erf(sqrt(6/n)x)".  
	-- In the algorithm, the variance is 1, so n = 12.


-- propose_ratings(delta: Rating)
--	For a ratings change of 'delta', adjust all players
--	according to their directions and recompute po for each game.
--
propose_ratings(delta: Rating) === 
	for each player, p
		case p.direction
			UP:  	p.rating += delta
			DOWN:  	p.rating -= delta
			NONE:
		p.oldpr = p.pr
		p.pr = player_pr(p.rating, p.seed)
	for each game, g
		g.oldpo = g.po
		g.po = game_po(g)


--revert_ratings(delta: Rating)
--	For a ratings change of 'delta', revert to prior ratings.
--
revert_ratings(delta: Rating) === 
	for each player, p
		case p.direction
			UP:  	p.rating -= delta
			DOWN:  	p.rating += delta
		p.pr = p.oldpr
	for each game, g
		g.po = g.oldpo


-- compute_pt(): Likelihood
--	Compute the likelihood of the combination of all current ratings
--	and the outcome of all games.
-- In practice, repeated multiplication of probabilities can lead to 
-- floating underflow.  It is preferable to use the logarithms of these
-- values and use addition instead of multiplication.  Indeed, the entire
-- algorithm can be rewritten to utilize logarithms of Likelihood values.
--
compute_pt(): Likelihood === 
	pt = 1
	for each player, p
		pt *= p.pr
	for each game, g
		pt *= g.po
	return pt


--compute_player_impact(p: Player, delta: Rating): Number
--	Compute the ratio ptNew/pt, where 
--		pt is total likelihood given the current assignment of ratings 
--		and ptNew is the likelihood resulting from 
--		changing the rating for player 'p' by 'delta'.
--	If this value is greater than one, the new rating is more accurate.
--	The tests against BIG are because after four or five sd's, 
--	the estimated rating has zero likelihood.  Therefore we give
--	the likelihood a little slope back toward the seed to
--	minimize the possibility of a plateau in the search.
--
compute_player_impact(p: Player, delta: Rating): Number ===
	r_save = p.rating
	p.rating += delta	-- temporarily change rating

	before = abs(r_save - p.seed) / p.sigma
	after = abs(p.rating - p.seed) / p.sigma
	if before > BIG and after > BIG
		if after < before
			pfactor = 1 + EPSILON         -- slightly better
		else
			pfactor = 1 / (1 + EPSILON)   -- slightly worse
	else if after > BIG
		pfactor = EPSILON / p.pr    -- off the edge - very unlikely
	else
		pfactor = player_pr(p) / p.pr
		for each game, g, that p has played
			pfactor *= game_po(g) / g.po

	p.rating = r_save
	return pfactor


-- estimate_ratings(): Likelihood
--	Estimate ratings simultaneously for all players and games.
--	Returns pt, the likelihood of the outcome, given the new ratings.
--
estimate_ratings(): Likelihood ===

	-- set ratings to seed values 
	for each player, p
		p.rating = p.seed
		p.direction = NONE

	-- calculate initial values of all pt components 
	propose_ratings(0)		-- compute initial po and pr
	best_pt = compute_pt()		-- compute resulting pt

	-- search for best new rating values 
	delta = max_delta
	while delta >= .002	    -- ensure two decimal places of accuracy
		for each player, p
			-- decide whether rating should go up or down
			chng_plus  = compute_player_impact(p,  delta)
			chng_minus = compute_player_impact(p,  -delta)
			if chng_plus < 1 and chng_minus < 1 then
				p.direction = NONE
			else if chng_plus > chng_minus
				p.direction = UP
			else -- chng_minus > chng_plus
				p.direction = DOWN

		-- change ratings and repeat with same or smaller delta
		propose_ratings(delta)
		new_pt = compute_pt()
		if new_pt > best_pt + EPSILON
			best_pt = new_pt
			-- do next cycle with same delta
		else if new_pt >= best_pt
			-- pt is no worse, but not much better; decrease delta
			delta /= 2
		else 
			-- pt is no better;  undo the change & decrease delta
			revert_ratings(delta)
			delta /= 2

	return pt