optimal bayesian ranking ch1

import%20marimo%0A%0A__generated_with%20%3D%20%220.10.7%22%0Aapp%20%3D%20marimo.App(app_title%3D%22optimal%20bayesian%20ranking%20ch1%22)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20r%22%22%22%0A%20%20%20%20%20%20%20%20%23%20optimal%20bayesian%20ranking%0A%20%20%20%20%20%20%20%20%3Ch3%20align%3D'center'%3Echapter%201%3A%20bernoulli%20to%20beta%3C%2Fh3%3E%0A%20%20%20%20%20%20%20%20%3Cp%20align%3D'center'%3Eby%20miraia%20s.%20chiou%20%C2%A9%202024%3C%2Fp%3E%0A%0A%20%20%20%20%20%20%20%20%23%23%20introduction%0A%0A%20%20%20%20%20%20%20%20**fuz**%20can%20be%20used%20to%20improve%20on%20basic%20bayesian%20ranking%20by%20taking%20into%20account%20the%20shape%20of%20distributions%20rather%20than%20just%20the%20means.%20the%20optimal%20method%20is%20to%20use%20mode-parameterized%20beta%20or%20dirichlet%20distributions%2C%20but%20using%20a%20prior%20obtained%20by%20multiplicative%20pooling%20also%20improves%20upon%20traditional%20%5Bbayesian%20averaging%2Franking%5D(https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FBayesian_average)%0A%0A%20%20%20%20%20%20%20%20if%20you%20know%20already%20know%20how%20to%20derive%20the%20%5Brule%20of%20succession%5D(https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FRule_of_succession)%2C%20skip%20to%20%5Bclaims%5D(%23claims).%20note%20that%20this%20chapter%20covers%20known%20principles.%20the%20next%20chapters%20cover%20research%20that%20could%20be%20considered%20novel%20(disclaimer%3A%20i%20am%20an%20amateur%20in%20this%20field).%0A%20%20%20%20%20%20%20%20%22%22%22%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20r%22%22%22%0A%20%20%20%20%20%20%20%20%23%23%20the%20bernoulli%20distribution%0A%0A%20%20%20%20%20%20%20%20to%20understand%20optimal%20bayesian%20ranking%2C%20start%20with%20the%20bernoulli%20distribution.%20this%20can%20be%20thought%20of%20as%20coin%20flips%2C%20upvotes%20and%20downvotes%2C%20yes%2Fno%20ratings%2C%20or%20anything%20representable%20as%20binary.%20as%20an%20example%20the%20video%20game%20service%20_steam_%20uses%20yes%2Fno%20(thumbs%20up%20or%20thumbs%20down).%20in%20this%20case%2C%20a%20game%20with%206%20thumbs%20up%20and%202%20thumbs%20down%20has%20a%20bernoulli%20score%20distribution%20with%20%24p%3D0.75%24.%0A%20%20%20%20%20%20%20%20%22%22%22%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_()%3A%0A%20%20%20%20from%20scipy.stats%20import%20bernoulli%0A%20%20%20%20return%20(bernoulli%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20wintro_score%20%3D%20mo.ui.slider(%0A%20%20%20%20%20%20%20%200%2C%201%2C%200.01%2C%20value%3D0.75%2C%20show_value%3DTrue%2C%20label%3D'choose%20a%20score'%2C%20full_width%3DTrue%0A%20%20%20%20)%0A%20%20%20%20mo.callout(wintro_score%2C%20kind%3D'success')%0A%20%20%20%20return%20(wintro_score%2C)%0A%0A%0A%40app.cell%0Adef%20_(bernoulli%2C%20mo%2C%20plot_bernoulli%2C%20wintro_score)%3A%0A%20%20%20%20intronoulli%20%3D%20bernoulli(wintro_score.value)%0A%20%20%20%20intronoulli_p%20%3D%20%5Bintronoulli.pmf(0)%2C%20intronoulli.pmf(1)%5D%0A%20%20%20%20_chart%20%3D%20(%0A%20%20%20%20%20%20%20%20plot_bernoulli(intronoulli_p)%0A%20%20%20%20%20%20%20%20.properties(width%3D'container')%0A%20%20%20%20%20%20%20%20.configure_mark(color%3D'darkgreen')%0A%20%20%20%20)%0A%20%20%20%20mo.ui.altair_chart(_chart)%0A%20%20%20%20return%20intronoulli%2C%20intronoulli_p%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22this%20distribution%20represents%20the%20sample%20mean.%20however%2C%20we%20have%20some%20additional%20information%20we%20want%20to%20represent%20-%20the%20number%20of%20ratings%20(count).%20we%20can%20do%20this%20with%20the%20binomial%20distribution.%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%23%23%20bernoulli%20to%20binomial%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_()%3A%0A%20%20%20%20from%20scipy.stats%20import%20binom%0A%20%20%20%20return%20(binom%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20wintro_no%20%3D%20mo.ui.slider(%0A%20%20%20%20%20%20%20%200%2C%2010%2C%20value%3D2%2C%20show_value%3DTrue%2C%20label%3D'no.%20of%20no%2Ftails%2Fdownvotes'%2C%20full_width%3DTrue%0A%20%20%20%20)%0A%20%20%20%20wintro_yes%20%3D%20mo.ui.slider(%0A%20%20%20%20%20%20%20%200%2C%2010%2C%20value%3D6%2C%20show_value%3DTrue%2C%20label%3D'no.%20of%20yes%2Fheads%2Fupvotes'%2C%20full_width%3DTrue%0A%20%20%20%20)%0A%20%20%20%20mo.callout(mo.hstack(%5Bwintro_no%2C%20wintro_yes%5D%2C%20widths%3D'equal'%2C%20align%3D'center')%2C%20kind%3D'info')%0A%20%20%20%20return%20wintro_no%2C%20wintro_yes%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(flog%2C%20mo%2C%20np%2C%20wintro_no%2C%20wintro_yes)%3A%0A%20%20%20%20intro2_weights%20%3D%20np.array(%5Bwintro_no.value%2C%20wintro_yes.value%5D)%0A%20%20%20%20intro2_p%20%3D%20flog.norm(intro2_weights)%0A%20%20%20%20intro2_n%20%3D%20intro2_weights.sum()%0A%20%20%20%20mo.md(f%22%22%22%0A%20%20%20%20here%20i%20introduce%20%60fuz.log.lnorm%60%20which%20can%20stably%20normalize%20weights%20in%20logarithmic%20space%20(log%20-%3E%20log)%2C%20handling%20%60nan%60s%20and%20complex%20numbers.%20a%20little%20excessive%20for%20our%20example%20here%2C%20but%20works%20well%20with%20very%20small%20probabilities.%20if%20you%20want%20to%20move%20out%20of%20log%20space%2C%20a%20convenience%20function%2C%20%60norm%60%2C%20does%20log%20conversion%20and%20exponentiation%20for%20you.%0A%0A%20%20%20%20%60%60%60python%0A%20%20%20%20import%20fuz.log%20as%20flog%0A%20%20%20%20import%20numpy%20as%20np%0A%0A%20%20%20%20flog.norm(%7Bintro2_weights%7D)%20%23%20%7Bintro2_p%7D%0A%20%20%20%20%23%20this%20is%20equivalent%20to%3A%0A%20%20%20%20np.exp(flog.lnorm(np.log(%7Bintro2_weights%7D)))%20%23%20%7Bintro2_p%7D%0A%20%20%20%20%60%60%60%0A%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%20intro2_n%2C%20intro2_p%2C%20intro2_weights%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(%0A%20%20%20%20alt%2C%0A%20%20%20%20binom%2C%0A%20%20%20%20intro2_n%2C%0A%20%20%20%20intro2_p%2C%0A%20%20%20%20plot_bernoulli%2C%0A%20%20%20%20plot_binomial%2C%0A%20%20%20%20wintro_no%2C%0A%20%20%20%20wintro_yes%2C%0A)%3A%0A%20%20%20%20_noulli_chart%20%3D%20plot_bernoulli(intro2_p).properties(title%3D'bernoulli%20pmf'%2C%20width%3D100)%0A%0A%0A%20%20%20%20intronomial%20%3D%20binom(intro2_n%2C%20intro2_p%5B1%5D)%0A%20%20%20%20_nomial_chart%20%3D%20plot_binomial(intronomial).properties(%0A%20%20%20%20%20%20%20%20title%3Dalt.TitleParams(%0A%20%20%20%20%20%20%20%20%20%20%20%20text%3D'binomial%20pmf'%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20subtitle%3Df'n%3D%7Bintro2_n%7D%2C%20heads%3D%7Bwintro_yes.value%7D%2C%20tails%3D%7Bwintro_no.value%7D'%2C%0A%20%20%20%20%20%20%20%20)%2C%0A%20%20%20%20%20%20%20%20width%3D300%2C%0A%20%20%20%20)%0A%20%20%20%20(_nomial_chart%20%7C%20_noulli_chart).interactive()%0A%20%20%20%20return%20(intronomial%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo%2C%20wintro_no%2C%20wintro_yes)%3A%0A%20%20%20%20mo.md(rf%22%22%22%0A%20%20%20%20this%20is%20how%20the%20binomial%20distribution%20is%20typically%20first%20taught.%20the%20pmf%20shows%20the%20probability%20of%20%24h%24%20successes%2C%20given%20%24n%24%20trials.%20however%2C%20for%20bayesian%20ranking%2C%20this%20isn't%20what%20we%20need.%20%0A%0A%20%20%20%20what%20we%20really%20want%20is%20to%20quantify%20the%20uncertainty%20around%20possible%20true%20scores.%20i.e.%2C%20given%20%7Bwintro_yes.value%7D%20heads%20and%20%7Bwintro_no.value%7D%20tails%2C%20what's%20the%20probability%20that%20once%20we%20have%20%24%5Cinfty%24%20ratings%2C%20the%20score%20will%20be%20%24x%24%3F%0A%0A%20%20%20%20good%20news%20-%20we%20can%20still%20start%20with%20the%20binomial%20distribution!%0A%0A%20%20%20%20we%20use%20the%20formula%20of%20the%20binomial%20pmf%20to%20hold%20%24k%24%20and%20%24n%24%20constant%2C%20sweeping%20%24x%24%20to%20get%20probabilities%20and%20creating%20a%20chart.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%23%23%20binomial%20to%20beta%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20w_xlen%20%3D%20mo.ui.slider(5%2C%2050%2C%20value%3D5%2C%20full_width%3DTrue%2C%20label%3D'slide%20to%20sample!')%0A%20%20%20%20mo.callout(w_xlen%2C%20kind%3D'danger')%0A%20%20%20%20return%20(w_xlen%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(np%2C%20w_xlen)%3A%0A%20%20%20%20x_interactive%20%3D%20np.linspace(0%2C%201%2C%20w_xlen.value)%0A%20%20%20%20return%20(x_interactive%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(alt%2C%20binom%2C%20intro2_n%2C%20pd%2C%20wintro_no%2C%20wintro_yes%2C%20x_interactive)%3A%0A%20%20%20%20_df%20%3D%20pd.DataFrame(%0A%20%20%20%20%20%20%20%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20'true%20mean'%3A%20x_interactive%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20'probability'%3A%20binom.pmf(wintro_yes.value%2C%20intro2_n%2C%20x_interactive)%2C%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20)%0A%20%20%20%20_base%20%3D%20alt.Chart(%0A%20%20%20%20%20%20%20%20_df%2C%0A%20%20%20%20%20%20%20%20title%3Dalt.TitleParams(%0A%20%20%20%20%20%20%20%20%20%20%20%20text%3D'probability%20of%20potential%20true%20means'%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20subtitle%3Df'given%20%7Bwintro_yes.value%7D%20heads%20and%20%7Bwintro_no.value%7D%20tails'%2C%0A%20%20%20%20%20%20%20%20)%2C%0A%20%20%20%20).encode(alt.X('true%20mean')%2C%20alt.Y('probability'))%0A%20%20%20%20_line%20%3D%20_base.mark_line(color%3D'maroon')%0A%20%20%20%20_point%20%3D%20_base.mark_point(color%3D'maroon')%0A%20%20%20%20_chart%20%3D%20_line%20%2B%20_point%0A%20%20%20%20_chart.properties(width%3D600)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20r%22%22%22%0A%20%20%20%20%20%20%20%20what%20do%20you%20notice%20here%3F%20%0A%0A%20%20%20%20%20%20%20%201.%20this%20is%20not%20a%20probability%20distribution%20function%20(pdf)%20yet.%20to%20make%20it%20a%20pdf%2C%20we'll%20divide%20the%20probability%20by%20the%20integral%20to%20get%20the%20density.%0A%20%20%20%20%20%20%20%201.%20the%20mode%20(peak)%20is%20%24p%24.%0A%20%20%20%20%20%20%20%201.%20the%20mean%20of%20the%20possible%20true%20means%20is%20different%20from%20the%20mode.%0A%20%20%20%20%20%20%20%201.%20after%20turning%20this%20into%20a%20pdf%2C%20this%20is%20a%20beta%20distribution.%0A%0A%20%20%20%20%20%20%20%20let's%20turn%20this%20into%20a%20pdf%20and%20see%20which%20beta%20distributions%20might%20match.%20fortunately%20**fuz**%20allows%20you%20to%20make%20beta%20distributions%20in%20a%20variety%20of%20ways.%0A%20%20%20%20%20%20%20%20%22%22%22%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_()%3A%0A%20%20%20%20from%20scipy.integrate%20import%20quad%0A%0A%20%20%20%20import%20fuz.dists%20as%20fd%0A%20%20%20%20return%20fd%2C%20quad%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(np)%3A%0A%20%20%20%20x_small%20%3D%20np.linspace(0%2C%201%2C%20257)%0A%20%20%20%20return%20(x_small%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(Callable%2C%20Sequence%2C%20alt%2C%20np%2C%20pd)%3A%0A%20%20%20%20def%20plot_betas(x%3A%20np.ndarray%2C%20pdfs%3A%20Sequence%5BCallable%5D%2C%20names%3A%20Sequence%5Bstr%5D)%20-%3E%20alt.Chart%3A%0A%20%20%20%20%20%20%20%20dfs%20%3D%20%5B%5D%0A%20%20%20%20%20%20%20%20for%20pdf%2C%20name%20in%20zip(pdfs%2C%20names%2C%20strict%3DTrue)%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20bdf%20%3D%20pd.DataFrame(%7B'x'%3A%20x%2C%20'pdf'%3A%20pdf(x)%7D)%0A%20%20%20%20%20%20%20%20%20%20%20%20bdf%5B'name'%5D%20%3D%20name%0A%20%20%20%20%20%20%20%20%20%20%20%20dfs.append(bdf)%0A%20%20%20%20%20%20%20%20df%20%3D%20pd.concat(dfs)%0A%20%20%20%20%20%20%20%20base%20%3D%20alt.Chart(df%2C%20title%3Dname).encode(%0A%20%20%20%20%20%20%20%20%20%20%20%20alt.X('x')%2C%20alt.Y('pdf')%2C%20alt.Color('name')%2C%20alt.StrokeDash('name')%0A%20%20%20%20%20%20%20%20)%0A%20%20%20%20%20%20%20%20return%20base.mark_line(opacity%3D0.5%2C%20strokeWidth%3D9)%0A%20%20%20%20return%20(plot_betas%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(%0A%20%20%20%20alt%2C%0A%20%20%20%20binom%2C%0A%20%20%20%20fd%2C%0A%20%20%20%20intro2_n%2C%0A%20%20%20%20intro2_p%2C%0A%20%20%20%20mo%2C%0A%20%20%20%20pd%2C%0A%20%20%20%20plot_betas%2C%0A%20%20%20%20quad%2C%0A%20%20%20%20wintro_yes%2C%0A%20%20%20%20x_small%2C%0A)%3A%0A%20%20%20%20pre_pdf%20%3D%20lambda%20x%3A%20binom.pmf(wintro_yes.value%2C%20intro2_n%2C%20x)%0A%20%20%20%20bin_pdf%20%3D%20lambda%20x%3A%20pre_pdf(x)%20%2F%20quad(pre_pdf%2C%200%2C%201)%5B0%5D%0A%20%20%20%20_bin_df%20%3D%20pd.DataFrame(%7B'x'%3A%20x_small%2C%20'pdf'%3A%20bin_pdf(x_small)%7D)%0A%20%20%20%20_bin_df%5B'name'%5D%20%3D%20'binomial-derived'%0A%20%20%20%20_bin_fig%20%3D%20(%0A%20%20%20%20%20%20%20%20alt.Chart(_bin_df%2C%20title%3D'binomial-derived%20vs%20potential%20betas')%0A%20%20%20%20%20%20%20%20.mark_line()%0A%20%20%20%20%20%20%20%20.encode(alt.X('x')%2C%20alt.Y('pdf')%2C%20alt.Color('name')%2C%20alt.StrokeDash('name'))%0A%20%20%20%20)%0A%0A%20%20%20%20_mo%20%3D%20intro2_p%5B1%5D%0A%20%20%20%20b_mo_t%20%3D%20fd.beta_from_mode_trials(_mo%2C%20intro2_n)%0A%20%20%20%20b_mo_k1%20%3D%20fd.beta_from_mode_k(_mo%2C%20intro2_n)%0A%20%20%20%20b_mu_k1%20%3D%20fd.beta_from_mu_k(_mo%2C%20intro2_n)%0A%20%20%20%20_beta_fig%20%3D%20plot_betas(%0A%20%20%20%20%20%20%20%20x_small%2C%20(b_mo_t.pdf%2C%20b_mo_k1.pdf%2C%20b_mu_k1.pdf)%2C%20('%CE%B2%20mode%20trials'%2C%20'%CE%B2%20mode%20k'%2C%20'%CE%B2%20%CE%BC%20k')%0A%20%20%20%20)%0A%0A%20%20%20%20mo.ui.altair_chart(_bin_fig%20%2B%20_beta_fig)%0A%20%20%20%20return%20b_mo_k1%2C%20b_mo_t%2C%20b_mu_k1%2C%20bin_pdf%2C%20pre_pdf%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20r%22%22%22%0A%20%20%20%20%20%20%20%20from%20the%20chart%2C%20it's%20clear%20that%20the%20beta%20distribution%20parameterized%20by%20mode%20and%20number%20of%20trials%20matches%20the%20binomial-derived%20pdf.%0A%0A%20%20%20%20%20%20%20%20what%20are%20the%20implications%3F%20moving%20from%20bernoulli%20to%20binomial%20to%20beta%20is%20a%20well-known%20fundamental%20concept%20in%20statistics%2C%20so%20what's%20new%20here%3F%20let's%20set%20things%20up%20to%20understand.%0A%20%20%20%20%20%20%20%20%22%22%22%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20r%22%22%22%0A%20%20%20%20%20%20%20%20%23%23%20setup%0A%0A%20%20%20%20%20%20%20%20first%2C%20for%20the%20purposes%20of%20this%20chapter%2C%20let's%20define%3A%0A%0A%20%20%20%20%20%20%20%20%24%24%0A%20%20%20%20%20%20%20%20%5Cbegin%7Baligned%7D%0A%20%20%20%20%20%20%20%20%5Cmu_s%20%26%3D%20%5Ctext%7Bsample%20mean%7D%20%5C%5C%0A%20%20%20%20%20%20%20%20%5Cmu_%5Ctop%20%26%3D%20%5Ctext%7Bthe%20true%20mean%7D%20%5C%5C%0A%20%20%20%20%20%20%20%20%5Cmu_%5Cdiamond%20%26%3D%20%5Ctext%7Ba%20possible%20true%20mean%7D%20%5C%5C%0A%20%20%20%20%20%20%20%20%5CMu%20%26%3D%20%5Ctext%7Bthe%20distribution%20of%20possible%20true%20means%7D%20%5C%5C%0A%20%20%20%20%20%20%20%20%5Cmathrm%7BE%7D%5BM%5D%20%3D%20%5Cmu_%5CMu%20%26%3D%20%5Ctext%7Bthe%20mean%20of%20possible%20true%20means%7D%20%5C%5C%0A%20%20%20%20%20%20%20%20%5Chat%7B%5CMu%7D%20%26%3D%20%5Ctext%7Bthe%20mode%20of%20the%20possible%20true%20mean%20distribution%7D%20%5C%5C%0A%20%20%20%20%20%20%20%20%5Chat%7B%5CBeta%7D%20%26%3D%20%5Ctext%7Bthe%20mode%20of%20a%20beta%20distribution%7D%20%5C%5C%0A%20%20%20%20%20%20%20%20%5Cend%7Baligned%7D%0A%20%20%20%20%20%20%20%20%24%24%0A%0A%20%20%20%20%20%20%20%20in%20addition%2C%20we%20use%20standard%20notation%20for%20the%20%5Bbeta%20distribution%20from%20wikipedia%5D(https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FBeta_distribution).%0A%0A%20%20%20%20%20%20%20%20here%20are%20some%20insights%3A%0A%0A%20%20%20%20%20%20%20%201.%20the%20parameterization%20of%20the%20beta%20by%20mode%20and%20no.%20of%20trials%20%24t%24%2C%20where%20%24t%20%3D%20k-2%24%2C%20matches%20best.%0A%20%20%20%20%20%20%20%201.%20the%20mean%20of%20the%20beta%20%24%5Cmu_B%24%20corresponds%20to%20the%20mean%20of%20possible%20true%20means%20%24%5Cmu_%5CMu%24%0A%20%20%20%20%20%20%20%20%22%22%22%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20r%22%22%22%0A%20%20%20%20%20%20%20%20%23%23%20the%20mean%20of%20possible%20true%20means%0A%0A%20%20%20%20%20%20%20%20first%2C%20what's%20the%20best%20way%20to%20calculate%20%24%5Cmu_%5CMu%24%3F%20just%20a%20bit%20of%20algebra%3A%0A%0A%20%20%20%20%20%20%20%20%24%24%0A%20%20%20%20%20%20%20%20%5Cbegin%7Baligned%7D%0A%20%20%20%20%20%20%20%20t%20%26%3D%20k-2%20%5C%5C%0A%20%20%20%20%20%20%20%20k%20%26%3D%20%5Calpha%20%2B%20%5Cbeta%20%5C%5C%0A%20%20%20%20%20%20%20%20%5Chat%7B%5CBeta%7D%20%26%5Ccoloneqq%20%5Cmu_s%5C%5C%0A%20%20%20%20%20%20%20%20%5Chat%7B%5CBeta%7D%20%26%3D%20%5Cfrac%7B%5Calpha-1%7D%7B%5Calpha%2B%5Cbeta-2%7D%20%3D%20%5Cfrac%7B%5Calpha-1%7D%7Bt%7D%20%5C%5C%0A%20%20%20%20%20%20%20%20%5CMu%20%26%3D%20%5CBeta(%5Calpha%2C%5Cbeta)%20%5C%5C%0A%20%20%20%20%20%20%20%20%5Cmu_%5CMu%20%26%3D%20%5Cfrac%7B%5Calpha%7D%7Bk%7D%20%5C%5C%0A%20%20%20%20%20%20%20%20%5Calpha%20%26%3D%20k%5Cmu_%5CMu%20%3D%20t%5Chat%7B%5CBeta%7D%20%2B%201%20%5C%5C%0A%20%20%20%20%20%20%20%20%5Cmu_%5CMu%20%26%3D%20%5Cfrac%7Bt%20%5Cmu_s%20%2B%201%7D%7Bk%7D%20%5C%5C%0A%20%20%20%20%20%20%20%20%5Cend%7Baligned%7D%0A%20%20%20%20%20%20%20%20%24%24%0A%0A%20%20%20%20%20%20%20%20thus%2C%20to%20find%20%24%5Cmu_%5CMu%24%20given%20number%20of%20ratings%20%24t%24%20and%20mean%20%24%5Cmu_s%24%3A%0A%20%20%20%20%20%20%20%20%22%22%22%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.callout(%0A%20%20%20%20%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%24%24%0A%20%20%20%20%5Cmu_%5CMu%20%3D%20%5Cfrac%7Bt%20%5Cmu_s%20%2B%201%7D%7Bt%2B2%7D%0A%20%20%20%20%24%24%0A%20%20%20%20%22%22%22)%2C%0A%20%20%20%20%20%20%20%20kind%3D'success'%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22we%20have%20just%20derived%20the%20%5Brule%20of%20succession%5D(https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FRule_of_succession)%2C%20aka%20the%20%5Bbayes%20estimator%5D(https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FBinomial_distribution%23Estimation_of_parameters)%20given%20a%20uniform%20prior.%20from%20here%20on%20i%20will%20refer%20to%20this%20estimator%20as%20%24%5Cmu_%5CMu%24%20to%20reflect%20its%20meaning%20in%20the%20context%20of%20bayesian%20ranking.%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%23%23%20claims%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.callout(%0A%20%20%20%20%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%201.%20in%20a%20bayesian%20ranking%20context%2C%20taking%20%24%5Cmu_%5CMu%24%20is%20equivalent%20to%20finding%20the%20posterior%2C%20where%20the%20prior%20consists%20of%20the%20other%20items'%20possible%20true%20mean%20distributions%20%24M_i%24.%0A%20%20%20%201.%20this%20is%20extensible%20other%20rating%20systems%20like%203-star%2C%205-star%2C%20out-of-10%2C%20and%20even%20continuous%20(floating-point)%20systems%2C%20by%20using%20dirichlet%20distributions.%0A%20%20%20%201.%20given%20the%20same%20information%2C%20finding%20%24%5Cmu_%5CMu%24%20is%20optimal%20and%20outperforms%20other%20bayesian%20ranking%20algorithms%20%5B%5Esame-level%5D%0A%0A%20%20%20%20%5B%5Esame-level%5D%3A%20caveat%20being%20algorithms%20on%20the%20same%20level%3B%20i'm%20not%20comparing%20to%20complex%20recommender%20systems%2C%20although%20it%20could%20act%20as%20a%20basis%20for%20a%20better%20recommender%20system%20since%20incorporating%20weights%20is%20simple.%0A%20%20%20%20%22%22%22)%2C%0A%20%20%20%20%20%20%20%20kind%3D'warn'%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20r%22%22%22%0A%20%20%20%20%20%20%20%20i%20will%20address%20these%20claims%20in%20the%20following%20chapters.%0A%0A%20%20%20%20%20%20%20%20although%20the%20rule%20of%20succession%20has%20been%20known%20since%20the%2018th%20century%2C%20i%20believe%20this%20is%20a%20novel%20angle%20to%20look%20at%20the%20problem%20of%20bayesian%20ranking.%0A%20%20%20%20%20%20%20%20%22%22%22%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%23%23%20code%20navigation%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%23%23%23%20widgets%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%23%23%23%23%20initial%20parameters%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20w_n_items%20%3D%20mo.ui.slider(%0A%20%20%20%20%20%20%20%20steps%3D%5B5%2C%2010%2C%2050%2C%20100%2C%20500%2C%201000%5D%2C%0A%20%20%20%20%20%20%20%20debounce%3DTrue%2C%0A%20%20%20%20%20%20%20%20label%3D'no.%20of%20scored%20items'%2C%0A%20%20%20%20%20%20%20%20full_width%3DTrue%2C%0A%20%20%20%20%20%20%20%20show_value%3DTrue%2C%0A%20%20%20%20%20%20%20%20value%3D10%2C%0A%20%20%20%20)%0A%20%20%20%20w_init_possible_means%20%3D%20mo.ui.range_slider(%0A%20%20%20%20%20%20%20%200.01%2C%0A%20%20%20%20%20%20%20%200.99%2C%0A%20%20%20%20%20%20%20%200.01%2C%0A%20%20%20%20%20%20%20%20value%3D(0.05%2C%200.95)%2C%0A%20%20%20%20%20%20%20%20debounce%3DTrue%2C%0A%20%20%20%20%20%20%20%20show_value%3DTrue%2C%0A%20%20%20%20%20%20%20%20label%3D'possible%20mean%20scores'%2C%0A%20%20%20%20%20%20%20%20full_width%3DTrue%2C%0A%20%20%20%20)%0A%20%20%20%20w_init_n_scores%20%3D%20mo.ui.range_slider(%0A%20%20%20%20%20%20%20%202%2C%0A%20%20%20%20%20%20%20%20200%2C%0A%20%20%20%20%20%20%20%201%2C%0A%20%20%20%20%20%20%20%20value%3D(3%2C%2036)%2C%0A%20%20%20%20%20%20%20%20debounce%3DTrue%2C%0A%20%20%20%20%20%20%20%20label%3D'possible%20no.%20of%20scores'%2C%0A%20%20%20%20%20%20%20%20full_width%3DTrue%2C%0A%20%20%20%20%20%20%20%20show_value%3DTrue%2C%0A%20%20%20%20)%0A%20%20%20%20w_xax_resolution%20%3D%20mo.ui.slider(%0A%20%20%20%20%20%20%20%20steps%3D%5B129%2C%20257%2C%20513%2C%201025%2C%202049%2C%204097%2C%208193%5D%2C%0A%20%20%20%20%20%20%20%20debounce%3DTrue%2C%0A%20%20%20%20%20%20%20%20label%3D'x%20axis%20resolution'%2C%0A%20%20%20%20%20%20%20%20full_width%3DTrue%2C%0A%20%20%20%20%20%20%20%20show_value%3DTrue%2C%0A%20%20%20%20%20%20%20%20value%3D1025%2C%0A%20%20%20%20)%0A%20%20%20%20return%20(%0A%20%20%20%20%20%20%20%20w_init_n_scores%2C%0A%20%20%20%20%20%20%20%20w_init_possible_means%2C%0A%20%20%20%20%20%20%20%20w_n_items%2C%0A%20%20%20%20%20%20%20%20w_xax_resolution%2C%0A%20%20%20%20)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%23%23%23%20plotting%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(Sequence%2C%20alt%2C%20np%2C%20pd)%3A%0A%20%20%20%20def%20plot_bernoulli(p%3A%20Sequence%5Bfloat%2C%20float%5D%20%7C%20np.ndarray)%20-%3E%20alt.Chart%3A%0A%20%20%20%20%20%20%20%20_df%20%3D%20pd.DataFrame(%7B'x'%3A%20%5B'negative'%2C%20'positive'%5D%2C%20'probability'%3A%20p%7D)%0A%20%20%20%20%20%20%20%20_base%20%3D%20alt.Chart(_df%2C%20title%3D'bernoulli%20probability%20mass%20function%20(pmf)').encode(%0A%20%20%20%20%20%20%20%20%20%20%20%20alt.X('x'%2C%20axis%3Dalt.Axis(labelAngle%3D0))%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20alt.Y('probability'%2C%20scale%3Dalt.Scale(domain%3D%5B0%2C%201%5D))%2C%0A%20%20%20%20%20%20%20%20)%0A%20%20%20%20%20%20%20%20_bar%20%3D%20_base.mark_bar()%0A%20%20%20%20%20%20%20%20_text%20%3D%20_base.mark_text(dy%3D-10).encode(alt.Text('probability'%2C%20format%3D'.2f'))%0A%20%20%20%20%20%20%20%20return%20_bar%20%2B%20_text%0A%20%20%20%20return%20(plot_bernoulli%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(alt%2C%20np%2C%20pd%2C%20rv_discrete_frozen)%3A%0A%20%20%20%20def%20plot_binomial(dist%3A%20rv_discrete_frozen%2C%20max_n%3A%20int%20%3D%2020)%20-%3E%20alt.Chart%3A%0A%20%20%20%20%20%20%20%20x%20%3D%20np.arange(max_n%20%2B%201)%0A%20%20%20%20%20%20%20%20df%20%3D%20pd.DataFrame(%7B'successes'%3A%20x%2C%20'probability'%3A%20dist.pmf(x)%7D)%0A%20%20%20%20%20%20%20%20base%20%3D%20alt.Chart(df%2C%20title%3D'binomial%20pmf').encode(%0A%20%20%20%20%20%20%20%20%20%20%20%20alt.X('successes')%2C%20alt.Y('probability'%2C%20scale%3Dalt.Scale(domain%3D%5B0%2C%201%5D))%0A%20%20%20%20%20%20%20%20)%0A%20%20%20%20%20%20%20%20line%20%3D%20base.mark_line()%0A%20%20%20%20%20%20%20%20point%20%3D%20base.mark_point()%0A%20%20%20%20%20%20%20%20return%20line%20%2B%20point%0A%20%20%20%20return%20(plot_binomial%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%23%23%23%20imports%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_()%3A%0A%20%20%20%20import%20altair%20as%20alt%0A%20%20%20%20import%20marimo%20as%20mo%0A%20%20%20%20import%20numpy%20as%20np%0A%20%20%20%20import%20pandas%20as%20pd%0A%20%20%20%20from%20scipy.stats._distn_infrastructure%20import%20rv_continuous_frozen%2C%20rv_discrete_frozen%0A%0A%20%20%20%20import%20fuz.log%20as%20flog%0A%20%20%20%20return%20alt%2C%20flog%2C%20mo%2C%20np%2C%20pd%2C%20rv_continuous_frozen%2C%20rv_discrete_frozen%0A%0A%0Aif%20__name__%20%3D%3D%20%22__main__%22%3A%0A%20%20%20%20app.run()%0A