From 211253becbd5e7e07b0aca17bdf5259d1a096d69 Mon Sep 17 00:00:00 2001 From: Nad Alaba <37968805+NadAlaba@users.noreply.github.com> Date: Mon, 24 Jun 2024 15:08:47 +0300 Subject: fix(zipf): improve approximation of zipf distribution (@NadAlaba) (#5515) * fix(zipf): improve approximation of zipf distribution * rename --------- Co-authored-by: Miodec --- frontend/src/ts/test/wordset.ts | 4 ++-- frontend/src/ts/utils/misc.ts | 20 +++++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/frontend/src/ts/test/wordset.ts b/frontend/src/ts/test/wordset.ts index 60d61a224..377f919df 100644 --- a/frontend/src/ts/test/wordset.ts +++ b/frontend/src/ts/test/wordset.ts @@ -1,5 +1,5 @@ import * as FunboxList from "./funbox/funbox-list"; -import { dreymarIndex } from "../utils/misc"; +import { zipfyRandomArrayIndex } from "../utils/misc"; import { randomElementFromArray, shuffle } from "../utils/arrays"; import Config from "../config"; @@ -25,7 +25,7 @@ export class Wordset implements MonkeyTypes.Wordset { randomWord(mode: MonkeyTypes.FunboxWordsFrequency): string { if (mode === "zipf") { - return this.words[dreymarIndex(this.words.length)] as string; + return this.words[zipfyRandomArrayIndex(this.words.length)] as string; } else { return randomElementFromArray(this.words); } diff --git a/frontend/src/ts/utils/misc.ts b/frontend/src/ts/utils/misc.ts index 5d45e5f47..e81a25985 100644 --- a/frontend/src/ts/utils/misc.ts +++ b/frontend/src/ts/utils/misc.ts @@ -579,14 +579,20 @@ export function isDevEnvironment(): boolean { return envConfig.isDevelopment; } -export function dreymarIndex(arrayLength: number): number { - const n = arrayLength; - const g = 0.5772156649; - const M = Math.log(n) + g; +export function zipfyRandomArrayIndex(dictLength: number): number { + /** + * get random index based on probability distribution of Zipf's law, + * where PMF is (1/n)/H_N, + * where H_N is the Harmonic number of (N), where N is dictLength + * and the harmonic number is approximated using the formula: + * H_n = ln(n + 0.5) + gamma + */ + const gamma = 0.5772156649015329; // Euler–Mascheroni constant + const H_N = Math.log(dictLength + 0.5) + gamma; // approximation of H_N const r = Math.random(); - const h = Math.exp(r * M - g); - const W = Math.ceil(h); - return W - 1; + /* inverse of CDF where CDF is H_n/H_N */ + const inverseCDF = Math.exp(r * H_N - gamma) - 0.5; + return Math.floor(inverseCDF); } export async function checkIfLanguageSupportsZipf( -- cgit v1.2.3