diff options
author | Nad Alaba <[email protected]> | 2024-06-24 15:08:47 +0300 |
---|---|---|
committer | GitHub <[email protected]> | 2024-06-24 14:08:47 +0200 |
commit | 211253becbd5e7e07b0aca17bdf5259d1a096d69 (patch) | |
tree | 72dbe384e69ebbf395033891a92838c414c2546c | |
parent | d12da37050a7430b01ccb23dd1a6384580a2bda0 (diff) | |
download | monkeytype-211253becbd5e7e07b0aca17bdf5259d1a096d69.tar.gz monkeytype-211253becbd5e7e07b0aca17bdf5259d1a096d69.zip |
fix(zipf): improve approximation of zipf distribution (@NadAlaba) (#5515)
* fix(zipf): improve approximation of zipf distribution
* rename
---------
Co-authored-by: Miodec <[email protected]>
-rw-r--r-- | frontend/src/ts/test/wordset.ts | 4 | ||||
-rw-r--r-- | frontend/src/ts/utils/misc.ts | 20 |
2 files changed, 15 insertions, 9 deletions
diff --git a/frontend/src/ts/test/wordset.ts b/frontend/src/ts/test/wordset.ts index 60d61a224..377f919df 100644 --- a/frontend/src/ts/test/wordset.ts +++ b/frontend/src/ts/test/wordset.ts @@ -1,5 +1,5 @@ import * as FunboxList from "./funbox/funbox-list"; -import { dreymarIndex } from "../utils/misc"; +import { zipfyRandomArrayIndex } from "../utils/misc"; import { randomElementFromArray, shuffle } from "../utils/arrays"; import Config from "../config"; @@ -25,7 +25,7 @@ export class Wordset implements MonkeyTypes.Wordset { randomWord(mode: MonkeyTypes.FunboxWordsFrequency): string { if (mode === "zipf") { - return this.words[dreymarIndex(this.words.length)] as string; + return this.words[zipfyRandomArrayIndex(this.words.length)] as string; } else { return randomElementFromArray(this.words); } diff --git a/frontend/src/ts/utils/misc.ts b/frontend/src/ts/utils/misc.ts index 5d45e5f47..e81a25985 100644 --- a/frontend/src/ts/utils/misc.ts +++ b/frontend/src/ts/utils/misc.ts @@ -579,14 +579,20 @@ export function isDevEnvironment(): boolean { return envConfig.isDevelopment; } -export function dreymarIndex(arrayLength: number): number { - const n = arrayLength; - const g = 0.5772156649; - const M = Math.log(n) + g; +export function zipfyRandomArrayIndex(dictLength: number): number { + /** + * get random index based on probability distribution of Zipf's law, + * where PMF is (1/n)/H_N, + * where H_N is the Harmonic number of (N), where N is dictLength + * and the harmonic number is approximated using the formula: + * H_n = ln(n + 0.5) + gamma + */ + const gamma = 0.5772156649015329; // Euler–Mascheroni constant + const H_N = Math.log(dictLength + 0.5) + gamma; // approximation of H_N const r = Math.random(); - const h = Math.exp(r * M - g); - const W = Math.ceil(h); - return W - 1; + /* inverse of CDF where CDF is H_n/H_N */ + const inverseCDF = Math.exp(r * H_N - gamma) - 0.5; + return Math.floor(inverseCDF); } export async function checkIfLanguageSupportsZipf( |