aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorNad Alaba <[email protected]>2024-06-24 15:08:47 +0300
committerGitHub <[email protected]>2024-06-24 14:08:47 +0200
commit211253becbd5e7e07b0aca17bdf5259d1a096d69 (patch)
tree72dbe384e69ebbf395033891a92838c414c2546c
parentd12da37050a7430b01ccb23dd1a6384580a2bda0 (diff)
downloadmonkeytype-211253becbd5e7e07b0aca17bdf5259d1a096d69.tar.gz
monkeytype-211253becbd5e7e07b0aca17bdf5259d1a096d69.zip
fix(zipf): improve approximation of zipf distribution (@NadAlaba) (#5515)
* fix(zipf): improve approximation of zipf distribution * rename --------- Co-authored-by: Miodec <[email protected]>
-rw-r--r--frontend/src/ts/test/wordset.ts4
-rw-r--r--frontend/src/ts/utils/misc.ts20
2 files changed, 15 insertions, 9 deletions
diff --git a/frontend/src/ts/test/wordset.ts b/frontend/src/ts/test/wordset.ts
index 60d61a224..377f919df 100644
--- a/frontend/src/ts/test/wordset.ts
+++ b/frontend/src/ts/test/wordset.ts
@@ -1,5 +1,5 @@
import * as FunboxList from "./funbox/funbox-list";
-import { dreymarIndex } from "../utils/misc";
+import { zipfyRandomArrayIndex } from "../utils/misc";
import { randomElementFromArray, shuffle } from "../utils/arrays";
import Config from "../config";
@@ -25,7 +25,7 @@ export class Wordset implements MonkeyTypes.Wordset {
randomWord(mode: MonkeyTypes.FunboxWordsFrequency): string {
if (mode === "zipf") {
- return this.words[dreymarIndex(this.words.length)] as string;
+ return this.words[zipfyRandomArrayIndex(this.words.length)] as string;
} else {
return randomElementFromArray(this.words);
}
diff --git a/frontend/src/ts/utils/misc.ts b/frontend/src/ts/utils/misc.ts
index 5d45e5f47..e81a25985 100644
--- a/frontend/src/ts/utils/misc.ts
+++ b/frontend/src/ts/utils/misc.ts
@@ -579,14 +579,20 @@ export function isDevEnvironment(): boolean {
return envConfig.isDevelopment;
}
-export function dreymarIndex(arrayLength: number): number {
- const n = arrayLength;
- const g = 0.5772156649;
- const M = Math.log(n) + g;
+export function zipfyRandomArrayIndex(dictLength: number): number {
+ /**
+ * get random index based on probability distribution of Zipf's law,
+ * where PMF is (1/n)/H_N,
+ * where H_N is the Harmonic number of (N), where N is dictLength
+ * and the harmonic number is approximated using the formula:
+ * H_n = ln(n + 0.5) + gamma
+ */
+ const gamma = 0.5772156649015329; // Euler–Mascheroni constant
+ const H_N = Math.log(dictLength + 0.5) + gamma; // approximation of H_N
const r = Math.random();
- const h = Math.exp(r * M - g);
- const W = Math.ceil(h);
- return W - 1;
+ /* inverse of CDF where CDF is H_n/H_N */
+ const inverseCDF = Math.exp(r * H_N - gamma) - 0.5;
+ return Math.floor(inverseCDF);
}
export async function checkIfLanguageSupportsZipf(