aboutsummaryrefslogtreecommitdiffhomepage
path: root/tests/rsqrt_test.cpp
blob: 7adbe39e123b25a891051ad4fb1815fe807c3ffe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/* This file is part of the dynarmic project.
 * Copyright (c) 2021 MerryMage
 * SPDX-License-Identifier: 0BSD
 */

#include <catch2/benchmark/catch_benchmark.hpp>
#include <catch2/catch_test_macros.hpp>
#include <fmt/printf.h>
#include <mcl/stdint.hpp>

#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/common/fp/fpsr.h"
#include "dynarmic/common/fp/op/FPRSqrtEstimate.h"

extern "C" u32 rsqrt_inaccurate(u32);
extern "C" u32 rsqrt_full(u32);
extern "C" u32 rsqrt_full_gpr(u32);
extern "C" u32 rsqrt_full_nb(u32);
extern "C" u32 rsqrt_full_nb2(u32);
extern "C" u32 rsqrt_full_nb_gpr(u32);
extern "C" u32 rsqrt_newton(u32);
extern "C" u32 rsqrt_hack(u32);

using namespace Dynarmic;

extern "C" u32 rsqrt_fallback(u32 value) {
    FP::FPCR fpcr;
    FP::FPSR fpsr;
    return FP::FPRSqrtEstimate(value, fpcr, fpsr);
}
extern "C" u32 _rsqrt_fallback(u32 value) {
    return rsqrt_fallback(value);
}

void Test(u32 value) {
    FP::FPCR fpcr;
    FP::FPSR fpsr;

    const u32 expect = FP::FPRSqrtEstimate(value, fpcr, fpsr);
    const u32 full = rsqrt_full(value);
    const u32 full_gpr = rsqrt_full_gpr(value);
    const u32 newton = rsqrt_newton(value);
    const u32 hack = rsqrt_hack(value);

    if (expect != full || expect != full_gpr || expect != newton || expect != hack) {
        fmt::print("{:08x} = {:08x} : {:08x} : {:08x} : {:08x} : {:08x}\n", value, expect, full, full_gpr, newton, hack);

        REQUIRE(expect == full);
        REQUIRE(expect == full_gpr);
        REQUIRE(expect == newton);
        REQUIRE(expect == hack);
    }
}

TEST_CASE("RSqrt Tests", "[fp][.]") {
    Test(0x00000000);
    Test(0x80000000);
    Test(0x7f8b7201);
    Test(0x7f800000);
    Test(0x7fc00000);
    Test(0xff800000);
    Test(0xffc00000);
    Test(0xff800001);

    for (u64 i = 0; i < 0x1'0000'0000; i++) {
        const u32 value = static_cast<u32>(i);
        Test(value);
    }
}

TEST_CASE("Benchmark RSqrt", "[fp][.]") {
    BENCHMARK("Inaccurate") {
        u64 total = 0;
        for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
            const u32 value = static_cast<u32>(i);
            total += rsqrt_inaccurate(value);
        }
        return total;
    };

    BENCHMARK("Full divss") {
        u64 total = 0;
        for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
            const u32 value = static_cast<u32>(i);
            total += rsqrt_full(value);
        }
        return total;
    };

    BENCHMARK("Full divss (GPR)") {
        u64 total = 0;
        for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
            const u32 value = static_cast<u32>(i);
            total += rsqrt_full_gpr(value);
        }
        return total;
    };

    BENCHMARK("Full divss (NB)") {
        u64 total = 0;
        for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
            const u32 value = static_cast<u32>(i);
            total += rsqrt_full_nb(value);
        }
        return total;
    };

    BENCHMARK("Full divss (NB2)") {
        u64 total = 0;
        for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
            const u32 value = static_cast<u32>(i);
            total += rsqrt_full_nb2(value);
        }
        return total;
    };

    BENCHMARK("Full divss (NB + GPR)") {
        u64 total = 0;
        for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
            const u32 value = static_cast<u32>(i);
            total += rsqrt_full_nb_gpr(value);
        }
        return total;
    };

    BENCHMARK("One Newton iteration") {
        u64 total = 0;
        for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
            const u32 value = static_cast<u32>(i);
            total += rsqrt_newton(value);
        }
        return total;
    };

    BENCHMARK("Ugly Hack") {
        u64 total = 0;
        for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
            const u32 value = static_cast<u32>(i);
            total += rsqrt_hack(value);
        }
        return total;
    };

    BENCHMARK("Softfloat") {
        u64 total = 0;
        for (u64 i = 0; i < 0x1'0000'0000; i += 0x1234) {
            const u32 value = static_cast<u32>(i);
            total += rsqrt_fallback(value);
        }
        return total;
    };
}