diff options
Diffstat (limited to 'zluda_rt')
59 files changed, 15209 insertions, 0 deletions
diff --git a/zluda_rt/Cargo.toml b/zluda_rt/Cargo.toml new file mode 100644 index 0000000..263b693 --- /dev/null +++ b/zluda_rt/Cargo.toml @@ -0,0 +1,47 @@ +[package] +name = "zluda_rt" +version = "0.0.0" +authors = ["Andrzej Janik <[email protected]>"] +edition = "2018" + +[lib] +name = "nvoptix" +crate-type = ["cdylib"] + +[dependencies] +comgr = { path = "../comgr" } +hip_common = { path = "../hip_common" } +hip_runtime-sys = { path = "../hip_runtime-sys" } +hiprt-sys = { path = "../hiprt-sys" } +optix_base = { path = "../optix_base" } +optix_types = { path = "../optix_types" } +ptx = { path = "../ptx" } +rustc-hash = "1.1" +paste = "1.0" +winapi = { version = "0.3", features = ["libloaderapi", "std"] } +libloading = "0.8" +nougat = "0.2.4" +glam = "0.22" +dirs = "4.0.0" +sha2 = "0.10.2" +generic-array = "0.14.5" +typenum = "1.15.0" +data-encoding = "2.3.3" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +serde_with = "2.1.0" +static_assertions = "1.1.0" +rusqlite = { version = "0.28.0", features = ["bundled", "serde_json"] } + +[dev-dependencies] +float-cmp = "0.9.0" + +[build-dependencies] +vergen = { version = "7.5.1", default-features = false, features = ["git"] } +# We don't use time crate, but this coerces vergen to not use newer version that requires +# higher minimum rust version +time = "=0.3.23" + +[package.metadata.zluda] +broken = true +linux_names = ["liboptix.so.6.5.0", "liboptix.so.6.6.0"] diff --git a/zluda_rt/bin/liboptix.so.6.5.0 b/zluda_rt/bin/liboptix.so.6.5.0 Binary files differnew file mode 100644 index 0000000..2d188c0 --- /dev/null +++ b/zluda_rt/bin/liboptix.so.6.5.0 diff --git a/zluda_rt/bin/optix.6.5.0.dll b/zluda_rt/bin/optix.6.5.0.dll Binary files differnew file mode 100644 index 0000000..90cf28e --- /dev/null +++ b/zluda_rt/bin/optix.6.5.0.dll diff --git a/zluda_rt/build.rs b/zluda_rt/build.rs new file mode 100644 index 0000000..9d7f95d --- /dev/null +++ b/zluda_rt/build.rs @@ -0,0 +1,5 @@ +use vergen::{Config, vergen};
+
+fn main() {
+ vergen(Config::default()).unwrap()
+}
\ No newline at end of file diff --git a/zluda_rt/optix.xmi b/zluda_rt/optix.xmi new file mode 100644 index 0000000..5f28feb --- /dev/null +++ b/zluda_rt/optix.xmi @@ -0,0 +1,325 @@ +<?xml version="1.0" encoding="UTF-8"?> +<XMI verified="false" xmi.version="1.2" timestamp="2022-08-18T17:58:56" xmlns:UML="http://schema.omg.org/spec/UML/1.4"> + <XMI.header> + <XMI.documentation> + <XMI.exporter>umbrello uml modeller http://umbrello.kde.org</XMI.exporter> + <XMI.exporterVersion>1.6.18</XMI.exporterVersion> + <XMI.exporterEncoding>UnicodeUTF8</XMI.exporterEncoding> + </XMI.documentation> + <XMI.metamodel xmi.version="1.4" href="UML.xml" xmi.name="UML"/> + </XMI.header> + <XMI.content> + <UML:Model isSpecification="false" isAbstract="false" isLeaf="false" xmi.id="m1" isRoot="false" name="UML Model"> + <UML:Namespace.ownedElement> + <UML:Stereotype visibility="public" isSpecification="false" namespace="m1" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="folder" name="folder"/> + <UML:Stereotype visibility="public" isSpecification="false" namespace="m1" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="interface" name="interface"/> + <UML:Model visibility="public" isSpecification="false" namespace="m1" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="Logical_View" name="Logical View"> + <UML:Namespace.ownedElement> + <UML:Package stereotype="folder" visibility="public" isSpecification="false" namespace="Logical_View" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="Datatypes" name="Datatypes"> + <UML:Namespace.ownedElement> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uMbpjW1xDQC9k" name="char"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uwDFPKBYx4UKI" name="int"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uOoodsG4cpKdS" name="float"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uPzy2V8JzqWIM" name="double"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="urkyyLXLOw5TT" name="bool"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uHSpplYNTixid" name="string"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uUSXACVg6nABi" name="unsigned char"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uuUhSpiZb8vSR" name="signed char"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="u87Jbv7DzhwXd" name="unsigned int"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uF3r31sQTNHFR" name="signed int"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="upyPDwCvXGBsU" name="short int"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="u7lCVcN6gpMPY" name="unsigned short int"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uHrtmL1b948Dd" name="signed short int"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uL5S9FX3x2Scx" name="long int"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uzYTDoL9j56IG" name="signed long int"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uaDZMyWmNVCUA" name="unsigned long int"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uZPgTO2b0RIpn" name="long double"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uBi5EKcjBO3ee" name="wchar_t"/> + <UML:DataType visibility="public" isSpecification="false" namespace="Datatypes" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uNhglXIPgP3OZ" name="asdas"/> + </UML:Namespace.ownedElement> + </UML:Package> + <UML:Class visibility="public" isSpecification="false" namespace="Logical_View" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uT8T3XgBcOE49" name="Material"/> + <UML:Interface stereotype="interface" visibility="public" isSpecification="false" namespace="Logical_View" isAbstract="true" isLeaf="false" isRoot="false" xmi.id="uWQoz0qQSrNhL" name="new_interface"/> + <UML:Class visibility="public" isSpecification="false" namespace="Logical_View" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="u8GfhQMSTWw6X" name="GeometryInstance"/> + <UML:Class visibility="public" isSpecification="false" namespace="Logical_View" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uIQE8ZtQZcVqQ" name="AnyHitProgram"/> + <UML:Class visibility="public" isSpecification="false" namespace="Logical_View" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uvEmnEk3IKScK" name="ClosestHitProgram"/> + <UML:Association visibility="public" isSpecification="false" namespace="Logical_View" xmi.id="u9ZXRmyhsB43f" name="ray type"> + <UML:Association.connection> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="*" xmi.id="ucQc7nu3LZk9W" type="uT8T3XgBcOE49" name="" aggregation="none"/> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="*" xmi.id="uTuccGPNRXNyV" type="uvEmnEk3IKScK" name="" aggregation="none"/> + </UML:Association.connection> + </UML:Association> + <UML:Association visibility="public" isSpecification="false" namespace="Logical_View" xmi.id="utrP0dPU1I45r" name="ray type"> + <UML:Association.connection> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="*" xmi.id="u94Jc7FqyNwbZ" type="uT8T3XgBcOE49" name="" aggregation="none"/> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="*" xmi.id="uiJ6J4xiol4SY" type="uIQE8ZtQZcVqQ" name="" aggregation="none"/> + </UML:Association.connection> + </UML:Association> + <UML:Association visibility="public" isSpecification="false" namespace="Logical_View" xmi.id="uSR4d7VATIHFm" name=""> + <UML:Association.connection> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="*" xmi.id="u3PsNSnWD9Uvh" type="u8GfhQMSTWw6X" name="" aggregation="none"/> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="*" xmi.id="uFeZZ7Rma3Nvk" type="uT8T3XgBcOE49" name="" aggregation="none"/> + </UML:Association.connection> + </UML:Association> + <UML:Class visibility="public" isSpecification="false" namespace="Logical_View" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uBkpGZVV3XFYp" name="Geometry"/> + <UML:Class visibility="public" isSpecification="false" namespace="Logical_View" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="ugcTKlWPxCkmw" name="GeometryGroup"/> + <UML:Class visibility="public" isSpecification="false" namespace="Logical_View" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uv4oPTpSbxLsV" name="GeometryTriangles"/> + <UML:Association visibility="public" isSpecification="false" namespace="Logical_View" xmi.id="upGO08TSdwn4u" name=""> + <UML:Association.connection> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="1" xmi.id="u2waZUn5UIcdL" type="ugcTKlWPxCkmw" name="" aggregation="none"/> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="*" xmi.id="uDt9qkrSruw1L" type="u8GfhQMSTWw6X" name="" aggregation="none"/> + </UML:Association.connection> + </UML:Association> + <UML:Association visibility="public" isSpecification="false" namespace="Logical_View" xmi.id="uk8oAK1mM7ozL" name=""> + <UML:Association.connection> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="*" xmi.id="uE8qGwefZ7P1s" type="u8GfhQMSTWw6X" name="" aggregation="none"/> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="1" xmi.id="uXQuguJqc24Ey" type="uBkpGZVV3XFYp" name="" aggregation="none"/> + </UML:Association.connection> + </UML:Association> + <UML:Association visibility="public" isSpecification="false" namespace="Logical_View" xmi.id="uDzHbQozO8QPz" name=""> + <UML:Association.connection> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="*" xmi.id="uPEVNW9pwyhDM" type="u8GfhQMSTWw6X" name="" aggregation="none"/> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="1" xmi.id="uXH8xPDQvKcW6" type="uv4oPTpSbxLsV" name="" aggregation="none"/> + </UML:Association.connection> + </UML:Association> + <UML:Class visibility="public" isSpecification="false" namespace="Logical_View" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="uaUkcqQH5jm0M" name="Group"/> + <UML:Class visibility="public" isSpecification="false" namespace="Logical_View" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="ubXw40UTKntCF" name="Acceleration_GG"/> + <UML:Association visibility="public" isSpecification="false" namespace="Logical_View" xmi.id="upcGcuivVZTGb" name=""> + <UML:Association.connection> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="1" xmi.id="upWdGTOngeGTN" type="uaUkcqQH5jm0M" name="" aggregation="none"/> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="*" xmi.id="uoYqKmT5bKXvY" type="ugcTKlWPxCkmw" name="" aggregation="none"/> + </UML:Association.connection> + </UML:Association> + <UML:Association visibility="public" isSpecification="false" namespace="Logical_View" xmi.id="uPLXgkXflHdRn" name=""> + <UML:Association.connection> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="1" xmi.id="uJBAIMDCE5jpF" type="ubXw40UTKntCF" name="" aggregation="none"/> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="1" xmi.id="uAVIx7Bo5I5hV" type="ugcTKlWPxCkmw" name="" aggregation="none"/> + </UML:Association.connection> + </UML:Association> + <UML:Association visibility="public" isSpecification="false" namespace="Logical_View" xmi.id="u29ibEnQQMsG7" name=""> + <UML:Association.connection> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" xmi.id="u5dh0xnGsMRkW" type="ubXw40UTKntCF" name="" aggregation="none"/> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" xmi.id="uRBgveRHtFLXI" type="uaUkcqQH5jm0M" name="" aggregation="none"/> + </UML:Association.connection> + </UML:Association> + <UML:Class visibility="public" isSpecification="false" namespace="Logical_View" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="ujiTNZOfc0exh" name="Acceleration_G"/> + <UML:Association visibility="public" isSpecification="false" namespace="Logical_View" xmi.id="uhM4v5YYXposu" name=""> + <UML:Association.connection> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="1" xmi.id="ukjmPoxGsKZAN" type="uaUkcqQH5jm0M" name="" aggregation="none"/> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" multiplicity="1" xmi.id="ubZei59VK3j0K" type="ujiTNZOfc0exh" name="" aggregation="none"/> + </UML:Association.connection> + </UML:Association> + <UML:Association visibility="public" isSpecification="false" namespace="Logical_View" xmi.id="uAf8nXusYMTVV" name=""> + <UML:Association.connection> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" xmi.id="ulvXSVq0AaDy9" type="uaUkcqQH5jm0M" name="" aggregation="none"/> + <UML:AssociationEnd changeability="changeable" visibility="public" isNavigable="true" isSpecification="false" xmi.id="uGR9rlM5otiME" type="uaUkcqQH5jm0M" name="" aggregation="none"/> + </UML:Association.connection> + </UML:Association> + </UML:Namespace.ownedElement> + <XMI.extension xmi.extender="umbrello"> + <diagrams resolution="96"> + <diagram showopsig="1" linecolor="#ff0000" snapx="25" showattribassocs="1" snapy="25" linewidth="0" showattsig="1" textcolor="#000000" isopen="1" showpackage="1" showpubliconly="0" showstereotype="1" name="class diagram" font="Sans Serif,9,-1,0,50,0,0,0,0,0" canvasheight="854" canvaswidth="1489" localid="-1" snapcsgrid="0" showgrid="0" showops="1" griddotcolor="#d3d3d3" backgroundcolor="#ffffff" usefillcolor="1" fillcolor="#ffff00" zoom="100" xmi.id="ucudBR6CdC2x5" documentation="" showscope="1" snapgrid="0" showatts="1" type="1"> + <widgets> + <classwidget linecolor="#ff0000" usesdiagramfillcolor="0" linewidth="0" textcolor="#000000" showoperations="1" usesdiagramusefillcolor="0" showpubliconly="0" showpackage="1" x="-31482.7" showattsigs="601" showstereotype="1" y="-3741.09" showattributes="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="ugFIMnQFA5yzd" width="162" isinstance="0" usefillcolor="1" fillcolor="#ffff00" xmi.id="uT8T3XgBcOE49" autoresize="0" showscope="1" height="91" showopsigs="601"/> + <classwidget linecolor="#ff0000" usesdiagramfillcolor="0" linewidth="0" textcolor="#000000" showoperations="1" usesdiagramusefillcolor="0" showpubliconly="0" showpackage="1" x="-31925.7" showattsigs="601" showstereotype="1" y="-3744.09" showattributes="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uOgvqZPr1Hh7p" width="229" isinstance="0" usefillcolor="1" fillcolor="#ffff00" xmi.id="u8GfhQMSTWw6X" autoresize="0" showscope="1" height="92" showopsigs="601"/> + <classwidget linecolor="#ff0000" usesdiagramfillcolor="0" linewidth="0" textcolor="#000000" showoperations="1" usesdiagramusefillcolor="0" showpubliconly="0" showpackage="1" x="-31026.7" showattsigs="601" showstereotype="1" y="-3717.09" showattributes="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="ufqrE6hhUUGtC" width="97" isinstance="0" usefillcolor="1" fillcolor="#ffff00" xmi.id="uIQE8ZtQZcVqQ" autoresize="1" showscope="1" height="29" showopsigs="601"/> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-30451" showstereotype="1" y="-3370" text="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="upt6qyenwwKoD" pretext="" role="703" width="14" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="u1dKNJRYjhPlW" autoresize="1" height="17"/> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31336.7" showstereotype="1" y="-4004.09" text="*" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="ucaGHJM7OePDS" pretext="" role="702" width="14" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="uhZeaRyfIt3KS" autoresize="1" height="17"/> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31390.7" showstereotype="1" y="-3881.09" text="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uPMy3eCL4XpDH" pretext="" role="701" width="14" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="uaVtdAv2hzbV6" autoresize="1" height="17"/> + <classwidget linecolor="#ff0000" usesdiagramfillcolor="0" linewidth="0" textcolor="#000000" showoperations="1" usesdiagramusefillcolor="0" showpubliconly="0" showpackage="1" x="-31027" showattsigs="601" showstereotype="1" y="-3668" showattributes="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="ujZmkNCE0Dd6n" width="118" isinstance="0" usefillcolor="1" fillcolor="#ffff00" xmi.id="uvEmnEk3IKScK" autoresize="1" showscope="1" height="29" showopsigs="601"/> + <notewidget linecolor="#ff0000" usesdiagramfillcolor="0" linewidth="0" textcolor="#000000" usesdiagramusefillcolor="0" x="-31292" showstereotype="1" y="-3829" text="Material owns closest&any hit programs.
Note that materials are assigned with ray type, so single material can have multiple anyhit programs for different ray types" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uYPfEXvJLxMw5" width="304" isinstance="0" noteType="0" usefillcolor="1" fillcolor="#ffff00" xmi.id="uDQXD91IkDJNF" autoresize="0" height="86"/> + <notewidget linecolor="#ff0000" usesdiagramfillcolor="0" linewidth="0" textcolor="#000000" usesdiagramusefillcolor="0" x="-31730" showstereotype="1" y="-3827" text="GeometryInstance has multiple materials attached" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uyz2ob3FnoF0i" width="133" isinstance="0" noteType="0" usefillcolor="1" fillcolor="#ffff00" xmi.id="uaiaBvSGOhwLk" autoresize="0" height="63"/> + <classwidget linecolor="#ff0000" usesdiagramfillcolor="0" linewidth="0" textcolor="#000000" showoperations="1" usesdiagramusefillcolor="0" showpubliconly="0" showpackage="1" x="-31926" showattsigs="601" showstereotype="1" y="-3444" showattributes="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="ucSaBIXsap0ss" width="183" isinstance="0" usefillcolor="1" fillcolor="#ffff00" xmi.id="uBkpGZVV3XFYp" autoresize="0" showscope="1" height="79" showopsigs="601"/> + <classwidget linecolor="#ff0000" usesdiagramfillcolor="0" linewidth="0" textcolor="#000000" showoperations="1" usesdiagramusefillcolor="0" showpubliconly="0" showpackage="1" x="-31917" showattsigs="601" showstereotype="1" y="-4066" showattributes="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="ujfOYbLDWVPgg" width="214" isinstance="0" usefillcolor="1" fillcolor="#ffff00" xmi.id="ugcTKlWPxCkmw" autoresize="0" showscope="1" height="97" showopsigs="601"/> + <classwidget linecolor="#ff0000" usesdiagramfillcolor="0" linewidth="0" textcolor="#000000" showoperations="1" usesdiagramusefillcolor="0" showpubliconly="0" showpackage="1" x="-31653" showattsigs="601" showstereotype="1" y="-3447" showattributes="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uahPfTgq1nPfO" width="174" isinstance="0" usefillcolor="1" fillcolor="#ffff00" xmi.id="uv4oPTpSbxLsV" autoresize="0" showscope="1" height="71" showopsigs="601"/> + <notewidget linecolor="#ff0000" usesdiagramfillcolor="0" linewidth="0" textcolor="#000000" usesdiagramusefillcolor="0" x="-31925" showstereotype="1" y="-3337" text="This is custom geometry, contains multiple geometries with multiple object&bounding box&intersection fn" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uCNEtKQYi276g" width="244" isinstance="0" noteType="0" usefillcolor="1" fillcolor="#ffff00" xmi.id="u9ooy1tw1SdvR" autoresize="0" height="79"/> + <notewidget linecolor="#ff0000" usesdiagramfillcolor="0" linewidth="0" textcolor="#000000" usesdiagramusefillcolor="0" x="-31650" showstereotype="1" y="-3343" text="This is for builtin geometry (list of triangles)" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="usrgmdPUL9jba" width="179" isinstance="0" noteType="0" usefillcolor="1" fillcolor="#ffff00" xmi.id="uYlWdxITChmZM" autoresize="0" height="44"/> + <classwidget linecolor="#ff0000" usesdiagramfillcolor="0" linewidth="0" textcolor="#000000" showoperations="1" usesdiagramusefillcolor="0" showpubliconly="0" showpackage="1" x="-31332" showattsigs="601" showstereotype="1" y="-4057" showattributes="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uOTq4K1Cqi4vR" width="235" isinstance="0" usefillcolor="1" fillcolor="#ffff00" xmi.id="uaUkcqQH5jm0M" autoresize="0" showscope="1" height="111" showopsigs="601"/> + <classwidget linecolor="#ff0000" usesdiagramfillcolor="0" linewidth="0" textcolor="#000000" showoperations="1" usesdiagramusefillcolor="0" showpubliconly="0" showpackage="1" x="-31621" showattsigs="601" showstereotype="1" y="-3946" showattributes="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="ueNGI2MLluAxZ" width="208" isinstance="0" usefillcolor="1" fillcolor="#ffff00" xmi.id="ubXw40UTKntCF" autoresize="0" showscope="1" height="94" showopsigs="601"/> + <classwidget linecolor="#ff0000" usesdiagramfillcolor="0" linewidth="0" textcolor="#000000" showoperations="1" usesdiagramusefillcolor="0" showpubliconly="0" showpackage="1" x="-30984" showattsigs="601" showstereotype="1" y="-4046" showattributes="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uLANlbplsfs75" width="208" isinstance="0" usefillcolor="1" fillcolor="#ffff00" xmi.id="ujiTNZOfc0exh" autoresize="0" showscope="1" height="94" showopsigs="601"/> + <notewidget linecolor="#ff0000" usesdiagramfillcolor="0" linewidth="0" textcolor="#000000" usesdiagramusefillcolor="0" x="-31336" showstereotype="1" y="-4112" text="A group can also contain other groups" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uYHQLFRm1ch8z" width="242" isinstance="0" noteType="0" usefillcolor="1" fillcolor="#ffff00" xmi.id="ukVyQoIsikAEc" autoresize="0" height="34"/> + </widgets> + <messages/> + <associations> + <assocwidget indexa="1" linecolor="#ff0000" usesdiagramfillcolor="1" widgetbid="uIQE8ZtQZcVqQ" indexb="1" linewidth="0" seqnum="" textcolor="#000000" usesdiagramusefillcolor="1" totalcounta="2" totalcountb="2" widgetaid="uT8T3XgBcOE49" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="u6SRxWhAuxNLC" usefillcolor="1" fillcolor="none" xmi.id="utrP0dPU1I45r" autoresize="1" type="503"> + <linepath layout="Direct"> + <startpoint startx="-31320.7" starty="-3717.09"/> + <endpoint endx="-31026.7" endy="-3717.09"/> + </linepath> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31173.7" showstereotype="1" y="-3717.09" text="ray type" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uOhYTZUIREdwh" pretext="" role="703" width="49" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="udVJnfC7SBCDp" autoresize="1" height="19"/> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31317" showstereotype="1" y="-3742" text="*" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="u13NMBPWpv7Om" pretext="" role="701" width="14" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="uTPP0XUbLxORk" autoresize="1" height="19"/> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31042" showstereotype="1" y="-3741" text="*" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="u4SAz6RZOZ4XF" pretext="" role="702" width="14" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="ubkLWYh6niODt" autoresize="1" height="19"/> + </assocwidget> + <assocwidget indexa="1" linecolor="#ff0000" usesdiagramfillcolor="1" widgetbid="uvEmnEk3IKScK" indexb="1" linewidth="0" seqnum="" textcolor="#000000" usesdiagramusefillcolor="1" totalcounta="2" totalcountb="2" widgetaid="uT8T3XgBcOE49" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uMMwDbUyYaEF9" usefillcolor="1" fillcolor="none" xmi.id="u9ZXRmyhsB43f" autoresize="1" type="503"> + <linepath layout="Direct"> + <startpoint startx="-31320.7" starty="-3668"/> + <endpoint endx="-31027" endy="-3668"/> + </linepath> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31173.8" showstereotype="1" y="-3668" text="ray type" font="MS Shell Dlg 2,8.2,-1,5,50,0,0,0,0,0" localid="u5SQxmsRrnsJo" pretext="" role="703" width="49" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="ua5B7s43RKYq2" autoresize="1" height="17"/> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31316.8" showstereotype="1" y="-3686.81" text="*" font="MS Shell Dlg 2,8.2,-1,5,50,0,0,0,0,0" localid="u1IDcVL6lOqZt" pretext="" role="701" width="14" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="uzrXOxEntMPwq" autoresize="1" height="17"/> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31035.9" showstereotype="1" y="-3684.27" text="*" font="MS Shell Dlg 2,8.2,-1,5,50,0,0,0,0,0" localid="uVGYWrAWvuQwh" pretext="" role="702" width="14" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="uss8cubDc7rDD" autoresize="1" height="17"/> + </assocwidget> + <assocwidget indexa="1" linecolor="#ff0000" usesdiagramfillcolor="1" widgetbid="uT8T3XgBcOE49" indexb="1" linewidth="0" seqnum="" textcolor="#000000" usesdiagramusefillcolor="1" totalcounta="2" totalcountb="2" widgetaid="u8GfhQMSTWw6X" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uIh03iTRXilrj" usefillcolor="1" fillcolor="none" xmi.id="uSR4d7VATIHFm" autoresize="1" type="503"> + <linepath layout="Direct"> + <startpoint startx="-31696.7" starty="-3725.09"/> + <endpoint endx="-31482.7" endy="-3725.09"/> + </linepath> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31694" showstereotype="1" y="-3731" text="*" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uYCNyMxBAr9bl" pretext="" role="701" width="14" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="ulN6At6UoXiNi" autoresize="1" height="19"/> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31496" showstereotype="1" y="-3745" text="*" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uwsohbegLLKLg" pretext="" role="702" width="14" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="uCEYNIoPOQAEu" autoresize="1" height="19"/> + </assocwidget> + <assocwidget linecolor="#ff0000" roleBdoc="" indexa="1" usesdiagramfillcolor="1" widgetbid="u8GfhQMSTWw6X" indexb="1" linewidth="0" seqnum="" textcolor="none" usesdiagramusefillcolor="1" totalcounta="2" totalcountb="2" roleAdoc="" widgetaid="uaiaBvSGOhwLk" font="Sans Serif,9,-1,0,50,0,0,0,0,0" visibilityA="0" visibilityB="0" localid="uvQ8LUs5WDMP3" usefillcolor="1" fillcolor="none" changeabilityA="900" xmi.id="u9XbSEZyaXmex" changeabilityB="900" documentation="" autoresize="1" type="513"> + <linepath layout="Direct"> + <startpoint startx="-31696.7" starty="-3764"/> + <endpoint endx="-31696.7" endy="-3744.09"/> + </linepath> + </assocwidget> + <assocwidget linecolor="#ff0000" roleBdoc="" indexa="1" usesdiagramfillcolor="1" widgetbid="uT8T3XgBcOE49" indexb="1" linewidth="0" seqnum="" textcolor="none" usesdiagramusefillcolor="1" totalcounta="2" totalcountb="2" roleAdoc="" widgetaid="uDQXD91IkDJNF" font="Sans Serif,9,-1,0,50,0,0,0,0,0" visibilityA="0" visibilityB="0" localid="uH2crv812FIaZ" usefillcolor="1" fillcolor="none" changeabilityA="900" xmi.id="uEdSbCM8sYgTd" changeabilityB="900" documentation="" autoresize="1" type="513"> + <linepath layout="Direct"> + <startpoint startx="-31292" starty="-3743"/> + <endpoint endx="-31320.7" endy="-3741.09"/> + </linepath> + </assocwidget> + <assocwidget linecolor="#ff0000" roleBdoc="" indexa="1" usesdiagramfillcolor="1" widgetbid="uBkpGZVV3XFYp" indexb="1" linewidth="0" seqnum="" textcolor="none" usesdiagramusefillcolor="1" totalcounta="2" totalcountb="2" roleAdoc="" widgetaid="u9ooy1tw1SdvR" font="Sans Serif,9,-1,0,50,0,0,0,0,0" visibilityA="0" visibilityB="0" localid="uKMPftHRyBUys" usefillcolor="1" fillcolor="none" changeabilityA="900" xmi.id="uAcfP9xctqAuf" changeabilityB="900" documentation="" autoresize="1" type="513"> + <linepath layout="Direct"> + <startpoint startx="-31834.5" starty="-3337"/> + <endpoint endx="-31834.5" endy="-3365"/> + </linepath> + </assocwidget> + <assocwidget linecolor="#ff0000" roleBdoc="" indexa="1" usesdiagramfillcolor="1" widgetbid="uv4oPTpSbxLsV" indexb="1" linewidth="0" seqnum="" textcolor="none" usesdiagramusefillcolor="1" totalcounta="2" totalcountb="2" roleAdoc="" widgetaid="uYlWdxITChmZM" font="Sans Serif,9,-1,0,50,0,0,0,0,0" visibilityA="0" visibilityB="0" localid="ujQzfzv7X2ani" usefillcolor="1" fillcolor="none" changeabilityA="900" xmi.id="u8wNbPBCGKX5l" changeabilityB="900" documentation="" autoresize="1" type="513"> + <linepath layout="Direct"> + <startpoint startx="-31565" starty="-3343"/> + <endpoint endx="-31565" endy="-3376"/> + </linepath> + </assocwidget> + <assocwidget indexa="1" linecolor="#ff0000" usesdiagramfillcolor="1" widgetbid="u8GfhQMSTWw6X" indexb="1" linewidth="0" seqnum="" textcolor="#000000" usesdiagramusefillcolor="1" totalcounta="2" totalcountb="2" widgetaid="ugcTKlWPxCkmw" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uwhQPG9po72GG" usefillcolor="1" fillcolor="none" xmi.id="upGO08TSdwn4u" autoresize="1" type="503"> + <linepath layout="Direct"> + <startpoint startx="-31811.2" starty="-3969"/> + <endpoint endx="-31811.2" endy="-3744.09"/> + </linepath> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31809" showstereotype="1" y="-3967" text="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uFow5R3DSWqhz" pretext="" role="701" width="15" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="uqsZFsZgStiur" autoresize="1" height="19"/> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31809" showstereotype="1" y="-3763" text="*" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uuw9LWRnFlLaj" pretext="" role="702" width="14" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="u1MjOTmGFhmvr" autoresize="1" height="19"/> + </assocwidget> + <assocwidget indexa="1" linecolor="#ff0000" usesdiagramfillcolor="1" widgetbid="uBkpGZVV3XFYp" indexb="1" linewidth="0" seqnum="" textcolor="#000000" usesdiagramusefillcolor="1" totalcounta="2" totalcountb="2" widgetaid="u8GfhQMSTWw6X" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uhOM5ihbPWFAA" usefillcolor="1" fillcolor="none" xmi.id="uk8oAK1mM7ozL" autoresize="1" type="503"> + <linepath layout="Direct"> + <startpoint startx="-31834.5" starty="-3652.09"/> + <endpoint endx="-31834.5" endy="-3444"/> + </linepath> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31832" showstereotype="1" y="-3650" text="*" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uK0NXsmdhVFUa" pretext="" role="701" width="13" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="uBny9Vb5hsVcB" autoresize="1" height="19"/> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31832" showstereotype="1" y="-3463" text="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uummQKk5HJxaP" pretext="" role="702" width="15" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="uAkR1b1HykWkL" autoresize="1" height="19"/> + </assocwidget> + <assocwidget indexa="1" linecolor="#ff0000" usesdiagramfillcolor="1" widgetbid="uv4oPTpSbxLsV" indexb="1" linewidth="0" seqnum="" textcolor="#000000" usesdiagramusefillcolor="1" totalcounta="2" totalcountb="2" widgetaid="u8GfhQMSTWw6X" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uUWMM0TuiqUHK" usefillcolor="1" fillcolor="none" xmi.id="uDzHbQozO8QPz" autoresize="1" type="503"> + <linepath layout="Direct"> + <startpoint startx="-31696.7" starty="-3652.09"/> + <endpoint endx="-31653" endy="-3447"/> + </linepath> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31712" showstereotype="1" y="-3650" text="*" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uJnQ4sEaBMukn" pretext="" role="701" width="13" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="uVlblMnMmhnEM" autoresize="1" height="19"/> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31651" showstereotype="1" y="-3466" text="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="u3jp0vP0lffxd" pretext="" role="702" width="15" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="uZQKoDKDOs76u" autoresize="1" height="19"/> + </assocwidget> + <assocwidget indexa="1" linecolor="#ff0000" usesdiagramfillcolor="1" widgetbid="ugcTKlWPxCkmw" indexb="1" linewidth="0" seqnum="" textcolor="#000000" usesdiagramusefillcolor="1" totalcounta="2" totalcountb="2" widgetaid="uaUkcqQH5jm0M" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uVCihITAenYUr" usefillcolor="1" fillcolor="none" xmi.id="upcGcuivVZTGb" autoresize="1" type="503"> + <linepath layout="Direct"> + <startpoint startx="-31332" starty="-4017.5"/> + <endpoint endx="-31703" endy="-4017.5"/> + </linepath> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31348" showstereotype="1" y="-4036" text="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uDRSsAGCXSApw" pretext="" role="701" width="15" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="uNVgbMqUCo8Wd" autoresize="1" height="19"/> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31701" showstereotype="1" y="-4036" text="*" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="ulvkFwcrXU0hp" pretext="" role="702" width="14" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="uLNiM5joWC7sz" autoresize="1" height="19"/> + </assocwidget> + <assocwidget indexa="1" linecolor="#ff0000" usesdiagramfillcolor="1" widgetbid="ugcTKlWPxCkmw" indexb="1" linewidth="0" seqnum="" textcolor="#000000" usesdiagramusefillcolor="1" totalcounta="2" totalcountb="2" widgetaid="ubXw40UTKntCF" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uJMU22GbUetYv" usefillcolor="1" fillcolor="none" xmi.id="uPLXgkXflHdRn" autoresize="1" type="503"> + <linepath layout="Direct"> + <startpoint startx="-31621" starty="-3946"/> + <endpoint endx="-31703" endy="-3969"/> + </linepath> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31627.5" showstereotype="1" y="-3962.25" text="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uPnd1o1IIPZ0q" pretext="" role="701" width="15" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="uAldbEdj6TU4G" autoresize="1" height="19"/> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31710.5" showstereotype="1" y="-3968.75" text="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uxQja7WF862Ac" pretext="" role="702" width="15" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="u5E9nAT27IkZ1" autoresize="1" height="19"/> + </assocwidget> + <assocwidget indexa="1" linecolor="#ff0000" usesdiagramfillcolor="1" widgetbid="ujiTNZOfc0exh" indexb="1" linewidth="0" seqnum="" textcolor="#000000" usesdiagramusefillcolor="1" totalcounta="2" totalcountb="2" widgetaid="uaUkcqQH5jm0M" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uu36PKSvcOf7P" usefillcolor="1" fillcolor="none" xmi.id="uhM4v5YYXposu" autoresize="1" type="503"> + <linepath layout="Direct"> + <startpoint startx="-31097" starty="-3952"/> + <endpoint endx="-30984" endy="-3952"/> + </linepath> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31095" showstereotype="1" y="-3971" text="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uADPUpMH3Aa21" pretext="" role="701" width="15" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="ulFH27CQ8E8cO" autoresize="1" height="19"/> + <floatingtext linecolor="#ff0000" usesdiagramfillcolor="1" linewidth="0" textcolor="none" usesdiagramusefillcolor="1" x="-31000" showstereotype="1" y="-3971" text="1" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="uGZt6uSANKhDX" pretext="" role="702" width="15" isinstance="0" posttext="" usefillcolor="1" fillcolor="none" xmi.id="ugheDDYuQXwPt" autoresize="1" height="19"/> + </assocwidget> + <assocwidget indexa="0" linecolor="#ff0000" usesdiagramfillcolor="1" widgetbid="uaUkcqQH5jm0M" indexb="0" linewidth="0" seqnum="" textcolor="none" usesdiagramusefillcolor="1" totalcounta="0" totalcountb="0" widgetaid="uaUkcqQH5jm0M" font="Sans Serif,9,-1,0,50,0,0,0,0,0" localid="u1v0dtPk1jdCF" usefillcolor="1" fillcolor="none" xmi.id="uAf8nXusYMTVV" autoresize="1" type="504"> + <linepath layout="Direct"> + <startpoint startx="-31273.2" starty="-3946"/> + <endpoint endx="-31155.8" endy="-3946"/> + <point x="-31273.2" y="-3896"/> + <point x="-31155.8" y="-3896"/> + </linepath> + </assocwidget> + <assocwidget linecolor="#ff0000" roleBdoc="" indexa="1" usesdiagramfillcolor="1" widgetbid="uaUkcqQH5jm0M" indexb="1" linewidth="0" seqnum="" textcolor="none" usesdiagramusefillcolor="1" totalcounta="2" totalcountb="2" roleAdoc="" widgetaid="ukVyQoIsikAEc" font="Sans Serif,9,-1,0,50,0,0,0,0,0" visibilityA="0" visibilityB="0" localid="uzQ4mQay3ffLO" usefillcolor="1" fillcolor="none" changeabilityA="900" xmi.id="ut2wsGkOxNfPM" changeabilityB="900" documentation="" autoresize="1" type="513"> + <linepath layout="Direct"> + <startpoint startx="-31214.5" starty="-4078"/> + <endpoint endx="-31214.5" endy="-4057"/> + </linepath> + </assocwidget> + </associations> + </diagram> + </diagrams> + </XMI.extension> + </UML:Model> + <UML:Model visibility="public" isSpecification="false" namespace="m1" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="Use_Case_View" name="Use Case View"> + <UML:Namespace.ownedElement/> + </UML:Model> + <UML:Model visibility="public" isSpecification="false" namespace="m1" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="Component_View" name="Component View"> + <UML:Namespace.ownedElement/> + </UML:Model> + <UML:Model visibility="public" isSpecification="false" namespace="m1" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="Deployment_View" name="Deployment View"> + <UML:Namespace.ownedElement/> + </UML:Model> + <UML:Model visibility="public" isSpecification="false" namespace="m1" isAbstract="false" isLeaf="false" isRoot="false" xmi.id="Entity_Relationship_Model" name="Entity Relationship Model"> + <UML:Namespace.ownedElement/> + </UML:Model> + </UML:Namespace.ownedElement> + </UML:Model> + </XMI.content> + <XMI.extensions xmi.extender="umbrello"> + <docsettings viewid="ucudBR6CdC2x5" uniqueid="u5QNyv34FaCVq" documentation=""/> + <listview> + <listitem open="1" type="800" id="Views"> + <listitem open="1" type="821" id="Component_View"/> + <listitem open="1" type="827" id="Deployment_View"/> + <listitem open="1" type="836" id="Entity_Relationship_Model"/> + <listitem open="1" type="801" id="Logical_View"> + <listitem open="1" type="813" id="ujiTNZOfc0exh"/> + <listitem open="1" type="813" id="ubXw40UTKntCF"/> + <listitem open="1" type="813" id="uIQE8ZtQZcVqQ"/> + <listitem open="0" type="807" id="ucudBR6CdC2x5" label="class diagram"/> + <listitem open="1" type="813" id="uvEmnEk3IKScK"/> + <listitem open="0" type="830" id="Datatypes"> + <listitem open="1" type="829" id="uNhglXIPgP3OZ"/> + <listitem open="0" type="829" id="urkyyLXLOw5TT"/> + <listitem open="0" type="829" id="uMbpjW1xDQC9k"/> + <listitem open="0" type="829" id="uPzy2V8JzqWIM"/> + <listitem open="0" type="829" id="uOoodsG4cpKdS"/> + <listitem open="0" type="829" id="uwDFPKBYx4UKI"/> + <listitem open="0" type="829" id="uZPgTO2b0RIpn"/> + <listitem open="0" type="829" id="uL5S9FX3x2Scx"/> + <listitem open="0" type="829" id="upyPDwCvXGBsU"/> + <listitem open="0" type="829" id="uuUhSpiZb8vSR"/> + <listitem open="0" type="829" id="uF3r31sQTNHFR"/> + <listitem open="0" type="829" id="uzYTDoL9j56IG"/> + <listitem open="0" type="829" id="uHrtmL1b948Dd"/> + <listitem open="0" type="829" id="uHSpplYNTixid"/> + <listitem open="0" type="829" id="uUSXACVg6nABi"/> + <listitem open="0" type="829" id="u87Jbv7DzhwXd"/> + <listitem open="0" type="829" id="uaDZMyWmNVCUA"/> + <listitem open="0" type="829" id="u7lCVcN6gpMPY"/> + <listitem open="0" type="829" id="uBi5EKcjBO3ee"/> + </listitem> + <listitem open="1" type="813" id="uBkpGZVV3XFYp"/> + <listitem open="1" type="813" id="ugcTKlWPxCkmw"/> + <listitem open="1" type="813" id="u8GfhQMSTWw6X"/> + <listitem open="1" type="813" id="uv4oPTpSbxLsV"/> + <listitem open="1" type="813" id="uaUkcqQH5jm0M"/> + <listitem open="1" type="813" id="uT8T3XgBcOE49"/> + <listitem open="1" type="817" id="uWQoz0qQSrNhL"/> + </listitem> + <listitem open="1" type="802" id="Use_Case_View"/> + </listitem> + </listview> + <codegeneration> + <codegenerator language="C++"/> + </codegeneration> + </XMI.extensions> +</XMI> diff --git a/zluda_rt/src/acceleration.rs b/zluda_rt/src/acceleration.rs new file mode 100644 index 0000000..371b747 --- /dev/null +++ b/zluda_rt/src/acceleration.rs @@ -0,0 +1,139 @@ +use crate::{
+ context::{self, Context, ContextData},
+ geometry_group::GeometryGroupData,
+ group::GroupData,
+ null_check, null_unwrap, MaybeWeakRefMut, OptixCell, OptixObjectData, TypeTag,
+};
+use hiprt_sys::hiprtBuildFlagBits;
+use optix_types::*;
+use std::ffi::CStr;
+use std::{
+ ffi::CString,
+ rc::{Rc, Weak},
+};
+
+pub(crate) type Acceleration = *const OptixCell<AccelerationData>;
+
+pub(crate) struct AccelerationData {
+ pub(crate) context: Weak<OptixCell<ContextData>>,
+ pub(crate) owner: Option<AccelerationOwner>,
+ builder: CString,
+}
+
+impl AccelerationData {
+ fn new(weak_context: Weak<OptixCell<ContextData>>, _: &mut ContextData) -> Self {
+ Self {
+ context: weak_context,
+ owner: None,
+ builder: CString::new("").unwrap(),
+ }
+ }
+
+ fn register(this: Rc<OptixCell<Self>>, context: &mut ContextData) {
+ context.accelerations.insert(this);
+ }
+
+ unsafe fn create(context: Context) -> Result<Acceleration, RTresult> {
+ context::create_subobject(context, Self::new, Self::register)
+ }
+
+ pub(crate) fn to_hiprt(&self) -> Option<hiprtBuildFlagBits> {
+ Some(match self.builder.as_bytes() {
+ b"NoAccel" => hiprtBuildFlagBits::hiprtBuildFlagBitPreferFastBuild,
+ b"Bvh" => hiprtBuildFlagBits::hiprtBuildFlagBitPreferBalancedBuild,
+ // As of version 1.2.0 high quality gives crashes
+ b"Sbvh" | b"Trbvh" => hiprtBuildFlagBits::hiprtBuildFlagBitPreferBalancedBuild,
+ _ => return None,
+ })
+ }
+}
+
+impl OptixObjectData for AccelerationData {
+ const TYPE: TypeTag = TypeTag::Acceleration;
+
+ fn deregister(&mut self, this: &Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ if let Some(context) = self.context.upgrade() {
+ let mut context = (*context).borrow_mut()?;
+ context.accelerations.remove(this);
+ }
+ Ok(())
+ }
+
+ fn context<'a>(&'a mut self) -> crate::MaybeWeakRefMut<'a, ContextData> {
+ MaybeWeakRefMut::Weak(&self.context)
+ }
+}
+
+pub(crate) enum AccelerationOwner {
+ Group(Weak<OptixCell<GroupData>>),
+ GeometryGroup(Weak<OptixCell<GeometryGroupData>>),
+}
+
+pub(crate) unsafe fn create(
+ context: Context,
+ acceleration: *mut Acceleration,
+) -> Result<(), RTresult> {
+ null_check(context)?;
+ null_check(acceleration)?;
+ *acceleration = AccelerationData::create(context)?;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_builder(
+ acceleration: *const OptixCell<AccelerationData>,
+ builder: *const i8,
+) -> Result<(), RTresult> {
+ null_check(builder)?;
+ let acceleration = null_unwrap(acceleration)?;
+ let mut acceleration = acceleration.borrow_mut()?;
+ acceleration.builder = CStr::from_ptr(builder).to_owned();
+ Ok(())
+}
+
+pub(crate) unsafe fn mark_dirty(acceleration: Acceleration) -> Result<(), RTresult> {
+ let acceleration = null_unwrap(acceleration)?;
+ let acceleration = acceleration.borrow()?;
+ let context = acceleration
+ .context
+ .upgrade()
+ .ok_or(RTresult::RT_ERROR_INVALID_CONTEXT)?;
+ let mut context = context.borrow_mut()?;
+ context.invalidate();
+ Ok(())
+}
+
+pub(crate) fn destroy(_acceleration: Acceleration) -> Result<(), RTresult> {
+ // TODO: implement
+ Ok(())
+}
+
+pub(crate) unsafe fn get_context(
+ acceleration: *const OptixCell<AccelerationData>,
+ context: *mut *const OptixCell<ContextData>,
+) -> Result<(), RTresult> {
+ let acceleration = null_unwrap(acceleration)?;
+ let acceleration = acceleration.borrow()?;
+ *context = acceleration.context.as_ptr();
+ Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::optix_test;
+ use crate::test_common::OptixFns;
+ use std::{ffi::CStr, ptr};
+
+ optix_test!(default_acceleration);
+
+ unsafe fn default_acceleration<Optix: OptixFns>(o: Optix) {
+ let mut ctx = ptr::null_mut();
+ o.rtContextCreate(&mut ctx);
+ let mut accel = ptr::null_mut();
+ o.rtAccelerationCreate(ctx, &mut accel);
+ let mut builder = ptr::null();
+ o.rtAccelerationGetBuilder(accel, &mut builder);
+ let builder_name = CStr::from_ptr(builder);
+ assert_eq!(builder_name.to_str().unwrap(), "");
+ o.rtContextDestroy(ctx);
+ }
+}
diff --git a/zluda_rt/src/buffer.rs b/zluda_rt/src/buffer.rs new file mode 100644 index 0000000..6727fde --- /dev/null +++ b/zluda_rt/src/buffer.rs @@ -0,0 +1,972 @@ +use crate::{
+ context::{self, Context, ContextData},
+ hip, null_check, null_unwrap, null_unwrap_mut, AlignedBuffer, MaybeWeakRefMut, OptixCell,
+ OptixObjectData, TypeTag,
+};
+use hip_runtime_sys::*;
+use optix_types::*;
+use std::{
+ alloc::Layout,
+ ffi::c_void,
+ mem, ptr,
+ rc::{Rc, Weak},
+};
+
+pub(crate) type Buffer = *const OptixCell<BufferData>;
+
+#[repr(C)]
+pub(crate) struct DeviceBuffer {
+ pub(crate) pointer: hipDeviceptr_t,
+ pub(crate) width: u64,
+ pub(crate) height: u64,
+}
+
+struct BufferAllocation {
+ size: u64,
+ pointer: hipDeviceptr_t,
+ host_buffer: Option<AlignedBuffer>,
+}
+
+impl Drop for BufferAllocation {
+ #[allow(unused_must_use)]
+ fn drop(&mut self) {
+ if self.pointer.0 != ptr::null_mut() {
+ unsafe { hipFree(self.pointer.0) };
+ }
+ }
+}
+
+impl BufferAllocation {
+ fn empty() -> Self {
+ BufferAllocation {
+ size: 0,
+ pointer: hipDeviceptr_t(ptr::null_mut()),
+ host_buffer: None,
+ }
+ }
+
+ fn new(this: Option<Self>, meta: BufferMetadata) -> Result<(Self, bool), RTresult> {
+ let size = meta.byte_size();
+ if let Some(this) = this {
+ if size == this.size {
+ return Ok((this, false));
+ }
+ }
+ let pointer = Self::hip_allocate(meta.allocation_size())?;
+ Ok((
+ Self {
+ size,
+ pointer,
+ host_buffer: None,
+ },
+ true,
+ ))
+ }
+
+ fn hip_allocate(size: u64) -> Result<hipDeviceptr_t, RTresult> {
+ Ok(if size > 0 {
+ let dev_ptr = hip::malloc(size as usize)
+ .map_err(|_| RTresult::RT_ERROR_MEMORY_ALLOCATION_FAILED)?;
+ hip::zero_fill(dev_ptr, size as usize)
+ .map_err(|_| RTresult::RT_ERROR_MEMORY_ALLOCATION_FAILED)?;
+ dev_ptr
+ } else {
+ hipDeviceptr_t(ptr::null_mut())
+ })
+ }
+
+ fn map(&mut self, meta: BufferMetadata) -> Result<*mut c_void, RTresult> {
+ match self.host_buffer {
+ Some(_) => return Err(RTresult::RT_ERROR_ALREADY_MAPPED),
+ None => {
+ let layout = unsafe {
+ Layout::from_size_align_unchecked(
+ self.size as usize,
+ BufferMetadata::alignment(meta.format)?,
+ )
+ };
+ let buffer = if self.pointer.0 != ptr::null_mut() {
+ AlignedBuffer::from_hip(layout, self.pointer)?
+ } else {
+ AlignedBuffer::new(Layout::new::<u8>())
+ };
+ let result = buffer.as_ptr();
+ self.host_buffer = Some(buffer);
+ Ok(result as _)
+ }
+ }
+ }
+
+ fn unmap(&mut self) -> Result<(), RTresult> {
+ match &self.host_buffer {
+ None => return Err(RTresult::RT_ERROR_INVALID_VALUE),
+ Some(buffer) => {
+ if self.pointer.0 != ptr::null_mut() {
+ hip! {hipMemcpyHtoD(self.pointer, buffer.as_ptr(), buffer.size), RT_ERROR_MEMORY_ALLOCATION_FAILED};
+ }
+ self.host_buffer = None;
+ Ok(())
+ }
+ }
+ }
+}
+
+#[derive(Copy, Clone)]
+pub(crate) struct BufferMetadata {
+ pub(crate) format: RTformatSafe,
+ pub(crate) element_size: u64,
+ pub(crate) width: u64,
+ pub(crate) height: u64,
+}
+
+impl BufferMetadata {
+ pub(crate) fn byte_size(&self) -> u64 {
+ self.width * self.height.max(1) * self.element_size
+ }
+
+ // We allocate one extra element for Arnold
+ // Arnold 7.1.4.1 has a buggy binary search which
+ // returns an element one past the end
+ pub(crate) fn allocation_size(&self) -> u64 {
+ (self.width + 1) * self.height.max(1) * self.element_size
+ }
+
+ fn dimensions(&self) -> u32 {
+ if self.height == 0 {
+ 1
+ } else {
+ 2
+ }
+ }
+
+ fn size(&self, dim: u32) -> u64 {
+ match dim {
+ 0 => self.width,
+ 1 => self.height,
+ _ => 0,
+ }
+ }
+
+ pub(crate) fn depth(&self) -> usize {
+ 0
+ }
+
+ fn element_size(format: RTformatSafe) -> Result<usize, RTresult> {
+ Ok(match format {
+ RTformatSafe::RT_FORMAT_UNKNOWN => 0,
+ RTformatSafe::RT_FORMAT_USER => 0,
+ RTformatSafe::RT_FORMAT_FLOAT => mem::size_of::<f32>(),
+ RTformatSafe::RT_FORMAT_FLOAT2 => mem::size_of::<f32>() * 2,
+ RTformatSafe::RT_FORMAT_FLOAT3 => mem::size_of::<f32>() * 3,
+ RTformatSafe::RT_FORMAT_FLOAT4 => mem::size_of::<f32>() * 4,
+ RTformatSafe::RT_FORMAT_BYTE => mem::size_of::<i8>(),
+ RTformatSafe::RT_FORMAT_BYTE2 => mem::size_of::<i8>() * 2,
+ RTformatSafe::RT_FORMAT_BYTE3 => mem::size_of::<i8>() * 3,
+ RTformatSafe::RT_FORMAT_BYTE4 => mem::size_of::<i8>() * 4,
+ RTformatSafe::RT_FORMAT_UNSIGNED_BYTE => mem::size_of::<u8>(),
+ RTformatSafe::RT_FORMAT_UNSIGNED_BYTE2 => mem::size_of::<u8>() * 2,
+ RTformatSafe::RT_FORMAT_UNSIGNED_BYTE3 => mem::size_of::<u8>() * 3,
+ RTformatSafe::RT_FORMAT_UNSIGNED_BYTE4 => mem::size_of::<u8>() * 4,
+ RTformatSafe::RT_FORMAT_SHORT => mem::size_of::<i16>(),
+ RTformatSafe::RT_FORMAT_SHORT2 => mem::size_of::<i16>() * 2,
+ RTformatSafe::RT_FORMAT_SHORT3 => mem::size_of::<i16>() * 3,
+ RTformatSafe::RT_FORMAT_SHORT4 => mem::size_of::<i16>() * 4,
+ RTformatSafe::RT_FORMAT_UNSIGNED_SHORT => mem::size_of::<u16>(),
+ RTformatSafe::RT_FORMAT_UNSIGNED_SHORT2 => mem::size_of::<u16>() * 2,
+ RTformatSafe::RT_FORMAT_UNSIGNED_SHORT3 => mem::size_of::<u16>() * 3,
+ RTformatSafe::RT_FORMAT_UNSIGNED_SHORT4 => mem::size_of::<u16>() * 4,
+ RTformatSafe::RT_FORMAT_INT => mem::size_of::<i32>(),
+ RTformatSafe::RT_FORMAT_INT2 => mem::size_of::<i32>() * 2,
+ RTformatSafe::RT_FORMAT_INT3 => mem::size_of::<i32>() * 3,
+ RTformatSafe::RT_FORMAT_INT4 => mem::size_of::<i32>() * 4,
+ RTformatSafe::RT_FORMAT_UNSIGNED_INT => mem::size_of::<u32>(),
+ RTformatSafe::RT_FORMAT_UNSIGNED_INT2 => mem::size_of::<u32>() * 2,
+ RTformatSafe::RT_FORMAT_UNSIGNED_INT3 => mem::size_of::<u32>() * 3,
+ RTformatSafe::RT_FORMAT_UNSIGNED_INT4 => mem::size_of::<u32>() * 4,
+ RTformatSafe::RT_FORMAT_HALF => 2,
+ RTformatSafe::RT_FORMAT_HALF2 => 2 * 2,
+ RTformatSafe::RT_FORMAT_HALF3 => 2 * 3,
+ RTformatSafe::RT_FORMAT_HALF4 => 2 * 4,
+ RTformatSafe::RT_FORMAT_LONG_LONG => mem::size_of::<i64>(),
+ RTformatSafe::RT_FORMAT_LONG_LONG2 => mem::size_of::<i64>() * 2,
+ RTformatSafe::RT_FORMAT_LONG_LONG3 => mem::size_of::<i64>() * 3,
+ RTformatSafe::RT_FORMAT_LONG_LONG4 => mem::size_of::<i64>() * 4,
+ RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG => mem::size_of::<u64>(),
+ RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG2 => mem::size_of::<u64>() * 2,
+ RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG3 => mem::size_of::<u64>() * 3,
+ RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG4 => mem::size_of::<u64>() * 4,
+ RTformatSafe::RT_FORMAT_BUFFER_ID => 4,
+ RTformatSafe::RT_FORMAT_PROGRAM_ID => 4,
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC1 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC2 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC3 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC4 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_BC4 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC5 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_BC5 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC6H => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_BC6H => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC7 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ })
+ }
+
+ fn alignment(format: RTformatSafe) -> Result<usize, RTresult> {
+ Ok(match format {
+ RTformatSafe::RT_FORMAT_UNKNOWN => 1,
+ RTformatSafe::RT_FORMAT_USER => 1,
+ RTformatSafe::RT_FORMAT_BYTE
+ | RTformatSafe::RT_FORMAT_BYTE2
+ | RTformatSafe::RT_FORMAT_BYTE3
+ | RTformatSafe::RT_FORMAT_BYTE4
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BYTE
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BYTE2
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BYTE3
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BYTE4 => 1,
+ RTformatSafe::RT_FORMAT_SHORT
+ | RTformatSafe::RT_FORMAT_SHORT2
+ | RTformatSafe::RT_FORMAT_SHORT3
+ | RTformatSafe::RT_FORMAT_SHORT4
+ | RTformatSafe::RT_FORMAT_UNSIGNED_SHORT
+ | RTformatSafe::RT_FORMAT_UNSIGNED_SHORT2
+ | RTformatSafe::RT_FORMAT_UNSIGNED_SHORT3
+ | RTformatSafe::RT_FORMAT_UNSIGNED_SHORT4
+ | RTformatSafe::RT_FORMAT_HALF
+ | RTformatSafe::RT_FORMAT_HALF2
+ | RTformatSafe::RT_FORMAT_HALF3
+ | RTformatSafe::RT_FORMAT_HALF4 => 2,
+ RTformatSafe::RT_FORMAT_INT
+ | RTformatSafe::RT_FORMAT_INT2
+ | RTformatSafe::RT_FORMAT_INT3
+ | RTformatSafe::RT_FORMAT_INT4
+ | RTformatSafe::RT_FORMAT_UNSIGNED_INT
+ | RTformatSafe::RT_FORMAT_UNSIGNED_INT2
+ | RTformatSafe::RT_FORMAT_UNSIGNED_INT3
+ | RTformatSafe::RT_FORMAT_UNSIGNED_INT4
+ | RTformatSafe::RT_FORMAT_FLOAT
+ | RTformatSafe::RT_FORMAT_FLOAT2
+ | RTformatSafe::RT_FORMAT_FLOAT3
+ | RTformatSafe::RT_FORMAT_FLOAT4 => 4,
+ RTformatSafe::RT_FORMAT_LONG_LONG
+ | RTformatSafe::RT_FORMAT_LONG_LONG2
+ | RTformatSafe::RT_FORMAT_LONG_LONG3
+ | RTformatSafe::RT_FORMAT_LONG_LONG4
+ | RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG
+ | RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG2
+ | RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG3
+ | RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG4 => 8,
+ RTformatSafe::RT_FORMAT_BUFFER_ID => 4,
+ RTformatSafe::RT_FORMAT_PROGRAM_ID => 4,
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC1 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC2 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC3 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC4 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_BC4 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC5 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_BC5 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC6H => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_BC6H => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC7 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ })
+ }
+
+ pub(crate) fn array_format(&self) -> Result<hipArray_Format, RTresult> {
+ Ok(match self.format {
+ RTformatSafe::RT_FORMAT_FLOAT
+ | RTformatSafe::RT_FORMAT_FLOAT2
+ | RTformatSafe::RT_FORMAT_FLOAT3
+ | RTformatSafe::RT_FORMAT_FLOAT4 => hipArray_Format::HIP_AD_FORMAT_FLOAT,
+ RTformatSafe::RT_FORMAT_BYTE
+ | RTformatSafe::RT_FORMAT_BYTE2
+ | RTformatSafe::RT_FORMAT_BYTE3
+ | RTformatSafe::RT_FORMAT_BYTE4 => hipArray_Format::HIP_AD_FORMAT_SIGNED_INT8,
+ RTformatSafe::RT_FORMAT_UNSIGNED_BYTE
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BYTE2
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BYTE3
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BYTE4 => {
+ hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT8
+ }
+ RTformatSafe::RT_FORMAT_SHORT
+ | RTformatSafe::RT_FORMAT_SHORT2
+ | RTformatSafe::RT_FORMAT_SHORT3
+ | RTformatSafe::RT_FORMAT_SHORT4 => hipArray_Format::HIP_AD_FORMAT_SIGNED_INT16,
+ RTformatSafe::RT_FORMAT_UNSIGNED_SHORT
+ | RTformatSafe::RT_FORMAT_UNSIGNED_SHORT2
+ | RTformatSafe::RT_FORMAT_UNSIGNED_SHORT3
+ | RTformatSafe::RT_FORMAT_UNSIGNED_SHORT4 => {
+ hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT16
+ }
+ RTformatSafe::RT_FORMAT_INT
+ | RTformatSafe::RT_FORMAT_INT2
+ | RTformatSafe::RT_FORMAT_INT3
+ | RTformatSafe::RT_FORMAT_INT4 => hipArray_Format::HIP_AD_FORMAT_SIGNED_INT32,
+ RTformatSafe::RT_FORMAT_UNSIGNED_INT
+ | RTformatSafe::RT_FORMAT_UNSIGNED_INT2
+ | RTformatSafe::RT_FORMAT_UNSIGNED_INT3
+ | RTformatSafe::RT_FORMAT_UNSIGNED_INT4 => {
+ hipArray_Format::HIP_AD_FORMAT_UNSIGNED_INT32
+ }
+ RTformatSafe::RT_FORMAT_HALF
+ | RTformatSafe::RT_FORMAT_HALF2
+ | RTformatSafe::RT_FORMAT_HALF3
+ | RTformatSafe::RT_FORMAT_HALF4 => hipArray_Format::HIP_AD_FORMAT_HALF,
+ RTformatSafe::RT_FORMAT_LONG_LONG
+ | RTformatSafe::RT_FORMAT_LONG_LONG2
+ | RTformatSafe::RT_FORMAT_LONG_LONG3
+ | RTformatSafe::RT_FORMAT_LONG_LONG4
+ | RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG
+ | RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG2
+ | RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG3
+ | RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG4 => {
+ return Err(RTresult::RT_ERROR_NOT_SUPPORTED)
+ }
+ RTformatSafe::RT_FORMAT_UNKNOWN
+ | RTformatSafe::RT_FORMAT_USER
+ | RTformatSafe::RT_FORMAT_BUFFER_ID
+ | RTformatSafe::RT_FORMAT_PROGRAM_ID => return Err(RTresult::RT_ERROR_INVALID_CONTEXT),
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC1
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BC2
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BC3
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BC4
+ | RTformatSafe::RT_FORMAT_BC4
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BC5
+ | RTformatSafe::RT_FORMAT_BC5
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BC6H
+ | RTformatSafe::RT_FORMAT_BC6H
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BC7 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ })
+ }
+
+ pub(crate) fn channels(&self) -> Result<u32, RTresult> {
+ Ok(match self.format {
+ RTformatSafe::RT_FORMAT_UNKNOWN
+ | RTformatSafe::RT_FORMAT_USER
+ | RTformatSafe::RT_FORMAT_BUFFER_ID
+ | RTformatSafe::RT_FORMAT_PROGRAM_ID
+ | RTformatSafe::RT_FORMAT_FLOAT
+ | RTformatSafe::RT_FORMAT_BYTE
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BYTE
+ | RTformatSafe::RT_FORMAT_SHORT
+ | RTformatSafe::RT_FORMAT_UNSIGNED_SHORT
+ | RTformatSafe::RT_FORMAT_INT
+ | RTformatSafe::RT_FORMAT_UNSIGNED_INT
+ | RTformatSafe::RT_FORMAT_HALF
+ | RTformatSafe::RT_FORMAT_LONG_LONG
+ | RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG => 1,
+ RTformatSafe::RT_FORMAT_FLOAT2
+ | RTformatSafe::RT_FORMAT_BYTE2
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BYTE2
+ | RTformatSafe::RT_FORMAT_SHORT2
+ | RTformatSafe::RT_FORMAT_UNSIGNED_SHORT2
+ | RTformatSafe::RT_FORMAT_INT2
+ | RTformatSafe::RT_FORMAT_UNSIGNED_INT2
+ | RTformatSafe::RT_FORMAT_HALF2
+ | RTformatSafe::RT_FORMAT_LONG_LONG2
+ | RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG2 => 2,
+ RTformatSafe::RT_FORMAT_FLOAT3
+ | RTformatSafe::RT_FORMAT_BYTE3
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BYTE3
+ | RTformatSafe::RT_FORMAT_SHORT3
+ | RTformatSafe::RT_FORMAT_UNSIGNED_SHORT3
+ | RTformatSafe::RT_FORMAT_INT3
+ | RTformatSafe::RT_FORMAT_UNSIGNED_INT3
+ | RTformatSafe::RT_FORMAT_HALF3
+ | RTformatSafe::RT_FORMAT_LONG_LONG3
+ | RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG3 => 3,
+ RTformatSafe::RT_FORMAT_FLOAT4
+ | RTformatSafe::RT_FORMAT_BYTE4
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BYTE4
+ | RTformatSafe::RT_FORMAT_SHORT4
+ | RTformatSafe::RT_FORMAT_UNSIGNED_SHORT4
+ | RTformatSafe::RT_FORMAT_INT4
+ | RTformatSafe::RT_FORMAT_UNSIGNED_INT4
+ | RTformatSafe::RT_FORMAT_HALF4
+ | RTformatSafe::RT_FORMAT_LONG_LONG4
+ | RTformatSafe::RT_FORMAT_UNSIGNED_LONG_LONG4 => 4,
+ RTformatSafe::RT_FORMAT_UNSIGNED_BC1
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BC2
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BC3
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BC4
+ | RTformatSafe::RT_FORMAT_BC4
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BC5
+ | RTformatSafe::RT_FORMAT_BC5
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BC6H
+ | RTformatSafe::RT_FORMAT_BC6H
+ | RTformatSafe::RT_FORMAT_UNSIGNED_BC7 => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ })
+ }
+
+ fn next_mip(self) -> Self {
+ Self {
+ format: self.format,
+ element_size: self.element_size,
+ width: (self.width / 2).max(1),
+ height: (self.height / 2).max(1),
+ }
+ }
+
+ fn generate_mips(self) -> impl Iterator<Item = Self> {
+ std::iter::repeat(()).scan(self, |metadata, _| {
+ let result = *metadata;
+ *metadata = metadata.next_mip();
+ Some(result)
+ })
+ }
+}
+
+struct OnDemandAllocation {
+ mip_levels: u32,
+ alloc: BufferAllocation,
+ callback_data: *mut ::std::os::raw::c_void,
+ callback: unsafe extern "C" fn(
+ callback_data: *mut ::std::os::raw::c_void,
+ buffer: Buffer,
+ block: *mut RTmemoryblock,
+ ) -> ::std::os::raw::c_int,
+}
+
+enum BufferStorage {
+ Normal(Vec<BufferAllocation>),
+ FromCallback(OnDemandAllocation),
+}
+
+pub(crate) struct BufferData {
+ pub(crate) context: Weak<OptixCell<ContextData>>,
+ pub(crate) metadata: BufferMetadata,
+ alloc: BufferStorage,
+ pub(crate) index: u32,
+}
+
+impl OptixObjectData for BufferData {
+ const TYPE: TypeTag = TypeTag::Buffer;
+
+ fn deregister(&mut self, this: &Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ if let Some(context) = self.context.upgrade() {
+ let mut context = (*context).borrow_mut()?;
+ context.buffers.remove(this);
+ }
+ Ok(())
+ }
+
+ fn context<'a>(&'a mut self) -> crate::MaybeWeakRefMut<'a, ContextData> {
+ MaybeWeakRefMut::Weak(&self.context)
+ }
+}
+
+impl BufferData {
+ fn new(weak_context: Weak<OptixCell<ContextData>>, context: &mut ContextData) -> Self {
+ context.buffers_counter += 1;
+ let metadata = BufferMetadata {
+ format: RTformatSafe::RT_FORMAT_BYTE,
+ element_size: 1,
+ width: 0,
+ height: 0,
+ };
+ Self {
+ context: weak_context,
+ index: context.buffers_counter,
+ metadata,
+ alloc: BufferStorage::Normal(vec![BufferAllocation::empty()]),
+ }
+ }
+
+ fn new_from_callback(
+ weak_context: Weak<OptixCell<ContextData>>,
+ context: &mut ContextData,
+ callback: unsafe extern "C" fn(
+ callback_data: *mut ::std::os::raw::c_void,
+ buffer: Buffer,
+ block: *mut RTmemoryblock,
+ ) -> i32,
+ callback_data: *mut ::std::os::raw::c_void,
+ ) -> Self {
+ context.buffers_counter += 1;
+ let metadata = BufferMetadata {
+ format: RTformatSafe::RT_FORMAT_BYTE,
+ element_size: 1,
+ width: 0,
+ height: 0,
+ };
+ Self {
+ context: weak_context,
+ index: context.buffers_counter,
+ metadata,
+ alloc: BufferStorage::FromCallback(OnDemandAllocation {
+ mip_levels: 1,
+ alloc: BufferAllocation::empty(),
+ callback_data,
+ callback,
+ }),
+ }
+ }
+
+ fn alloc0(&self) -> &BufferAllocation {
+ match &self.alloc {
+ BufferStorage::Normal(alloc) => &alloc[0],
+ BufferStorage::FromCallback(on_demand) => &on_demand.alloc,
+ }
+ }
+
+ fn propagate_size_change(&mut self) -> Result<(), RTresult> {
+ match self.alloc {
+ BufferStorage::Normal(ref mut alloc) => {
+ for (alloc, metadata) in alloc.iter_mut().zip(self.metadata.generate_mips()) {
+ let alloc_copy = mem::replace(alloc, BufferAllocation::empty());
+ *alloc = BufferAllocation::new(Some(alloc_copy), metadata)?.0;
+ }
+ }
+ BufferStorage::FromCallback(_) => {}
+ }
+ Ok(())
+ }
+
+ // We do all this dance because callback function can and does call various buffer functions (eg rtBufferGetSize2D)
+ pub(crate) fn load_from_callback(this: &Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ let needs_copy = {
+ let mut this = this.borrow_mut_no_invalidate()?;
+ let metadata = this.metadata;
+ match this.alloc {
+ BufferStorage::Normal(_) => return Ok(()),
+ BufferStorage::FromCallback(ref mut alloc) => {
+ let alloc_copy = mem::replace(&mut alloc.alloc, BufferAllocation::empty());
+ let (new_alloc, needs_copy) =
+ BufferAllocation::new(Some(alloc_copy), metadata)?;
+ alloc.alloc = new_alloc;
+ if needs_copy {
+ Some((alloc.callback, alloc.callback_data, metadata))
+ } else {
+ None
+ }
+ }
+ }
+ };
+ if let Some((callback, callback_data, metadata)) = needs_copy {
+ let layout = unsafe {
+ Layout::from_size_align_unchecked(
+ metadata.byte_size() as usize,
+ BufferMetadata::alignment(metadata.format)?,
+ )
+ };
+ let mut buffer = AlignedBuffer::new(layout);
+ let mut block = RTmemoryblock {
+ format: metadata.format.into(),
+ baseAddress: buffer.as_bytes_mut().as_mut_ptr().cast(),
+ mipLevel: 0u32,
+ x: 0,
+ y: 0,
+ z: 0,
+ width: metadata.width as u32,
+ height: metadata.height.max(1) as u32,
+ depth: metadata.depth().max(1) as u32,
+ rowPitch: (metadata.width * metadata.element_size) as u32,
+ planePitch: 0,
+ };
+ if unsafe { (callback)(callback_data, Rc::as_ptr(this), &mut block) } == 0 {
+ return Err(RTresult::RT_ERROR_UNKNOWN);
+ }
+ let mut this = this.borrow_mut()?;
+ let alloc = match this.alloc {
+ BufferStorage::Normal(_) => return Err(RTresult::RT_ERROR_UNKNOWN),
+ BufferStorage::FromCallback(ref mut alloc) => alloc,
+ };
+ hip! { hipMemcpyHtoD(alloc.alloc.pointer, buffer.as_ptr(), metadata.byte_size() as usize), RT_ERROR_UNKNOWN };
+ }
+ Ok(())
+ }
+
+ fn register(this: Rc<OptixCell<Self>>, context: &mut ContextData) {
+ context.buffers.insert(this);
+ }
+
+ fn map_ex(&mut self, level: u32) -> Result<*mut c_void, RTresult> {
+ match self.alloc {
+ BufferStorage::Normal(ref mut alloc) => {
+ let alloc = alloc
+ .get_mut(level as usize)
+ .ok_or(RTresult::RT_ERROR_INVALID_VALUE)?;
+ alloc.map(self.metadata)
+ }
+ BufferStorage::FromCallback(_) => Err(RTresult::RT_ERROR_UNKNOWN),
+ }
+ }
+
+ fn unmap_ex(&mut self, level: u32) -> Result<(), RTresult> {
+ match self.alloc {
+ BufferStorage::Normal(ref mut alloc) => {
+ let alloc = alloc
+ .get_mut(level as usize)
+ .ok_or(RTresult::RT_ERROR_INVALID_VALUE)?;
+ alloc.unmap()
+ }
+ BufferStorage::FromCallback(_) => Err(RTresult::RT_ERROR_UNKNOWN),
+ }
+ }
+
+ unsafe fn create(context: Context) -> Result<Buffer, RTresult> {
+ context::create_subobject(context, BufferData::new, BufferData::register)
+ }
+
+ unsafe fn create_from_callback(
+ context: Context,
+ callback: unsafe extern "C" fn(
+ callback_data: *mut ::std::os::raw::c_void,
+ buffer: Buffer,
+ block: *mut RTmemoryblock,
+ ) -> i32,
+ callback_data: *mut ::std::os::raw::c_void,
+ ) -> Result<Buffer, RTresult> {
+ context::create_subobject(
+ context,
+ |weak_context, context| {
+ BufferData::new_from_callback(weak_context, context, callback, callback_data)
+ },
+ BufferData::register,
+ )
+ }
+
+ pub(crate) fn get_device_mip0(&self) -> DeviceBuffer {
+ let alloc = &self.alloc0();
+ DeviceBuffer {
+ pointer: alloc.pointer,
+ width: self.metadata.width,
+ height: self.metadata.height,
+ }
+ }
+
+ pub(crate) fn pointer_mip0(&self) -> hipDeviceptr_t {
+ self.alloc0().pointer
+ }
+}
+
+pub(crate) unsafe fn create(
+ context: Context,
+ _bufferdesc: u32,
+ buffer: *mut Buffer,
+) -> Result<(), RTresult> {
+ null_check(context)?;
+ null_check(buffer)?;
+ *buffer = BufferData::create(context)?;
+ Ok(())
+}
+
+pub(crate) unsafe fn create_from_callback(
+ context: Context,
+ _bufferdesc: ::std::os::raw::c_uint,
+ callback: Option<
+ unsafe extern "C" fn(
+ callbackData: *mut ::std::os::raw::c_void,
+ buffer: Buffer,
+ block: *mut RTmemoryblock,
+ ) -> ::std::os::raw::c_int,
+ >,
+ callback_data: *mut ::std::os::raw::c_void,
+ buffer: *mut Buffer,
+) -> Result<(), RTresult> {
+ null_check(context)?;
+ null_check(buffer)?;
+ let callback = callback.ok_or(RTresult::RT_ERROR_INVALID_VALUE)?;
+ *buffer = BufferData::create_from_callback(context, callback, callback_data)?;
+ Ok(())
+}
+
+pub(crate) unsafe fn destroy(buffer: Buffer) -> Result<(), RTresult> {
+ OptixCell::destroy(buffer)
+}
+
+pub(crate) unsafe fn get_dimensionality(
+ buffer: Buffer,
+ dimensionality: *mut u32,
+) -> Result<(), RTresult> {
+ let buffer = null_unwrap(buffer)?;
+ let dimensionality = null_unwrap_mut(dimensionality)?;
+ let buffer = buffer.borrow()?;
+ *dimensionality = buffer.metadata.dimensions();
+ Ok(())
+}
+
+pub(crate) unsafe fn get_format(buffer: Buffer, format: *mut RTformat) -> Result<(), RTresult> {
+ let buffer = null_unwrap(buffer)?;
+ let format = null_unwrap_mut(format)?;
+ let buffer = buffer.borrow()?;
+ *format = RTformat(buffer.metadata.format as u32);
+ Ok(())
+}
+
+pub(crate) unsafe fn get_size1d(buffer: Buffer, width: *mut u64) -> Result<(), RTresult> {
+ let buffer = null_unwrap(buffer)?;
+ let width = null_unwrap_mut(width)?;
+ let buffer = buffer.borrow()?;
+ *width = buffer.metadata.size(0);
+ Ok(())
+}
+
+pub(crate) unsafe fn set_format(buffer_ptr: Buffer, format: RTformat) -> Result<(), RTresult> {
+ let buffer = null_unwrap(buffer_ptr)?;
+ let mut buffer = buffer.borrow_mut()?;
+ buffer.metadata.format = RTformatSafe::new(format).ok_or(RTresult::RT_ERROR_INVALID_VALUE)?;
+ buffer.metadata.element_size = BufferMetadata::element_size(buffer.metadata.format)? as u64;
+ buffer.propagate_size_change()?;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_size1d(buffer_ptr: Buffer, width: u64) -> Result<(), RTresult> {
+ let buffer = null_unwrap(buffer_ptr)?;
+ let mut buffer = buffer.borrow_mut()?;
+ buffer.metadata.width = width;
+ buffer.metadata.height = 0;
+ buffer.propagate_size_change()?;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_size2d(
+ buffer_ptr: Buffer,
+ width: u64,
+ height: u64,
+) -> Result<(), RTresult> {
+ let buffer = null_unwrap(buffer_ptr)?;
+ let mut buffer = buffer.borrow_mut()?;
+ buffer.metadata.width = width;
+ buffer.metadata.height = height;
+ buffer.propagate_size_change()?;
+ Ok(())
+}
+
+pub(crate) unsafe fn get_context(buffer: Buffer, context: *mut Context) -> Result<(), RTresult> {
+ null_check(context)?;
+ let buffer = null_unwrap(buffer)?;
+ let buffer = buffer.borrow()?;
+ *context = Weak::as_ptr(&buffer.context);
+ Ok(())
+}
+
+pub(crate) unsafe fn get_size2d(
+ buffer: Buffer,
+ width: *mut u64,
+ height: *mut u64,
+) -> Result<(), RTresult> {
+ null_check(width)?;
+ null_check(height)?;
+ let buffer = null_unwrap(buffer)?;
+ let buffer = buffer.borrow()?;
+ *width = buffer.metadata.width;
+ *height = buffer.metadata.height;
+ Ok(())
+}
+
+pub(crate) unsafe fn map(buffer: Buffer, user_pointer: *mut *mut c_void) -> Result<(), RTresult> {
+ map_ex(
+ buffer,
+ RTbuffermapflag::RT_BUFFER_MAP_READ_WRITE.0,
+ 0,
+ ptr::null_mut(),
+ user_pointer,
+ )
+}
+
+pub(crate) unsafe fn unmap(buffer: Buffer) -> Result<(), RTresult> {
+ unmap_ex(buffer, 0)
+}
+
+pub(crate) unsafe fn get_glboid(_buffer: Buffer, glid: *mut u32) -> Result<(), RTresult> {
+ null_check(glid)?;
+ *glid = 0;
+ Ok(())
+}
+
+pub(crate) unsafe fn map_ex(
+ buffer: Buffer,
+ _map_flags: u32,
+ level: u32,
+ user_owned: *mut c_void,
+ optix_owned: *mut *mut c_void,
+) -> Result<(), RTresult> {
+ if user_owned != ptr::null_mut() {
+ return Err(RTresult::RT_ERROR_INVALID_VALUE);
+ }
+
+ null_check(optix_owned)?;
+ let buffer = null_unwrap(buffer)?;
+ let mut buffer = buffer.borrow_mut_no_invalidate()?;
+ *optix_owned = buffer.map_ex(level)?;
+ Ok(())
+}
+
+pub(crate) unsafe fn unmap_ex(buffer: Buffer, level: u32) -> Result<(), RTresult> {
+ let buffer = null_unwrap(buffer)?;
+ let mut buffer = buffer.borrow_mut_no_invalidate()?;
+ buffer.unmap_ex(level)
+}
+
+pub(crate) unsafe fn get_element_size(
+ buffer: Buffer,
+ element_size: *mut u64,
+) -> Result<(), RTresult> {
+ null_check(element_size)?;
+ let buffer = null_unwrap(buffer)?;
+ let buffer = buffer.borrow()?;
+ *element_size = buffer.metadata.element_size;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_element_size(
+ buffer_ptr: Buffer,
+ element_size: u64,
+) -> Result<(), RTresult> {
+ if element_size == 0 {
+ return Err(RTresult::RT_ERROR_INVALID_VALUE);
+ }
+ let buffer = null_unwrap(buffer_ptr)?;
+ let mut buffer = buffer.borrow_mut()?;
+ if buffer.metadata.format != RTformatSafe::RT_FORMAT_USER {
+ return Ok(());
+ }
+ buffer.metadata.element_size = element_size;
+ buffer.propagate_size_change()?;
+ Ok(())
+}
+
+pub(crate) unsafe fn get_id(buffer: Buffer, buffer_id: *mut i32) -> Result<(), RTresult> {
+ null_check(buffer_id)?;
+ let buffer = null_unwrap(buffer)?;
+ let buffer = buffer.borrow()?;
+ *buffer_id = buffer.index as i32;
+ Ok(())
+}
+
+pub(crate) unsafe fn get_miplevel_count(buffer: Buffer, level: *mut u32) -> Result<(), RTresult> {
+ null_check(level)?;
+ let buffer = null_unwrap(buffer)?;
+ let buffer = buffer.borrow()?;
+ let buffer_levels = match &buffer.alloc {
+ BufferStorage::Normal(alloc) => alloc.len() as u32,
+ BufferStorage::FromCallback(alloc) => alloc.mip_levels,
+ };
+ *level = buffer_levels;
+ Ok(())
+}
+
+pub(crate) unsafe fn get_sizev(
+ buffer: Buffer,
+ dimensionality: u32,
+ dims: *mut u64,
+) -> Result<(), RTresult> {
+ null_check(dims)?;
+ let buffer = null_unwrap(buffer)?;
+ let buffer = buffer.borrow()?;
+ for d in 0..dimensionality {
+ let size = match d {
+ 0 => buffer.metadata.width,
+ 1 => buffer.metadata.height,
+ _ => 0,
+ };
+ *dims.add(d as usize) = size;
+ }
+ Ok(())
+}
+
+pub(crate) unsafe fn get_device_pointer(
+ buffer: Buffer,
+ _optix_device_ordinal: i32,
+ device_pointer: *mut *mut c_void,
+) -> Result<(), RTresult> {
+ null_check(device_pointer)?;
+ let buffer = null_unwrap(buffer)?;
+ let buffer = buffer.borrow()?;
+ *device_pointer = buffer.alloc0().pointer.0;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_mip_level_count(buffer: Buffer, levels: u32) -> Result<(), RTresult> {
+ let buffer = null_unwrap(buffer)?;
+ let mut buffer = buffer.borrow_mut()?;
+ let levels = levels as usize;
+ let metadata = buffer.metadata;
+ let alloc = match &mut buffer.alloc {
+ BufferStorage::Normal(alloc) => alloc,
+ // TODO: implement MIP levels when mipmapped textures work in HIP
+ BufferStorage::FromCallback(alloc) => {
+ alloc.mip_levels = levels as u32;
+ return Ok(());
+ }
+ };
+ if levels <= alloc.len() {
+ alloc.truncate(levels);
+ return Ok(());
+ }
+ let mut meta_generator = metadata.generate_mips().skip(alloc.len());
+ for _ in alloc.len()..levels {
+ let metadata = meta_generator.next().unwrap();
+ alloc.push(BufferAllocation::new(None, metadata)?.0);
+ }
+ Ok(())
+}
+
+pub(crate) unsafe fn get_miplevel_size2d(
+ buffer: Buffer,
+ level: u32,
+ width: *mut u64,
+ height: *mut u64,
+) -> Result<(), RTresult> {
+ if level != 0 {
+ return Err(RTresult::RT_ERROR_NOT_SUPPORTED);
+ }
+ get_size2d(buffer, width, height)
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::optix_test;
+ use crate::test_common::OptixFns;
+ use optix_types::*;
+ use std::{mem, ptr};
+
+ optix_test!(new_buffer);
+
+ unsafe fn new_buffer<Optix: OptixFns>(o: Optix) {
+ let mut ctx = ptr::null_mut();
+ o.rtContextCreate(&mut ctx);
+ let mut buffer = ptr::null_mut();
+ o.rtBufferCreate(ctx, RTbuffertype::RT_BUFFER_OUTPUT.0, &mut buffer);
+ let mut dims = mem::zeroed();
+ o.rtBufferGetDimensionality(buffer, &mut dims);
+ assert_eq!(dims, 1);
+ let mut format = mem::zeroed();
+ o.rtBufferGetFormat(buffer, &mut format);
+ assert_eq!(format, RTformat::RT_FORMAT_BYTE);
+ let mut size = mem::zeroed();
+ o.rtBufferGetSize1D(buffer, &mut size);
+ assert_eq!(size, 0);
+ let mut dim_buffer = [164, 2, 3];
+ o.rtBufferGetSizev(buffer, 3, dim_buffer.as_mut_ptr());
+ assert_eq!(dim_buffer, [0, 0, 0]);
+ o.rtBufferDestroy(buffer);
+ o.rtContextDestroy(ctx);
+ }
+
+ optix_test!(empty_buffer_can_be_mapped);
+
+ unsafe fn empty_buffer_can_be_mapped<Optix: OptixFns>(o: Optix) {
+ let mut ctx = ptr::null_mut();
+ o.rtContextCreate(&mut ctx);
+ let mut buffer = ptr::null_mut();
+ o.rtBufferCreate(ctx, RTbuffertype::RT_BUFFER_INPUT.0, &mut buffer);
+ o.rtBufferSetFormat(buffer, RTformat::RT_FORMAT_PROGRAM_ID);
+ let mut dims = mem::zeroed();
+ o.rtBufferGetDimensionality(buffer, &mut dims);
+ assert_eq!(dims, 1);
+ let mut size = mem::zeroed();
+ o.rtBufferGetSize1D(buffer, &mut size);
+ assert_eq!(size, 0);
+ let mut host_ptr = ptr::null_mut();
+ o.rtBufferMapEx(
+ buffer,
+ RTbuffermapflag::RT_BUFFER_MAP_READ_WRITE.0,
+ 0,
+ ptr::null_mut(),
+ &mut host_ptr,
+ );
+ assert_ne!(host_ptr, ptr::null_mut());
+ o.rtBufferUnmapEx(buffer, 0);
+ o.rtBufferDestroy(buffer);
+ o.rtContextDestroy(ctx);
+ }
+}
diff --git a/zluda_rt/src/cache.rs b/zluda_rt/src/cache.rs new file mode 100644 index 0000000..f9876a9 --- /dev/null +++ b/zluda_rt/src/cache.rs @@ -0,0 +1,281 @@ +use crate::context::ContextData; +use crate::{OptixCell, ProgramData}; +use data_encoding::HEXLOWER; +use hip_common::raytracing::VariablesBlock; +use hip_common::unwrap_or_return; +use rustc_hash::FxHashMap; +use sha2::{Digest, Sha512}; +use std::collections::BTreeMap; +use std::ffi::{CStr, CString}; +use std::path::{Path, PathBuf}; +use std::rc::Weak; +use std::time::{self, SystemTime}; + +pub(crate) struct KernelRepository(hip_common::cache::KernelRepository<RaytracingDataExtension>); + +impl KernelRepository { + pub(crate) fn new(cache_file: PathBuf) -> rusqlite::Result<Self> { + Ok(Self(hip_common::cache::KernelRepository::new(Some( + cache_file, + ))?)) + } + + #[cfg(test)] + pub(crate) fn new_in_memory() -> rusqlite::Result<Self> { + Ok(Self(hip_common::cache::KernelRepository::new(None)?)) + } + + pub(crate) fn save_program( + &mut self, + now: i64, + program_name: &CStr, + hash: &str, + compiler_version: &str, + git_hash: &str, + device: &CStr, + binary: &[u8], + input_attributes: &str, + hiprt_version: &str, + ) -> rusqlite::Result<()> { + self.0.save_program( + now, + hash, + compiler_version, + git_hash, + device, + binary, + rusqlite::params![ + hip_common::cache::SqlCStrRef(program_name), + input_attributes, + hiprt_version + ], + ) + } + + fn try_load_program( + &mut self, + now: i64, + program_name: &CStr, + hash: &str, + compiler_version: &str, + git_hash: &str, + device: &CStr, + input_attributes: &str, + hiprt_version: &str, + ) -> rusqlite::Result<Option<Vec<u8>>> { + self.0.try_load_program( + now, + hash, + compiler_version, + git_hash, + device, + rusqlite::params![ + hip_common::cache::SqlCStrRef(program_name), + input_attributes, + hiprt_version + ], + ) + } +} + +pub(crate) struct KernelCache(KernelRepository); + +impl KernelCache { + pub(crate) const OPTIX6_CACHE_FILE: &'static str = "zluda_optix6.db"; + + pub(crate) fn new(cache_dir: &Path) -> Option<Self> { + let mut file = cache_dir.to_path_buf(); + file.push(Self::OPTIX6_CACHE_FILE); + Some(Self(KernelRepository::new(file).ok()?)) + } + + pub(crate) fn save_program( + &mut self, + compiler_version: &str, + hiprt_version: &str, + isa: &CStr, + program_name: &CStr, + ptx: &str, + prog: &ProgramData, + input_attributes: &VariablesBlock, + ) { + let now = unwrap_or_return!(SystemTime::now().duration_since(time::UNIX_EPOCH)).as_millis() + as i64; + let mut hasher = Sha512::new(); + hasher.update(ptx); + let hash = hasher.finalize(); + let hash = HEXLOWER.encode(&hash[..]); + let git_hash = env!("VERGEN_GIT_SHA"); + let attributes = unwrap_or_return!(Self::serialize_input_attributes( + &input_attributes.variables + )); + self.0 + .save_program( + now, + program_name, + &hash, + compiler_version, + git_hash, + isa, + &prog.shared.binary, + &attributes, + hiprt_version, + ) + .ok(); + } + + pub(crate) fn try_load_program( + &mut self, + weak_context: Weak<OptixCell<ContextData>>, + compiler_version: &str, + hiprt_version: &str, + isa: &CStr, + program_name: &CStr, + ptx: &str, + input_attributes: &VariablesBlock, + ) -> Option<(ProgramData, VariablesBlock)> { + let now = SystemTime::now() + .duration_since(time::UNIX_EPOCH) + .ok()? + .as_millis() as i64; + let mut hasher = Sha512::new(); + hasher.update(ptx); + let hash = hasher.finalize(); + let hash = HEXLOWER.encode(&hash[..]); + let git_hash = env!("VERGEN_GIT_SHA"); + let attributes = Self::serialize_input_attributes(&input_attributes.variables).ok()?; + let binary = self + .0 + .try_load_program( + now, + program_name, + &hash, + compiler_version, + git_hash, + isa, + &attributes, + hiprt_version, + ) + .ok()??; + ProgramData::try_from_binary(weak_context, binary) + } + + fn serialize_input_attributes( + attributes: &FxHashMap<CString, hip_common::raytracing::Variable>, + ) -> serde_json::Result<String> { + let sorted_attrbutes = attributes.iter().collect::<BTreeMap<_, _>>(); + serde_json::to_string(&serialize::VariablesMapSerialize2 { + variables: sorted_attrbutes, + }) + } +} + +impl Drop for KernelCache { + fn drop(&mut self) { + if let Ok(connection) = self.0 .0.connect() { + connection.execute_batch("VACUUM;").ok(); + } + } +} + +struct RaytracingDataExtension; + +impl hip_common::cache::KernelExtendedData for RaytracingDataExtension { + const INPUT_COLUMNS: &'static [[&'static str; 2]] = &[ + ["name", "TEXT NOT NULL"], + ["input_attributes", "TEXT NOT NULL"], + ["hiprt_version", "TEXT NOT NULL"], + ]; +} + +pub(crate) mod serialize { + use serde::{Deserialize, Serialize}; + use serde_with::{serde_as, SerializeAs}; + use std::collections::BTreeMap; + use std::ffi::CString; + + #[serde_as] + #[derive(serde::Serialize)] + #[serde(transparent)] + pub(crate) struct VariablesMapSerialize2<'a> { + #[serde_as(as = "BTreeMap<AsString, &Variable>")] + pub(crate) variables: BTreeMap<&'a CString, &'a hip_common::raytracing::Variable>, + } + + struct AsString; + + impl SerializeAs<&CString> for AsString { + fn serialize_as<S>(value: &&CString, serializer: S) -> Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + serializer.serialize_str(value.to_str().unwrap()) + } + } + + #[derive(Serialize, Deserialize)] + #[serde(remote = "hip_common::raytracing::Variable")] + pub(crate) struct Variable { + pub size: u32, + pub offset: u32, + pub default_value: Vec<u8>, + } + + impl SerializeAs<hip_common::raytracing::Variable> for Variable { + fn serialize_as<S>( + value: &hip_common::raytracing::Variable, + serializer: S, + ) -> Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + Variable::serialize(value, serializer) + } + } +} + +#[cfg(test)] +mod tests { + use super::KernelRepository; + use std::ffi::CString; + + #[test] + fn kernel_insert_select() { + let mut cache = KernelRepository::new_in_memory().unwrap(); + let input_attributes = "{TEST}"; + cache + .save_program( + 1, + &*CString::new("start").unwrap(), + "FFFF", + "Clang 15", + "EEEE", + &*CString::new("gfx1030").unwrap(), + &vec![0x11, 0x12, 0x13, 0x14], + &input_attributes, + "1.2", + ) + .unwrap(); + assert_eq!(get_time(&mut cache.0.connect().unwrap()), 1); + let binary = cache + .try_load_program( + 2, + CString::new("start").unwrap().as_c_str(), + "FFFF", + "Clang 15", + "EEEE", + &*CString::new("gfx1030").unwrap(), + input_attributes, + "1.2", + ) + .unwrap() + .unwrap(); + assert_eq!(binary, vec![0x11, 0x12, 0x13, 0x14]); + } + + fn get_time(connection: &mut rusqlite::Connection) -> i64 { + connection + .query_row("SELECT last_used FROM kernels", [], |row| row.get(0)) + .unwrap() + } +} diff --git a/zluda_rt/src/context.rs b/zluda_rt/src/context.rs new file mode 100644 index 0000000..6973751 --- /dev/null +++ b/zluda_rt/src/context.rs @@ -0,0 +1,698 @@ +use crate::{
+ acceleration::AccelerationData,
+ buffer::{Buffer, BufferData, DeviceBuffer},
+ cache::KernelCache,
+ definitions, div_positive_round_up,
+ geometry::GeometryData,
+ geometry_group::GeometryGroupData,
+ geometry_instance::GeometryInstanceData,
+ geometry_triangles::GeometryTrianglesData,
+ group::GroupData,
+ hip, hiprt,
+ material::MaterialData,
+ null_check, null_unwrap, null_unwrap_mut,
+ program::{self, Program, ProgramData},
+ repr_gpu::{self, Scene, TrivialHIPAllocator},
+ texture_sampler::TextureSamplerData,
+ transform::TransformData,
+ variable::{Variable, VariableData},
+ MaybeWeakRefMut, OptixCell, OptixObjectData, RcHashSet, TypeTag,
+};
+use comgr::Comgr;
+use hip_common::raytracing::VariablesBlock;
+use hip_runtime_sys::*;
+use hiprt_sys::*;
+use optix_types::*;
+use rustc_hash::FxHashMap;
+use std::{
+ ffi::{CStr, CString},
+ fs,
+ mem::{self, ManuallyDrop},
+ path::PathBuf,
+ ptr,
+ rc::{Rc, Weak},
+};
+
+pub(crate) type Context = *const OptixCell<ContextData>;
+
+pub(crate) struct ContextData {
+ pub(crate) hip_context: hipCtx_t,
+ pub(crate) hiprt: Rc<HipRt>,
+ pub(crate) ray_type_count: u32,
+ pub(crate) geometry_group_count: u32,
+ pub(crate) context: hiprtContext,
+ pub(crate) comgr: Rc<Comgr>,
+ pub(crate) isa: CString,
+ pub(crate) buffers: RcHashSet<OptixCell<BufferData>>,
+ pub(crate) buffers_counter: u32,
+ pub(crate) variables: FxHashMap<CString, Rc<OptixCell<VariableData>>>,
+ pub(crate) programs: RcHashSet<OptixCell<ProgramData>>,
+ pub(crate) callable_program_counter: u32,
+ pub(crate) cumulative_attributes: VariablesBlock,
+ pub(crate) materials: RcHashSet<OptixCell<MaterialData>>,
+ pub(crate) geometry: RcHashSet<OptixCell<GeometryData>>,
+ pub(crate) geometry_triangles: RcHashSet<OptixCell<GeometryTrianglesData>>,
+ pub(crate) geometry_instances: RcHashSet<OptixCell<GeometryInstanceData>>,
+ pub(crate) geometry_groups: RcHashSet<OptixCell<GeometryGroupData>>,
+ pub(crate) groups: RcHashSet<OptixCell<GroupData>>,
+ pub(crate) accelerations: RcHashSet<OptixCell<AccelerationData>>,
+ pub(crate) texture_samplers: RcHashSet<OptixCell<TextureSamplerData>>,
+ pub(crate) transforms: RcHashSet<OptixCell<TransformData>>,
+ pub(crate) texture_counter: u32,
+ pub(crate) entry_points: Vec<Option<Rc<OptixCell<ProgramData>>>>,
+ pub(crate) exception_programs: Vec<Option<Rc<OptixCell<ProgramData>>>>,
+ pub(crate) miss_programs: Vec<Option<Rc<OptixCell<ProgramData>>>>,
+ disk_cache_location: PathBuf,
+ pub(crate) optix_salt: [u8; 32],
+ pub(crate) vendor_salt: [u8; 32],
+ pub(crate) public_vendor_key: Vec<u8>,
+ pub(crate) cache: Option<KernelCache>,
+ pub(crate) compiler_version: String,
+ pub(crate) hiprt_version: String,
+ pub(crate) scene_rebuild_pending: bool,
+ pub(crate) scene: Scene,
+ pub(crate) global_stack: GlobalStack,
+}
+
+// 16kB is the maximum allowed in rocm 5.4 and seems to be enough for Arnold
+const MAX_GPU_STACK: usize = 16 * 1_024;
+
+impl ContextData {
+ pub(crate) fn new() -> Result<Self, RTresult> {
+ hip! { hipInit(0), RT_ERROR_CONTEXT_CREATION_FAILED };
+ let comgr = comgr::Comgr::find_and_load()
+ .map_err(|_| RTresult::RT_ERROR_CONTEXT_CREATION_FAILED)?;
+ let mut stack_size = 0;
+ hip! { hipDeviceGetLimit(&mut stack_size, hipLimit_t::hipLimitStackSize), RT_ERROR_CONTEXT_CREATION_FAILED };
+ if stack_size < MAX_GPU_STACK {
+ hip! { hipDeviceSetLimit(hipLimit_t::hipLimitStackSize, MAX_GPU_STACK), RT_ERROR_CONTEXT_CREATION_FAILED };
+ }
+ let mut hip_context = ptr::null_mut();
+ hip! { hipCtxCreate(&mut hip_context, 0, 0), RT_ERROR_CONTEXT_CREATION_FAILED };
+ let mut context_input = hiprtContextCreationInput {
+ ctxt: hip_context as _,
+ device: 0,
+ deviceType: hiprtDeviceType::hiprtDeviceAMD,
+ };
+ let hiprt =
+ unsafe { HipRt::load() }.map_err(|_| RTresult::RT_ERROR_CONTEXT_CREATION_FAILED)?;
+ let mut context = ptr::null_mut();
+ hiprt! {
+ hiprt.hiprtCreateContext(HIPRT_API_VERSION, &mut context_input, &mut context),
+ RT_ERROR_CONTEXT_CREATION_FAILED
+ };
+ let isa = unsafe {
+ hip_common::comgr_isa(0).map_err(|_| RTresult::RT_ERROR_CONTEXT_CREATION_FAILED)?
+ };
+ let hiprt_version = hiprt::HIPRT_API_VERSION.to_string();
+ let compiler_version = comgr
+ .version()
+ .map_err(|_| RTresult::RT_ERROR_CONTEXT_CREATION_FAILED)?;
+ let mut disk_cache_location =
+ dirs::cache_dir().ok_or(RTresult::RT_ERROR_CONTEXT_CREATION_FAILED)?;
+ disk_cache_location.push("ZLUDA");
+ disk_cache_location.push("OptixCache");
+ fs::create_dir_all(&disk_cache_location)
+ .map_err(|_| RTresult::RT_ERROR_CONTEXT_CREATION_FAILED)?;
+ let cache = KernelCache::new(&disk_cache_location)
+ .ok_or(RTresult::RT_ERROR_CONTEXT_CREATION_FAILED)?;
+ Ok(ContextData {
+ hip_context,
+ hiprt: Rc::new(hiprt),
+ context,
+ ray_type_count: 0,
+ geometry_group_count: 0,
+ comgr: Rc::new(comgr),
+ buffers: RcHashSet::new(),
+ buffers_counter: 0,
+ variables: FxHashMap::default(),
+ programs: RcHashSet::new(),
+ callable_program_counter: 0,
+ cumulative_attributes: VariablesBlock::empty(),
+ materials: RcHashSet::new(),
+ geometry: RcHashSet::new(),
+ geometry_triangles: RcHashSet::new(),
+ geometry_instances: RcHashSet::new(),
+ geometry_groups: RcHashSet::new(),
+ transforms: RcHashSet::new(),
+ groups: RcHashSet::new(),
+ accelerations: RcHashSet::new(),
+ texture_samplers: RcHashSet::new(),
+ texture_counter: 0,
+ entry_points: Vec::new(),
+ exception_programs: Vec::new(),
+ miss_programs: Vec::new(),
+ isa,
+ disk_cache_location,
+ optix_salt: [0; 32],
+ vendor_salt: [0; 32],
+ public_vendor_key: Vec::new(),
+ cache: Some(cache),
+ compiler_version,
+ hiprt_version,
+ scene_rebuild_pending: true,
+ scene: Scene::empty(),
+ global_stack: GlobalStack::empty(),
+ })
+ }
+
+ pub fn attributes_layout(&self) -> (u16, u16) {
+ let size = div_positive_round_up(
+ self.cumulative_attributes.layout.size() as u64,
+ mem::size_of::<u32>() as u64,
+ );
+ let align = div_positive_round_up(
+ self.cumulative_attributes.layout.align() as u64,
+ mem::size_of::<u32>() as u64,
+ );
+ (size as u16, align as u16)
+ }
+
+ pub fn allocate_miss_programs(
+ &self,
+ allocator: &mut TrivialHIPAllocator,
+ ) -> Result<hipDeviceptr_t, RTresult> {
+ let call_chain_visitor = repr_gpu::MissProgramsVisitCallChain {
+ context: self,
+ miss_programs: &self.miss_programs,
+ };
+ let chain_layout = repr_gpu::get_layout(0, &call_chain_visitor)?;
+ let chain_on_gpu = allocator.allocate(chain_layout.layout.size())?;
+ repr_gpu::copy_to_gpu(0, &call_chain_visitor, &chain_layout, chain_on_gpu)?;
+ Ok(chain_on_gpu)
+ }
+
+ pub fn allocate_callable_programs(
+ &self,
+ allocator: &mut TrivialHIPAllocator,
+ ) -> Result<hipDeviceptr_t, RTresult> {
+ let visitor = repr_gpu::CallableProgramsVisitor::new(self)?;
+ let chain_layout = repr_gpu::get_layout(self.ray_type_count, &visitor)?;
+ let dev_ptr = allocator.allocate(chain_layout.layout.size())?;
+ repr_gpu::copy_to_gpu(self.ray_type_count, &visitor, &chain_layout, dev_ptr)?;
+ Ok(dev_ptr)
+ }
+
+ pub fn allocate_buffers(
+ &self,
+ allocator: &mut TrivialHIPAllocator,
+ ) -> Result<hipDeviceptr_t, RTresult> {
+ let mut buffers = (0..self.buffers_counter + 1)
+ .into_iter()
+ .map(|_| unsafe { mem::zeroed::<DeviceBuffer>() })
+ .collect::<Vec<_>>();
+ // We allocate this additional buffer for bug compatiblity with OptiX
+ // Arnold has a bug where it tries to access buffer with id = 0
+ // On OptiX this returns NULL pointer. Furthermore, in OptiX, dereferencing
+ // an invalid pointer returns zeros. We emulate this behavior by returning
+ // this zero-filled buffer. It actually works well enough to run Arnold
+ let page_zero = allocator.allocate(1024 * 4)?;
+ hip! { hipMemset(page_zero.0, 0, 1024 * 4), RT_ERROR_MEMORY_ALLOCATION_FAILED };
+ buffers[0] = DeviceBuffer {
+ pointer: page_zero,
+ width: 1024,
+ height: 0,
+ };
+ for buffer in self.buffers.iter() {
+ let buffer = buffer.borrow()?;
+ buffers[buffer.index as usize] = buffer.get_device_mip0();
+ }
+ allocator.copy_to_device(&buffers[..])
+ }
+
+ pub fn get_uv_offset(&self) -> Result<u32, RTresult> {
+ let uv_offset = self
+ .cumulative_attributes
+ .variables
+ .get(unsafe { CStr::from_bytes_with_nul_unchecked(b"rtTriangleBarycentrics\0") })
+ .map(|var| var.offset)
+ .unwrap_or(!0u32);
+ Ok(uv_offset)
+ }
+
+ pub fn invalidate(&mut self) {
+ self.scene_rebuild_pending = true;
+ }
+
+ pub(crate) fn buffers_load_from_callback(&mut self) -> Result<(), RTresult> {
+ for buffer_rc in self.buffers.iter() {
+ BufferData::load_from_callback(buffer_rc)?;
+ }
+ Ok(())
+ }
+}
+
+impl Drop for ContextData {
+ #[allow(unused_must_use)]
+ fn drop(&mut self) {
+ unsafe {
+ self.hiprt.hiprtDestroyContext(self.context);
+ hipCtxDestroy(self.hip_context);
+ }
+ }
+}
+
+impl OptixObjectData for ContextData {
+ const TYPE: TypeTag = TypeTag::Context;
+
+ fn deregister(&mut self, _this: &Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ Err(RTresult::RT_ERROR_UNKNOWN)
+ }
+
+ fn context<'a>(&'a mut self) -> crate::MaybeWeakRefMut<'a, ContextData> {
+ MaybeWeakRefMut::Ref(self)
+ }
+}
+
+pub(crate) struct GlobalStack {
+ pointer: hipDeviceptr_t,
+ thread_space: usize,
+}
+
+impl GlobalStack {
+ pub(crate) const THREAD_STACK_DEPTH: u16 = 512;
+
+ fn empty() -> GlobalStack {
+ GlobalStack {
+ pointer: hipDeviceptr_t(ptr::null_mut()),
+ thread_space: 0,
+ }
+ }
+
+ fn reallocate(&mut self, width: u32, height: u32) -> Result<(), RTresult> {
+ let (grid_dim_x, block_dim_x) = program::get_launch_dimensions_x(width)?;
+ let thread_space = grid_dim_x as usize * block_dim_x as usize * height as usize;
+ if thread_space <= self.thread_space {
+ return Ok(());
+ }
+ hip::free(self.pointer).map_err(|_| RTresult::RT_ERROR_MEMORY_ALLOCATION_FAILED)?;
+ self.pointer =
+ hip::malloc(thread_space * Self::THREAD_STACK_DEPTH as usize * mem::size_of::<u32>())
+ .map_err(|_| RTresult::RT_ERROR_MEMORY_ALLOCATION_FAILED)?;
+ self.thread_space = thread_space;
+ Ok(())
+ }
+}
+
+pub(crate) unsafe fn get_error_string(
+ _context: Context,
+ _code: RTresult,
+ string_return: *mut *const i8,
+) {
+ *string_return = b"\0".as_ptr() as _;
+}
+
+pub(crate) unsafe fn set_entry_point_count(context: Context, count: u32) -> Result<(), RTresult> {
+ let context = null_unwrap(context)?;
+ let mut context = (*context).borrow_mut()?;
+ context.entry_points.resize(count as usize, None);
+ context.exception_programs.resize(count as usize, None);
+ Ok(())
+}
+
+pub(crate) unsafe fn set_ray_type_count(
+ context: Context,
+ ray_type_count: u32,
+) -> Result<(), RTresult> {
+ let context = null_unwrap(context)?;
+ let mut context = context.borrow_mut()?;
+ context.ray_type_count = ray_type_count;
+ context.miss_programs.resize(ray_type_count as usize, None);
+ Ok(())
+}
+
+pub(crate) unsafe fn create(context: *mut Context) -> Result<(), RTresult> {
+ null_check(context)?;
+ *context = Rc::into_raw(Rc::new(OptixCell::new(ContextData::new()?))) as *mut _;
+ Ok(())
+}
+
+pub(crate) unsafe fn destroy(context: Context) -> Result<(), RTresult> {
+ null_check(context)?;
+ Rc::from_raw(context);
+ Ok(())
+}
+
+pub(crate) unsafe fn declare_variable(
+ context_ptr: Context,
+ name: *const i8,
+ v: *mut Variable,
+) -> Result<(), RTresult> {
+ null_check(name)?;
+ let v = null_unwrap_mut(v)?;
+ let context = null_unwrap(context_ptr)?;
+ let variable = VariableData::new_with_context(context)?;
+ let mut context = context.borrow_mut()?;
+ let name = CStr::from_ptr(name as _).to_owned();
+ let result = Rc::as_ptr(&variable);
+ context.variables.insert(name, variable);
+ *v = result;
+ Ok(())
+}
+
+// TODO: implement
+pub(crate) fn validate(_context: Context) -> Result<(), RTresult> {
+ Ok(())
+}
+
+pub(crate) unsafe fn set_ray_generation_program(
+ context: Context,
+ entry_point_index: u32,
+ program: Program,
+) -> Result<(), RTresult> {
+ set_program(context, entry_point_index, program, |ctx| {
+ &mut ctx.entry_points
+ })
+}
+
+pub(crate) unsafe fn set_exception_program(
+ context: Context,
+ entry_point_index: u32,
+ program: Program,
+) -> Result<(), RTresult> {
+ set_program(context, entry_point_index, program, |ctx| {
+ &mut ctx.exception_programs
+ })
+}
+
+unsafe fn set_program<F>(
+ context: Context,
+ entry_point_index: u32,
+ program: Program,
+ setter: F,
+) -> Result<(), RTresult>
+where
+ F: for<'a> FnOnce(&'a mut ContextData) -> &'a mut Vec<Option<Rc<OptixCell<ProgramData>>>>,
+{
+ let context = null_unwrap(context)?;
+ let mut context = (*context).borrow_mut()?;
+ match setter(&mut context).get_mut(entry_point_index as usize) {
+ None => Err(RTresult::RT_ERROR_INVALID_VALUE),
+ Some(context_entry) => {
+ let program = ManuallyDrop::new(Rc::from_raw(program));
+ *context_entry = Some((&*program).clone());
+ Ok(())
+ }
+ }
+}
+
+pub(crate) unsafe fn set_miss_program(
+ context: Context,
+ ray_type_index: u32,
+ program: Program,
+) -> Result<(), RTresult> {
+ set_program(context, ray_type_index, program, |ctx| {
+ &mut ctx.miss_programs
+ })
+}
+
+pub(crate) unsafe fn launch_2d(
+ context: Context,
+ entry_point_index: u32,
+ width: u64,
+ height: u64,
+) -> Result<(), RTresult> {
+ if width > u32::MAX as u64 || height > u32::MAX as u64 {
+ return Err(RTresult::RT_ERROR_NOT_SUPPORTED);
+ }
+ let context = null_unwrap(context)?;
+ let mut context = (context).borrow_mut_no_invalidate()?;
+ context.buffers_load_from_callback()?;
+ if context.scene_rebuild_pending {
+ context.scene = Scene::new(&context)?;
+ context.scene_rebuild_pending = false;
+ }
+ if width == 0 || height == 0 {
+ return Ok(());
+ }
+ context
+ .global_stack
+ .reallocate(width as u32, height as u32)?;
+ let main_program = context
+ .entry_points
+ .get(entry_point_index as usize)
+ .ok_or(RTresult::RT_ERROR_INVALID_VALUE)?
+ .as_ref()
+ .ok_or(RTresult::RT_ERROR_INVALID_VALUE)?;
+ let main_program = main_program.borrow()?;
+ let exception_program = context
+ .exception_programs
+ .get(entry_point_index as usize)
+ .map(Option::as_ref)
+ .flatten()
+ .map(Rc::as_ref)
+ .map(OptixCell::borrow)
+ .transpose()?;
+ context.scene.launch_2d(
+ entry_point_index,
+ &main_program,
+ exception_program,
+ width as u32,
+ height as u32,
+ context.global_stack.pointer,
+ )
+}
+
+pub(crate) unsafe fn create_subobject<T: OptixObjectData>(
+ context: Context,
+ constructor: impl FnOnce(Weak<OptixCell<ContextData>>, &mut ContextData) -> T,
+ register: impl FnOnce(Rc<OptixCell<T>>, &mut ContextData),
+) -> Result<*const OptixCell<T>, RTresult> {
+ let context = ManuallyDrop::new(Rc::from_raw(context));
+ let weak_context = Rc::downgrade(&context);
+ let mut context = (**context).borrow_mut()?;
+ let object = Rc::new(OptixCell::new(constructor(weak_context, &mut context)));
+ let result = Rc::as_ptr(&object);
+ register(object, &mut *context);
+ Ok(result)
+}
+
+pub(crate) fn set_stack_size(_context: Context, _bytes: u64) -> Result<(), RTresult> {
+ Ok(())
+}
+
+pub(crate) fn set_max_depth(_context: Context, _max_depth: u32) -> Result<(), RTresult> {
+ Ok(())
+}
+
+pub(crate) unsafe fn query_variable(
+ context: Context,
+ name: *const i8,
+ v: *mut Variable,
+) -> Result<(), RTresult> {
+ null_check(name)?;
+ null_check(v)?;
+ let context = null_unwrap(context)?;
+ let context = (context).borrow()?;
+ *v = context
+ .variables
+ .get(CStr::from_ptr(name))
+ .map(|variable| Rc::as_ptr(variable))
+ .unwrap_or(ptr::null_mut());
+ Ok(())
+}
+
+pub(crate) unsafe fn set_attribute(
+ context: Context,
+ attrib: RTcontextattribute,
+ size: u64,
+ p: *const std::ffi::c_void,
+) -> Result<(), RTresult> {
+ null_check(p)?;
+ match attrib {
+ RTcontextattribute::RT_CONTEXT_ATTRIBUTE_DISK_CACHE_LOCATION => {
+ let context = null_unwrap(context)?;
+ let mut context = OptixCell::borrow_mut_no_invalidate(context)?;
+ let cache_location = CStr::from_ptr(p as _);
+ let cache_location = cache_location
+ .to_str()
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ let cache_location = PathBuf::from(cache_location);
+ let cache = KernelCache::new(&cache_location).ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ context.disk_cache_location = cache_location;
+ context.cache = Some(cache);
+ }
+ RTcontextattribute::RT_CONTEXT_ATTRIBUTE_DISK_CACHE_ENABLED => {
+ if size != mem::size_of::<u32>() as u64 {
+ return Err(RTresult::RT_ERROR_INVALID_VALUE);
+ }
+ let value = p as *const u32;
+ let context = null_unwrap(context)?;
+ let mut context = OptixCell::borrow_mut_no_invalidate(context)?;
+ if *value == 0 {
+ context.cache = None;
+ } else {
+ context.cache = Some(
+ KernelCache::new(&context.disk_cache_location)
+ .ok_or(RTresult::RT_ERROR_UNKNOWN)?,
+ );
+ }
+ }
+ RTcontextattribute::RT_CONTEXT_ATTRIBUTE_PREFER_FAST_RECOMPILES => {}
+ RTcontextattribute::RT_CONTEXT_ATTRIBUTE_PREFER_WATERTIGHT_TRAVERSAL => {}
+ RTcontextattribute::RT_CONTEXT_ATTRIBUTE_PUBLIC_VENDOR_KEY => {
+ let context = null_unwrap(context)?;
+ let mut context = OptixCell::borrow_mut_no_invalidate(context)?;
+ context.public_vendor_key =
+ std::slice::from_raw_parts(p as *const u8, size as usize).to_vec();
+ }
+ RTcontextattribute::RT_CONTEXT_ATTRIBUTE_VENDOR_SALT => {
+ if size != 32 {
+ return Err(RTresult::RT_ERROR_NOT_SUPPORTED);
+ }
+ let context = null_unwrap(context)?;
+ let mut context = OptixCell::borrow_mut_no_invalidate(context)?;
+ context.vendor_salt = *(p as *const [u8; 32]);
+ }
+ RTcontextattribute::RT_CONTEXT_ATTRIBUTE_OPTIX_SALT => {
+ if size != 32 {
+ return Err(RTresult::RT_ERROR_NOT_SUPPORTED);
+ }
+ let context = null_unwrap(context)?;
+ let mut context = OptixCell::borrow_mut_no_invalidate(context)?;
+ context.optix_salt = *(p as *const [u8; 32]);
+ }
+ // TODO: implement
+ RTcontextattribute::RT_CONTEXT_ATTRIBUTE_DISK_CACHE_MEMORY_LIMITS => {}
+ // TODO: reverse
+ RTcontextattribute(15)
+ | RTcontextattribute(16)
+ | RTcontextattribute(17)
+ | RTcontextattribute(33554454) => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ _ => return Err(definitions::unimplemented()),
+ }
+ Ok(())
+}
+
+pub(crate) fn set_max_callable_program_depth(_context: Context, _max_depth: u32) -> RTresult {
+ RTresult::RT_SUCCESS
+}
+
+pub(crate) unsafe fn get_attribute(
+ context: Context,
+ attrib: RTcontextattribute,
+ size: u64,
+ p: *mut std::ffi::c_void,
+) -> Result<(), RTresult> {
+ null_check(p)?;
+ match attrib {
+ RTcontextattribute::RT_CONTEXT_ATTRIBUTE_DISK_CACHE_LOCATION => {
+ let context = null_unwrap(context)?;
+ let context = OptixCell::borrow(context)?;
+ let cache = context
+ .disk_cache_location
+ .to_str()
+ .ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ *(p as *mut _) = cache.as_ptr();
+ }
+ RTcontextattribute::RT_CONTEXT_ATTRIBUTE_OPTIX_SALT => {
+ if size != 32 {
+ return Err(RTresult::RT_ERROR_NOT_SUPPORTED);
+ }
+ let context = null_unwrap(context)?;
+ let context = OptixCell::borrow(context)?;
+ *(p as *mut [u8; 32]) = context.optix_salt;
+ }
+ _ => return Err(definitions::unimplemented()),
+ }
+ Ok(())
+}
+
+pub(crate) unsafe fn set_devices(
+ context: Context,
+ count: u32,
+ devices: *const i32,
+) -> Result<(), RTresult> {
+ null_check(context)?;
+ if count != 1 {
+ return Err(definitions::unimplemented());
+ }
+ if *devices != 0 {
+ return Err(definitions::unimplemented());
+ }
+ Ok(())
+}
+
+pub(crate) fn set_exception_enabled(
+ context: Context,
+ _exception: RTexception,
+ _enabled: i32,
+) -> Result<(), RTresult> {
+ null_check(context)?;
+ Ok(())
+}
+
+pub(crate) fn set_print_enabled(
+ context: *const OptixCell<ContextData>,
+ _enabled: i32,
+) -> Result<(), RTresult> {
+ null_check(context)?;
+ Ok(())
+}
+
+pub(crate) fn set_usage_report_callback(
+ context: *const OptixCell<ContextData>,
+ _callback: RTusagereportcallback,
+ _verbosity: i32,
+ _cbdata: *mut std::ffi::c_void,
+) -> Result<(), RTresult> {
+ null_check(context)?;
+ Ok(())
+}
+
+pub(crate) fn set_print_launch_index(
+ context: *const OptixCell<ContextData>,
+ _x: i32,
+ _y: i32,
+ _z: i32,
+) -> Result<(), RTresult> {
+ null_check(context)?;
+ Ok(())
+}
+
+pub(crate) unsafe fn get_device_count(context: Context, count: *mut u32) -> Result<(), RTresult> {
+ null_check(context)?;
+ *count = 1;
+ Ok(())
+}
+
+pub(crate) unsafe fn get_devices(context: Context, devices: *mut i32) -> Result<(), RTresult> {
+ null_check(context)?;
+ *devices = 0;
+ Ok(())
+}
+
+// Used only during Arnold deinitialization
+pub(crate) unsafe fn get_buffer_from_id(
+ context: Context,
+ buffer_id: i32,
+ buffer_output: *mut Buffer,
+) -> Result<(), RTresult> {
+ null_check(buffer_output)?;
+ let context = null_unwrap(context)?;
+ let context = context.borrow()?;
+ for buffer_rc in context.buffers.iter() {
+ let buffer = buffer_rc.borrow()?;
+ if buffer.index == (buffer_id as u32) {
+ *buffer_output = Rc::as_ptr(buffer_rc);
+ return Ok(());
+ }
+ }
+ *buffer_output = ptr::null_mut();
+ Err(RTresult::RT_ERROR_INVALID_VALUE)
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::optix_test;
+ use crate::test_common::OptixFns;
+ use std::ptr;
+
+ optix_test!(create_destroy_context);
+
+ unsafe fn create_destroy_context<Optix: OptixFns>(o: Optix) {
+ let mut ctx = ptr::null_mut();
+ o.rtContextCreate(&mut ctx);
+ o.rtContextDestroy(ctx);
+ }
+}
diff --git a/zluda_rt/src/eptx.rs b/zluda_rt/src/eptx.rs new file mode 100644 index 0000000..85d4d55 --- /dev/null +++ b/zluda_rt/src/eptx.rs @@ -0,0 +1,109 @@ +use generic_array::arr;
+use generic_array::GenericArray;
+use sha2::Digest;
+use sha2::Sha256;
+use std::convert::TryInto;
+use std::io::Write;
+use std::mem;
+use std::u8;
+use typenum::{U16, U2, U32};
+
+const OPTIX_KEY: &'static [u8] =
+ b"-3343556356fgfgfdessss-(--9355-489795-2333354:[]}}{[]523552%GWEf";
+
+const REMAINDER_KEY: [u8; 7] = [164, 195, 147, 255, 203, 161, 184];
+
+pub(crate) fn decode_ptx<'a>(
+ content: &'a mut [u8],
+ optix_salt: &[u8],
+ vendor_salt: &[u8],
+ vendor_key: &[u8],
+) -> &'a mut [u8] {
+ let content = normalize(content);
+ let concatenated_key = [&vendor_key[..], &OPTIX_KEY[..]].concat();
+ let hashed_key = sha256(&concatenated_key[..]);
+ let hexed_key = to_hex_string(hashed_key);
+ let session_key_input = [&optix_salt[..], &hexed_key[..], &vendor_salt[..]].concat();
+ let mut hashed_session_key = sha256(&session_key_input[..]);
+ let reduced_key = reduce_key(&mut hashed_session_key);
+ decode(content, &*reduced_key);
+ decode_remainder(content);
+ content
+}
+
+fn decode(content: &mut [u8], reduced_key: &GenericArray<u8, U16>) {
+ for i in 0..content.len() / 8 {
+ let block = &mut content[i * 8..i * 8 + 8];
+ unsafe { decrypt_block(block.try_into().unwrap(), reduced_key) };
+ }
+}
+
+fn decode_remainder(content: &mut [u8]) {
+ let remainer_start = (content.len() / 8) * 8;
+ for i in 0..content.len() % 8 {
+ content[remainer_start + i] = content[remainer_start + i] ^ REMAINDER_KEY[i];
+ }
+}
+
+fn sha256(content: &[u8]) -> GenericArray<u8, U32> {
+ let mut hasher = Sha256::new();
+ hasher.update(content);
+ hasher.finalize()
+}
+
+fn to_hex_string(hash: GenericArray<u8, U32>) -> [u8; 64] {
+ let mut result = [0u8; 64];
+ for (idx, c) in hash.into_iter().enumerate() {
+ write!(&mut result[idx * 2..idx * 2 + 2], "{:02x}", c).unwrap();
+ }
+ result
+}
+
+fn reduce_key<'a>(content: &'a mut GenericArray<u8, U32>) -> &'a mut GenericArray<u8, U16> {
+ for i in 0usize..16 {
+ content[i] = content[i].wrapping_add(content[i + 16]);
+ }
+ GenericArray::from_mut_slice(&mut content.as_mut_slice()[..16])
+}
+
+unsafe fn decrypt_block(block: &mut [u8; 8], key: &GenericArray<u8, U16>) {
+ let delta: u32 = 0x9E3779B9;
+ let mut sum: u32 = 0xE3779B90;
+ let v = mem::transmute::<[u8; 8], [u32; 2]>(*block);
+ let mut v0 = v[0];
+ let mut v1 = v[1];
+ let key = key.as_ptr() as *const u32;
+ let k0 = *key.offset(0);
+ let k1 = *key.offset(1);
+ let k2 = *key.offset(2);
+ let k3 = *key.offset(3);
+ for _ in 0..16 {
+ v1 = v1.wrapping_sub(
+ ((v0 << 4).wrapping_add(k2)) ^ (v0.wrapping_add(sum)) ^ ((v0 >> 5).wrapping_add(k3)),
+ );
+ v0 = v0.wrapping_sub(
+ ((v1 << 4).wrapping_add(k0)) ^ (v1.wrapping_add(sum)) ^ ((v1 >> 5).wrapping_add(k1)),
+ );
+ sum = sum.wrapping_sub(delta)
+ }
+ *block = std::mem::transmute::<GenericArray<u32, U2>, [u8; 8]>(arr![u32; v0, v1]);
+}
+
+fn normalize<'a>(content: &'a mut [u8]) -> &'a mut [u8] {
+ let mut to = 0;
+ let mut from = 8;
+ loop {
+ if from >= content.len() {
+ break;
+ }
+ let mut c = content[from];
+ if c == 1 {
+ from += 1;
+ c = content[from] - 1;
+ }
+ content[to] = c;
+ from += 1;
+ to += 1;
+ }
+ &mut content[0..to]
+}
diff --git a/zluda_rt/src/geometry.rs b/zluda_rt/src/geometry.rs new file mode 100644 index 0000000..04437fb --- /dev/null +++ b/zluda_rt/src/geometry.rs @@ -0,0 +1,147 @@ +use crate::{
+ context::{self, Context, ContextData},
+ null_check, null_unwrap,
+ program::{Program, ProgramData},
+ variable::{Variable, VariableData},
+ MaybeWeakRefMut, OptixCell, OptixObjectData, TypeTag,
+};
+use optix_types::*;
+use rustc_hash::FxHashMap;
+use std::{
+ ffi::{CStr, CString},
+ ptr,
+ rc::{Rc, Weak},
+};
+
+pub(crate) type Geometry = *const OptixCell<GeometryData>;
+
+pub(crate) struct GeometryData {
+ pub(crate) context: Weak<OptixCell<ContextData>>,
+ pub(crate) program_intersection: Option<Weak<OptixCell<ProgramData>>>,
+ pub(crate) program_bounding_box: Option<Weak<OptixCell<ProgramData>>>,
+ pub(crate) variables: FxHashMap<CString, Rc<OptixCell<VariableData>>>,
+ pub(crate) primitive_count: u32,
+}
+
+impl GeometryData {
+ fn new(_context: Weak<OptixCell<ContextData>>, _: &mut ContextData) -> Self {
+ Self {
+ context: _context,
+ program_intersection: None,
+ program_bounding_box: None,
+ variables: FxHashMap::default(),
+ primitive_count: 0,
+ }
+ }
+
+ fn register(this: Rc<OptixCell<Self>>, context: &mut ContextData) {
+ context.geometry.insert(this);
+ }
+
+ unsafe fn create(context: Context) -> Result<Geometry, RTresult> {
+ context::create_subobject(context, Self::new, Self::register)
+ }
+}
+
+impl OptixObjectData for GeometryData {
+ const TYPE: TypeTag = TypeTag::Geometry;
+
+ fn deregister(&mut self, this: &Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ if let Some(context) = self.context.upgrade() {
+ let mut context = (*context).borrow_mut()?;
+ context.geometry.remove(this);
+ }
+ Ok(())
+ }
+
+ fn context<'a>(&'a mut self) -> crate::MaybeWeakRefMut<'a, ContextData> {
+ MaybeWeakRefMut::Weak(&self.context)
+ }
+}
+
+pub(crate) unsafe fn create(context: Context, geometry: *mut Geometry) -> Result<(), RTresult> {
+ null_check(context)?;
+ null_check(geometry)?;
+ *geometry = GeometryData::create(context)?;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_primitive_count(
+ geometry: Geometry,
+ primitive_count: u32,
+) -> Result<(), RTresult> {
+ let geometry = null_unwrap(geometry)?;
+ let mut geometry = geometry.borrow_mut()?;
+ geometry.primitive_count = primitive_count;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_bounding_box_program(
+ geometry: Geometry,
+ program: Program,
+) -> Result<(), RTresult> {
+ null_check(program)?;
+ let geometry = null_unwrap(geometry)?;
+ let mut geometry = geometry.borrow_mut()?;
+ geometry.program_bounding_box = Some(OptixCell::clone_weak(program));
+ Ok(())
+}
+
+pub(crate) unsafe fn set_intersection_program(
+ geometry: Geometry,
+ program: Program,
+) -> Result<(), RTresult> {
+ null_check(program)?;
+ let geometry = null_unwrap(geometry)?;
+ let mut geometry = geometry.borrow_mut()?;
+ geometry.program_intersection = Some(OptixCell::clone_weak(program));
+ Ok(())
+}
+
+pub(crate) unsafe fn declare_variable(
+ geometry: Geometry,
+ name: *const i8,
+ v: *mut Variable,
+) -> Result<(), RTresult> {
+ null_check(v)?;
+ let instance = null_unwrap(geometry)?;
+ let mut material = instance.borrow_mut()?;
+ let variable = VariableData::new(&mut *material)?;
+ let name = CStr::from_ptr(name).to_owned();
+ let result = Rc::as_ptr(&variable);
+ material.variables.insert(name, variable);
+ *v = result;
+ Ok(())
+}
+
+pub(crate) unsafe fn query_variable(
+ geometry: Geometry,
+ name: *const i8,
+ v: *mut Variable,
+) -> Result<(), RTresult> {
+ null_check(name)?;
+ null_check(v)?;
+ let geometry = null_unwrap(geometry)?;
+ let geometry = (geometry).borrow()?;
+ *v = geometry
+ .variables
+ .get(CStr::from_ptr(name))
+ .map(|variable| Rc::as_ptr(variable))
+ .unwrap_or(ptr::null_mut());
+ Ok(())
+}
+
+pub(crate) fn destroy(_geometry: Geometry) -> Result<(), RTresult> {
+ // TODO: implement
+ Ok(())
+}
+
+pub(crate) unsafe fn get_context(
+ geometry: *const OptixCell<GeometryData>,
+ context: *mut *const OptixCell<ContextData>,
+) -> Result<(), RTresult> {
+ let geometry = null_unwrap(geometry)?;
+ let geometry = geometry.borrow()?;
+ *context = geometry.context.as_ptr();
+ Ok(())
+}
diff --git a/zluda_rt/src/geometry_group.rs b/zluda_rt/src/geometry_group.rs new file mode 100644 index 0000000..2e127ac --- /dev/null +++ b/zluda_rt/src/geometry_group.rs @@ -0,0 +1,316 @@ +use crate::{
+ acceleration::{Acceleration, AccelerationData, AccelerationOwner},
+ context::{self, Context, ContextData},
+ geometry_instance::{DeviceGeometryInstance, GeometryInstance, GeometryInstanceData},
+ hip, hiprt, null_check, null_unwrap,
+ repr_gpu::{self, TrivialHIPAllocator},
+ MaybeWeakRefMut, OptixCell, OptixObjectData, TypeTag,
+};
+use hip_runtime_sys::*;
+use hiprt_sys::*;
+use optix_types::*;
+use std::{
+ alloc::Layout,
+ ptr,
+ rc::{Rc, Weak},
+};
+
+pub(crate) type GeometryGroup = *const OptixCell<GeometryGroupData>;
+
+pub(crate) struct GeometryGroupData {
+ pub(crate) context: Weak<OptixCell<ContextData>>,
+ pub(crate) acceleration: Option<Rc<OptixCell<AccelerationData>>>,
+ pub(crate) children: Vec<Option<Rc<OptixCell<GeometryInstanceData>>>>,
+ pub(crate) index: u32,
+ pub(crate) visibility: u32,
+}
+
+impl GeometryGroupData {
+ fn new(weak_context: Weak<OptixCell<ContextData>>, context: &mut ContextData) -> Self {
+ let index = context.geometry_group_count;
+ context.geometry_group_count += 1;
+ Self {
+ context: weak_context,
+ acceleration: None,
+ children: Vec::new(),
+ index,
+ visibility: !0u32,
+ }
+ }
+
+ fn register(this: Rc<OptixCell<Self>>, context: &mut ContextData) {
+ context.geometry_groups.insert(this);
+ }
+
+ unsafe fn create(context: Context) -> Result<GeometryGroup, RTresult> {
+ context::create_subobject(context, Self::new, Self::register)
+ }
+
+ pub(crate) fn prepare_globals(
+ &self,
+ allocator: &mut TrivialHIPAllocator,
+ ctx: &ContextData,
+ global_state: &mut repr_gpu::GlobalState,
+ ) -> Result<BvhDetails, RTresult> {
+ let ray_type_count = ctx.ray_type_count;
+ let acceleration = self
+ .acceleration
+ .as_ref()
+ .ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let acceleration = acceleration.borrow()?;
+ let build_flags = acceleration.to_hiprt().ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let mut hiprt_instances_host = Vec::with_capacity(self.children.len());
+ let custom_func_table = allocator.new_func_table()?;
+ let mut custom_func_set_counter = 0u32;
+ let attribute_visitor = repr_gpu::AttributesVisitCallChain {
+ context: ctx,
+ children: &*self.children,
+ };
+ let attribute_chain_layout = repr_gpu::get_layout(ray_type_count, &attribute_visitor)?;
+ let attribute_call_chain = allocator.allocate(attribute_chain_layout.layout.size())?;
+ repr_gpu::copy_to_gpu(
+ ray_type_count,
+ &attribute_visitor,
+ &attribute_chain_layout,
+ attribute_call_chain,
+ )?;
+ let hit_chains = (0..ray_type_count).map(|ray| {
+ let prologue_layout = Layout::new::<repr_gpu::HitProgramChain>();
+ let any_hit_visitor = repr_gpu::HitProgramsVisitCallChain {
+ closest_hit: false,
+ ray,
+ children: &self.children,
+ context: ctx,
+ };
+ let closest_hit_visitor = repr_gpu::HitProgramsVisitCallChain {
+ closest_hit: true,
+ ray,
+ children: &self.children,
+ context: ctx,
+ };
+ let any_hit_chain_layout = repr_gpu::get_layout(ray_type_count, &any_hit_visitor)?;
+ let closest_hit_chain_layout = repr_gpu::get_layout(ray_type_count, &closest_hit_visitor)?;
+ let with_any_hit = prologue_layout
+ .extend(any_hit_chain_layout.layout)
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ let with_closest_hit = with_any_hit
+ .0
+ .extend(closest_hit_chain_layout.layout)
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ let hit_chain_gpu = allocator.allocate(with_closest_hit.0.size())?;
+ let prolog = repr_gpu::HitProgramChain {
+ any_hit_start: with_any_hit.1 as u32,
+ closest_hit_start: with_closest_hit.1 as u32,
+ };
+ hip! { hipMemcpyHtoD(hit_chain_gpu, &prolog as *const _ as _, prologue_layout.size()), RT_ERROR_UNKNOWN };
+ repr_gpu::copy_to_gpu(
+ ray_type_count,
+ &any_hit_visitor,
+ &any_hit_chain_layout,
+ hipDeviceptr_t(unsafe { hit_chain_gpu.0.cast::<u8>().add(with_any_hit.1).cast() }),
+ )?;
+ repr_gpu::copy_to_gpu(
+ ray_type_count,
+ &closest_hit_visitor,
+ &closest_hit_chain_layout,
+ hipDeviceptr_t(unsafe {
+ hit_chain_gpu.0.cast::<u8>().add(with_closest_hit.1).cast()
+ }),
+ )?;
+ Ok(hit_chain_gpu)
+ }).collect::<Result<Vec<_>, _>>()?;
+ for instance in self.children.iter() {
+ let instance = instance.as_ref().ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let instance = instance.borrow()?;
+ match instance.prepare_globals(
+ allocator,
+ ctx,
+ build_flags,
+ ray_type_count,
+ custom_func_set_counter,
+ global_state,
+ hipDeviceptr_t(ptr::null_mut()),
+ )? {
+ DeviceGeometryInstance::Geometry {
+ custom_func_set,
+ hiprt_geometry,
+ } => {
+ hiprt_instances_host.push(hiprt_geometry);
+ hiprt! { ctx.hiprt.hiprtSetCustomFuncTable(ctx.context, custom_func_table, custom_func_set_counter, custom_func_set), RT_ERROR_UNKNOWN };
+ custom_func_set_counter += 1;
+ }
+ DeviceGeometryInstance::GeometryTriangles { hiprt_geometry } => {
+ hiprt_instances_host.push(hiprt_geometry);
+ }
+ };
+ }
+ let instance_frames_host = vec![
+ hiprtFrame {
+ translation: hiprtFloat3 {
+ x: 0f32,
+ y: 0f32,
+ z: 0f32,
+ },
+ scale: hiprtFloat3 {
+ x: 1f32,
+ y: 1f32,
+ z: 1f32,
+ },
+ rotation: hiprtFloat4 {
+ x: 0f32,
+ y: 0f32,
+ z: 1.0f32,
+ w: 0f32,
+ },
+ time: 0f32,
+ pad: 0,
+ };
+ self.children.len()
+ ];
+ let instance_geometries = allocator.copy_to_device(&hiprt_instances_host[..])?;
+ let instance_frames = allocator.copy_to_device(&instance_frames_host[..])?;
+ let instance_mask_host = vec![self.visibility; self.children.len()];
+ let instance_masks = allocator.copy_to_device(&instance_mask_host[..])?;
+ let scene_input = hiprtSceneBuildInput {
+ instanceCount: self.children.len() as u32,
+ instanceGeometries: instance_geometries.0,
+ frameCount: self.children.len() as u32,
+ instanceFrames: instance_frames.0,
+ nodes: ptr::null_mut(),
+ instanceTransformHeaders: ptr::null_mut(),
+ instanceMasks: instance_masks.0,
+ };
+ let build_options = hiprtBuildOptions {
+ buildFlags: build_flags.0,
+ };
+ let transform_blocks = vec![hipDeviceptr_t(ptr::null_mut()); self.children.len()];
+ let transform_blocks = allocator.copy_to_device(&transform_blocks)?;
+ let hit_chains = allocator.copy_to_device(&hit_chains)?;
+ let scene = allocator.new_scene(scene_input, build_options)?;
+ Ok(BvhDetails {
+ scene,
+ attribute_call_chain,
+ func_set: custom_func_table,
+ transform_blocks,
+ hit_chains,
+ })
+ }
+}
+
+impl OptixObjectData for GeometryGroupData {
+ const TYPE: TypeTag = TypeTag::GeometryGroup;
+
+ fn deregister(&mut self, this: &Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ if let Some(context) = self.context.upgrade() {
+ let mut context = (*context).borrow_mut()?;
+ context.geometry_groups.remove(this);
+ }
+ Ok(())
+ }
+
+ fn context<'a>(&'a mut self) -> crate::MaybeWeakRefMut<'a, ContextData> {
+ MaybeWeakRefMut::Weak(&self.context)
+ }
+}
+
+#[repr(C)]
+pub(crate) struct BvhDetails {
+ pub(crate) scene: hiprtScene,
+ pub(crate) func_set: hiprtCustomFuncTable,
+ pub(crate) attribute_call_chain: hipDeviceptr_t,
+ pub(crate) transform_blocks: hipDeviceptr_t,
+ pub(crate) hit_chains: hipDeviceptr_t,
+}
+
+pub(crate) unsafe fn create(
+ context: Context,
+ geometry_group: *mut GeometryGroup,
+) -> Result<(), RTresult> {
+ null_check(context)?;
+ null_check(geometry_group)?;
+ *geometry_group = GeometryGroupData::create(context)?;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_acceleration(
+ geometry_group: GeometryGroup,
+ acceleration: Acceleration,
+) -> Result<(), RTresult> {
+ null_check(acceleration)?;
+ let geometry_group = null_unwrap(geometry_group)?;
+ let acceleration = null_unwrap(acceleration)?;
+ {
+ let mut geometry_group = geometry_group.borrow_mut()?;
+ geometry_group.acceleration = Some(OptixCell::clone_rc(acceleration));
+ }
+ {
+ let mut acceleration = acceleration.borrow_mut()?;
+ acceleration.owner = Some(AccelerationOwner::GeometryGroup(OptixCell::clone_weak(
+ geometry_group,
+ )));
+ }
+ Ok(())
+}
+
+pub(crate) unsafe fn set_child(
+ geometry_group: GeometryGroup,
+ index: u32,
+ geometry_instance: GeometryInstance,
+) -> Result<(), RTresult> {
+ null_check(geometry_instance)?;
+ let geometry_group = null_unwrap(geometry_group)?;
+ let mut geometry_group = geometry_group.borrow_mut()?;
+ match geometry_group.children.get_mut(index as usize) {
+ Some(instance_slot) => {
+ *instance_slot = Some(OptixCell::clone_rc(geometry_instance));
+ Ok(())
+ }
+ None => Err(RTresult::RT_ERROR_INVALID_VALUE),
+ }
+}
+
+pub(crate) unsafe fn set_child_count(
+ geometry_group: GeometryGroup,
+ count: u32,
+) -> Result<(), RTresult> {
+ let geometry_group = null_unwrap(geometry_group)?;
+ let mut geometry_group = geometry_group.borrow_mut()?;
+ geometry_group.children.resize(count as usize, None);
+ Ok(())
+}
+
+pub(crate) unsafe fn get_child_count(
+ geometry_group: GeometryGroup,
+ count: *mut u32,
+) -> Result<(), RTresult> {
+ null_check(count)?;
+ let geometry_group = null_unwrap(geometry_group)?;
+ let geometry_group = geometry_group.borrow()?;
+ *count = geometry_group.children.len() as u32;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_visibility_mask(
+ geometry_group: *const OptixCell<GeometryGroupData>,
+ mask: u32,
+) -> Result<(), RTresult> {
+ let geometry_group = null_unwrap(geometry_group)?;
+ let mut geometry_group = geometry_group.borrow_mut()?;
+ geometry_group.visibility = mask;
+ Ok(())
+}
+
+pub(crate) unsafe fn get_context(
+ geometrygroup: GeometryGroup,
+ context: *mut Context,
+) -> Result<(), RTresult> {
+ let geometrygroup = null_unwrap(geometrygroup)?;
+ let geometrygroup = geometrygroup.borrow()?;
+ *context = geometrygroup.context.as_ptr();
+ Ok(())
+}
+
+pub(crate) fn destroy(_geometrygroup: GeometryGroup) -> Result<(), RTresult> {
+ // TODO: implement
+ Ok(())
+}
diff --git a/zluda_rt/src/geometry_instance.rs b/zluda_rt/src/geometry_instance.rs new file mode 100644 index 0000000..a748c9d --- /dev/null +++ b/zluda_rt/src/geometry_instance.rs @@ -0,0 +1,411 @@ +use crate::{
+ context::{self, Context, ContextData},
+ geometry::{Geometry, GeometryData},
+ geometry_triangles::{GeometryTriangles, GeometryTrianglesData},
+ hip,
+ material::{Material, MaterialData},
+ null_check, null_unwrap,
+ program::ProgramData,
+ repr_gpu::{self, TrivialHIPAllocator},
+ variable::{Variable, VariableData},
+ MaybeWeakRefMut, OptixCell, OptixObjectData, TypeTag,
+};
+use hip_runtime_sys::*;
+use hiprt_sys::*;
+use optix_types::*;
+use rustc_hash::FxHashMap;
+use std::{
+ alloc::{Layout, LayoutError},
+ ffi::{c_void, CStr, CString},
+ mem, ptr,
+ rc::{Rc, Weak},
+};
+
+pub(crate) type GeometryInstance = *const OptixCell<GeometryInstanceData>;
+
+pub(crate) struct GeometryInstanceData {
+ pub(crate) context: Weak<OptixCell<ContextData>>,
+ pub(crate) child: GeometryInstanceChild,
+ pub(crate) materials: Vec<Option<Rc<OptixCell<MaterialData>>>>,
+ pub(crate) variables: FxHashMap<CString, Rc<OptixCell<VariableData>>>,
+}
+
+impl GeometryInstanceData {
+ fn new(weak_context: Weak<OptixCell<ContextData>>, _: &mut ContextData) -> Self {
+ Self {
+ context: weak_context,
+ child: GeometryInstanceChild::None,
+ materials: Vec::new(),
+ variables: FxHashMap::default(),
+ }
+ }
+
+ fn register(this: Rc<OptixCell<Self>>, context: &mut ContextData) {
+ context.geometry_instances.insert(this);
+ }
+
+ unsafe fn create(context: Context) -> Result<GeometryInstance, RTresult> {
+ context::create_subobject(context, Self::new, Self::register)
+ }
+
+ pub(crate) fn prepare_globals(
+ &self,
+ allocator: &mut TrivialHIPAllocator,
+ context: &ContextData,
+ build_flags: hiprtBuildFlagBits,
+ ray_type_count: u32,
+ custom_func_set_counter: u32,
+ global_state: &mut repr_gpu::GlobalState,
+ transform_block: hipDeviceptr_t,
+ ) -> Result<DeviceGeometryInstance, RTresult> {
+ match self.child {
+ GeometryInstanceChild::None => return Err(RTresult::RT_ERROR_INVALID_CONTEXT),
+ GeometryInstanceChild::Geometry(ref geometry) => self.prepare_custom_geometry(
+ allocator,
+ context,
+ build_flags,
+ ray_type_count,
+ custom_func_set_counter,
+ geometry,
+ transform_block,
+ global_state
+ ),
+ GeometryInstanceChild::GeometryTriangles(ref triangles) => {
+ self.prepare_triangles(allocator, build_flags, triangles)
+ }
+ }
+ }
+
+ fn prepare_custom_geometry(
+ &self,
+ allocator: &mut TrivialHIPAllocator,
+ context: &ContextData,
+ build_flags: hiprtBuildFlagBits,
+ ray_type_count: u32,
+ custom_func_set_counter: u32,
+ geometry: &Rc<OptixCell<GeometryData>>,
+ transform_block: hipDeviceptr_t,
+ global_state: &mut repr_gpu::GlobalState
+ ) -> Result<DeviceGeometryInstance, RTresult> {
+ let geometry = geometry.borrow()?;
+ let program_intersection = geometry
+ .program_intersection
+ .as_ref()
+ .ok_or(RTresult::RT_ERROR_INVALID_CONTEXT)?;
+ let program_intersection = program_intersection
+ .upgrade()
+ .ok_or(RTresult::RT_ERROR_INVALID_CONTEXT)?;
+ let program_intersection = program_intersection.borrow()?;
+ let program_bounding_box = geometry
+ .program_bounding_box
+ .as_ref()
+ .ok_or(RTresult::RT_ERROR_INVALID_CONTEXT)?;
+ let program_bounding_box = program_bounding_box
+ .upgrade()
+ .ok_or(RTresult::RT_ERROR_INVALID_CONTEXT)?;
+ let program_bounding_box = program_bounding_box.borrow()?;
+ let mut bb_variable_block = program_bounding_box
+ .prepare_variable_block_for_function_non_hit(allocator, self, &geometry, context)?;
+ let intersection_input = self.prepare_intersection_input(
+ allocator,
+ context,
+ ray_type_count,
+ &*program_intersection,
+ &*geometry,
+ transform_block,
+ )?;
+ let intersect_func = program_intersection.get_function()?;
+ let mut bounding_box_fn = program_bounding_box.get_function()?;
+ let custom_func_set = hiprtCustomFuncSet {
+ intersectFunc: unsafe { mem::transmute(intersect_func) },
+ intersectFuncData: intersection_input.0,
+ };
+ let mut bounding_box_primitive_count = geometry.primitive_count;
+ let kernel_bounding_box = program_bounding_box
+ .shared
+ .module
+ .get_function(ProgramData::KERNEL_BOUNDING_BOX_NAME)
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ let mut bounding_boxes_device = allocator
+ .allocate(mem::size_of::<f32>() * 8 * bounding_box_primitive_count as usize)?;
+ let mut params = [
+ global_state as *mut repr_gpu::GlobalState as *mut c_void,
+ &mut bounding_box_fn as *mut _ as *mut c_void,
+ &mut bounding_box_primitive_count as *mut _ as _,
+ &mut bounding_boxes_device.0 as *mut _ as _,
+ &mut bb_variable_block as *mut _ as _,
+ ];
+ program_bounding_box
+ .shared
+ .module
+ .launch_kernel_1d(
+ kernel_bounding_box,
+ bounding_box_primitive_count,
+ 0,
+ ptr::null_mut(),
+ params.as_mut_ptr(),
+ )
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ let mut aabb_list = hiprtAABBListPrimitive {
+ aabbs: bounding_boxes_device.0,
+ aabbCount: bounding_box_primitive_count,
+ aabbStride: (mem::size_of::<f32>() * 8) as u32,
+ };
+ let geometry_input = hiprtGeometryBuildInput {
+ type_: hiprtPrimitiveType::hiprtPrimitiveTypeAABBList,
+ __bindgen_anon_1: hiprtGeometryBuildInput__bindgen_ty_1 {
+ aabbList: hiprtGeometryBuildInput__bindgen_ty_1__bindgen_ty_2 {
+ primitive: &mut aabb_list,
+ customType: custom_func_set_counter,
+ },
+ },
+ nodes: ptr::null_mut(),
+ };
+ let build_options = hiprtBuildOptions {
+ buildFlags: build_flags.0,
+ };
+ let hiprt_geometry = allocator.new_geometry(geometry_input, build_options)?;
+ Ok(DeviceGeometryInstance::Geometry {
+ hiprt_geometry: hipDeviceptr_t(hiprt_geometry),
+ custom_func_set,
+ })
+ }
+
+ fn prepare_intersection_input(
+ &self,
+ allocator: &mut TrivialHIPAllocator,
+ context: &ContextData,
+ ray_type_count: u32,
+ program: &ProgramData,
+ geometry: &GeometryData,
+ transform_block: hipDeviceptr_t,
+ ) -> Result<hipDeviceptr_t, RTresult> {
+ let visitor = repr_gpu::IntersectVisitCallChain {
+ context,
+ geometry_instance: self,
+ };
+ let layout_var_block = program.variables_block.layout;
+ let call_chain_layout = repr_gpu::get_layout(ray_type_count, &visitor)?;
+ let (total_layout, offsets) =
+ Self::layout_of_struct([layout_var_block, call_chain_layout.layout].iter().copied())
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ let intersection_input = allocator.allocate(total_layout.size())?;
+ let prologue = repr_gpu::IntersectionInput {
+ transform_block,
+ materials_start: offsets[1] as u32,
+ };
+ let mut staging_var_block = vec![0u8; layout_var_block.size()];
+ unsafe { ptr::copy_nonoverlapping(&prologue, staging_var_block.as_mut_ptr() as _, 1) };
+ ProgramData::copy_variable_block(
+ &program.variables_block,
+ &mut staging_var_block,
+ |var_name| program.get_variable_for_function_non_hit(self, geometry, context, var_name),
+ )?;
+ hip! { hipMemcpyHtoD(intersection_input, staging_var_block.as_mut_ptr() as _, layout_var_block.size()), RT_ERROR_UNKNOWN };
+ repr_gpu::copy_to_gpu(
+ ray_type_count,
+ &visitor,
+ &call_chain_layout,
+ hipDeviceptr_t(unsafe { intersection_input.0.cast::<u8>().add(offsets[1]).cast() }),
+ )?;
+ Ok(intersection_input)
+ }
+
+ fn layout_of_struct(
+ mut fields: impl Iterator<Item = Layout>,
+ ) -> Result<(Layout, Vec<usize>), LayoutError> {
+ fields.try_fold(
+ (Layout::new::<()>(), Vec::new()),
+ |(total_layout, mut offsets), layout| {
+ let (new_layout, offset) = total_layout.extend(layout)?;
+ offsets.push(offset);
+ Ok((new_layout, offsets))
+ },
+ )
+ }
+
+ fn prepare_triangles(
+ &self,
+ allocator: &mut TrivialHIPAllocator,
+ build_flags: hiprtBuildFlagBits,
+ triangles: &Rc<OptixCell<GeometryTrianglesData>>,
+ ) -> Result<DeviceGeometryInstance, RTresult> {
+ let triangles = triangles.borrow()?;
+ let mut primitive = triangles.to_hiprt()?;
+ let geometry_input = hiprtGeometryBuildInput {
+ type_: hiprtPrimitiveType::hiprtPrimitiveTypeTriangleMesh,
+ __bindgen_anon_1: hiprtGeometryBuildInput__bindgen_ty_1 {
+ triangleMesh: hiprtGeometryBuildInput__bindgen_ty_1__bindgen_ty_1 {
+ primitive: &mut primitive,
+ },
+ },
+ nodes: ptr::null_mut(),
+ };
+ let build_options = hiprtBuildOptions {
+ buildFlags: build_flags.0,
+ };
+ let hiprt_geometry = allocator.new_geometry(geometry_input, build_options)?;
+ Ok(DeviceGeometryInstance::GeometryTriangles {
+ hiprt_geometry: hipDeviceptr_t(hiprt_geometry),
+ })
+ }
+}
+
+impl OptixObjectData for GeometryInstanceData {
+ const TYPE: TypeTag = TypeTag::GeometryInstance;
+
+ fn deregister(&mut self, this: &Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ if let Some(context) = self.context.upgrade() {
+ let mut context = (*context).borrow_mut()?;
+ context.geometry_instances.remove(this);
+ }
+ Ok(())
+ }
+
+ fn context<'a>(&'a mut self) -> crate::MaybeWeakRefMut<'a, ContextData> {
+ MaybeWeakRefMut::Weak(&self.context)
+ }
+}
+
+pub(crate) enum DeviceGeometryInstance {
+ Geometry {
+ hiprt_geometry: hipDeviceptr_t,
+ custom_func_set: hiprtCustomFuncSet,
+ },
+ GeometryTriangles {
+ hiprt_geometry: hipDeviceptr_t,
+ },
+}
+
+pub(crate) enum GeometryInstanceChild {
+ None,
+ Geometry(Rc<OptixCell<GeometryData>>),
+ GeometryTriangles(Rc<OptixCell<GeometryTrianglesData>>),
+}
+
+#[repr(C)]
+#[derive(Clone)]
+pub(crate) struct DeviceProgram {
+ pub(crate) function: hipDeviceptr_t,
+ pub(crate) variable_block: hipDeviceptr_t,
+}
+
+pub(crate) unsafe fn create(
+ context: Context,
+ material: *mut GeometryInstance,
+) -> Result<(), RTresult> {
+ null_check(context)?;
+ null_check(material)?;
+ *material = GeometryInstanceData::create(context)?;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_geometry_triangles(
+ instance: GeometryInstance,
+ triangles: GeometryTriangles,
+) -> Result<(), RTresult> {
+ null_check(triangles)?;
+ let instance = null_unwrap(instance)?;
+ let mut instance = instance.borrow_mut()?;
+ let triangles = OptixCell::clone_rc(triangles);
+ instance.child = GeometryInstanceChild::GeometryTriangles(triangles);
+ Ok(())
+}
+
+pub(crate) unsafe fn set_geometry(
+ instance: GeometryInstance,
+ geometry: Geometry,
+) -> Result<(), RTresult> {
+ null_check(geometry)?;
+ let instance = null_unwrap(instance)?;
+ let mut instance = instance.borrow_mut()?;
+ let triangles = OptixCell::clone_rc(geometry);
+ instance.child = GeometryInstanceChild::Geometry(triangles);
+ Ok(())
+}
+
+pub(crate) unsafe fn set_material(
+ instance: GeometryInstance,
+ index: u32,
+ material: Material,
+) -> Result<(), RTresult> {
+ null_check(material)?;
+ let instance = null_unwrap(instance)?;
+ let mut instance = instance.borrow_mut()?;
+ match instance.materials.get_mut(index as usize) {
+ Some(material_slot) => {
+ *material_slot = Some(OptixCell::clone_rc(material));
+ Ok(())
+ }
+ None => Err(RTresult::RT_ERROR_INVALID_VALUE),
+ }
+}
+
+pub(crate) unsafe fn set_material_count(
+ instance: GeometryInstance,
+ count: u32,
+) -> Result<(), RTresult> {
+ let instance = null_unwrap(instance)?;
+ let mut instance = instance.borrow_mut()?;
+ instance.materials.resize(count as usize, None);
+ Ok(())
+}
+
+pub(crate) unsafe fn declare_variable(
+ instance_ptr: GeometryInstance,
+ name: *const i8,
+ v: *mut Variable,
+) -> Result<(), RTresult> {
+ null_check(v)?;
+ let instance = null_unwrap(instance_ptr)?;
+ let mut instance = instance.borrow_mut()?;
+ let variable = VariableData::new(&mut *instance)?;
+ let name = CStr::from_ptr(name).to_owned();
+ let result = Rc::as_ptr(&variable);
+ instance.variables.insert(name, variable);
+ *v = result;
+ Ok(())
+}
+
+pub(crate) unsafe fn get_material_count(
+ geometryinstance: GeometryInstance,
+ count: *mut u32,
+) -> Result<(), RTresult> {
+ null_check(count)?;
+ let instance = null_unwrap(geometryinstance)?;
+ let instance = instance.borrow()?;
+ *count = instance.materials.len() as u32;
+ Ok(())
+}
+
+pub(crate) unsafe fn query_variable(
+ geometryinstance: GeometryInstance,
+ name: *const i8,
+ v: *mut Variable,
+) -> Result<(), RTresult> {
+ null_check(name)?;
+ null_check(v)?;
+ let geometryinstance = null_unwrap(geometryinstance)?;
+ let geometryinstance = (geometryinstance).borrow()?;
+ *v = geometryinstance
+ .variables
+ .get(CStr::from_ptr(name))
+ .map(|variable| Rc::as_ptr(variable))
+ .unwrap_or(ptr::null_mut());
+ Ok(())
+}
+
+pub(crate) fn destroy(_geometryinstance: GeometryInstance) -> Result<(), RTresult> {
+ // TODO: implement
+ Ok(())
+}
+
+pub(crate) unsafe fn get_context(
+ geometryinstance: *const OptixCell<GeometryInstanceData>,
+ context: *mut *const OptixCell<ContextData>,
+) -> Result<(), RTresult> {
+ let geometryinstance = null_unwrap(geometryinstance)?;
+ let geometryinstance = geometryinstance.borrow()?;
+ *context = geometryinstance.context.as_ptr();
+ Ok(())
+}
diff --git a/zluda_rt/src/geometry_triangles.rs b/zluda_rt/src/geometry_triangles.rs new file mode 100644 index 0000000..bafa53a --- /dev/null +++ b/zluda_rt/src/geometry_triangles.rs @@ -0,0 +1,269 @@ +use crate::{
+ buffer::{Buffer, BufferData},
+ context::{self, Context, ContextData},
+ null_check, null_unwrap,
+ program::{Program, ProgramData},
+ variable::{Variable, VariableData},
+ MaybeWeakRefMut, OptixCell, OptixObjectData, TypeTag,
+};
+use hiprt_sys::hiprtTriangleMeshPrimitive;
+use optix_types::*;
+use rustc_hash::FxHashMap;
+use std::{
+ ffi::{c_void, CStr, CString},
+ ptr,
+ rc::{Rc, Weak},
+};
+
+pub(crate) type GeometryTriangles = *const OptixCell<GeometryTrianglesData>;
+
+pub(crate) struct GeometryTrianglesData {
+ pub(crate) context: Weak<OptixCell<ContextData>>,
+ pub(crate) attribute_program: Option<Weak<OptixCell<ProgramData>>>,
+ pub(crate) variables: FxHashMap<CString, Rc<OptixCell<VariableData>>>,
+ triangle_count: u32,
+ indices: Option<Indices>,
+ vertices: Option<Vertices>,
+ flags: FxHashMap<u32, RTgeometryflags>,
+}
+
+impl GeometryTrianglesData {
+ fn new(context: Weak<OptixCell<ContextData>>, _: &mut ContextData) -> Self {
+ Self {
+ context,
+ attribute_program: None,
+ variables: FxHashMap::default(),
+ triangle_count: 0,
+ indices: None,
+ vertices: None,
+ flags: FxHashMap::default(),
+ }
+ }
+
+ fn register(this: Rc<OptixCell<Self>>, context: &mut ContextData) {
+ context.geometry_triangles.insert(this);
+ }
+
+ unsafe fn create(context: Context) -> Result<GeometryTriangles, RTresult> {
+ context::create_subobject(context, Self::new, Self::register)
+ }
+
+ pub(crate) fn to_hiprt(&self) -> Result<hiprtTriangleMeshPrimitive, RTresult> {
+ let vertices = self.vertices.as_ref().ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let vertex_buffer = vertices
+ .vertex_buffer
+ .upgrade()
+ .ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let vertex_buffer = vertex_buffer.borrow()?;
+ let mut result = hiprtTriangleMeshPrimitive {
+ vertices: unsafe {
+ (vertex_buffer.pointer_mip0().0 as *mut u8)
+ .add(vertices.vertex_buffer_byte_offset as usize)
+ } as *mut c_void,
+ vertexCount: vertices.vertex_count,
+ vertexStride: vertices.vertex_byte_stride as u32,
+ triangleIndices: ptr::null_mut(),
+ triangleCount: 0,
+ triangleStride: 0,
+ };
+ if let Some(ref vertex_indices) = self.indices {
+ let index_buffer = vertex_indices
+ .index_buffer
+ .upgrade()
+ .ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let index_buffer = index_buffer.borrow()?;
+ result.triangleIndices = unsafe {
+ (index_buffer.pointer_mip0().0 as *mut u8)
+ .add(vertex_indices.index_buffer_byte_offset as usize)
+ } as *mut c_void;
+ result.triangleCount = self.triangle_count;
+ result.triangleStride = vertex_indices.tri_indices_byte_stride as u32;
+ } else {
+ // TODO: implement
+ return Err(RTresult::RT_ERROR_UNKNOWN);
+ }
+ Ok(result)
+ }
+}
+
+impl OptixObjectData for GeometryTrianglesData {
+ const TYPE: TypeTag = TypeTag::GeometryTriangles;
+
+ fn deregister(&mut self, this: &Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ if let Some(context) = self.context.upgrade() {
+ let mut context = (*context).borrow_mut()?;
+ context.geometry_triangles.remove(this);
+ }
+ Ok(())
+ }
+
+ fn context<'a>(&'a mut self) -> crate::MaybeWeakRefMut<'a, ContextData> {
+ MaybeWeakRefMut::Weak(&self.context)
+ }
+}
+
+struct Indices {
+ index_buffer: Weak<OptixCell<BufferData>>,
+ index_buffer_byte_offset: u64,
+ tri_indices_byte_stride: u64,
+}
+
+struct Vertices {
+ vertex_count: ::std::os::raw::c_uint,
+ vertex_buffer: Weak<OptixCell<BufferData>>,
+ vertex_buffer_byte_offset: u64,
+ vertex_byte_stride: u64,
+}
+
+pub(crate) unsafe fn create(
+ context: Context,
+ triangles: *mut GeometryTriangles,
+) -> Result<(), RTresult> {
+ null_check(context)?;
+ null_check(triangles)?;
+ *triangles = GeometryTrianglesData::create(context)?;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_primitive_count(
+ triangles: GeometryTriangles,
+ triangle_count: u32,
+) -> Result<(), RTresult> {
+ let triangles = null_unwrap(triangles)?;
+ let mut triangles = triangles.borrow_mut()?;
+ triangles.triangle_count = triangle_count;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_triangle_indices(
+ triangles: GeometryTriangles,
+ index_buffer: Buffer,
+ index_buffer_byte_offset: u64,
+ tri_indices_byte_stride: u64,
+ tri_indices_format: RTformat,
+) -> Result<(), RTresult> {
+ if tri_indices_format != RTformat::RT_FORMAT_UNSIGNED_INT3 {
+ return Err(RTresult::RT_ERROR_NOT_SUPPORTED);
+ }
+ let triangles = null_unwrap(triangles)?;
+ let mut triangles = triangles.borrow_mut()?;
+ triangles.indices = Some(Indices {
+ index_buffer: OptixCell::clone_weak(index_buffer),
+ index_buffer_byte_offset,
+ tri_indices_byte_stride,
+ });
+ Ok(())
+}
+
+pub(crate) unsafe fn set_vertices(
+ triangles: GeometryTriangles,
+ vertex_count: ::std::os::raw::c_uint,
+ vertex_buffer: Buffer,
+ vertex_buffer_byte_offset: u64,
+ vertex_byte_stride: u64,
+ position_format: RTformat,
+) -> Result<(), RTresult> {
+ if position_format != RTformat::RT_FORMAT_FLOAT3 {
+ return Err(RTresult::RT_ERROR_NOT_SUPPORTED);
+ }
+ let triangles = null_unwrap(triangles)?;
+ let mut triangles = triangles.borrow_mut()?;
+ triangles.vertices = Some(Vertices {
+ vertex_count,
+ vertex_buffer: OptixCell::clone_weak(vertex_buffer),
+ vertex_buffer_byte_offset,
+ vertex_byte_stride,
+ });
+ Ok(())
+}
+
+pub(crate) fn validate(_geometrytriangles: GeometryTriangles) -> Result<(), RTresult> {
+ // TODO: implement
+ Ok(())
+}
+
+pub(crate) unsafe fn set_attribute(
+ triangles: GeometryTriangles,
+ program: Program,
+) -> Result<(), RTresult> {
+ let triangles = null_unwrap(triangles)?;
+ let mut triangles = triangles.borrow_mut()?;
+ triangles.attribute_program = Some(OptixCell::clone_weak(program));
+ Ok(())
+}
+
+pub(crate) unsafe fn set_flags_per_material(
+ triangles: GeometryTriangles,
+ material_index: u32,
+ flags: RTgeometryflags,
+) -> Result<(), RTresult> {
+ let triangles = null_unwrap(triangles)?;
+ let mut triangles = triangles.borrow_mut()?;
+ let entry = triangles
+ .flags
+ .entry(material_index)
+ .or_insert(RTgeometryflags(0));
+ entry.0 |= flags.0;
+ Ok(())
+}
+
+pub(crate) unsafe fn get_context(
+ geometrytriangles: GeometryTriangles,
+ context_ptr: *mut Context,
+) -> Result<(), RTresult> {
+ let geometrytriangles = null_unwrap(geometrytriangles)?;
+ null_check(context_ptr)?;
+ let geometrytriangles = geometrytriangles.borrow()?;
+ let context = Weak::as_ptr(&geometrytriangles.context);
+ *context_ptr = context;
+ Ok(())
+}
+
+pub(crate) unsafe fn declare_variable(
+ geometrytriangles: GeometryTriangles,
+ name: *const i8,
+ v: *mut Variable,
+) -> Result<(), RTresult> {
+ null_check(v)?;
+ let geometrytriangles = null_unwrap(geometrytriangles)?;
+ let mut geometrytriangles = geometrytriangles.borrow_mut()?;
+ let variable = VariableData::new(&mut *geometrytriangles)?;
+ let name = CStr::from_ptr(name).to_owned();
+ let result = Rc::as_ptr(&variable);
+ geometrytriangles.variables.insert(name, variable);
+ *v = result;
+ Ok(())
+}
+
+pub(crate) unsafe fn query_variable(
+ geometrytriangles: GeometryTriangles,
+ name: *const i8,
+ v: *mut Variable,
+) -> Result<(), RTresult> {
+ null_check(name)?;
+ null_check(v)?;
+ let geometrytriangles = null_unwrap(geometrytriangles)?;
+ let geometrytriangles = (geometrytriangles).borrow()?;
+ *v = geometrytriangles
+ .variables
+ .get(CStr::from_ptr(name))
+ .map(|variable| Rc::as_ptr(variable))
+ .unwrap_or(ptr::null_mut());
+ Ok(())
+}
+
+pub(crate) unsafe fn set_build_flags(
+ _geometrytriangles: GeometryTriangles,
+ _build_flags: RTgeometrybuildflags,
+) -> Result<(), RTresult> {
+ // TODO: implement
+ if _build_flags != RTgeometrybuildflags::RT_GEOMETRY_BUILD_FLAG_NONE {
+ return Err(RTresult::RT_ERROR_NOT_SUPPORTED);
+ }
+ Ok(())
+}
+
+pub(crate) unsafe fn destroy(_geometrytriangles: GeometryTriangles) -> Result<(), RTresult> {
+ // TODO: implement
+ Ok(())
+}
diff --git a/zluda_rt/src/group.rs b/zluda_rt/src/group.rs new file mode 100644 index 0000000..e43388c --- /dev/null +++ b/zluda_rt/src/group.rs @@ -0,0 +1,411 @@ +use crate::{
+ acceleration::{Acceleration, AccelerationData, AccelerationOwner},
+ context::{self, Context, ContextData},
+ geometry_group::{BvhDetails, GeometryGroupData},
+ geometry_instance::DeviceGeometryInstance,
+ hip, hiprt, null_check, null_unwrap,
+ repr_gpu::{self, TrivialHIPAllocator},
+ transform::TransformData,
+ MaybeWeakRefMut, OptixCell, OptixObjectData, TypeTag, TypedObjectWeak, UntypedObject,
+};
+use hip_runtime_sys::*;
+use hiprt_sys::*;
+use optix_types::*;
+use std::{
+ alloc::Layout,
+ ptr,
+ rc::{Rc, Weak},
+};
+
+pub(crate) type Group = *const OptixCell<GroupData>;
+
+pub(crate) struct GroupData {
+ pub(crate) context: Weak<OptixCell<ContextData>>,
+ acceleration: Option<Rc<OptixCell<AccelerationData>>>,
+ subgroups: Vec<Option<GroupChild>>,
+ pub(crate) index: u32,
+}
+
+impl GroupData {
+ fn new(weak_context: Weak<OptixCell<ContextData>>, context: &mut ContextData) -> Self {
+ let index = context.geometry_group_count;
+ context.geometry_group_count += 1;
+ Self {
+ context: weak_context,
+ acceleration: None,
+ subgroups: Vec::new(),
+ index,
+ }
+ }
+
+ fn register(this: Rc<OptixCell<Self>>, context: &mut ContextData) {
+ context.groups.insert(this);
+ }
+
+ unsafe fn create(context: Context) -> Result<Group, RTresult> {
+ context::create_subobject(context, Self::new, Self::register)
+ }
+
+ pub(crate) fn prepare_globals(
+ &self,
+ allocator: &mut TrivialHIPAllocator,
+ ctx: &ContextData,
+ global_state: &mut repr_gpu::GlobalState,
+ ) -> Result<BvhDetails, RTresult> {
+ let ray_type_count = ctx.ray_type_count;
+ let mut transforms = Vec::new();
+ let mut children = Vec::new();
+ let acceleration = self
+ .acceleration
+ .as_ref()
+ .ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let acceleration = acceleration.borrow()?;
+ let mut build_flags = acceleration.to_hiprt().ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ for child in self.subgroups.iter() {
+ let child = child.as_ref().ok_or(RTresult::RT_ERROR_INVALID_CONTEXT)?;
+ match child {
+ GroupChild::GeometryGroup(group) => {
+ let group = Weak::upgrade(&group).ok_or(RTresult::RT_ERROR_INVALID_CONTEXT)?;
+ let group = group.borrow()?;
+ for _ in 0..group.children.len() {
+ transforms.push(None);
+ }
+ children.extend_from_slice(&group.children[..]);
+ let child_acceleration = group
+ .acceleration
+ .as_ref()
+ .ok_or(RTresult::RT_ERROR_UNKNOWN)?
+ .borrow()?;
+ let child_build_flags = child_acceleration
+ .to_hiprt()
+ .ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ build_flags = Self::join_build_flags(build_flags, child_build_flags);
+ }
+ GroupChild::Transform(transform) => {
+ let transform_rc =
+ Weak::upgrade(&transform).ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let transform = transform_rc.borrow()?;
+ let dev_transform = transform.allocate(allocator)?;
+ let group = transform
+ .child
+ .as_ref()
+ .ok_or(RTresult::RT_ERROR_INVALID_CONTEXT)?;
+ let group = Weak::upgrade(group).ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let group = group.borrow()?;
+ for _ in 0..group.children.len() {
+ transforms.push(Some((transform_rc.clone(), dev_transform)));
+ }
+ children.extend_from_slice(&group.children[..]);
+ let child_acceleration = group
+ .acceleration
+ .as_ref()
+ .ok_or(RTresult::RT_ERROR_UNKNOWN)?
+ .borrow()?;
+ let child_build_flags = child_acceleration
+ .to_hiprt()
+ .ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ build_flags = Self::join_build_flags(build_flags, child_build_flags);
+ }
+ }
+ }
+ let mut hiprt_instances_host = Vec::with_capacity(children.len());
+ let custom_func_table = allocator.new_func_table()?;
+ let mut custom_func_set_counter = 0u32;
+ let attribute_visitor = repr_gpu::AttributesVisitCallChain {
+ context: ctx,
+ children: &*children,
+ };
+ let attribute_chain_layout = repr_gpu::get_layout(ray_type_count, &attribute_visitor)?;
+ let attribute_call_chain = allocator.allocate(attribute_chain_layout.layout.size())?;
+ repr_gpu::copy_to_gpu(
+ ray_type_count,
+ &attribute_visitor,
+ &attribute_chain_layout,
+ attribute_call_chain,
+ )?;
+ let hit_chains = (0..ray_type_count).map(|ray| {
+ let prologue_layout = Layout::new::<repr_gpu::HitProgramChain>();
+ let any_hit_visitor = repr_gpu::HitProgramsVisitCallChain {
+ closest_hit: false,
+ ray,
+ children: &children,
+ context: ctx,
+ };
+ let closest_hit_visitor = repr_gpu::HitProgramsVisitCallChain {
+ closest_hit: true,
+ ray,
+ children: &children,
+ context: ctx,
+ };
+ let any_hit_chain_layout = repr_gpu::get_layout(ray_type_count, &any_hit_visitor)?;
+ let closest_hit_chain_layout = repr_gpu::get_layout(ray_type_count, &closest_hit_visitor)?;
+ let with_any_hit = prologue_layout
+ .extend(any_hit_chain_layout.layout)
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ let with_closest_hit = with_any_hit
+ .0
+ .extend(closest_hit_chain_layout.layout)
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ let hit_chain_gpu = allocator.allocate(with_closest_hit.0.size())?;
+ let prolog = repr_gpu::HitProgramChain {
+ any_hit_start: with_any_hit.1 as u32,
+ closest_hit_start: with_closest_hit.1 as u32,
+ };
+ hip! { hipMemcpyHtoD(hit_chain_gpu, &prolog as *const _ as _, prologue_layout.size()), RT_ERROR_UNKNOWN };
+ repr_gpu::copy_to_gpu(
+ ray_type_count,
+ &any_hit_visitor,
+ &any_hit_chain_layout,
+ hipDeviceptr_t(unsafe { hit_chain_gpu.0.cast::<u8>().add(with_any_hit.1).cast() }),
+ )?;
+ repr_gpu::copy_to_gpu(
+ ray_type_count,
+ &closest_hit_visitor,
+ &closest_hit_chain_layout,
+ hipDeviceptr_t(unsafe {
+ hit_chain_gpu.0.cast::<u8>().add(with_closest_hit.1).cast()
+ }),
+ )?;
+ Ok(hit_chain_gpu)
+ }).collect::<Result<Vec<_>, _>>()?;
+ for (instance_index, instance) in children.iter().enumerate() {
+ let instance = instance.as_ref().ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let instance = instance.borrow()?;
+ match instance.prepare_globals(
+ allocator,
+ ctx,
+ build_flags,
+ ray_type_count,
+ custom_func_set_counter,
+ global_state,
+ transforms[instance_index]
+ .as_ref()
+ .map(|(_, dev_ptr)| *dev_ptr)
+ .unwrap_or(hipDeviceptr_t(ptr::null_mut())),
+ )? {
+ DeviceGeometryInstance::Geometry {
+ custom_func_set,
+ hiprt_geometry,
+ } => {
+ hiprt_instances_host.push(hiprt_geometry);
+ hiprt! { ctx.hiprt.hiprtSetCustomFuncTable(ctx.context, custom_func_table, custom_func_set_counter, custom_func_set), RT_ERROR_UNKNOWN };
+ custom_func_set_counter += 1;
+ }
+ DeviceGeometryInstance::GeometryTriangles { hiprt_geometry } => {
+ hiprt_instances_host.push(hiprt_geometry);
+ }
+ };
+ }
+ let instance_frames_host = transforms
+ .iter()
+ .map(|transform| {
+ Ok::<_, RTresult>(match transform {
+ Some((transform, _)) => {
+ let transform = transform.borrow()?;
+ transform.to_hiprt()
+ }
+ None => hiprtFrame {
+ translation: hiprtFloat3 {
+ x: 0f32,
+ y: 0f32,
+ z: 0f32,
+ },
+ scale: hiprtFloat3 {
+ x: 1f32,
+ y: 1f32,
+ z: 1f32,
+ },
+ rotation: hiprtFloat4 {
+ x: 0f32,
+ y: 0f32,
+ z: 1.0f32,
+ w: 0f32,
+ },
+ time: 0f32,
+ pad: 0,
+ },
+ })
+ })
+ .collect::<Result<Vec<_>, _>>()?;
+ let instance_geometries = allocator.copy_to_device(&hiprt_instances_host[..])?;
+ let instance_frames = allocator.copy_to_device(&instance_frames_host[..])?;
+ let scene_input = hiprtSceneBuildInput {
+ instanceCount: children.len() as u32,
+ instanceGeometries: instance_geometries.0,
+ frameCount: children.len() as u32,
+ instanceFrames: instance_frames.0,
+ nodes: ptr::null_mut(),
+ instanceTransformHeaders: ptr::null_mut(),
+ instanceMasks: ptr::null_mut(),
+ };
+ let build_options = hiprtBuildOptions {
+ buildFlags: build_flags.0,
+ };
+ let transform_blocks = transforms
+ .iter()
+ .map(|maybe_transform| {
+ maybe_transform
+ .as_ref()
+ .map_or(hipDeviceptr_t(ptr::null_mut()), |(_, ptr)| *ptr)
+ })
+ .collect::<Vec<_>>();
+ let transform_blocks = allocator.copy_to_device(&transform_blocks)?;
+ let hit_chains = allocator.copy_to_device(&hit_chains)?;
+ let scene = allocator.new_scene(scene_input, build_options)?;
+ Ok(BvhDetails {
+ scene,
+ func_set: custom_func_table,
+ attribute_call_chain,
+ transform_blocks,
+ hit_chains,
+ })
+ }
+
+ // We assume lower 2 bits are operator, higher bits are flags
+ fn join_build_flags(f1: hiprtBuildFlagBits, f2: hiprtBuildFlagBits) -> hiprtBuildFlagBits {
+ let op = u32::max(f1.0 & 3, f2.0 & 3);
+ let flags = ((f1.0 >> 2) | (f2.0 >> 2)) << 2;
+ return hiprtBuildFlagBits(op | flags);
+ }
+}
+
+impl OptixObjectData for GroupData {
+ const TYPE: TypeTag = TypeTag::Group;
+
+ fn deregister(&mut self, this: &Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ if let Some(context) = self.context.upgrade() {
+ let mut context = (*context).borrow_mut()?;
+ context.groups.remove(this);
+ }
+ Ok(())
+ }
+
+ fn context<'a>(&'a mut self) -> crate::MaybeWeakRefMut<'a, ContextData> {
+ MaybeWeakRefMut::Weak(&self.context)
+ }
+}
+
+#[derive(Clone)]
+enum GroupChild {
+ GeometryGroup(Weak<OptixCell<GeometryGroupData>>),
+ Transform(Weak<OptixCell<TransformData>>),
+}
+
+pub(crate) unsafe fn create(context: Context, group: *mut Group) -> Result<(), RTresult> {
+ null_check(context)?;
+ null_check(group)?;
+ *group = GroupData::create(context)?;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_acceleration(
+ group: Group,
+ acceleration: Acceleration,
+) -> Result<(), RTresult> {
+ null_check(acceleration)?;
+ let group = null_unwrap(group)?;
+ let acceleration = null_unwrap(acceleration)?;
+ {
+ let mut group = group.borrow_mut()?;
+ group.acceleration = Some(OptixCell::clone_rc(acceleration));
+ }
+ {
+ let mut acceleration = acceleration.borrow_mut()?;
+ acceleration.owner = Some(AccelerationOwner::Group(OptixCell::clone_weak(group)));
+ }
+ Ok(())
+}
+
+pub(crate) unsafe fn set_child_count(group: Group, count: u32) -> Result<(), RTresult> {
+ let group = null_unwrap(group)?;
+ let mut group = group.borrow_mut()?;
+ group.subgroups.resize(count as usize, None);
+ Ok(())
+}
+
+pub(crate) unsafe fn get_child(
+ group: Group,
+ index: u32,
+ child: *mut UntypedObject,
+) -> Result<(), RTresult> {
+ let group = null_unwrap(group)?;
+ let group = group.borrow()?;
+ if index as usize >= group.subgroups.len() {
+ *child = ptr::null_mut();
+ return Err(RTresult::RT_ERROR_INVALID_VALUE);
+ }
+ let result = match &group.subgroups[index as usize] {
+ Some(GroupChild::GeometryGroup(geo)) => {
+ let geo = Weak::upgrade(&geo).ok_or(RTresult::RT_ERROR_INVALID_CONTEXT)?;
+ OptixCell::as_untyped(&*geo)
+ }
+ Some(GroupChild::Transform(transform)) => {
+ let transform = Weak::upgrade(&transform).ok_or(RTresult::RT_ERROR_INVALID_CONTEXT)?;
+ OptixCell::as_untyped(&*transform)
+ }
+ None => ptr::null_mut(),
+ };
+ *child = result;
+ Ok(())
+}
+
+pub(crate) unsafe fn get_child_count(group: Group, count: *mut u32) -> Result<(), RTresult> {
+ null_check(count)?;
+ let group = null_unwrap(group)?;
+ let group = group.borrow()?;
+ *count = group.subgroups.len() as u32;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_child(
+ group: Group,
+ index: u32,
+ child: UntypedObject,
+) -> Result<(), RTresult> {
+ null_check(child)?;
+ let group = null_unwrap(group)?;
+ let mut group = group.borrow_mut()?;
+ match group.subgroups.get_mut(index as usize) {
+ Some(instance_slot) => {
+ let child = TypedObjectWeak::clone_from(child)?;
+ let group = match child {
+ TypedObjectWeak::GeometryGroup(group) => GroupChild::GeometryGroup(group),
+ TypedObjectWeak::Transform(transform) => GroupChild::Transform(transform),
+ _ => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ };
+ *instance_slot = Some(group);
+ Ok(())
+ }
+ None => Err(RTresult::RT_ERROR_INVALID_VALUE),
+ }
+}
+
+pub(crate) unsafe fn get_acceleration(
+ group: Group,
+ acceleration: *mut Acceleration,
+) -> Result<(), RTresult> {
+ null_check(acceleration)?;
+ let group = null_unwrap(group)?;
+ let group = group.borrow()?;
+ *acceleration = group
+ .acceleration
+ .as_ref()
+ .map(Rc::as_ptr)
+ .unwrap_or(ptr::null());
+ Ok(())
+}
+
+pub(crate) fn destroy(_group: Group) -> Result<(), RTresult> {
+ // TODO: implement
+ Ok(())
+}
+
+pub(crate) unsafe fn get_context(
+ group: *const OptixCell<GroupData>,
+ context: *mut *const OptixCell<ContextData>,
+) -> Result<(), RTresult> {
+ let group = null_unwrap(group)?;
+ let group = group.borrow()?;
+ *context = group.context.as_ptr();
+ Ok(())
+}
diff --git a/zluda_rt/src/hip.rs b/zluda_rt/src/hip.rs new file mode 100644 index 0000000..647be3c --- /dev/null +++ b/zluda_rt/src/hip.rs @@ -0,0 +1,103 @@ +use hip_runtime_sys::*;
+use optix_types::RTresult;
+use std::{
+ ffi::{c_void, CStr},
+ mem, ptr,
+};
+
+use crate::div_positive_round_up;
+
+macro_rules! hip {
+ ($expr:expr) => {
+ #[allow(unused_unsafe)]
+ {
+ let err = unsafe { $expr };
+ if err != hip_runtime_sys::hipError_t::hipSuccess {
+ return Result::Err(err);
+ }
+ }
+ };
+}
+
+#[repr(transparent)]
+pub(crate) struct Module(pub hipModule_t);
+
+impl Module {
+ pub(crate) fn launch_kernel_1d(
+ &self,
+ f: hipFunction_t,
+ size: u32,
+ shared_mem: u32,
+ stream: hipStream_t,
+ params: *mut *mut c_void,
+ ) -> Result<(), hipError_t> {
+ let groups = div_positive_round_up(size as u64, 32u64);
+ hip! { hipModuleLaunchKernel(f, groups as u32, 1, 1, 32, 1, 1, shared_mem, stream, params, ptr::null_mut()) };
+ Ok(())
+ }
+
+ pub(crate) fn load_data(binary: &[u8]) -> Result<Self, hipError_t> {
+ let mut raw_module = ptr::null_mut();
+ hip! { hipModuleLoadData(&mut raw_module, binary.as_ptr() as _) };
+ Ok(Module(raw_module))
+ }
+
+ pub(crate) fn get_function(&self, kernel_name: &CStr) -> Result<hipFunction_t, hipError_t> {
+ let mut function = ptr::null_mut();
+ hip! { hipModuleGetFunction(&mut function, self.0, kernel_name.as_ptr() as _) };
+ Ok(function)
+ }
+
+ pub(crate) unsafe fn get_global<T>(&self, name: &CStr) -> Result<T, hipError_t> {
+ let (ptr, bytes) = self.get_pointer_to_global(name)?;
+ if bytes != mem::size_of::<T>() {
+ return Err(hipError_t::hipErrorInvalidSymbol);
+ }
+ let mut result = mem::zeroed::<T>();
+ hip! { hipMemcpyDtoH(&mut result as *mut T as _, ptr, bytes) };
+ Ok(result)
+ }
+
+ pub(crate) fn get_pointer_to_global(
+ &self,
+ name: &CStr,
+ ) -> Result<(hipDeviceptr_t, usize), hipError_t> {
+ let mut ptr = unsafe { mem::zeroed() };
+ let mut bytes = 0;
+ hip! { hipModuleGetGlobal(&mut ptr, &mut bytes, self.0, name.as_ptr() as _) };
+ Ok((ptr, bytes))
+ }
+}
+
+impl Drop for Module {
+ #[allow(unused_must_use)]
+ fn drop(&mut self) {
+ unsafe { hipModuleUnload(self.0) };
+ }
+}
+
+pub(crate) fn copy_to_device<T>(slice: &[T]) -> Result<hipDeviceptr_t, RTresult> {
+ copy_to_device_impl(slice).map_err(|_| RTresult::RT_ERROR_MEMORY_ALLOCATION_FAILED)
+}
+
+fn copy_to_device_impl<T>(slice: &[T]) -> Result<hipDeviceptr_t, hipError_t> {
+ let dev_ptr = malloc(slice.len() * mem::size_of::<T>())?;
+ hip! { hipMemcpyHtoD(dev_ptr, slice.as_ptr() as _, slice.len() * mem::size_of::<T>()) };
+ Ok(dev_ptr)
+}
+
+pub(crate) fn malloc(size: usize) -> Result<hipDeviceptr_t, hipError_t> {
+ let mut dev_ptr = ptr::null_mut();
+ hip! { hipMalloc(&mut dev_ptr, size) };
+ Ok(hipDeviceptr_t(dev_ptr))
+}
+
+pub(crate) fn free(ptr: hipDeviceptr_t) -> Result<(), hipError_t> {
+ hip! { hipFree(ptr.0) };
+ Ok(())
+}
+
+pub(crate) fn zero_fill(ptr: hipDeviceptr_t, size: usize) -> Result<(), hipError_t> {
+ hip! { hipMemsetD8(ptr, 0, size) };
+ Ok(())
+}
diff --git a/zluda_rt/src/lib.rs b/zluda_rt/src/lib.rs new file mode 100644 index 0000000..985dbee --- /dev/null +++ b/zluda_rt/src/lib.rs @@ -0,0 +1,1884 @@ +#[macro_use]
+extern crate nougat;
+
+mod acceleration;
+mod buffer;
+mod cache;
+mod context;
+mod eptx;
+mod geometry;
+mod geometry_group;
+mod geometry_instance;
+mod geometry_triangles;
+mod group;
+mod hip;
+mod material;
+mod program;
+mod repr_gpu;
+#[cfg(test)]
+mod test_common;
+#[cfg(test)]
+mod tests;
+mod texture_sampler;
+mod transform;
+mod variable;
+
+use crate::texture_sampler::{TextureSampler, TextureSamplerData};
+use acceleration::{Acceleration, AccelerationData};
+use buffer::{Buffer, BufferData};
+use context::{Context, ContextData};
+use geometry::{Geometry, GeometryData};
+use geometry_group::{GeometryGroup, GeometryGroupData};
+use geometry_instance::{GeometryInstance, GeometryInstanceData};
+use geometry_triangles::{GeometryTriangles, GeometryTrianglesData};
+use group::{Group, GroupData};
+use hip_runtime_sys::{
+ hipDeviceAttribute_t, hipDeviceGetAttribute, hipDeviceTotalMem, hipDeviceptr_t, hipMemcpyDtoH,
+};
+use material::{Material, MaterialData};
+use optix_types::*;
+use paste::paste;
+use program::{Program, ProgramData};
+use std::{
+ alloc::Layout,
+ cell::{Ref, RefCell, RefMut},
+ collections::HashSet,
+ ffi::c_void,
+ hash::{BuildHasherDefault, Hash, Hasher},
+ mem::{self, ManuallyDrop},
+ os::raw::c_char,
+ ptr::{self, NonNull},
+ rc::{Rc, Weak},
+};
+use transform::{Transform, TransformData};
+use variable::{Variable, VariableData};
+
+macro_rules! optix6_unimplemented {
+ ($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* $(,)? ) -> $ret_type:ty);*) => {
+ $(
+ #[no_mangle]
+ unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
+ definitions::unimplemented()
+ }
+ )*
+ };
+}
+
+macro_rules! optix6_fn {
+ ($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* $(,)? ) -> $ret_type:ty);*) => {
+ $(
+ #[no_mangle]
+ unsafe extern $abi fn $fn_name ( $( $arg_id : $arg_type),* ) -> $ret_type {
+ IntoOptix::<$ret_type>::into_optix(definitions::$fn_name( $( InternalRepresentation::to_internal($arg_id) ),* ))
+ }
+ )*
+ };
+}
+
+#[macro_export]
+macro_rules! unwrap_or_continue {
+ ($option:expr) => {
+ match $option {
+ Some(x) => x,
+ None => continue,
+ }
+ };
+}
+
+#[macro_export]
+macro_rules! hip {
+ ($expr:expr, $err:ident) => {
+ #[allow(unused_unsafe)]
+ {
+ let err = unsafe { $expr };
+ if err != hip_runtime_sys::hipError_t::hipSuccess {
+ return Result::Err(RTresult::$err);
+ }
+ }
+ };
+}
+
+#[macro_export]
+macro_rules! hiprt {
+ ($expr:expr, $err:ident) => {
+ #[allow(unused_unsafe)]
+ {
+ let err = unsafe { $expr };
+ if err != hiprt_sys::hiprtError::hiprtSuccess {
+ return Result::Err(RTresult::$err);
+ }
+ }
+ };
+}
+
+optix_base::optix6_function_declarations!(
+ optix6_unimplemented,
+ optix6_fn,
+ [
+ rtAccelerationCreate,
+ rtAccelerationDestroy,
+ rtAccelerationGetContext,
+ rtAccelerationSetBuilder,
+ rtAccelerationMarkDirty,
+ rtBufferCreate,
+ rtBufferCreateFromCallback,
+ rtBufferDestroy,
+ rtBufferSetElementSize,
+ rtBufferSetFormat,
+ rtBufferSetMipLevelCount,
+ rtBufferSetSize1D,
+ rtBufferSetSize2D,
+ rtBufferGetContext,
+ rtBufferGetDevicePointer,
+ rtBufferGetDimensionality,
+ rtBufferGetElementSize,
+ rtBufferGetFormat,
+ rtBufferGetGLBOId,
+ rtBufferGetId,
+ rtBufferGetMipLevelCount,
+ rtBufferGetMipLevelSize2D,
+ rtBufferGetSize1D,
+ rtBufferGetSize2D,
+ rtBufferGetSizev,
+ rtBufferMap,
+ rtBufferMapEx,
+ rtBufferUnmap,
+ rtBufferUnmapEx,
+ rtContextCreate,
+ rtContextDeclareVariable,
+ rtContextDestroy,
+ rtContextGetAttribute,
+ rtContextGetBufferFromId,
+ rtContextGetDeviceCount,
+ rtContextGetDevices,
+ rtContextGetErrorString,
+ rtContextLaunch2D,
+ rtContextQueryVariable,
+ rtContextSetAttribute,
+ rtContextSetDevices,
+ rtContextSetEntryPointCount,
+ rtContextSetExceptionEnabled,
+ rtContextSetExceptionProgram,
+ rtContextSetMaxCallableProgramDepth,
+ rtContextSetMaxTraceDepth,
+ rtContextSetMissProgram,
+ rtContextSetPrintEnabled,
+ rtContextSetPrintLaunchIndex,
+ rtContextSetRayGenerationProgram,
+ rtContextSetRayTypeCount,
+ rtContextSetStackSize,
+ rtContextSetUsageReportCallback,
+ rtContextValidate,
+ rtDeviceGetAttribute,
+ rtDeviceGetDeviceCount,
+ rtGeometryCreate,
+ rtGeometryDeclareVariable,
+ rtGeometryDestroy,
+ rtGeometryGetContext,
+ rtGeometryQueryVariable,
+ rtGeometrySetPrimitiveCount,
+ rtGeometrySetBoundingBoxProgram,
+ rtGeometrySetIntersectionProgram,
+ rtGeometryGroupCreate,
+ rtGeometryGroupDestroy,
+ rtGeometryGroupGetChildCount,
+ rtGeometryGroupGetContext,
+ rtGeometryGroupSetAcceleration,
+ rtGeometryGroupSetChild,
+ rtGeometryGroupSetChildCount,
+ rtGeometryGroupSetVisibilityMask,
+ rtGeometryInstanceCreate,
+ rtGeometryInstanceDeclareVariable,
+ rtGeometryInstanceDestroy,
+ rtGeometryInstanceQueryVariable,
+ rtGeometryInstanceGetContext,
+ rtGeometryInstanceGetMaterialCount,
+ rtGeometryInstanceSetGeometry,
+ rtGeometryInstanceSetGeometryTriangles,
+ rtGeometryInstanceSetMaterial,
+ rtGeometryInstanceSetMaterialCount,
+ rtGeometryTrianglesCreate,
+ rtGeometryTrianglesDeclareVariable,
+ rtGeometryTrianglesDestroy,
+ rtGeometryTrianglesGetContext,
+ rtGeometryTrianglesSetAttributeProgram,
+ rtGeometryTrianglesSetBuildFlags,
+ rtGeometryTrianglesSetFlagsPerMaterial,
+ rtGeometryTrianglesSetPrimitiveCount,
+ rtGeometryTrianglesSetTriangleIndices,
+ rtGeometryTrianglesSetVertices,
+ rtGeometryTrianglesQueryVariable,
+ rtGeometryTrianglesValidate,
+ rtGetVersion,
+ rtGlobalGetAttribute,
+ rtGlobalSetAttribute,
+ rtGroupCreate,
+ rtGroupDestroy,
+ rtGroupGetAcceleration,
+ rtGroupGetChild,
+ rtGroupGetChildCount,
+ rtGroupGetContext,
+ rtGroupSetAcceleration,
+ rtGroupSetChild,
+ rtGroupSetChildCount,
+ rtMaterialCreate,
+ rtMaterialDeclareVariable,
+ rtMaterialDestroy,
+ rtMaterialGetContext,
+ rtMaterialQueryVariable,
+ rtMaterialSetAnyHitProgram,
+ rtMaterialSetClosestHitProgram,
+ rtProgramCreateFromProgram,
+ rtProgramCreateFromPTXFile,
+ rtProgramCreateFromPTXString,
+ rtProgramDeclareVariable,
+ rtProgramDestroy,
+ rtProgramGetContext,
+ rtProgramGetId,
+ rtProgramQueryVariable,
+ rtProgramValidate,
+ rtTextureSamplerCreate,
+ rtTextureSamplerDestroy,
+ rtTextureSamplerGetBuffer,
+ rtTextureSamplerGetContext,
+ rtTextureSamplerGetId,
+ rtTextureSamplerSetArraySize,
+ rtTextureSamplerSetBuffer,
+ rtTextureSamplerSetFilteringModes,
+ rtTextureSamplerSetIndexingMode,
+ rtTextureSamplerSetMaxAnisotropy,
+ rtTextureSamplerSetMipLevelCount,
+ rtTextureSamplerSetReadMode,
+ rtTextureSamplerSetWrapMode,
+ rtTransformCreate,
+ rtTransformDestroy,
+ rtTransformGetContext,
+ rtTransformGetMotionKeyCount,
+ rtTransformSetChild,
+ rtTransformSetMatrix,
+ rtVariableSet1f,
+ rtVariableSet1i,
+ rtVariableSet1ui,
+ rtVariableSet1ull,
+ rtVariableSet3f,
+ rtVariableSet3fv,
+ rtVariableSet4f,
+ rtVariableSet4fv,
+ rtVariableSetObject,
+ rtVariableGetObject,
+ rtVariableSetUserData
+ ]
+);
+
+pub(crate) trait InternalRepresentation<T: Sized>: Sized {
+ fn to_internal(t: T) -> Self {
+ unsafe { mem::transmute_copy(&t) }
+ }
+}
+
+impl<From, Into: InternalRepresentation<From>> InternalRepresentation<*mut From> for *mut Into {}
+impl<From, Into: InternalRepresentation<From>> InternalRepresentation<*const From> for *const Into {}
+
+// Unchanged
+impl InternalRepresentation<RTgeometrybuildflags> for RTgeometrybuildflags {}
+impl InternalRepresentation<RTresult> for RTresult {}
+impl InternalRepresentation<RTformat> for RTformat {}
+impl InternalRepresentation<RTwrapmode> for RTwrapmode {}
+impl InternalRepresentation<RTgeometryflags> for RTgeometryflags {}
+impl InternalRepresentation<RTusagereportcallback> for RTusagereportcallback {}
+impl InternalRepresentation<RTexception> for RTexception {}
+impl InternalRepresentation<RTcontextattribute> for RTcontextattribute {}
+impl InternalRepresentation<RTglobalattribute> for RTglobalattribute {}
+impl InternalRepresentation<RTtextureindexmode> for RTtextureindexmode {}
+impl InternalRepresentation<RTdeviceattribute> for RTdeviceattribute {}
+impl InternalRepresentation<RTtexturereadmode> for RTtexturereadmode {}
+impl InternalRepresentation<RTfiltermode> for RTfiltermode {}
+impl InternalRepresentation<c_char> for c_char {}
+impl InternalRepresentation<f32> for f32 {}
+impl InternalRepresentation<i32> for i32 {}
+impl InternalRepresentation<u32> for u32 {}
+impl InternalRepresentation<u64> for u64 {}
+impl InternalRepresentation<c_void> for c_void {}
+// ZLUDA
+impl InternalRepresentation<RTobject> for UntypedObject {}
+impl InternalRepresentation<RTbuffercallback>
+ for Option<
+ unsafe extern "C" fn(
+ callbackData: *mut ::std::os::raw::c_void,
+ buffer: Buffer,
+ block: *mut RTmemoryblock,
+ ) -> ::std::os::raw::c_int,
+ >
+{
+}
+
+#[repr(C)]
+struct OptixCell<T: OptixObjectData> {
+ tag: TypeTag,
+ data: RefCell<T>,
+}
+
+impl<T: OptixObjectData> OptixCell<T> {
+ unsafe fn clone_rc(cell: *const OptixCell<T>) -> Rc<OptixCell<T>> {
+ let weak = ManuallyDrop::new(Rc::from_raw(cell));
+ Rc::clone(&weak)
+ }
+
+ unsafe fn clone_weak(cell: *const OptixCell<T>) -> Weak<OptixCell<T>> {
+ let weak = ManuallyDrop::new(Weak::from_raw(cell));
+ Weak::clone(&weak)
+ }
+
+ unsafe fn as_untyped(cell: *const OptixCell<T>) -> UntypedObject {
+ cell.cast()
+ }
+}
+
+struct RcHashSet<T: Sized>(HashSet<PtrEq<T>, BuildHasherDefault<WritethroughHasher<T>>>);
+
+#[repr(transparent)]
+struct PtrEq<T: Sized>(Rc<T>);
+
+impl<T: Sized> PartialEq for PtrEq<T> {
+ fn eq(&self, other: &Self) -> bool {
+ Rc::ptr_eq(&self.0, &other.0)
+ }
+}
+
+impl<T: Sized> Eq for PtrEq<T> {}
+
+impl<T: Sized> Hash for PtrEq<T> {
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ state.write_usize(unsafe { mem::transmute_copy::<Rc<T>, usize>(&self.0) })
+ }
+}
+struct WritethroughHasher<T: Sized>(*const T);
+
+impl<T: Sized> Default for WritethroughHasher<T> {
+ fn default() -> Self {
+ Self(ptr::null())
+ }
+}
+
+impl<T> Hasher for WritethroughHasher<T> {
+ fn finish(&self) -> u64 {
+ unsafe { mem::transmute(self.0) }
+ }
+
+ fn write(&mut self, _bytes: &[u8]) {
+ unreachable!()
+ }
+
+ fn write_usize(&mut self, i: usize) {
+ self.0 = i as *const T;
+ }
+}
+
+impl<T> RcHashSet<T> {
+ fn new() -> Self {
+ Self(HashSet::default())
+ }
+
+ fn insert(&mut self, value: Rc<T>) -> bool {
+ self.0.insert(PtrEq(value))
+ }
+
+ fn remove(&mut self, value: &Rc<T>) -> bool {
+ self.0
+ .remove(unsafe { mem::transmute::<&Rc<T>, &PtrEq<T>>(value) })
+ }
+
+ fn iter(&self) -> std::collections::hash_set::Iter<'_, Rc<T>> {
+ unsafe { mem::transmute(self.0.iter()) }
+ }
+}
+
+impl<T: OptixObjectData> OptixCell<T> {
+ fn new(t: T) -> Self {
+ Self {
+ tag: T::TYPE,
+ data: RefCell::new(t),
+ }
+ }
+
+ fn borrow(&self) -> Result<Ref<'_, T>, RTresult> {
+ self.data
+ .try_borrow()
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)
+ }
+
+ fn borrow_mut(&self) -> Result<RefMut<'_, T>, RTresult> {
+ let mut this = self
+ .data
+ .try_borrow_mut()
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ match this.context() {
+ MaybeWeakRefMut::Weak(weak_ctx) => {
+ let ctx = weak_ctx.upgrade().ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ if let Ok(mut ctx) = ctx.borrow_mut() {
+ // We might be called from within launch2d, where ctx is already mutably borrowed
+ ctx.invalidate();
+ }
+ drop(ctx);
+ }
+ MaybeWeakRefMut::Ref(ctx) => ctx.invalidate(),
+ }
+ Ok(this)
+ }
+
+ fn borrow_mut_no_invalidate(&self) -> Result<RefMut<'_, T>, RTresult> {
+ self.data
+ .try_borrow_mut()
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)
+ }
+
+ unsafe fn destroy(ptr: *const OptixCell<T>) -> Result<(), RTresult> {
+ let obj = null_unwrap(ptr)?;
+ let mut this = obj.borrow_mut()?;
+ let rc_ptr = ManuallyDrop::new(Rc::from_raw(obj));
+ this.deregister(&*rc_ptr)
+ }
+}
+
+type UntypedObject = *const TypeTag;
+
+macro_rules! optix_types {
+ ($ctx:ident, [$($type_:ident),+]) => {
+ #[repr(u8)]
+ enum TypeTag {
+ $ctx = 1,
+ $(
+ $type_,
+ )+
+ }
+
+ optix_types!(@ $ctx, $($type_),+);
+ };
+ (@ $($type_:ident),+) => {
+ $(
+ impl InternalRepresentation<paste!{ [<RT $type_:lower>] }> for $type_ {}
+ )+
+
+ #[derive(Clone)]
+ enum TypedObjectWeak {
+ $(
+ $type_(Weak<OptixCell< paste! { [<$type_ Data>] } >>),
+ )+
+ }
+
+ impl TypedObjectWeak {
+ fn as_untyped(&self) -> UntypedObject {
+ match self {
+ $(
+ TypedObjectWeak::$type_(weak) => {
+ let result: $type_ = Weak::as_ptr(weak);
+ result as UntypedObject
+ },
+ )+
+ }
+ }
+
+
+ unsafe fn clone_from(obj: UntypedObject) -> Result<TypedObjectWeak, RTresult> {
+ null_check(obj)?;
+ Ok(match *obj {
+ $(
+ TypeTag::$type_ => {
+ let fake_strong = ManuallyDrop::new(Rc::from_raw(mem::transmute::<_, $type_>(obj)));
+ TypedObjectWeak::$type_(Rc::downgrade(&fake_strong))
+ }
+ )+
+ })
+ }
+
+ $(
+ paste!{
+ #[allow(dead_code)]
+ #[allow(non_snake_case)]
+ unsafe fn [<clone_from_ $type_:snake>] ($type_: $type_) -> TypedObjectWeak {
+ let fake_strong = ManuallyDrop::new(Rc::from_raw($type_));
+ TypedObjectWeak::$type_(Rc::downgrade(&fake_strong))
+ }
+ }
+ )+
+ }
+ };
+}
+
+optix_types!(
+ Context,
+ [
+ Buffer,
+ Variable,
+ Program,
+ Material,
+ Geometry,
+ GeometryTriangles,
+ GeometryGroup,
+ Group,
+ Acceleration,
+ GeometryInstance,
+ TextureSampler,
+ Transform
+ ]
+);
+
+struct AlignedBuffer {
+ size: usize,
+ pub(crate) ptr: NonNull<u8>,
+ align: u32,
+}
+
+impl AlignedBuffer {
+ fn new(layout: Layout) -> Self {
+ let size = layout.size();
+ let align = layout.align() as u32;
+ let ptr = NonNull::new(unsafe { std::alloc::alloc(layout) }).unwrap();
+ Self { size, align, ptr }
+ }
+
+ fn len(&self) -> usize {
+ self.size as usize
+ }
+
+ fn from_hip(layout: Layout, dev_data: hipDeviceptr_t) -> Result<Self, RTresult> {
+ let ptr = unsafe { std::alloc::alloc(layout) };
+ hip! { hipMemcpyDtoH(ptr as _, dev_data, layout.size()), RT_ERROR_MEMORY_ALLOCATION_FAILED };
+ let ptr = NonNull::new(ptr).unwrap();
+ let size = layout.size();
+ let align = layout.align() as u32;
+ Ok(Self { size, align, ptr })
+ }
+
+ fn as_ptr(&self) -> *mut c_void {
+ self.ptr.as_ptr() as _
+ }
+
+ fn as_bytes(&self) -> &[u8] {
+ unsafe { std::slice::from_raw_parts(self.ptr.as_ptr(), self.size) }
+ }
+
+ fn as_bytes_mut(&mut self) -> &mut [u8] {
+ unsafe { std::slice::from_raw_parts_mut(self.ptr.as_ptr(), self.size) }
+ }
+}
+
+impl Clone for AlignedBuffer {
+ fn clone(&self) -> Self {
+ let mut buffer = AlignedBuffer::new(unsafe {
+ Layout::from_size_align_unchecked(self.size, self.align as usize)
+ });
+ buffer.as_bytes_mut().copy_from_slice(self.as_bytes());
+ buffer
+ }
+}
+
+impl Drop for AlignedBuffer {
+ fn drop(&mut self) {
+ unsafe {
+ std::alloc::dealloc(
+ self.ptr.as_ptr(),
+ Layout::from_size_align_unchecked(self.size, self.align as usize),
+ )
+ }
+ }
+}
+
+fn div_positive_round_up(dividend: u64, divisor: u64) -> u64 {
+ let mut result = dividend / divisor;
+ if (dividend % divisor) != 0 {
+ result += 1;
+ }
+ result
+}
+
+unsafe fn slice_cast_mut<'a, T: Sized, U: Sized>(from: &'a mut [T], count: usize) -> &'a mut [U] {
+ assert!(from.len() * mem::size_of::<T>() >= count * mem::size_of::<U>());
+ std::slice::from_raw_parts_mut(from.as_mut_ptr() as _, count)
+}
+
+trait OptixObjectData: Sized {
+ const TYPE: TypeTag;
+
+ fn deregister(&mut self, this: &Rc<OptixCell<Self>>) -> Result<(), RTresult>;
+ fn context<'a>(&'a mut self) -> MaybeWeakRefMut<'a, ContextData>;
+}
+
+pub(crate) enum MaybeWeakRefMut<'a, T: OptixObjectData> {
+ Weak(&'a Weak<OptixCell<T>>),
+ Ref(&'a mut T),
+}
+
+pub(crate) trait IntoOptix<T> {
+ fn into_optix(self) -> T;
+}
+
+impl IntoOptix<()> for () {
+ fn into_optix(self) -> () {
+ self
+ }
+}
+
+impl IntoOptix<RTresult> for () {
+ fn into_optix(self) -> RTresult {
+ RTresult::RT_SUCCESS
+ }
+}
+
+impl IntoOptix<RTresult> for RTresult {
+ fn into_optix(self) -> RTresult {
+ self
+ }
+}
+
+impl IntoOptix<RTresult> for Result<(), RTresult> {
+ fn into_optix(self) -> RTresult {
+ match self {
+ Ok(()) => IntoOptix::into_optix(()),
+ Err(err) => IntoOptix::into_optix(err),
+ }
+ }
+}
+
+trait NullablePointer {
+ fn null() -> Self;
+}
+
+impl<T> NullablePointer for *mut T {
+ fn null() -> Self {
+ ptr::null_mut()
+ }
+}
+
+impl<T> NullablePointer for *const T {
+ fn null() -> Self {
+ ptr::null_mut()
+ }
+}
+
+#[must_use]
+fn null_check<T: NullablePointer + PartialEq>(ptr: T) -> Result<(), RTresult> {
+ if ptr == T::null() {
+ Err(RTresult::RT_ERROR_INVALID_VALUE)
+ } else {
+ Ok(())
+ }
+}
+
+#[must_use]
+unsafe fn null_unwrap<'a, T>(t: *const T) -> Result<&'a T, RTresult> {
+ t.as_ref().ok_or(RTresult::RT_ERROR_INVALID_VALUE)
+}
+
+#[must_use]
+unsafe fn null_unwrap_mut<'a, T>(t: *mut T) -> Result<&'a mut T, RTresult> {
+ t.as_mut().ok_or(RTresult::RT_ERROR_INVALID_VALUE)
+}
+
+#[allow(non_snake_case)]
+mod definitions {
+ use crate::{
+ acceleration::{self, Acceleration},
+ buffer::{self, Buffer},
+ context::{self, Context},
+ geometry::{self, Geometry},
+ geometry_group::{self, GeometryGroup},
+ geometry_instance::{self, GeometryInstance},
+ geometry_triangles::{self, GeometryTriangles},
+ group::{self, Group},
+ material::{self, Material},
+ program::{self, Program},
+ texture_sampler::{self, TextureSampler},
+ transform::{self, Transform},
+ variable::{self, Variable},
+ UntypedObject,
+ };
+ use optix_types::*;
+ use std::os::raw::{c_int, c_uint};
+
+ #[cfg(debug_assertions)]
+ pub(crate) fn unimplemented() -> RTresult {
+ unimplemented!()
+ }
+
+ #[cfg(not(debug_assertions))]
+ pub(crate) fn unimplemented() -> RTresult {
+ RTresult::RT_ERROR_NOT_SUPPORTED
+ }
+
+ pub(crate) unsafe fn rtAccelerationCreate(
+ context: Context,
+ acceleration: *mut Acceleration,
+ ) -> Result<(), RTresult> {
+ acceleration::create(context, acceleration)
+ }
+
+ pub(crate) unsafe fn rtAccelerationDestroy(acceleration: Acceleration) -> Result<(), RTresult> {
+ acceleration::destroy(acceleration)
+ }
+
+ pub(crate) unsafe fn rtAccelerationGetContext(
+ acceleration: Acceleration,
+ context: *mut Context,
+ ) -> Result<(), RTresult> {
+ acceleration::get_context(acceleration, context)
+ }
+
+ pub(crate) unsafe fn rtAccelerationSetBuilder(
+ acceleration: Acceleration,
+ builder: *const ::std::os::raw::c_char,
+ ) -> Result<(), RTresult> {
+ acceleration::set_builder(acceleration, builder)
+ }
+
+ pub(crate) unsafe fn rtAccelerationMarkDirty(
+ acceleration: Acceleration,
+ ) -> Result<(), RTresult> {
+ acceleration::mark_dirty(acceleration)
+ }
+
+ pub(crate) unsafe fn rtBufferCreate(
+ context: Context,
+ bufferdesc: ::std::os::raw::c_uint,
+ buffer: *mut Buffer,
+ ) -> Result<(), RTresult> {
+ buffer::create(context, bufferdesc, buffer)
+ }
+
+ pub(crate) unsafe fn rtBufferCreateFromCallback(
+ context: Context,
+ bufferdesc: ::std::os::raw::c_uint,
+ callback: Option<
+ unsafe extern "C" fn(
+ callbackData: *mut ::std::os::raw::c_void,
+ buffer: Buffer,
+ block: *mut RTmemoryblock,
+ ) -> ::std::os::raw::c_int,
+ >,
+ callback_data: *mut ::std::os::raw::c_void,
+ buffer: *mut Buffer,
+ ) -> Result<(), RTresult> {
+ buffer::create_from_callback(context, bufferdesc, callback, callback_data, buffer)
+ }
+
+ pub(crate) unsafe fn rtBufferDestroy(buffer: Buffer) -> Result<(), RTresult> {
+ buffer::destroy(buffer)
+ }
+
+ pub(crate) unsafe fn rtBufferSetElementSize(
+ buffer: Buffer,
+ element_size: u64,
+ ) -> Result<(), RTresult> {
+ buffer::set_element_size(buffer, element_size)
+ }
+
+ pub(crate) unsafe fn rtBufferSetFormat(
+ buffer: Buffer,
+ format: RTformat,
+ ) -> Result<(), RTresult> {
+ buffer::set_format(buffer, format)
+ }
+
+ pub(crate) unsafe fn rtBufferSetMipLevelCount(
+ buffer: Buffer,
+ levels: ::std::os::raw::c_uint,
+ ) -> Result<(), RTresult> {
+ buffer::set_mip_level_count(buffer, levels)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtBufferSetSize1D(buffer: Buffer, width: RTsize) -> Result<(), RTresult> {
+ buffer::set_size1d(buffer, width)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtBufferSetSize2D(
+ buffer: Buffer,
+ width: RTsize,
+ height: RTsize,
+ ) -> Result<(), RTresult> {
+ buffer::set_size2d(buffer, width, height)
+ }
+
+ pub(crate) unsafe fn rtBufferGetContext(
+ buffer: Buffer,
+ context: *mut Context,
+ ) -> Result<(), RTresult> {
+ buffer::get_context(buffer, context)
+ }
+
+ pub(crate) unsafe fn rtBufferGetDevicePointer(
+ buffer: Buffer,
+ optix_device_ordinal: ::std::os::raw::c_int,
+ device_pointer: *mut *mut ::std::os::raw::c_void,
+ ) -> Result<(), RTresult> {
+ buffer::get_device_pointer(buffer, optix_device_ordinal, device_pointer)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtBufferGetDimensionality(
+ buffer: Buffer,
+ dimensionality: *mut c_uint,
+ ) -> Result<(), RTresult> {
+ buffer::get_dimensionality(buffer, dimensionality)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtBufferGetElementSize(
+ buffer: Buffer,
+ elementSize: *mut u64,
+ ) -> Result<(), RTresult> {
+ buffer::get_element_size(buffer, elementSize)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtBufferGetGLBOId(
+ buffer: Buffer,
+ glid: *mut c_uint,
+ ) -> Result<(), RTresult> {
+ buffer::get_glboid(buffer, glid)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtBufferGetFormat(
+ buffer: Buffer,
+ format: *mut RTformat,
+ ) -> Result<(), RTresult> {
+ buffer::get_format(buffer, format)
+ }
+
+ pub(crate) unsafe fn rtBufferGetId(
+ buffer: Buffer,
+ bufferId: *mut ::std::os::raw::c_int,
+ ) -> Result<(), RTresult> {
+ buffer::get_id(buffer, bufferId)
+ }
+
+ pub(crate) unsafe fn rtBufferGetMipLevelCount(
+ buffer: Buffer,
+ level: *mut ::std::os::raw::c_uint,
+ ) -> Result<(), RTresult> {
+ buffer::get_miplevel_count(buffer, level)
+ }
+
+ pub(crate) unsafe fn rtBufferGetMipLevelSize2D(
+ buffer: Buffer,
+ level: ::std::os::raw::c_uint,
+ width: *mut RTsize,
+ height: *mut RTsize,
+ ) -> Result<(), RTresult> {
+ buffer::get_miplevel_size2d(buffer, level, width, height)
+ }
+
+ pub(crate) unsafe fn rtBufferGetSize1D(
+ buffer: Buffer,
+ width: *mut RTsize,
+ ) -> Result<(), RTresult> {
+ buffer::get_size1d(buffer, width)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtBufferGetSize2D(
+ buffer: Buffer,
+ width: *mut RTsize,
+ height: *mut RTsize,
+ ) -> Result<(), RTresult> {
+ buffer::get_size2d(buffer, width, height)
+ }
+
+ pub(crate) unsafe fn rtBufferGetSizev(
+ buffer: Buffer,
+ dimensionality: ::std::os::raw::c_uint,
+ dims: *mut RTsize,
+ ) -> Result<(), RTresult> {
+ buffer::get_sizev(buffer, dimensionality, dims)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtBufferMap(
+ buffer: Buffer,
+ userPointer: *mut *mut ::std::os::raw::c_void,
+ ) -> Result<(), RTresult> {
+ buffer::map(buffer, userPointer)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtBufferMapEx(
+ buffer: Buffer,
+ mapFlags: ::std::os::raw::c_uint,
+ level: ::std::os::raw::c_uint,
+ userOwned: *mut ::std::os::raw::c_void,
+ optixOwned: *mut *mut ::std::os::raw::c_void,
+ ) -> Result<(), RTresult> {
+ buffer::map_ex(buffer, mapFlags, level, userOwned, optixOwned)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtBufferUnmap(buffer: Buffer) -> Result<(), RTresult> {
+ buffer::unmap(buffer)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtBufferUnmapEx(
+ buffer: Buffer,
+ level: ::std::os::raw::c_uint,
+ ) -> Result<(), RTresult> {
+ buffer::unmap_ex(buffer, level)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtContextCreate(context: *mut Context) -> Result<(), RTresult> {
+ context::create(context)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtContextDeclareVariable(
+ context: Context,
+ name: *const ::std::os::raw::c_char,
+ v: *mut Variable,
+ ) -> Result<(), RTresult> {
+ context::declare_variable(context, name, v)
+ }
+
+ pub(crate) unsafe fn rtContextGetAttribute(
+ context: Context,
+ attrib: RTcontextattribute,
+ size: RTsize,
+ p: *mut ::std::os::raw::c_void,
+ ) -> Result<(), RTresult> {
+ context::get_attribute(context, attrib, size, p)
+ }
+
+ pub(crate) unsafe fn rtContextGetBufferFromId(
+ context: Context,
+ bufferId: ::std::os::raw::c_int,
+ buffer: *mut Buffer,
+ ) -> Result<(), RTresult> {
+ context::get_buffer_from_id(context, bufferId, buffer)
+ }
+
+ pub(crate) unsafe fn rtContextGetDeviceCount(
+ context: Context,
+ count: *mut ::std::os::raw::c_uint,
+ ) -> Result<(), RTresult> {
+ context::get_device_count(context, count)
+ }
+
+ pub(crate) unsafe fn rtContextGetDevices(
+ context: Context,
+ devices: *mut ::std::os::raw::c_int,
+ ) -> Result<(), RTresult> {
+ context::get_devices(context, devices)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtContextDestroy(context: Context) -> Result<(), RTresult> {
+ context::destroy(context)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtContextGetErrorString(
+ ctx: Context,
+ code: RTresult,
+ string_return: *mut *const ::std::os::raw::c_char,
+ ) {
+ context::get_error_string(ctx, code, string_return)
+ }
+
+ pub(crate) unsafe fn rtContextLaunch2D(
+ context: Context,
+ entry_point_index: ::std::os::raw::c_uint,
+ width: u64,
+ height: u64,
+ ) -> Result<(), RTresult> {
+ context::launch_2d(context, entry_point_index, width, height)
+ }
+
+ pub(crate) unsafe fn rtContextQueryVariable(
+ context: Context,
+ name: *const ::std::os::raw::c_char,
+ v: *mut Variable,
+ ) -> Result<(), RTresult> {
+ context::query_variable(context, name, v)
+ }
+
+ pub(crate) unsafe fn rtContextSetAttribute(
+ context: Context,
+ attrib: RTcontextattribute,
+ size: RTsize,
+ p: *const ::std::os::raw::c_void,
+ ) -> Result<(), RTresult> {
+ context::set_attribute(context, attrib, size, p)
+ }
+
+ pub(crate) unsafe fn rtContextSetDevices(
+ context: Context,
+ count: ::std::os::raw::c_uint,
+ devices: *const ::std::os::raw::c_int,
+ ) -> Result<(), RTresult> {
+ context::set_devices(context, count, devices)
+ }
+
+ pub(crate) unsafe fn rtContextSetEntryPointCount(
+ context: Context,
+ count: c_uint,
+ ) -> Result<(), RTresult> {
+ context::set_entry_point_count(context, count)
+ }
+
+ pub(crate) fn rtContextSetExceptionEnabled(
+ context: Context,
+ exception: RTexception,
+ enabled: ::std::os::raw::c_int,
+ ) -> Result<(), RTresult> {
+ context::set_exception_enabled(context, exception, enabled)
+ }
+
+ pub(crate) unsafe fn rtContextSetExceptionProgram(
+ context: Context,
+ entry_point_index: ::std::os::raw::c_uint,
+ program: Program,
+ ) -> Result<(), RTresult> {
+ context::set_exception_program(context, entry_point_index, program)
+ }
+
+ pub(crate) unsafe fn rtContextSetMaxCallableProgramDepth(
+ context: Context,
+ maxDepth: ::std::os::raw::c_uint,
+ ) -> RTresult {
+ context::set_max_callable_program_depth(context, maxDepth)
+ }
+
+ pub(crate) unsafe fn rtContextSetMaxTraceDepth(
+ context: Context,
+ max_depth: u32,
+ ) -> Result<(), RTresult> {
+ context::set_max_depth(context, max_depth)
+ }
+
+ pub(crate) unsafe fn rtContextSetMissProgram(
+ context: Context,
+ ray_type_index: ::std::os::raw::c_uint,
+ program: Program,
+ ) -> Result<(), RTresult> {
+ context::set_miss_program(context, ray_type_index, program)
+ }
+
+ pub(crate) unsafe fn rtContextSetPrintEnabled(
+ context: Context,
+ enabled: ::std::os::raw::c_int,
+ ) -> Result<(), RTresult> {
+ context::set_print_enabled(context, enabled)
+ }
+
+ pub(crate) unsafe fn rtContextSetPrintLaunchIndex(
+ context: Context,
+ x: ::std::os::raw::c_int,
+ y: ::std::os::raw::c_int,
+ z: ::std::os::raw::c_int,
+ ) -> Result<(), RTresult> {
+ context::set_print_launch_index(context, x, y, z)
+ }
+
+ pub(crate) unsafe fn rtContextSetRayGenerationProgram(
+ context: Context,
+ entry_point_index: ::std::os::raw::c_uint,
+ program: Program,
+ ) -> Result<(), RTresult> {
+ context::set_ray_generation_program(context, entry_point_index, program)
+ }
+
+ pub(crate) unsafe fn rtContextSetRayTypeCount(
+ context: Context,
+ ray_type_count: c_uint,
+ ) -> Result<(), RTresult> {
+ context::set_ray_type_count(context, ray_type_count)
+ }
+
+ pub(crate) unsafe fn rtContextSetStackSize(
+ context: Context,
+ bytes: u64,
+ ) -> Result<(), RTresult> {
+ context::set_stack_size(context, bytes)
+ }
+
+ pub(crate) fn rtContextSetUsageReportCallback(
+ context: Context,
+ callback: RTusagereportcallback,
+ verbosity: ::std::os::raw::c_int,
+ cbdata: *mut ::std::os::raw::c_void,
+ ) -> Result<(), RTresult> {
+ context::set_usage_report_callback(context, callback, verbosity, cbdata)
+ }
+
+ pub(crate) unsafe fn rtContextValidate(context: Context) -> Result<(), RTresult> {
+ context::validate(context)
+ }
+
+ pub(crate) unsafe fn rtDeviceGetAttribute(
+ ordinal: ::std::os::raw::c_int,
+ attrib: RTdeviceattribute,
+ size: RTsize,
+ p: *mut ::std::os::raw::c_void,
+ ) -> Result<(), RTresult> {
+ super::device_get_attribute(ordinal, attrib, size, p)
+ }
+
+ pub(crate) unsafe fn rtDeviceGetDeviceCount(device_count: *mut u32) -> RTresult {
+ super::device_get_count(device_count)
+ }
+
+ pub(crate) unsafe fn rtGeometryCreate(
+ context: Context,
+ geometry: *mut Geometry,
+ ) -> Result<(), RTresult> {
+ geometry::create(context, geometry)
+ }
+
+ pub(crate) unsafe fn rtGeometryDeclareVariable(
+ geometry: Geometry,
+ name: *const ::std::os::raw::c_char,
+ v: *mut Variable,
+ ) -> Result<(), RTresult> {
+ geometry::declare_variable(geometry, name, v)
+ }
+
+ pub(crate) unsafe fn rtGeometryDestroy(geometry: Geometry) -> Result<(), RTresult> {
+ geometry::destroy(geometry)
+ }
+
+ pub(crate) unsafe fn rtGeometryGetContext(geometry: Geometry, context: *mut Context) -> Result<(), RTresult> {
+ geometry::get_context(geometry, context)
+ }
+
+ pub(crate) unsafe fn rtGeometryQueryVariable(
+ geometry: Geometry,
+ name: *const ::std::os::raw::c_char,
+ v: *mut Variable,
+ ) -> Result<(), RTresult> {
+ geometry::query_variable(geometry, name, v)
+ }
+
+ pub(crate) unsafe fn rtGeometrySetPrimitiveCount(
+ geometry: Geometry,
+ primitiveCount: ::std::os::raw::c_uint,
+ ) -> Result<(), RTresult> {
+ geometry::set_primitive_count(geometry, primitiveCount)
+ }
+
+ pub(crate) unsafe fn rtGeometrySetBoundingBoxProgram(
+ geometry: Geometry,
+ program: Program,
+ ) -> Result<(), RTresult> {
+ geometry::set_bounding_box_program(geometry, program)
+ }
+
+ pub(crate) unsafe fn rtGeometrySetIntersectionProgram(
+ geometry: Geometry,
+ program: Program,
+ ) -> Result<(), RTresult> {
+ geometry::set_intersection_program(geometry, program)
+ }
+
+ pub(crate) unsafe fn rtGeometryGroupCreate(
+ context: Context,
+ geometrygroup: *mut GeometryGroup,
+ ) -> Result<(), RTresult> {
+ geometry_group::create(context, geometrygroup)
+ }
+
+ pub(crate) unsafe fn rtGeometryGroupDestroy(
+ geometrygroup: GeometryGroup,
+ ) -> Result<(), RTresult> {
+ geometry_group::destroy(geometrygroup)
+ }
+
+ pub(crate) unsafe fn rtGeometryGroupGetChildCount(
+ geometrygroup: GeometryGroup,
+ count: *mut ::std::os::raw::c_uint,
+ ) -> Result<(), RTresult> {
+ geometry_group::get_child_count(geometrygroup, count)
+ }
+
+ pub(crate) unsafe fn rtGeometryGroupGetContext(
+ geometrygroup: GeometryGroup,
+ context: *mut Context,
+ ) -> Result<(), RTresult> {
+ geometry_group::get_context(geometrygroup, context)
+ }
+
+ pub(crate) unsafe fn rtGeometryGroupSetAcceleration(
+ geometrygroup: GeometryGroup,
+ acceleration: Acceleration,
+ ) -> Result<(), RTresult> {
+ geometry_group::set_acceleration(geometrygroup, acceleration)
+ }
+
+ pub(crate) unsafe fn rtGeometryGroupSetChild(
+ geometrygroup: GeometryGroup,
+ index: ::std::os::raw::c_uint,
+ geometryinstance: GeometryInstance,
+ ) -> Result<(), RTresult> {
+ geometry_group::set_child(geometrygroup, index, geometryinstance)
+ }
+
+ pub(crate) unsafe fn rtGeometryGroupSetChildCount(
+ geometrygroup: GeometryGroup,
+ count: ::std::os::raw::c_uint,
+ ) -> Result<(), RTresult> {
+ geometry_group::set_child_count(geometrygroup, count)
+ }
+
+ pub(crate) unsafe fn rtGeometryGroupSetVisibilityMask(
+ geometrygroup: GeometryGroup,
+ mask: RTvisibilitymask,
+ ) -> Result<(), RTresult> {
+ geometry_group::set_visibility_mask(geometrygroup, mask)
+ }
+
+ pub(crate) unsafe fn rtGeometryInstanceCreate(
+ context: Context,
+ geometryinstance: *mut GeometryInstance,
+ ) -> Result<(), RTresult> {
+ geometry_instance::create(context, geometryinstance)
+ }
+
+ pub(crate) unsafe fn rtGeometryInstanceDeclareVariable(
+ geometryinstance: GeometryInstance,
+ name: *const ::std::os::raw::c_char,
+ v: *mut Variable,
+ ) -> Result<(), RTresult> {
+ geometry_instance::declare_variable(geometryinstance, name, v)
+ }
+
+ pub(crate) unsafe fn rtGeometryInstanceDestroy(
+ geometryinstance: GeometryInstance,
+ ) -> Result<(), RTresult> {
+ geometry_instance::destroy(geometryinstance)
+ }
+
+ pub(crate) unsafe fn rtGeometryInstanceQueryVariable(
+ geometryinstance: GeometryInstance,
+ name: *const ::std::os::raw::c_char,
+ v: *mut Variable,
+ ) -> Result<(), RTresult> {
+ geometry_instance::query_variable(geometryinstance, name, v)
+ }
+
+ pub(crate) unsafe fn rtGeometryInstanceGetContext(
+ geometryinstance: GeometryInstance,
+ context: *mut Context,
+ ) -> Result<(), RTresult> {
+ geometry_instance::get_context(geometryinstance, context)
+ }
+
+ pub(crate) unsafe fn rtGeometryInstanceGetMaterialCount(
+ geometryinstance: GeometryInstance,
+ count: *mut ::std::os::raw::c_uint,
+ ) -> Result<(), RTresult> {
+ geometry_instance::get_material_count(geometryinstance, count)
+ }
+
+ pub(crate) unsafe fn rtGeometryInstanceSetGeometry(
+ geometryinstance: GeometryInstance,
+ geometry: Geometry,
+ ) -> Result<(), RTresult> {
+ geometry_instance::set_geometry(geometryinstance, geometry)
+ }
+
+ pub(crate) unsafe fn rtGeometryInstanceSetGeometryTriangles(
+ geometryinstance: GeometryInstance,
+ geometrytriangles: GeometryTriangles,
+ ) -> Result<(), RTresult> {
+ geometry_instance::set_geometry_triangles(geometryinstance, geometrytriangles)
+ }
+
+ pub(crate) unsafe fn rtGeometryInstanceSetMaterial(
+ geometryinstance: GeometryInstance,
+ index: ::std::os::raw::c_uint,
+ material: Material,
+ ) -> Result<(), RTresult> {
+ geometry_instance::set_material(geometryinstance, index, material)
+ }
+
+ pub(crate) unsafe fn rtGeometryInstanceSetMaterialCount(
+ geometryinstance: GeometryInstance,
+ count: ::std::os::raw::c_uint,
+ ) -> Result<(), RTresult> {
+ geometry_instance::set_material_count(geometryinstance, count)
+ }
+
+ pub(crate) unsafe fn rtGeometryTrianglesCreate(
+ context: Context,
+ geometrytriangles: *mut GeometryTriangles,
+ ) -> Result<(), RTresult> {
+ geometry_triangles::create(context, geometrytriangles)
+ }
+
+ pub(crate) unsafe fn rtGeometryTrianglesDeclareVariable(
+ geometrytriangles: GeometryTriangles,
+ name: *const ::std::os::raw::c_char,
+ v: *mut Variable,
+ ) -> Result<(), RTresult> {
+ geometry_triangles::declare_variable(geometrytriangles, name, v)
+ }
+
+ pub(crate) unsafe fn rtGeometryTrianglesDestroy(
+ geometrytriangles: GeometryTriangles,
+ ) -> Result<(), RTresult> {
+ geometry_triangles::destroy(geometrytriangles)
+ }
+
+ pub(crate) unsafe fn rtGeometryTrianglesGetContext(
+ geometrytriangles: GeometryTriangles,
+ context: *mut Context,
+ ) -> Result<(), RTresult> {
+ geometry_triangles::get_context(geometrytriangles, context)
+ }
+
+ pub(crate) unsafe fn rtGeometryTrianglesSetAttributeProgram(
+ geometrytriangles: GeometryTriangles,
+ program: Program,
+ ) -> Result<(), RTresult> {
+ geometry_triangles::set_attribute(geometrytriangles, program)
+ }
+
+ pub(crate) unsafe fn rtGeometryTrianglesSetBuildFlags(
+ geometrytriangles: GeometryTriangles,
+ _buildFlags: RTgeometrybuildflags,
+ ) -> Result<(), RTresult> {
+ geometry_triangles::set_build_flags(geometrytriangles, _buildFlags)
+ }
+
+ pub(crate) unsafe fn rtGeometryTrianglesSetFlagsPerMaterial(
+ geometrytriangles: GeometryTriangles,
+ materialIndex: ::std::os::raw::c_uint,
+ flags: RTgeometryflags,
+ ) -> Result<(), RTresult> {
+ geometry_triangles::set_flags_per_material(geometrytriangles, materialIndex, flags)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtGeometryTrianglesSetPrimitiveCount(
+ geometrytriangles: GeometryTriangles,
+ triangle_count: c_uint,
+ ) -> Result<(), RTresult> {
+ geometry_triangles::set_primitive_count(geometrytriangles, triangle_count)
+ }
+
+ pub(crate) unsafe fn rtGeometryTrianglesSetTriangleIndices(
+ geometrytriangles: GeometryTriangles,
+ indexBuffer: Buffer,
+ indexBufferByteOffset: u64,
+ triIndicesByteStride: u64,
+ triIndicesFormat: RTformat,
+ ) -> Result<(), RTresult> {
+ geometry_triangles::set_triangle_indices(
+ geometrytriangles,
+ indexBuffer,
+ indexBufferByteOffset,
+ triIndicesByteStride,
+ triIndicesFormat,
+ )
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtGeometryTrianglesSetVertices(
+ geometrytriangles: GeometryTriangles,
+ vertexCount: ::std::os::raw::c_uint,
+ vertexBuffer: Buffer,
+ vertexBufferByteOffset: u64,
+ vertexByteStride: u64,
+ positionFormat: RTformat,
+ ) -> Result<(), RTresult> {
+ geometry_triangles::set_vertices(
+ geometrytriangles,
+ vertexCount,
+ vertexBuffer,
+ vertexBufferByteOffset,
+ vertexByteStride,
+ positionFormat,
+ )
+ }
+
+ pub(crate) unsafe fn rtGeometryTrianglesQueryVariable(
+ geometrytriangles: GeometryTriangles,
+ name: *const ::std::os::raw::c_char,
+ v: *mut Variable,
+ ) -> Result<(), RTresult> {
+ geometry_triangles::query_variable(geometrytriangles, name, v)
+ }
+
+ pub(crate) unsafe fn rtGeometryTrianglesValidate(
+ geometrytriangles: GeometryTriangles,
+ ) -> Result<(), RTresult> {
+ geometry_triangles::validate(geometrytriangles)
+ }
+
+ pub(crate) unsafe fn rtGetVersion(version: *mut u32) -> RTresult {
+ super::get_version(version)
+ }
+
+ pub(crate) unsafe fn rtGlobalGetAttribute(
+ attrib: RTglobalattribute,
+ size: RTsize,
+ p: *mut ::std::os::raw::c_void,
+ ) -> RTresult {
+ super::global_get_attribute(attrib, size, p)
+ }
+
+ pub(crate) unsafe fn rtGlobalSetAttribute(
+ attrib: RTglobalattribute,
+ size: RTsize,
+ p: *const ::std::os::raw::c_void,
+ ) -> RTresult {
+ super::global_set_attribute(attrib, size, p)
+ }
+
+ pub(crate) unsafe fn rtGroupCreate(
+ context: Context,
+ group: *mut Group,
+ ) -> Result<(), RTresult> {
+ group::create(context, group)
+ }
+
+ pub(crate) unsafe fn rtGroupDestroy(group: Group) -> Result<(), RTresult> {
+ group::destroy(group)
+ }
+
+ pub(crate) unsafe fn rtGroupGetAcceleration(
+ group: Group,
+ acceleration: *mut Acceleration,
+ ) -> Result<(), RTresult> {
+ group::get_acceleration(group, acceleration)
+ }
+
+ pub(crate) unsafe fn rtGroupGetChild(
+ group: Group,
+ index: ::std::os::raw::c_uint,
+ child: *mut UntypedObject,
+ ) -> Result<(), RTresult> {
+ group::get_child(group, index, child)
+ }
+
+ pub(crate) unsafe fn rtGroupGetChildCount(
+ group: Group,
+ count: *mut ::std::os::raw::c_uint,
+ ) -> Result<(), RTresult> {
+ group::get_child_count(group, count)
+ }
+
+ pub(crate) unsafe fn rtGroupGetContext(
+ group: Group,
+ context: *mut Context,
+ ) -> Result<(), RTresult> {
+ group::get_context(group, context)
+ }
+
+ pub(crate) unsafe fn rtGroupSetAcceleration(
+ group: Group,
+ acceleration: Acceleration,
+ ) -> Result<(), RTresult> {
+ group::set_acceleration(group, acceleration)
+ }
+
+ pub(crate) unsafe fn rtGroupSetChild(
+ group: Group,
+ index: ::std::os::raw::c_uint,
+ child: UntypedObject,
+ ) -> Result<(), RTresult> {
+ group::set_child(group, index, child)
+ }
+
+ pub(crate) unsafe fn rtGroupSetChildCount(
+ group: Group,
+ count: ::std::os::raw::c_uint,
+ ) -> Result<(), RTresult> {
+ group::set_child_count(group, count)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtMaterialCreate(
+ context: Context,
+ material: *mut Material,
+ ) -> Result<(), RTresult> {
+ material::create(context, material)
+ }
+
+ pub(crate) unsafe fn rtMaterialDeclareVariable(
+ material: Material,
+ name: *const ::std::os::raw::c_char,
+ v: *mut Variable,
+ ) -> Result<(), RTresult> {
+ material::declare_variable(material, name, v)
+ }
+
+ pub(crate) unsafe fn rtMaterialDestroy(material: Material) -> Result<(), RTresult> {
+ material::destroy(material)
+ }
+
+ pub(crate) unsafe fn rtMaterialGetContext(
+ material: Material,
+ context: *mut Context,
+ ) -> Result<(), RTresult> {
+ material::get_context(material, context)
+ }
+
+ pub(crate) unsafe fn rtMaterialQueryVariable(
+ material: Material,
+ name: *const ::std::os::raw::c_char,
+ v: *mut Variable,
+ ) -> Result<(), RTresult> {
+ material::query_variable(material, name, v)
+ }
+
+ pub(crate) unsafe fn rtMaterialSetAnyHitProgram(
+ material: Material,
+ rayTypeIndex: ::std::os::raw::c_uint,
+ program: Program,
+ ) -> Result<(), RTresult> {
+ material::set_any_hit_program(material, rayTypeIndex, program)
+ }
+
+ pub(crate) unsafe fn rtMaterialSetClosestHitProgram(
+ material: Material,
+ rayTypeIndex: ::std::os::raw::c_uint,
+ program: Program,
+ ) -> Result<(), RTresult> {
+ material::set_closest_hit_program(material, rayTypeIndex, program)
+ }
+
+ pub(crate) unsafe fn rtProgramCreateFromProgram(
+ context: Context,
+ program_in: Program,
+ program_out: *mut Program,
+ ) -> Result<(), RTresult> {
+ program::create_from_program(context, program_in, program_out)
+ }
+
+ pub(crate) unsafe fn rtProgramCreateFromPTXFile(
+ context: Context,
+ filename: *const ::std::os::raw::c_char,
+ program_name: *const ::std::os::raw::c_char,
+ program: *mut Program,
+ ) -> Result<(), RTresult> {
+ program::create_from_ptx_file(context, filename, program_name, program)
+ }
+
+ pub(crate) unsafe fn rtProgramCreateFromPTXString(
+ context: Context,
+ ptx: *const ::std::os::raw::c_char,
+ program_name: *const ::std::os::raw::c_char,
+ program: *mut Program,
+ ) -> Result<(), RTresult> {
+ program::create_from_ptx_string(context, ptx, program_name, program)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtProgramDeclareVariable(
+ program: Program,
+ name: *const ::std::os::raw::c_char,
+ v: *mut Variable,
+ ) -> Result<(), RTresult> {
+ program::declare_variable(program, name, v)
+ }
+
+ pub(crate) unsafe fn rtProgramDestroy(program: Program) -> Result<(), RTresult> {
+ program::destroy(program)
+ }
+
+ pub(crate) unsafe fn rtProgramGetContext(program: Program, context: *mut Context) -> Result<(), RTresult> {
+ program::get_context(program, context)
+ }
+
+ pub(crate) unsafe fn rtProgramGetId(
+ program: Program,
+ program_id: *mut ::std::os::raw::c_int,
+ ) -> Result<(), RTresult> {
+ program::get_id(program, program_id)
+ }
+
+ pub(crate) unsafe fn rtProgramQueryVariable(
+ program: Program,
+ name: *const ::std::os::raw::c_char,
+ v: *mut Variable,
+ ) -> Result<(), RTresult> {
+ program::query_variable(program, name, v)
+ }
+
+ pub(crate) unsafe fn rtProgramValidate(program: Program) -> Result<(), RTresult> {
+ program::validate(program)
+ }
+
+ pub(crate) unsafe fn rtTextureSamplerCreate(
+ context: Context,
+ texturesampler: *mut TextureSampler,
+ ) -> Result<(), RTresult> {
+ texture_sampler::create(context, texturesampler)
+ }
+
+ pub(crate) unsafe fn rtTextureSamplerDestroy(
+ texturesampler: TextureSampler,
+ ) -> Result<(), RTresult> {
+ texture_sampler::destroy(texturesampler)
+ }
+
+ pub(crate) unsafe fn rtTextureSamplerGetBuffer(
+ texturesampler: TextureSampler,
+ deprecated0: ::std::os::raw::c_uint,
+ deprecated1: ::std::os::raw::c_uint,
+ buffer: *mut Buffer,
+ ) -> Result<(), RTresult> {
+ texture_sampler::get_buffer(texturesampler, deprecated0, deprecated1, buffer)
+ }
+
+ pub(crate) unsafe fn rtTextureSamplerGetContext(
+ texturesampler: TextureSampler,
+ context: *mut Context,
+ ) -> Result<(), RTresult> {
+ texture_sampler::get_context(texturesampler, context)
+ }
+
+ pub(crate) unsafe fn rtTextureSamplerGetId(
+ texturesampler: TextureSampler,
+ textureId: *mut ::std::os::raw::c_int,
+ ) -> Result<(), RTresult> {
+ texture_sampler::get_id(texturesampler, textureId)
+ }
+
+ pub(crate) unsafe fn rtTextureSamplerSetArraySize(
+ texturesampler: TextureSampler,
+ textureCount: ::std::os::raw::c_uint,
+ ) -> Result<(), RTresult> {
+ texture_sampler::set_array_size(texturesampler, textureCount)
+ }
+
+ pub(crate) unsafe fn rtTextureSamplerSetBuffer(
+ texturesampler: TextureSampler,
+ _deprecated0: ::std::os::raw::c_uint,
+ _deprecated1: ::std::os::raw::c_uint,
+ buffer: Buffer,
+ ) -> Result<(), RTresult> {
+ texture_sampler::set_buffer(texturesampler, buffer)
+ }
+
+ pub(crate) unsafe fn rtTextureSamplerSetFilteringModes(
+ texturesampler: TextureSampler,
+ minification: RTfiltermode,
+ magnification: RTfiltermode,
+ mipmapping: RTfiltermode,
+ ) -> Result<(), RTresult> {
+ texture_sampler::set_filtering_modes(
+ texturesampler,
+ minification,
+ magnification,
+ mipmapping,
+ )
+ }
+
+ pub(crate) unsafe fn rtTextureSamplerSetIndexingMode(
+ texturesampler: TextureSampler,
+ indexmode: RTtextureindexmode,
+ ) -> Result<(), RTresult> {
+ texture_sampler::set_indexing_mode(texturesampler, indexmode)
+ }
+
+ pub(crate) unsafe fn rtTextureSamplerSetMaxAnisotropy(
+ texturesampler: TextureSampler,
+ value: f32,
+ ) -> Result<(), RTresult> {
+ texture_sampler::set_max_anisotropy(texturesampler, value)
+ }
+
+ pub(crate) unsafe fn rtTextureSamplerSetMipLevelCount(
+ texturesampler: TextureSampler,
+ mipLevelCount: ::std::os::raw::c_uint,
+ ) -> Result<(), RTresult> {
+ texture_sampler::set_mip_level_count(texturesampler, mipLevelCount)
+ }
+
+ pub(crate) unsafe fn rtTextureSamplerSetReadMode(
+ texturesampler: TextureSampler,
+ readmode: RTtexturereadmode,
+ ) -> Result<(), RTresult> {
+ texture_sampler::set_read_mode(texturesampler, readmode)
+ }
+
+ pub(crate) unsafe fn rtTextureSamplerSetWrapMode(
+ texturesampler: TextureSampler,
+ dimension: ::std::os::raw::c_uint,
+ wrapmode: RTwrapmode,
+ ) -> Result<(), RTresult> {
+ texture_sampler::set_wrap_mode(texturesampler, dimension, wrapmode)
+ }
+
+ pub(crate) unsafe fn rtTransformCreate(
+ context: Context,
+ transform: *mut Transform,
+ ) -> Result<(), RTresult> {
+ transform::create(context, transform)
+ }
+
+ pub(crate) unsafe fn rtTransformDestroy(transform: Transform) -> Result<(), RTresult> {
+ transform::destroy(transform)
+ }
+
+ pub(crate) unsafe fn rtTransformGetContext(transform: Transform, context: *mut Context) -> Result<(), RTresult> {
+ transform::get_context(transform, context)
+ }
+
+ pub(crate) unsafe fn rtTransformGetMotionKeyCount(
+ transform: Transform,
+ n: *mut ::std::os::raw::c_uint,
+ ) -> Result<(), RTresult> {
+ transform::get_motion_key_count(transform, n)
+ }
+
+ pub(crate) unsafe fn rtTransformSetChild(
+ transform: Transform,
+ child: UntypedObject,
+ ) -> Result<(), RTresult> {
+ transform::set_child(transform, child)
+ }
+
+ pub(crate) unsafe fn rtTransformSetMatrix(
+ transform: Transform,
+ transpose: i32,
+ matrix: *const f32,
+ inverse_matrix: *const f32,
+ ) -> Result<(), RTresult> {
+ transform::set_matrix(transform, transpose, matrix, inverse_matrix)
+ }
+
+ pub(crate) unsafe fn rtVariableSetObject(
+ v: Variable,
+ object: UntypedObject,
+ ) -> Result<(), RTresult> {
+ variable::set_object(v, object)
+ }
+
+ pub(crate) unsafe fn rtVariableGetObject(
+ v: Variable,
+ object: *mut UntypedObject,
+ ) -> Result<(), RTresult> {
+ variable::get_object(v, object)
+ }
+
+ pub(crate) unsafe fn rtVariableSet1f(v: Variable, f1: f32) -> Result<(), RTresult> {
+ variable::set_1f(v, f1)
+ }
+
+ pub(crate) unsafe fn rtVariableSet1i(v: Variable, i1: c_int) -> Result<(), RTresult> {
+ variable::set_1i(v, i1)
+ }
+
+ pub(crate) unsafe fn rtVariableSet1ui(v: Variable, u1: u32) -> Result<(), RTresult> {
+ variable::set_1ui(v, u1)
+ }
+
+ pub(crate) unsafe fn rtVariableSet1ull(
+ v: Variable,
+ ull1: ::std::os::raw::c_ulonglong,
+ ) -> Result<(), RTresult> {
+ variable::set_1ull(v, ull1)
+ }
+
+ #[allow(non_snake_case)]
+ pub(crate) unsafe fn rtVariableSet3f(
+ v: Variable,
+ f1: f32,
+ f2: f32,
+ f3: f32,
+ ) -> Result<(), RTresult> {
+ variable::set_3f(v, f1, f2, f3)
+ }
+
+ pub(crate) unsafe fn rtVariableSet3fv(v: Variable, f: *const f32) -> Result<(), RTresult> {
+ variable::set_3fv(v, f)
+ }
+
+ pub(crate) unsafe fn rtVariableSet4f(
+ v: Variable,
+ f1: f32,
+ f2: f32,
+ f3: f32,
+ f4: f32,
+ ) -> Result<(), RTresult> {
+ variable::set_4f(v, f1, f2, f3, f4)
+ }
+
+ pub(crate) unsafe fn rtVariableSet4fv(v: Variable, f: *const f32) -> Result<(), RTresult> {
+ variable::set_4fv(v, f)
+ }
+
+ pub(crate) unsafe fn rtVariableSetUserData(
+ v: Variable,
+ size: RTsize,
+ ptr: *const ::std::os::raw::c_void,
+ ) -> Result<(), RTresult> {
+ variable::set_user_data(v, size, ptr)
+ }
+}
+
+pub(crate) unsafe fn get_version(version: *mut u32) -> RTresult {
+ *version = 60600;
+ RTresult::RT_SUCCESS
+}
+
+pub(crate) unsafe fn device_get_count(device_count: *mut u32) -> RTresult {
+ *device_count = 1;
+ RTresult::RT_SUCCESS
+}
+
+pub(crate) unsafe fn device_get_attribute(
+ ordinal: i32,
+ attrib: RTdeviceattribute,
+ size: u64,
+ p: *mut c_void,
+) -> Result<(), RTresult> {
+ Ok(match attrib {
+ RTdeviceattribute::RT_DEVICE_ATTRIBUTE_CLOCK_RATE => {
+ hip! { hipDeviceGetAttribute(
+ p as _,
+ hipDeviceAttribute_t::hipDeviceAttributeClockRate,
+ ordinal,
+ ), RT_ERROR_UNKNOWN }
+ }
+ RTdeviceattribute::RT_DEVICE_ATTRIBUTE_NAME => {
+ let dev_name = "Graphics Device";
+ let strlen = dev_name.len().min(size as usize - 1);
+ ptr::copy_nonoverlapping(dev_name.as_ptr(), p as _, strlen);
+ *(p as *mut u8).add(strlen) = 0;
+ }
+ RTdeviceattribute::RT_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY => {
+ *(p as *mut [u32; 2]) = [8u32, 6u32];
+ }
+ RTdeviceattribute::RT_DEVICE_ATTRIBUTE_TOTAL_MEMORY => {
+ hip! { hipDeviceTotalMem(
+ p as _,
+ ordinal,
+ ), RT_ERROR_UNKNOWN }
+ }
+ RTdeviceattribute::RT_DEVICE_ATTRIBUTE_CUDA_DEVICE_ORDINAL => *(p as *mut i32) = ordinal,
+ _ => return Err(definitions::unimplemented()),
+ })
+}
+
+pub(crate) unsafe fn global_set_attribute(
+ attrib: RTglobalattribute,
+ _size: u64,
+ _p: *const c_void,
+) -> RTresult {
+ match attrib {
+ RTglobalattribute::RT_GLOBAL_ATTRIBUTE_ENABLE_RTX => RTresult::RT_SUCCESS,
+ // TODO: reverse
+ RTglobalattribute(268435457) | optix_types::RTglobalattribute(4096) => {
+ return RTresult::RT_ERROR_NOT_SUPPORTED
+ }
+ _ => definitions::unimplemented(),
+ }
+}
+
+fn global_get_attribute(_attrib: RTglobalattribute, _size: u64, _p: *mut c_void) -> RTresult {
+ RTresult::RT_ERROR_NOT_SUPPORTED
+}
diff --git a/zluda_rt/src/material.rs b/zluda_rt/src/material.rs new file mode 100644 index 0000000..4186165 --- /dev/null +++ b/zluda_rt/src/material.rs @@ -0,0 +1,142 @@ +use crate::{
+ context::{self, Context, ContextData},
+ null_check, null_unwrap,
+ program::{Program, ProgramData},
+ variable::{Variable, VariableData},
+ MaybeWeakRefMut, OptixCell, OptixObjectData, TypeTag,
+};
+use optix_types::*;
+use rustc_hash::FxHashMap;
+use std::{
+ ffi::{CStr, CString},
+ ptr,
+ rc::{Rc, Weak},
+};
+
+pub(crate) type Material = *const OptixCell<MaterialData>;
+
+pub(crate) struct MaterialData {
+ pub(crate) context: Weak<OptixCell<ContextData>>,
+ pub(crate) variables: FxHashMap<CString, Rc<OptixCell<VariableData>>>,
+ pub(crate) any_hit_programs: FxHashMap<u32, Rc<OptixCell<ProgramData>>>,
+ pub(crate) closest_hit_programs: FxHashMap<u32, Rc<OptixCell<ProgramData>>>,
+}
+
+impl MaterialData {
+ fn new(weak_context: Weak<OptixCell<ContextData>>, _: &mut ContextData) -> Self {
+ Self {
+ context: weak_context,
+ variables: FxHashMap::default(),
+ any_hit_programs: FxHashMap::default(),
+ closest_hit_programs: FxHashMap::default(),
+ }
+ }
+
+ fn register(this: Rc<OptixCell<Self>>, context: &mut ContextData) {
+ context.materials.insert(this);
+ }
+
+ unsafe fn create(context: Context) -> Result<Material, RTresult> {
+ context::create_subobject(context, Self::new, Self::register)
+ }
+}
+
+impl OptixObjectData for MaterialData {
+ const TYPE: TypeTag = TypeTag::Material;
+
+ fn deregister(&mut self, this: &std::rc::Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ if let Some(context) = self.context.upgrade() {
+ let mut context = (*context).borrow_mut()?;
+ context.materials.remove(this);
+ }
+ Ok(())
+ }
+
+ fn context<'a>(&'a mut self) -> MaybeWeakRefMut<'a, ContextData> {
+ MaybeWeakRefMut::Weak(&self.context)
+ }
+}
+
+pub(crate) unsafe fn create(context: Context, material: *mut Material) -> Result<(), RTresult> {
+ null_check(context)?;
+ null_check(material)?;
+ *material = MaterialData::create(context)?;
+ Ok(())
+}
+
+pub(crate) unsafe fn declare_variable(
+ material_ptr: Material,
+ name: *const i8,
+ v: *mut Variable,
+) -> Result<(), RTresult> {
+ null_check(v)?;
+ let material = null_unwrap(material_ptr)?;
+ let mut material = material.borrow_mut()?;
+ let variable = VariableData::new(&mut *material)?;
+ let name = CStr::from_ptr(name).to_owned();
+ let result = Rc::as_ptr(&variable);
+ material.variables.insert(name, variable);
+ *v = result;
+ Ok(())
+}
+
+pub(crate) unsafe fn query_variable(
+ material_ptr: Material,
+ name: *const i8,
+ v: *mut Variable,
+) -> Result<(), RTresult> {
+ null_check(name)?;
+ null_check(v)?;
+ let material = null_unwrap(material_ptr)?;
+ let material = material.borrow()?;
+ *v = material
+ .variables
+ .get(CStr::from_ptr(name))
+ .map(|variable| Rc::as_ptr(variable))
+ .unwrap_or(ptr::null_mut());
+ Ok(())
+}
+
+pub(crate) unsafe fn set_any_hit_program(
+ material: Material,
+ ray_type_index: u32,
+ program: Program,
+) -> Result<(), RTresult> {
+ null_check(program)?;
+ let material = null_unwrap(material)?;
+ let mut material = material.borrow_mut()?;
+ material
+ .any_hit_programs
+ .insert(ray_type_index, OptixCell::clone_rc(program));
+ Ok(())
+}
+
+pub(crate) unsafe fn set_closest_hit_program(
+ material: Material,
+ ray_type_index: u32,
+ program: Program,
+) -> Result<(), RTresult> {
+ null_check(program)?;
+ let material = null_unwrap(material)?;
+ let mut material = material.borrow_mut()?;
+ material
+ .closest_hit_programs
+ .insert(ray_type_index, OptixCell::clone_rc(program));
+ Ok(())
+}
+
+pub(crate) unsafe fn get_context(
+ material: Material,
+ context: *mut Context,
+) -> Result<(), RTresult> {
+ null_check(context)?;
+ let material = null_unwrap(material)?;
+ let material = material.borrow()?;
+ *context = Weak::as_ptr(&material.context);
+ Ok(())
+}
+
+pub(crate) fn destroy(_material: Material) -> Result<(), RTresult> {
+ // TODO: implement
+ Ok(())
+}
diff --git a/zluda_rt/src/program.rs b/zluda_rt/src/program.rs new file mode 100644 index 0000000..8865018 --- /dev/null +++ b/zluda_rt/src/program.rs @@ -0,0 +1,720 @@ +use crate::context::ContextData;
+use crate::geometry::GeometryData;
+use crate::geometry_instance::GeometryInstanceData;
+use crate::geometry_triangles::GeometryTrianglesData;
+use crate::material::MaterialData;
+use crate::repr_gpu::TrivialHIPAllocator;
+use crate::{
+ context::Context,
+ null_check, null_unwrap, null_unwrap_mut,
+ variable::{Variable, VariableData},
+ OptixCell, OptixObjectData, TypeTag,
+};
+use crate::{div_positive_round_up, eptx, hip, hiprt, repr_gpu, AlignedBuffer, MaybeWeakRefMut};
+use comgr::Comgr;
+use hip_common::raytracing::VariablesBlock;
+use hip_runtime_sys::*;
+use hiprt_sys::*;
+use optix_types::*;
+use ptx::{llvm, raytracing, ModuleParserExt};
+use rustc_hash::FxHashMap;
+use std::alloc::Layout;
+use std::ffi::c_void;
+use std::mem::{self, ManuallyDrop};
+use std::rc::Weak;
+use std::{
+ ffi::{CStr, CString},
+ rc::Rc,
+};
+use std::{iter, ptr};
+
+pub(crate) type Program = *const OptixCell<ProgramData>;
+
+pub(crate) struct ProgramData {
+ pub(crate) context: Weak<OptixCell<ContextData>>,
+ pub(crate) variables: FxHashMap<CString, Rc<OptixCell<VariableData>>>,
+ pub(crate) variables_block: VariablesBlock,
+ pub(crate) callable_index: Option<u32>,
+ // this field is shared between instances created by cloning existing program object
+ pub(crate) shared: Rc<ProgramShared>,
+}
+
+pub(crate) struct ProgramShared {
+ pub(crate) binary: Vec<u8>,
+ pub(crate) module: hip::Module,
+}
+
+impl OptixObjectData for ProgramData {
+ const TYPE: TypeTag = TypeTag::Program;
+
+ fn deregister(&mut self, this: &Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ if let Some(context) = self.context.upgrade() {
+ let mut context = (*context).borrow_mut()?;
+ context.programs.remove(this);
+ }
+ Ok(())
+ }
+
+ fn context<'a>(&'a mut self) -> crate::MaybeWeakRefMut<'a, ContextData> {
+ MaybeWeakRefMut::Weak(&self.context)
+ }
+}
+
+impl ProgramData {
+ pub(crate) const KERNEL_BOUNDING_BOX_NAME: &'static CStr =
+ raytracing::Module::KERNEL_BOUNDING_BOX_NAME;
+ pub(crate) const KERNEL_NAME: &'static CStr = raytracing::Module::KERNEL_NAME;
+ pub(crate) const ATTRIBUTE_FUNCTION_POINTER_NAME: &'static CStr =
+ raytracing::Module::ATTRIBUTE_FUNCTION_POINTER_NAME;
+ pub(crate) const FUNCTION_POINTER_NAME: &'static CStr =
+ raytracing::Module::FUNCTION_POINTER_NAME;
+
+ pub(crate) fn get_program_block_layout(&self) -> Result<Layout, RTresult> {
+ Ok(self.variables_block.layout)
+ }
+
+ pub(crate) fn try_from_binary(
+ context: Weak<OptixCell<ContextData>>,
+ binary: Vec<u8>,
+ ) -> Option<(Self, VariablesBlock)> {
+ let zluda_rt6_section = hip_common::kernel_metadata::get_section(
+ hip_common::kernel_metadata::zluda_rt6::SECTION_STR,
+ &*binary,
+ )?;
+ let zluda_rt6_metadata =
+ hip_common::kernel_metadata::zluda_rt6::read(&zluda_rt6_section).ok()?;
+ let module = hip::Module::load_data(&binary).ok()?;
+ Some((
+ ProgramData {
+ context,
+ variables: FxHashMap::default(),
+ variables_block: zluda_rt6_metadata.variables,
+ callable_index: if zluda_rt6_metadata.is_callable {
+ Some(0)
+ } else {
+ None
+ },
+ shared: Rc::new(ProgramShared { binary, module }),
+ },
+ zluda_rt6_metadata.attribute_variables,
+ ))
+ }
+
+ pub(crate) fn copy_program_block<'a>(
+ &'a self,
+ dst_buffer: &mut [u8],
+ get_variable: impl Fn(&CStr) -> Option<&'a Rc<OptixCell<VariableData>>>,
+ ) -> Result<(), RTresult> {
+ self.copy_program_block_impl(false, dst_buffer, get_variable)
+ }
+
+ pub(crate) fn copy_attribute_program_block<'a>(
+ &'a self,
+ dst_buffer: &mut [u8],
+ get_variable: impl Fn(&CStr) -> Option<&'a Rc<OptixCell<VariableData>>>,
+ ) -> Result<(), RTresult> {
+ self.copy_program_block_impl(true, dst_buffer, get_variable)
+ }
+
+ fn copy_program_block_impl<'a>(
+ &'a self,
+ is_attribute: bool,
+ dst_buffer: &mut [u8],
+ get_variable: impl Fn(&CStr) -> Option<&'a Rc<OptixCell<VariableData>>>,
+ ) -> Result<(), RTresult> {
+ let fn_ptr = if is_attribute {
+ self.get_attribute_function()
+ } else {
+ self.get_function()
+ }?;
+ dst_buffer[..mem::size_of::<hipDeviceptr_t>()]
+ .copy_from_slice(&(fn_ptr.0 as usize).to_ne_bytes());
+ Self::copy_variable_block(&self.variables_block, dst_buffer, get_variable)
+ }
+
+ pub(crate) fn copy_variable_block<'a>(
+ variables_block: &VariablesBlock,
+ dst_buffer: &mut [u8],
+ get_variable: impl Fn(&CStr) -> Option<&'a Rc<OptixCell<VariableData>>>,
+ ) -> Result<(), RTresult> {
+ for (name, var_details) in variables_block.variables.iter() {
+ match get_variable(name) {
+ Some(variable) => {
+ let variable = variable.borrow()?;
+ let variable_offset = var_details.offset as usize;
+ variable.copy_into_buffer(
+ &mut dst_buffer
+ [variable_offset..variable_offset + var_details.size as usize],
+ )?;
+ }
+ None => {
+ if var_details.default_value.len() == 0 {
+ continue;
+ }
+ if var_details.default_value.len() != var_details.size as usize {
+ return Err(RTresult::RT_ERROR_UNKNOWN);
+ }
+ let variable_offset = var_details.offset as usize;
+ dst_buffer[variable_offset..variable_offset + var_details.size as usize]
+ .copy_from_slice(&*var_details.default_value);
+ }
+ };
+ }
+ Ok(())
+ }
+
+ pub(crate) fn launch_2d(
+ &self,
+ width: u32,
+ height: u32,
+ globals: &repr_gpu::GlobalState,
+ mut stack: hipDeviceptr_t,
+ mut variable_block: hipDeviceptr_t,
+ (mut exception, mut exception_var_block): (hipDeviceptr_t, hipDeviceptr_t),
+ ) -> Result<(), RTresult> {
+ let function = self
+ .shared
+ .module
+ .get_function(Self::KERNEL_NAME)
+ .map_err(|_| RTresult::RT_ERROR_INVALID_CONTEXT)?;
+ let mut globals = globals.clone();
+ globals.width = width;
+ globals.height = height;
+ let (grid_dim_x, block_dim_x) = get_launch_dimensions_x(width)?;
+ let mut params = [
+ &mut globals as *mut repr_gpu::GlobalState as *mut c_void,
+ &mut stack as *mut _ as *mut c_void,
+ &mut variable_block as *mut _ as *mut c_void,
+ &mut exception as *mut _ as *mut c_void,
+ &mut exception_var_block as *mut _ as *mut c_void,
+ ];
+ //println!("enter");
+ //let mut unused = String::new();
+ //std::io::stdin().read_line(&mut unused).unwrap();
+ hip! {
+ hipModuleLaunchKernel(
+ function,
+ grid_dim_x,
+ height as u32,
+ 1,
+ block_dim_x,
+ 1,
+ 1,
+ 0,
+ ptr::null_mut(),
+ params.as_mut_ptr() as _,
+ ptr::null_mut()
+ ),
+ RT_ERROR_UNKNOWN
+ };
+ hip! { hipStreamSynchronize(ptr::null_mut()), RT_ERROR_UNKNOWN };
+ Ok(())
+ }
+
+ pub(crate) fn get_variable_for_kernel<'a>(
+ &'a self,
+ context: &'a ContextData,
+ name: &CStr,
+ ) -> Option<&Rc<OptixCell<VariableData>>> {
+ self.variables
+ .get(name)
+ .or_else(|| context.variables.get(name))
+ }
+
+ pub(crate) fn get_variable_for_function<'a>(
+ &'a self,
+ geometry_instance: &'a GeometryInstanceData,
+ material: &'a MaterialData,
+ context: &'a ContextData,
+ name: &CStr,
+ ) -> Option<&Rc<OptixCell<VariableData>>> {
+ self.variables
+ .get(name)
+ .or_else(|| geometry_instance.variables.get(name))
+ .or_else(|| material.variables.get(name))
+ .or_else(|| context.variables.get(name))
+ }
+
+ pub(crate) fn get_variable_for_attribute<'a>(
+ &'a self,
+ geometry_triangles: &'a GeometryTrianglesData,
+ geometry_instance: &'a GeometryInstanceData,
+ context: &'a ContextData,
+ name: &CStr,
+ ) -> Option<&Rc<OptixCell<VariableData>>> {
+ self.variables
+ .get(name)
+ .or_else(|| geometry_triangles.variables.get(name))
+ .or_else(|| geometry_instance.variables.get(name))
+ .or_else(|| context.variables.get(name))
+ }
+
+ pub(crate) fn get_variable_for_function_non_hit<'a>(
+ &'a self,
+ geometry_instance: &'a GeometryInstanceData,
+ geometry: &'a GeometryData,
+ context: &'a ContextData,
+ name: &CStr,
+ ) -> Option<&Rc<OptixCell<VariableData>>> {
+ self.variables
+ .get(name)
+ .or_else(|| geometry_instance.variables.get(name))
+ .or_else(|| geometry.variables.get(name))
+ .or_else(|| context.variables.get(name))
+ }
+
+ pub(crate) fn prepare_variable_block_for_kernel(
+ &self,
+ allocator: &mut TrivialHIPAllocator,
+ context: &ContextData,
+ ) -> Result<hipDeviceptr_t, RTresult> {
+ let mut staging_buffer = AlignedBuffer::new(self.variables_block.layout);
+ Self::copy_variable_block(
+ &self.variables_block,
+ staging_buffer.as_bytes_mut(),
+ |name| self.get_variable_for_kernel(&*context, name),
+ )?;
+ allocator.copy_to_device(staging_buffer.as_bytes())
+ }
+
+ pub(crate) fn prepare_variable_block_for_function_non_hit<'a>(
+ &self,
+ allocator: &mut TrivialHIPAllocator,
+ geometry_instance: &'a GeometryInstanceData,
+ geometry: &GeometryData,
+ context: &'a ContextData,
+ ) -> Result<hipDeviceptr_t, RTresult> {
+ let mut staging_buffer = AlignedBuffer::new(self.variables_block.layout);
+ ProgramData::copy_variable_block(
+ &self.variables_block,
+ staging_buffer.as_bytes_mut(),
+ |name| {
+ self.get_variable_for_function_non_hit(geometry_instance, geometry, &context, name)
+ },
+ )?;
+ allocator.copy_to_device(staging_buffer.as_bytes())
+ }
+
+ pub fn get_attribute_function(&self) -> Result<hipDeviceptr_t, RTresult> {
+ unsafe {
+ self.shared
+ .module
+ .get_global(Self::ATTRIBUTE_FUNCTION_POINTER_NAME)
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)
+ }
+ }
+
+ pub fn get_function(&self) -> Result<hipDeviceptr_t, RTresult> {
+ unsafe {
+ self.shared
+ .module
+ .get_global(Self::FUNCTION_POINTER_NAME)
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)
+ }
+ }
+
+ unsafe fn create_from(
+ &self,
+ context_wrapper: &OptixCell<ContextData>,
+ context: &mut ContextData,
+ ) -> Result<Program, RTresult> {
+ let callable_index = self.callable_index.map(|_| {
+ context.callable_program_counter += 1;
+ context.callable_program_counter
+ });
+ let context_wrapper = OptixCell::clone_weak(context_wrapper);
+ let new_program = Rc::new(OptixCell::new(ProgramData {
+ context: context_wrapper,
+ variables: FxHashMap::default(),
+ variables_block: self.variables_block.clone(),
+ callable_index,
+ shared: self.shared.clone(),
+ }));
+ let program_ptr = Rc::as_ptr(&new_program);
+ context.programs.insert(new_program);
+ Ok(program_ptr)
+ }
+}
+
+pub(crate) fn get_launch_dimensions_x(width: u32) -> Result<(u32, u32), RTresult> {
+ let block_size = 32;
+ Ok((
+ div_positive_round_up(width as u64, block_size as u64) as u32,
+ block_size,
+ ))
+}
+
+pub(crate) unsafe fn create_from_ptx_file(
+ context: Context,
+ filename: *const i8,
+ program_name: *const i8,
+ program: *mut Program,
+) -> Result<(), RTresult> {
+ null_check(filename)?;
+ let path = CStr::from_ptr(filename)
+ .to_str()
+ .map_err(|_| RTresult::RT_ERROR_INVALID_VALUE)?;
+ let file = std::fs::read(path).map_err(|_| RTresult::RT_ERROR_FILE_NOT_FOUND)?;
+ create_from_ptx(context, PtxInput::Vec(file), program_name, program)
+}
+
+pub(crate) unsafe fn create_from_ptx_string(
+ context: Context,
+ ptx: *const i8,
+ program_name: *const i8,
+ program: *mut Program,
+) -> Result<(), RTresult> {
+ create_from_ptx(
+ context,
+ PtxInput::CStr(CStr::from_ptr(ptx)),
+ program_name,
+ program,
+ )
+}
+
+unsafe fn create_from_ptx(
+ raw_context: Context,
+ ptx: PtxInput,
+ program_name: *const i8,
+ program: *mut Program,
+) -> Result<(), RTresult> {
+ let context = null_unwrap(raw_context)?;
+ null_check(program_name)?;
+ null_check(program)?;
+ let program_name = CStr::from_ptr(program_name);
+ let mut context = context.borrow_mut()?;
+ let ptx = ptx.decode(
+ &context.optix_salt[..],
+ &context.vendor_salt[..],
+ &context.public_vendor_key[..],
+ )?;
+ let mut_context = &mut *context;
+ let weak_context = Weak::clone(&ManuallyDrop::new(Weak::from_raw(raw_context)));
+ let (mut program_object, attribute_block, should_save) =
+ build_or_load(mut_context, weak_context, program_name, &ptx)?;
+ if let Some(ref mut callable_index) = program_object.callable_index {
+ mut_context.callable_program_counter += 1;
+ *callable_index = mut_context.callable_program_counter;
+ }
+ if should_save {
+ if let Some(ref mut cache) = mut_context.cache {
+ cache.save_program(
+ &mut_context.compiler_version,
+ &mut_context.hiprt_version,
+ &mut_context.isa,
+ program_name,
+ &ptx,
+ &program_object,
+ &mut_context.cumulative_attributes,
+ );
+ }
+ }
+ mut_context.cumulative_attributes = attribute_block;
+ let program_object = Rc::new(OptixCell::new(program_object));
+ let result = Rc::as_ptr(&program_object);
+ mut_context.programs.insert(program_object);
+ *program = result;
+ Ok(())
+}
+
+fn build_or_load(
+ mut_context: &mut ContextData,
+ weak_context: Weak<OptixCell<ContextData>>,
+ program_name: &CStr,
+ ptx: &String,
+) -> Result<(ProgramData, VariablesBlock, bool), RTresult> {
+ if let Some(ref mut cache) = mut_context.cache {
+ if let Some((program, attribute_block)) = cache.try_load_program(
+ weak_context.clone(),
+ &mut_context.compiler_version,
+ &mut_context.hiprt_version,
+ &mut_context.isa,
+ program_name,
+ ptx,
+ &mut_context.cumulative_attributes,
+ ) {
+ return Ok((program, attribute_block, false));
+ }
+ }
+ unsafe {
+ build(
+ weak_context,
+ &mut_context.hiprt,
+ &mut_context.comgr,
+ &mut_context.isa,
+ &mut_context.cumulative_attributes,
+ ptx,
+ program_name,
+ mut_context.context,
+ )
+ }
+}
+
+enum PtxInput<'a> {
+ Vec(Vec<u8>),
+ CStr(&'a CStr),
+}
+
+impl<'a> PtxInput<'a> {
+ unsafe fn decode(
+ self,
+ optix_salt: &[u8],
+ vendor_salt: &[u8],
+ vendor_key: &[u8],
+ ) -> Result<String, RTresult> {
+ let is_eptx = &self.to_bytes()[..8] == b"eptx0001";
+ let mut this = self.to_owned();
+ if is_eptx {
+ let new_len = eptx::decode_ptx(&mut this, optix_salt, vendor_salt, vendor_key).len();
+ this.truncate(new_len);
+ }
+ String::from_utf8(this).map_err(|_| RTresult::RT_ERROR_UNKNOWN)
+ }
+
+ unsafe fn to_bytes(&self) -> &[u8] {
+ match self {
+ PtxInput::Vec(vec) => &vec[..],
+ PtxInput::CStr(cstr) => cstr.to_bytes(),
+ }
+ }
+
+ unsafe fn to_owned(self) -> Vec<u8> {
+ match self {
+ PtxInput::Vec(vec) => vec,
+ PtxInput::CStr(cstr) => cstr.to_bytes().to_vec(),
+ }
+ }
+}
+
+// TODO: drop rtc program
+unsafe fn build(
+ weak_context: Weak<OptixCell<ContextData>>,
+ hiprt: &HipRt,
+ comgr: &Rc<Comgr>,
+ isa: &CStr,
+ cumulative_attributes: &VariablesBlock,
+ text: &str,
+ program_name: &CStr,
+ context: hiprtContext,
+) -> Result<(ProgramData, VariablesBlock, bool), RTresult> {
+ let ast =
+ ptx::ModuleParser::parse_checked(text).map_err(|_| RTresult::RT_ERROR_INVALID_SOURCE)?;
+ let raytracing_module = ptx::to_llvm_module_for_raytracing(
+ ast,
+ std::str::from_utf8_unchecked(program_name.to_bytes()),
+ cumulative_attributes,
+ )
+ .map_err(|_| RTresult::RT_ERROR_INVALID_SOURCE)?;
+ let debug_level = if cfg!(debug_assertions) {
+ b"-g\0".as_ptr()
+ } else {
+ b"-g0\0".as_ptr()
+ };
+ let options = [
+ debug_level,
+ // We just want to emit LLVM, we'd use O0, but somehow IR emitted by O0 prevents inling.
+ // Weirdly, -disable-llvm-optzns produces much bigger code
+ b"-O1\0".as_ptr(),
+ // Stop compilation at LLVM
+ b"-fgpu-rdc\0".as_ptr(),
+ // hiprtc injects -mcumode which we don't want
+ b"-mno-cumode\0".as_ptr(),
+ // Internalization makes so that _rt_trace_time_mask_flags_64 is module-private
+ // and does not get linked with the code generated by ptx compiler
+ b"-mllvm\0".as_ptr(),
+ b"-amdgpu-internalize-symbols=0\0".as_ptr(),
+ ];
+ let mut rt_program = ptr::null_mut::<c_void>();
+ let headers = raytracing_module
+ .headers
+ .iter()
+ .map(|s| s.as_ptr())
+ .collect::<Vec<_>>();
+ let header_names = raytracing_module
+ .header_names
+ .iter()
+ .map(|s| s.as_ptr())
+ .collect::<Vec<_>>();
+ hiprt! {
+ hiprt.hiprtBuildTraceProgram(
+ context,
+ raytracing::Module::KERNEL_NAME.as_ptr(),
+ raytracing_module.kernel_source.as_ptr() as _,
+ "zluda_rt_kernel\0".as_ptr() as _,
+ headers.len() as i32,
+ headers.as_ptr() as _,
+ header_names.as_ptr() as _,
+ options.as_ptr() as _,
+ options.len() as i32,
+ (&mut rt_program) as *mut _ as _
+ ),
+ RT_ERROR_INVALID_SOURCE
+ };
+ let main_bitcode = get_bitcode(rt_program)?;
+ let binary = llvm::MemoryBuffer::create_no_copy(&main_bitcode, false);
+ let isa_main = CStr::from_bytes_with_nul_unchecked(b"raytracing_main\0");
+ let binary = comgr
+ .compile(
+ hip_common::CompilationMode::Wave32,
+ isa,
+ iter::once((binary, isa_main))
+ .chain(raytracing_module.compilation_module.get_bitcode_all()),
+ &raytracing_module.linker_module,
+ )
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ let module = hip::Module::load_data(&binary).map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ Ok((
+ ProgramData {
+ context: weak_context,
+ variables: FxHashMap::default(),
+ variables_block: raytracing_module.variables,
+ callable_index: if raytracing_module.is_callable {
+ Some(0)
+ } else {
+ None
+ },
+ shared: Rc::new(ProgramShared { binary, module }),
+ },
+ raytracing_module.attribute_variables,
+ true,
+ ))
+}
+
+#[cfg(windows)]
+const HIPRTC: &'static str = "hiprtc\0";
+
+#[cfg(not(windows))]
+const HIPRTC: &'static str = "libhiprtc.so\0";
+
+unsafe fn get_bitcode(rt_program: *mut c_void) -> Result<Vec<u8>, RTresult> {
+ use libloading::{Library, Symbol};
+ let hiprtc = Library::new(HIPRTC).map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ let hiprtc_get_bitcode_size: Symbol<
+ unsafe fn(prog: *mut c_void, bitcode_size: *mut usize) -> u32,
+ > = hiprtc
+ .get(b"hiprtcGetBitcodeSize\0")
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ let hiprtc_get_bitcode: Symbol<unsafe fn(prog: *mut c_void, bitcode: *mut u8) -> u32> = hiprtc
+ .get(b"hiprtcGetBitcode\0")
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ let mut program_size = 0;
+ let error = hiprtc_get_bitcode_size(rt_program, &mut program_size);
+ if error != 0 {
+ return Err(RTresult::RT_ERROR_UNKNOWN);
+ }
+ let mut main_bitcode = vec![0u8; program_size];
+ let error = hiprtc_get_bitcode(rt_program, main_bitcode.as_mut_ptr());
+ if error != 0 {
+ return Err(RTresult::RT_ERROR_UNKNOWN);
+ }
+ Ok(main_bitcode)
+}
+
+pub(crate) unsafe fn declare_variable(
+ program_ptr: Program,
+ name: *const i8,
+ v: *mut Variable,
+) -> Result<(), RTresult> {
+ null_check(name)?;
+ let v = null_unwrap_mut(v)?;
+ let program = null_unwrap(program_ptr)?;
+ let mut program = program.borrow_mut()?;
+ let variable = VariableData::new(&mut *program)?;
+ let name = CStr::from_ptr(name as _).to_owned();
+ let result = Rc::as_ptr(&variable);
+ program.variables.insert(name, variable);
+ *v = result;
+ Ok(())
+}
+
+pub(crate) unsafe fn destroy(program: Program) -> Result<(), RTresult> {
+ OptixCell::destroy(program)
+}
+
+pub(crate) unsafe fn get_id(program: Program, program_id: *mut i32) -> Result<(), RTresult> {
+ let program = null_unwrap(program)?;
+ let program = program.borrow()?;
+ let callable_id = match program.callable_index {
+ Some(i) => i,
+ None => return Err(RTresult::RT_ERROR_INVALID_VALUE),
+ };
+ *program_id = callable_id as i32;
+ Ok(())
+}
+
+pub(crate) unsafe fn query_variable(
+ program: Program,
+ name: *const i8,
+ v: *mut Variable,
+) -> Result<(), RTresult> {
+ null_check(name)?;
+ null_check(v)?;
+ let program = null_unwrap(program)?;
+ let program = (program).borrow()?;
+ *v = program
+ .variables
+ .get(CStr::from_ptr(name))
+ .map(|variable| Rc::as_ptr(variable))
+ .unwrap_or(ptr::null_mut());
+ Ok(())
+}
+
+pub(crate) fn validate(program: Program) -> Result<(), RTresult> {
+ null_check(program)?;
+ Ok(())
+}
+
+pub(crate) unsafe fn create_from_program(
+ context: Context,
+ program_in: Program,
+ program_out: *mut Program,
+) -> Result<(), RTresult> {
+ null_check(program_out)?;
+ let context_wrapper = null_unwrap(context)?;
+ let mut context = context_wrapper.borrow_mut()?;
+ let program_in = null_unwrap(program_in)?;
+ let program_in = program_in.borrow()?;
+ *program_out = program_in.create_from(context_wrapper, &mut context)?;
+ Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::test_common::OptixFns;
+ use crate::{optix_test, tests};
+ use std::ptr;
+
+ optix_test!(cloning_program_does_not_clone_variables);
+
+ unsafe fn cloning_program_does_not_clone_variables<Optix: OptixFns>(o: Optix) {
+ let mut ctx = ptr::null_mut();
+ o.rtContextCreate(&mut ctx);
+ let mut program1 = ptr::null_mut();
+ let mut ptx = tests::ANY_HIT_INTERSECT_PTX.to_string();
+ ptx.push('\0');
+ o.rtProgramCreateFromPTXString(
+ ctx,
+ ptx.as_ptr() as _,
+ "set_buffer\0".as_ptr() as _,
+ &mut program1,
+ );
+ let mut var1 = ptr::null_mut();
+ o.rtProgramDeclareVariable(program1, b"output_buffer\0".as_ptr() as _, &mut var1);
+ let mut program2 = ptr::null_mut();
+ o.rtProgramCreateFromProgram(ctx, program1, &mut program2);
+ let mut var2 = ptr::null_mut();
+ o.rtProgramQueryVariable(program2, b"output_buffer\0".as_ptr() as _, &mut var2);
+ assert_ne!(var1, ptr::null_mut());
+ assert_eq!(var2, ptr::null_mut());
+ }
+}
+
+pub(crate) unsafe fn get_context(
+ program: *const OptixCell<ProgramData>,
+ context: *mut *const OptixCell<ContextData>,
+) -> Result<(), RTresult> {
+ let program = null_unwrap(program)?;
+ let program = program.borrow()?;
+ *context = program.context.as_ptr();
+ Ok(())
+}
diff --git a/zluda_rt/src/repr_gpu.rs b/zluda_rt/src/repr_gpu.rs new file mode 100644 index 0000000..b61e2fc --- /dev/null +++ b/zluda_rt/src/repr_gpu.rs @@ -0,0 +1,1320 @@ +use crate::acceleration::AccelerationOwner;
+use crate::context::GlobalStack;
+use crate::geometry_instance::{GeometryInstanceChild, GeometryInstanceData};
+use crate::geometry_triangles::GeometryTrianglesData;
+use crate::material::MaterialData;
+use crate::{
+ context::ContextData, hip, program::ProgramData, slice_cast_mut, AlignedBuffer, OptixCell,
+};
+use crate::{hiprt, unwrap_or_continue};
+use hip_runtime_sys::{hipDeviceptr_t, hipMemcpyHtoD};
+use hiprt_sys::*;
+use optix_types::RTresult;
+use static_assertions::const_assert_eq;
+use std::rc::Weak;
+use std::{alloc::Layout, rc::Rc};
+use std::{iter, mem, ptr};
+
+#[repr(C)]
+#[derive(Clone)]
+pub(crate) struct GlobalState {
+ pub(crate) scenes: hipDeviceptr_t,
+ pub(crate) miss_programs: hipDeviceptr_t,
+ pub(crate) buffers: hipDeviceptr_t,
+ pub(crate) callable_programs: hipDeviceptr_t,
+ pub(crate) textures: hipDeviceptr_t,
+ pub(crate) ray_type_count: u32,
+ pub(crate) uv_attribute_offset: u32,
+ pub(crate) width: u32,
+ pub(crate) height: u32,
+ pub(crate) attribute_block_size: u16,
+ pub(crate) attribute_block_align: u16,
+ pub(crate) thread_global_stack_size: u16,
+ pub(crate) _padding: u16,
+}
+const_assert_eq!(mem::size_of::<GlobalState>(), 64);
+
+#[repr(C)]
+#[allow(dead_code)]
+pub(crate) struct HitProgramChain {
+ pub(crate) any_hit_start: u32,
+ pub(crate) closest_hit_start: u32,
+}
+
+#[repr(C)]
+#[allow(dead_code)]
+pub(crate) struct IntersectionInput {
+ pub transform_block: hipDeviceptr_t,
+ pub materials_start: u32,
+}
+
+#[repr(C)]
+#[allow(dead_code)]
+pub(crate) struct OptixTransform {
+ pub transform: [f32; 16],
+ pub inverse_transform: [f32; 16],
+}
+
+pub(crate) struct Scene {
+ exception_variable_blocks: Vec<Option<hipDeviceptr_t>>,
+ entry_point_variable_blocks: Vec<Option<hipDeviceptr_t>>,
+ global_state: GlobalState,
+ // _allocator is used implictly, it holds all the allocations used by the Scene object
+ _allocator: Option<TrivialHIPAllocator>,
+}
+
+impl Scene {
+ pub fn empty() -> Self {
+ Self {
+ exception_variable_blocks: Vec::new(),
+ entry_point_variable_blocks: Vec::new(),
+ global_state: unsafe { mem::zeroed() },
+ _allocator: None,
+ }
+ }
+
+ pub fn new(context: &ContextData) -> Result<Self, RTresult> {
+ let mut allocator = TrivialHIPAllocator::new(context.context, context.hiprt.clone());
+ let textures = Self::allocate_texture_samplers(context, &mut allocator)?;
+ let mut bvhs = (0..context.geometry_group_count)
+ .into_iter()
+ .map(|_| unsafe { mem::zeroed() })
+ .collect::<Vec<_>>();
+ let miss_programs = context.allocate_miss_programs(&mut allocator)?;
+ let ray_type_count = context.ray_type_count;
+ let callable_programs = context.allocate_callable_programs(&mut allocator)?;
+ let uv_attribute_offset = context.get_uv_offset()?;
+ let buffers = context.allocate_buffers(&mut allocator)?;
+ let (attribute_block_size, attribute_block_align) = context.attributes_layout();
+ let entry_point_variable_blocks =
+ Self::allocate_entry_variable_blocks(&mut allocator, context)?;
+ let exception_variable_blocks =
+ Self::allocate_exception_variable_blocks(&mut allocator, context)?;
+ let mut global_state = GlobalState {
+ scenes: hipDeviceptr_t(ptr::null_mut()),
+ miss_programs,
+ buffers,
+ callable_programs,
+ textures,
+ ray_type_count,
+ uv_attribute_offset,
+ attribute_block_size,
+ attribute_block_align,
+ width: 0,
+ height: 0,
+ thread_global_stack_size: GlobalStack::THREAD_STACK_DEPTH,
+ _padding: 0,
+ };
+ for accel in context.accelerations.iter() {
+ let accel = OptixCell::borrow(accel)?;
+ if let Some(group) = accel.owner.as_ref() {
+ let (index, bvh) = match group {
+ AccelerationOwner::GeometryGroup(geo_group) => {
+ if let Some(geo_group) = geo_group.upgrade() {
+ let geo_group = OptixCell::borrow(&*geo_group)?;
+ let index = geo_group.index;
+ let bvh = geo_group.prepare_globals(
+ &mut allocator,
+ context,
+ &mut global_state,
+ )?;
+ (index, bvh)
+ } else {
+ continue;
+ }
+ }
+ AccelerationOwner::Group(group) => {
+ if let Some(group) = group.upgrade() {
+ let group = OptixCell::borrow(&*group)?;
+ let index = group.index;
+ let bvh = group.prepare_globals(
+ &mut allocator,
+ context,
+ &mut global_state,
+ )?;
+ (index, bvh)
+ } else {
+ continue;
+ }
+ }
+ };
+ bvhs[index as usize] = bvh;
+ }
+ }
+ global_state.scenes = allocator.copy_to_device(&bvhs)?;
+ Ok(Self {
+ entry_point_variable_blocks,
+ exception_variable_blocks,
+ global_state,
+ _allocator: Some(allocator),
+ })
+ }
+
+ fn allocate_texture_samplers(
+ context: &ContextData,
+ allocator: &mut TrivialHIPAllocator,
+ ) -> Result<hipDeviceptr_t, RTresult> {
+ let mut sampler_ptrs = vec![ptr::null_mut(); context.texture_counter as usize];
+ for sampler in context.texture_samplers.iter() {
+ let mut sampler = sampler.borrow_mut()?;
+ unsafe { sampler.create_underlying()? };
+ sampler_ptrs[sampler.index as usize - 1] = sampler.hip_object;
+ }
+ allocator.copy_to_device(&sampler_ptrs[..])
+ }
+
+ pub fn launch_2d(
+ &self,
+ entry_point_index: u32,
+ main_program: &ProgramData,
+ exception: Option<std::cell::Ref<ProgramData>>,
+ width: u32,
+ height: u32,
+ stack: hipDeviceptr_t,
+ ) -> Result<(), RTresult> {
+ let program_variable_block = self
+ .entry_point_variable_blocks
+ .get(entry_point_index as usize)
+ .ok_or(RTresult::RT_ERROR_INVALID_VALUE)?
+ .ok_or(RTresult::RT_ERROR_INVALID_VALUE)?;
+ let exception = exception
+ .map(|program| {
+ let exception = program.get_function()?;
+ let var_block = self
+ .exception_variable_blocks
+ .get(entry_point_index as usize)
+ .ok_or(RTresult::RT_ERROR_INVALID_VALUE)?
+ .ok_or(RTresult::RT_ERROR_INVALID_VALUE)?;
+ Ok::<_, RTresult>((exception, var_block))
+ })
+ .transpose()?
+ .unwrap_or((
+ hipDeviceptr_t(ptr::null_mut()),
+ hipDeviceptr_t(ptr::null_mut()),
+ ));
+ main_program.launch_2d(
+ width,
+ height,
+ &self.global_state,
+ stack,
+ program_variable_block,
+ exception,
+ )
+ }
+
+ fn allocate_entry_variable_blocks(
+ allocator: &mut TrivialHIPAllocator,
+ context: &ContextData,
+ ) -> Result<Vec<Option<hipDeviceptr_t>>, RTresult> {
+ context
+ .entry_points
+ .iter()
+ .map(|program| {
+ program
+ .as_ref()
+ .map(|program| {
+ let program = program.borrow()?;
+ program.prepare_variable_block_for_kernel(allocator, context)
+ })
+ .transpose()
+ })
+ .collect::<Result<Vec<_>, _>>()
+ }
+
+ fn allocate_exception_variable_blocks(
+ allocator: &mut TrivialHIPAllocator,
+ context: &ContextData,
+ ) -> Result<Vec<Option<hipDeviceptr_t>>, RTresult> {
+ context
+ .exception_programs
+ .iter()
+ .map(|program| {
+ program
+ .as_ref()
+ .map(|program| {
+ let program = program.borrow()?;
+ program.prepare_variable_block_for_kernel(allocator, context)
+ })
+ .transpose()
+ })
+ .collect::<Result<Vec<_>, _>>()
+ }
+}
+
+pub(crate) fn get_layout<S: VisitCallChain>(
+ ray_type_count: u32,
+ layout_source: &S,
+) -> Result<CallChainLayout, RTresult> {
+ let containers_len = layout_source.len(ray_type_count);
+ if containers_len == 0 {
+ return Ok(CallChainLayout {
+ layout: Layout::new::<()>(),
+ outer_offsets: vec![u32::MAX; containers_len],
+ inner_offsets: vec![vec![]; containers_len],
+ });
+ }
+ let mut layouts = vec![vec![]; containers_len];
+ layout_source.visit_alloc(ray_type_count, |outer_index, _, programs| {
+ let maybe_layout = programs
+ .map(|(_, prog)| S::get_program_block_layout(prog))
+ .transpose()?;
+ layouts[outer_index].push(maybe_layout);
+ Ok(())
+ })?;
+ let mut layout = Layout::new::<()>();
+ layout = layout_extend_by_offset_array(layout, containers_len)?;
+ let mut outer_offsets = Vec::with_capacity(containers_len);
+ let mut inner_offsets = Vec::with_capacity(containers_len);
+ for sublayouts in layouts {
+ if sublayouts.len() == 0 {
+ outer_offsets.push(u32::MAX);
+ inner_offsets.push(Vec::new());
+ continue;
+ }
+ let inner_layout_and_offsets = if sublayouts.len() == 1 {
+ let program = sublayouts[0];
+ match program {
+ Some(inner_layout) => Some((inner_layout, vec![0])),
+ None => None,
+ }
+ } else {
+ let mut offsets = Vec::with_capacity(sublayouts.len());
+ let mut inner_layout =
+ layout_extend_by_offset_array(Layout::new::<()>(), sublayouts.len())?;
+ let mut callable = 0usize;
+ for sublayout in sublayouts {
+ let prog_block_layout = match sublayout {
+ None => {
+ offsets.push(u32::MAX);
+ continue;
+ }
+ Some(l) => l,
+ };
+ callable += 1;
+ let (new_layout, offset) = inner_layout
+ .extend(prog_block_layout)
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ inner_layout = new_layout;
+ offsets.push(offset as u32);
+ }
+ if callable == 0 {
+ None
+ } else {
+ Some((inner_layout, offsets))
+ }
+ };
+ match inner_layout_and_offsets {
+ Some((inner_layout, offsets)) => {
+ let (outer_layout, mut outer_offset) = layout
+ .extend(inner_layout)
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)?;
+ if offsets.len() > 1 {
+ //debug_assert!(outer_offset & 1 == 0);
+ outer_offset |= 1;
+ }
+ outer_offsets.push(outer_offset as u32);
+ inner_offsets.push(offsets);
+ layout = outer_layout;
+ }
+ None => {
+ outer_offsets.push(u32::MAX);
+ inner_offsets.push(Vec::new());
+ }
+ }
+ }
+ Ok(CallChainLayout {
+ layout,
+ outer_offsets,
+ inner_offsets,
+ })
+}
+
+pub(crate) fn copy_to_gpu<S: VisitCallChain>(
+ ray_type_count: u32,
+ layout_source: &S,
+ chain_layout: &CallChainLayout,
+ destination: hipDeviceptr_t,
+) -> Result<(), RTresult> {
+ let staging_buffer = copy_to_cpu(ray_type_count, layout_source, chain_layout)?;
+ hip! { hipMemcpyHtoD(destination, staging_buffer.as_ptr(), staging_buffer.len()), RT_ERROR_UNKNOWN };
+ Ok(())
+}
+
+pub(crate) fn copy_to_cpu<S: VisitCallChain>(
+ ray_type_count: u32,
+ layout_source: &S,
+ chain_layout: &CallChainLayout,
+) -> Result<AlignedBuffer, RTresult> {
+ let mut staging_buffer = AlignedBuffer::new(chain_layout.layout);
+ if cfg!(test) {
+ staging_buffer.as_bytes_mut().fill(0u8);
+ }
+ let dst_buffer = staging_buffer.as_bytes_mut();
+ copy_prologue_offsets(chain_layout, dst_buffer);
+ layout_source.visit_alloc(ray_type_count, |outer_index, inner_index, program| {
+ if let Some((copy_ctx, program)) = program {
+ let outer_offset = chain_layout.outer_offsets[outer_index] & !1u32;
+ let offset = outer_offset + chain_layout.inner_offsets[outer_index][inner_index];
+ layout_source.copy_program_block(
+ copy_ctx,
+ program,
+ &mut dst_buffer[offset as usize..],
+ )?;
+ }
+ Ok(())
+ })?;
+ Ok(staging_buffer)
+}
+
+#[gat]
+pub(crate) trait VisitCallChain {
+ type ProgramData; // This type parametrization is to enable testing
+ type CopyContext<'temp>;
+ fn len(&self, ray_type_count: u32) -> usize;
+ fn visit_alloc(
+ &self,
+ ray_type_count: u32,
+ visitor: impl FnMut(
+ usize,
+ usize,
+ Option<(Self::CopyContext<'_>, &Self::ProgramData)>,
+ ) -> Result<(), RTresult>,
+ ) -> Result<(), RTresult>;
+ fn get_program_block_layout(p: &Self::ProgramData) -> Result<Layout, RTresult>;
+ fn copy_program_block(
+ &self,
+ copy_ctx: Self::CopyContext<'_>,
+ p: &Self::ProgramData,
+ dst: &mut [u8],
+ ) -> Result<(), RTresult>;
+}
+
+trait VisitLayout {
+ fn len(&self, ray_type_count: u32) -> usize;
+ fn visit_layout(
+ &self,
+ ray_type_count: u32,
+ visitor: impl FnMut(Vec<Option<Layout>>) -> Result<(), RTresult>,
+ ) -> Result<(), RTresult>;
+}
+
+impl VisitLayout for GeometryInstanceData {
+ fn len(&self, ray_type_count: u32) -> usize {
+ self.materials.len() * ray_type_count as usize
+ }
+
+ fn visit_layout(
+ &self,
+ ray_type_count: u32,
+ mut visitor: impl FnMut(Vec<Option<Layout>>) -> Result<(), RTresult>,
+ ) -> Result<(), RTresult> {
+ for material in self.materials.iter() {
+ for ray in 0..ray_type_count {
+ let material = material.as_ref().ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let material = material.borrow()?;
+ let maybe_any_hit_program = material.any_hit_programs.get(&ray);
+ visitor(vec![maybe_any_hit_program
+ .map(|any_hit_program| {
+ let any_hit_program = any_hit_program.borrow()?;
+ any_hit_program.get_program_block_layout()
+ })
+ .transpose()?])?
+ }
+ }
+ Ok(())
+ }
+}
+
+pub(crate) struct IntersectVisitCallChain<'a> {
+ pub(crate) context: &'a ContextData,
+ pub(crate) geometry_instance: &'a GeometryInstanceData,
+}
+
+pub(crate) struct CallableProgramsVisitor<'a> {
+ context: &'a ContextData,
+ programs: Vec<Option<std::cell::Ref<'a, ProgramData>>>,
+}
+
+impl<'a> CallableProgramsVisitor<'a> {
+ pub(crate) fn new(context: &'a ContextData) -> Result<Self, RTresult> {
+ let mut programs = (0..context.callable_program_counter)
+ .into_iter()
+ .map(|_| None)
+ .collect::<Vec<_>>();
+ for program in context.programs.iter() {
+ let program = program.borrow()?;
+ let callable_index = unwrap_or_continue!(program.callable_index);
+ let index = (callable_index - 1) as usize;
+ programs[index] = Some(program);
+ }
+ Ok(Self { programs, context })
+ }
+}
+
+#[gat]
+impl<'a> VisitCallChain for CallableProgramsVisitor<'a> {
+ type ProgramData = ProgramData;
+ type CopyContext<'unused> = ();
+
+ fn len(&self, _ray_type_count: u32) -> usize {
+ self.programs.len()
+ }
+
+ fn visit_alloc(
+ &self,
+ _ray_type_count: u32,
+ mut visitor: impl FnMut(usize, usize, Option<((), &Self::ProgramData)>) -> Result<(), RTresult>,
+ ) -> Result<(), RTresult> {
+ for (index, prog) in self.programs.iter().enumerate() {
+ visitor(index, 0, prog.as_deref().map(|prog| ((), prog)))?;
+ }
+ Ok(())
+ }
+
+ fn get_program_block_layout(prog: &Self::ProgramData) -> Result<Layout, RTresult> {
+ prog.get_program_block_layout()
+ }
+
+ fn copy_program_block(
+ &self,
+ _: (),
+ prog: &Self::ProgramData,
+ dst: &mut [u8],
+ ) -> Result<(), RTresult> {
+ prog.copy_program_block(dst, |name| prog.get_variable_for_kernel(self.context, name))
+ }
+}
+
+#[gat]
+impl<'a> VisitCallChain for IntersectVisitCallChain<'a> {
+ type ProgramData = ProgramData;
+ type CopyContext<'temp> = &'temp MaterialData;
+
+ fn len(&self, ray_type_count: u32) -> usize {
+ self.geometry_instance.materials.len() * ray_type_count as usize
+ }
+
+ fn visit_alloc(
+ &self,
+ ray_type_count: u32,
+ mut visitor: impl FnMut(
+ usize,
+ usize,
+ Option<(&MaterialData, &Self::ProgramData)>,
+ ) -> Result<(), RTresult>,
+ ) -> Result<(), RTresult> {
+ for (material_index, material) in self.geometry_instance.materials.iter().enumerate() {
+ for ray in 0..ray_type_count {
+ let material = material.as_ref().ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let material = material.borrow()?;
+ let maybe_any_hit_program = material.any_hit_programs.get(&ray);
+ let outer_index = (ray_type_count as usize * material_index) + ray as usize;
+ visitor(
+ outer_index,
+ 0,
+ maybe_any_hit_program
+ .map(std::ops::Deref::deref)
+ .map(OptixCell::borrow)
+ .transpose()?
+ .as_deref()
+ .map(|prog| (&*material, prog)),
+ )?;
+ }
+ }
+ Ok(())
+ }
+
+ fn get_program_block_layout(p: &Self::ProgramData) -> Result<Layout, RTresult> {
+ p.get_program_block_layout()
+ }
+
+ fn copy_program_block(
+ &self,
+ material: &MaterialData,
+ p: &Self::ProgramData,
+ dst: &mut [u8],
+ ) -> Result<(), RTresult> {
+ p.copy_program_block(dst, |name| {
+ p.get_variable_for_function(self.geometry_instance, material, self.context, name)
+ })
+ }
+}
+
+pub(crate) struct AttributesVisitCallChain<'a> {
+ pub(crate) context: &'a ContextData,
+ pub(crate) children: &'a [Option<Rc<OptixCell<GeometryInstanceData>>>],
+}
+
+#[gat]
+impl<'a> VisitCallChain for AttributesVisitCallChain<'a> {
+ type ProgramData = ProgramData;
+ type CopyContext<'temp> = (&'temp GeometryTrianglesData, &'temp GeometryInstanceData);
+
+ fn len(&self, _: u32) -> usize {
+ self.children.len()
+ }
+
+ fn visit_alloc(
+ &self,
+ _: u32,
+ mut visitor: impl for<'x> FnMut(
+ usize,
+ usize,
+ Option<(
+ (&'x GeometryTrianglesData, &'x GeometryInstanceData),
+ &Self::ProgramData,
+ )>,
+ ) -> Result<(), RTresult>,
+ ) -> Result<(), RTresult> {
+ for (program_index, maybe_instance) in self.children.iter().enumerate() {
+ if let Some(instance) = maybe_instance {
+ let instance = instance.borrow()?;
+ match instance.child {
+ GeometryInstanceChild::None => return Err(RTresult::RT_ERROR_INVALID_CONTEXT),
+ GeometryInstanceChild::Geometry(_) => {}
+ GeometryInstanceChild::GeometryTriangles(ref geo) => {
+ let geo = geo.borrow()?;
+ if let Some(ref program) = geo.attribute_program {
+ if let Some(program) = Weak::upgrade(&program) {
+ let program = program.borrow()?;
+ visitor(program_index, 0, Some(((&geo, &instance), &program)))?;
+ continue;
+ }
+ }
+ }
+ }
+ visitor(program_index, 0, None)?;
+ } else {
+ return Err(RTresult::RT_ERROR_INVALID_CONTEXT);
+ }
+ }
+ Ok(())
+ }
+
+ fn get_program_block_layout(p: &Self::ProgramData) -> Result<Layout, RTresult> {
+ p.get_program_block_layout()
+ }
+
+ fn copy_program_block(
+ &self,
+ (triangles, instance): (&GeometryTrianglesData, &GeometryInstanceData),
+ p: &Self::ProgramData,
+ dst: &mut [u8],
+ ) -> Result<(), RTresult> {
+ p.copy_attribute_program_block(dst, |name| {
+ p.get_variable_for_attribute(triangles, instance, self.context, name)
+ })
+ }
+}
+
+pub(crate) struct MissProgramsVisitCallChain<'a> {
+ pub(crate) context: &'a ContextData,
+ pub(crate) miss_programs: &'a [Option<Rc<OptixCell<ProgramData>>>],
+}
+
+#[gat]
+impl<'a> VisitCallChain for MissProgramsVisitCallChain<'a> {
+ type ProgramData = ProgramData;
+ type CopyContext<'unused> = ();
+
+ fn len(&self, _: u32) -> usize {
+ self.miss_programs.len()
+ }
+
+ fn visit_alloc(
+ &self,
+ _: u32,
+ mut visitor: impl FnMut(usize, usize, Option<((), &Self::ProgramData)>) -> Result<(), RTresult>,
+ ) -> Result<(), RTresult> {
+ for (program_index, maybe_program) in self.miss_programs.iter().enumerate() {
+ visitor(
+ program_index,
+ 0,
+ maybe_program
+ .as_deref()
+ .map(OptixCell::borrow)
+ .transpose()?
+ .as_deref()
+ .map(|prog| ((), prog)),
+ )?;
+ }
+ Ok(())
+ }
+
+ fn get_program_block_layout(p: &Self::ProgramData) -> Result<Layout, RTresult> {
+ p.get_program_block_layout()
+ }
+
+ fn copy_program_block(
+ &self,
+ _: (),
+ p: &Self::ProgramData,
+ dst: &mut [u8],
+ ) -> Result<(), RTresult> {
+ p.copy_program_block(dst, |name| p.get_variable_for_kernel(self.context, name))
+ }
+}
+
+pub(crate) struct HitProgramsVisitCallChain<'a> {
+ pub(crate) context: &'a ContextData,
+ pub(crate) children: &'a [Option<Rc<OptixCell<GeometryInstanceData>>>],
+ pub(crate) closest_hit: bool,
+ pub(crate) ray: u32,
+}
+
+#[gat]
+impl<'a> VisitCallChain for HitProgramsVisitCallChain<'a> {
+ type ProgramData = ProgramData;
+ type CopyContext<'temp> = (&'temp GeometryInstanceData, &'temp MaterialData);
+
+ fn len(&self, _: u32) -> usize {
+ self.children.len()
+ }
+
+ fn visit_alloc(
+ &self,
+ _: u32,
+ mut visitor: impl for<'x, 'y> FnMut(
+ usize,
+ usize,
+ Option<(
+ (&'x GeometryInstanceData, &'x MaterialData),
+ &'y Self::ProgramData,
+ )>,
+ ) -> Result<(), RTresult>,
+ ) -> Result<(), RTresult> {
+ for (instance_idx, maybe_instance) in self.children.iter().enumerate() {
+ let instance = maybe_instance
+ .as_ref()
+ .ok_or(RTresult::RT_ERROR_INVALID_CONTEXT)?;
+ let instance = instance.borrow()?;
+ let ignore_any_hit = matches!(instance.child, GeometryInstanceChild::Geometry(..));
+ if ignore_any_hit && !self.closest_hit {
+ continue;
+ }
+ for (material_index, material) in instance.materials.iter().enumerate() {
+ let material = material
+ .as_ref()
+ .ok_or(RTresult::RT_ERROR_INVALID_CONTEXT)?;
+ let material = material.borrow()?;
+ let hit_programs = if self.closest_hit {
+ &material.closest_hit_programs
+ } else {
+ &material.any_hit_programs
+ };
+ visitor(
+ instance_idx,
+ material_index,
+ hit_programs
+ .get(&self.ray)
+ .map(std::ops::Deref::deref)
+ .map(OptixCell::borrow)
+ .transpose()?
+ .as_deref()
+ .map(|prog| ((&*instance, &*material), prog)),
+ )?;
+ }
+ }
+ Ok(())
+ }
+
+ fn get_program_block_layout(p: &Self::ProgramData) -> Result<Layout, RTresult> {
+ p.get_program_block_layout()
+ }
+
+ fn copy_program_block(
+ &self,
+ (geometry_instance, material): (&GeometryInstanceData, &MaterialData),
+ p: &Self::ProgramData,
+ dst: &mut [u8],
+ ) -> Result<(), RTresult> {
+ p.copy_program_block(dst, |name| {
+ p.get_variable_for_function(geometry_instance, material, self.context, name)
+ })
+ }
+}
+
+pub(crate) struct VisitHitPrograms<'a> {
+ pub(crate) closest_hit: bool,
+ pub(crate) ray: u32,
+ pub(crate) children: &'a [Option<Rc<OptixCell<GeometryInstanceData>>>],
+}
+
+impl<'a> VisitLayout for VisitHitPrograms<'a> {
+ fn len(&self, _: u32) -> usize {
+ self.children.len()
+ }
+
+ fn visit_layout(
+ &self,
+ _: u32,
+ mut visitor: impl FnMut(Vec<Option<Layout>>) -> Result<(), RTresult>,
+ ) -> Result<(), RTresult> {
+ for maybe_instance in self.children.iter() {
+ let instance = maybe_instance
+ .as_ref()
+ .ok_or(RTresult::RT_ERROR_INVALID_CONTEXT)?;
+ let instance = instance.borrow()?;
+ let ignore_any_hit = matches!(instance.child, GeometryInstanceChild::Geometry(..));
+ if ignore_any_hit && !self.closest_hit {
+ visitor(vec![])?;
+ continue;
+ }
+ visitor(
+ instance
+ .materials
+ .iter()
+ .map(|material| {
+ let material = material
+ .as_ref()
+ .ok_or(RTresult::RT_ERROR_INVALID_CONTEXT)?;
+ let material = material.borrow()?;
+ let hit_programs = if self.closest_hit {
+ &material.closest_hit_programs
+ } else {
+ &material.any_hit_programs
+ };
+ hit_programs
+ .get(&self.ray)
+ .map(|prog| {
+ let prog = prog.borrow()?;
+ prog.get_program_block_layout()
+ })
+ .transpose()
+ })
+ .collect::<Result<Vec<_>, _>>()?,
+ )?;
+ }
+ Ok(())
+ }
+}
+
+pub(crate) struct CallChainLayout {
+ pub(crate) layout: Layout,
+ pub(crate) outer_offsets: Vec<u32>,
+ pub(crate) inner_offsets: Vec<Vec<u32>>,
+}
+
+impl CallChainLayout {
+ fn offsets(&self) -> impl Iterator<Item = (u32, &[u32])> {
+ iter::once((0u32, &self.outer_offsets[..])).chain(
+ self.outer_offsets
+ .iter()
+ .enumerate()
+ .filter_map(move |(index, offset)| {
+ if *offset == u32::MAX {
+ None
+ } else if (offset & 1) == 1 {
+ Some((*offset & !1, &self.inner_offsets[index][..]))
+ } else {
+ None
+ }
+ }),
+ )
+ }
+}
+
+trait CallChainCopier {
+ fn copy_offsets(offset: u32, offsets: &[u32]);
+ fn copy_variable(offset: u32, offsets: &[u32]);
+}
+
+fn layout_extend_by_offset_array(layout: Layout, programs_len: usize) -> Result<Layout, RTresult> {
+ Ok(layout
+ .extend(Layout::array::<u32>(programs_len).map_err(|_| RTresult::RT_ERROR_UNKNOWN)?)
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)?
+ .0)
+}
+
+fn copy_prologue_offsets(chain_layout: &CallChainLayout, dst_buffer: &mut [u8]) {
+ for (offset, chain_prologue) in chain_layout.offsets() {
+ let dst =
+ unsafe { slice_cast_mut(&mut dst_buffer[offset as usize..], chain_prologue.len()) };
+ dst.copy_from_slice(&*chain_prologue);
+ }
+}
+
+#[must_use]
+pub(crate) struct TrivialHIPAllocator {
+ context: hiprtContext,
+ hiprt: Rc<HipRt>,
+ geometries: Vec<hiprtGeometry>,
+ func_tables: Vec<hiprtCustomFuncTable>,
+ scenes: Vec<hiprtScene>,
+ allocations: Vec<hipDeviceptr_t>,
+ drop_flag: bool,
+}
+
+impl TrivialHIPAllocator {
+ pub(crate) fn new(context: hiprtContext, hiprt: Rc<HipRt>) -> Self {
+ Self {
+ context,
+ hiprt,
+ geometries: Vec::new(),
+ func_tables: Vec::new(),
+ scenes: Vec::new(),
+ allocations: Vec::new(),
+ drop_flag: false,
+ }
+ }
+
+ pub(crate) fn new_func_table(&mut self) -> Result<hiprtCustomFuncTable, RTresult> {
+ let mut custom_func_table = ptr::null_mut();
+ hiprt! { self.hiprt.hiprtCreateCustomFuncTable(self.context, &mut custom_func_table), RT_ERROR_UNKNOWN };
+ Ok(custom_func_table)
+ }
+
+ pub(crate) fn new_scene(
+ &mut self,
+ scene_input: hiprtSceneBuildInput,
+ build_options: hiprtBuildOptions,
+ ) -> Result<hiprtScene, RTresult> {
+ let mut temp_mem_size = 0;
+ hiprt! { self.hiprt.hiprtGetSceneBuildTemporaryBufferSize(self.context, &scene_input, &build_options, &mut temp_mem_size), RT_ERROR_UNKNOWN };
+ let temp_mem = self.allocate(temp_mem_size)?;
+ let mut scene = ptr::null_mut();
+ hiprt! { self.hiprt.hiprtCreateScene(self.context, &scene_input, &build_options, &mut scene), RT_ERROR_UNKNOWN };
+ self.scenes.push(scene);
+ hiprt! { self.hiprt.hiprtBuildScene(self.context, hiprtBuildOperation::hiprtBuildOperationBuild, &scene_input, &build_options, temp_mem.0, ptr::null_mut(), scene), RT_ERROR_UNKNOWN };
+ Ok(scene)
+ }
+
+ pub(crate) fn new_geometry(
+ &mut self,
+ geometry_input: hiprtGeometryBuildInput,
+ build_options: hiprtBuildOptions,
+ ) -> Result<hiprtGeometry, RTresult> {
+ let mut temp_mem_size = 0;
+ hiprt! { self.hiprt.hiprtGetGeometryBuildTemporaryBufferSize(self.context, &geometry_input, &build_options, &mut temp_mem_size), RT_ERROR_UNKNOWN };
+ let temp_mem = self.allocate(temp_mem_size)?;
+ let mut geometry = ptr::null_mut();
+ hiprt! { self.hiprt.hiprtCreateGeometry(self.context, &geometry_input, &build_options, &mut geometry), RT_ERROR_UNKNOWN };
+ self.geometries.push(geometry);
+ hiprt! { self.hiprt.hiprtBuildGeometry(self.context, hiprtBuildOperation::hiprtBuildOperationBuild, &geometry_input, &build_options, temp_mem.0, ptr::null_mut(), geometry), RT_ERROR_UNKNOWN };
+ Ok(geometry)
+ }
+
+ pub(crate) fn allocate(&mut self, size: usize) -> Result<hipDeviceptr_t, RTresult> {
+ let dev_ptr = hip::malloc(size).map_err(|_| RTresult::RT_ERROR_MEMORY_ALLOCATION_FAILED)?;
+ self.allocations.push(dev_ptr);
+ Ok(dev_ptr)
+ }
+
+ pub(crate) fn copy_to_device<T>(&mut self, slice: &[T]) -> Result<hipDeviceptr_t, RTresult> {
+ let dev_ptr = hip::copy_to_device(slice)?;
+ self.allocations.push(dev_ptr);
+ Ok(dev_ptr)
+ }
+
+ fn dealloc_impl(&mut self) -> Result<(), RTresult> {
+ if self.drop_flag {
+ return Ok(());
+ }
+ self.drop_flag = true;
+ let geometries_result = self
+ .geometries
+ .iter()
+ .copied()
+ .fold(Ok(()), |result, geometry| {
+ let destroy_result =
+ match unsafe { self.hiprt.hiprtDestroyGeometry(self.context, geometry) } {
+ hiprtError::hiprtSuccess => Ok(()),
+ _ => Err(RTresult::RT_ERROR_UNKNOWN),
+ };
+ result.and(destroy_result)
+ });
+ let func_tables_result =
+ self.func_tables
+ .iter()
+ .copied()
+ .fold(geometries_result, |result, func_table| {
+ let destroy_result = match unsafe {
+ self.hiprt
+ .hiprtDestroyCustomFuncTable(self.context, func_table)
+ } {
+ hiprtError::hiprtSuccess => Ok(()),
+ _ => Err(RTresult::RT_ERROR_UNKNOWN),
+ };
+ result.and(destroy_result)
+ });
+ let scenes_result =
+ self.scenes
+ .iter()
+ .copied()
+ .fold(func_tables_result, |result, scene| {
+ let destroy_result =
+ match unsafe { self.hiprt.hiprtDestroyScene(self.context, scene) } {
+ hiprtError::hiprtSuccess => Ok(()),
+ _ => Err(RTresult::RT_ERROR_UNKNOWN),
+ };
+ result.and(destroy_result)
+ });
+ self.allocations
+ .iter()
+ .copied()
+ .fold(scenes_result, |result, dev_ptr| {
+ result.and(hip::free(dev_ptr).map_err(|_| RTresult::RT_ERROR_UNKNOWN))
+ })
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)
+ }
+}
+
+// In case there is an early exit and we don't get to call .dealloc(...)
+impl Drop for TrivialHIPAllocator {
+ fn drop(&mut self) {
+ self.dealloc_impl().ok();
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::context::ContextData;
+ use crate::variable::VariableData;
+ use crate::AlignedBuffer;
+ use crate::MaybeWeakRefMut;
+ use crate::OptixCell;
+ use crate::OptixObjectData;
+ use crate::TypeTag;
+ use hip_common::raytracing::VariablesBlock;
+ use optix_types::RTresult;
+ use rustc_hash::FxHashMap;
+ use std::ffi::CString;
+ use std::rc::Rc;
+ use std::{alloc::Layout, mem};
+
+ #[test]
+ fn get_layout_empty() {
+ let chain_layout = get_layout(
+ 1,
+ &TestGroup {
+ containers: Vec::new(),
+ },
+ )
+ .unwrap();
+ assert_eq!(Layout::new::<()>(), chain_layout.layout);
+ assert_eq!(0, chain_layout.inner_offsets.len());
+ assert_eq!(0, chain_layout.outer_offsets.len());
+ }
+
+ #[test]
+ fn get_layout_complex() {
+ let context = Rc::new(OptixCell::new(ContextData::new().unwrap()));
+ let mut byte = 0x11;
+ let containers = vec![
+ TestContainer::new(vec![Some(TestProgram::new(vec![], &context, &mut byte))]), // just pointer
+ TestContainer::new(vec![Some(TestProgram::new(
+ vec![Layout::new::<u8>()],
+ &context,
+ &mut byte,
+ ))]), // single byte
+ TestContainer::new(vec![]), // empty
+ TestContainer::new(vec![None]), // nothing
+ TestContainer::new(vec![None, None]), // uncallable
+ TestContainer::new(vec![Some(TestProgram::new(
+ vec![
+ // three fields
+ Layout::new::<u32>(),
+ Layout::new::<u16>(),
+ Layout::new::<u64>(),
+ Layout::new::<u8>(),
+ ],
+ &context,
+ &mut byte,
+ ))]),
+ TestContainer::new(vec![
+ // four subprograms
+ Some(TestProgram::new(
+ vec![
+ Layout::new::<u8>(),
+ Layout::new::<u32>(),
+ Layout::new::<u32>(),
+ ],
+ &context,
+ &mut byte,
+ )),
+ None,
+ Some(TestProgram::new(vec![], &context, &mut byte)),
+ None,
+ ]),
+ TestContainer::new(vec![Some(TestProgram::new(vec![], &context, &mut byte))]),
+ TestContainer::new(vec![None]),
+ ];
+ let chain_layout = get_layout(1, &TestGroup { containers }).unwrap();
+ assert_eq!(
+ vec![40u32, 48, !0, !0, !0, 64, 97, 144, !0],
+ chain_layout.outer_offsets
+ );
+ assert_eq!(152, chain_layout.layout.size());
+ assert_eq!(8, chain_layout.layout.align());
+ assert_eq!(
+ vec![
+ vec![0u32],
+ vec![0u32],
+ vec![],
+ vec![],
+ vec![],
+ vec![0],
+ vec![16, !0, 40, !0],
+ vec![0],
+ vec![]
+ ],
+ chain_layout.inner_offsets
+ );
+ }
+
+ #[test]
+ fn copy_to_cpu_complex() {
+ let context = Rc::new(OptixCell::new(ContextData::new().unwrap()));
+ let chain_layout = CallChainLayout {
+ layout: Layout::from_size_align(152, 8).unwrap(),
+ inner_offsets: vec![
+ vec![0u32],
+ vec![0u32],
+ vec![],
+ vec![],
+ vec![],
+ vec![0],
+ vec![16, !0, 40, !0],
+ vec![0],
+ vec![],
+ ],
+ outer_offsets: vec![40u32, 48, !0, !0, !0, 64, 97, 144, !0],
+ };
+ let mut byte = 0x11;
+ let containers = vec![
+ TestContainer::new(vec![Some(TestProgram::new(vec![], &context, &mut byte))]), // just pointer
+ TestContainer::new(vec![Some(TestProgram::new(
+ vec![Layout::new::<u8>()],
+ &context,
+ &mut byte,
+ ))]), // single byte
+ TestContainer::new(vec![]), // empty
+ TestContainer::new(vec![None]), // nothing
+ TestContainer::new(vec![None, None]), // uncallable
+ TestContainer::new(vec![Some(TestProgram::new(
+ vec![
+ // three fields
+ Layout::new::<u32>(),
+ Layout::new::<u16>(),
+ Layout::new::<u64>(),
+ Layout::new::<u8>(),
+ ],
+ &context,
+ &mut byte,
+ ))]),
+ TestContainer::new(vec![
+ // four subprograms
+ Some(TestProgram::new(
+ vec![
+ Layout::new::<u8>(),
+ Layout::new::<u32>(),
+ Layout::new::<u32>(),
+ ],
+ &context,
+ &mut byte,
+ )),
+ None,
+ Some(TestProgram::new(vec![], &context, &mut byte)),
+ None,
+ ]),
+ TestContainer::new(vec![Some(TestProgram::new(vec![], &context, &mut byte))]),
+ TestContainer::new(vec![None]),
+ ];
+ let buffer = super::copy_to_cpu(1, &TestGroup { containers }, &chain_layout).unwrap();
+ assert_eq!(
+ buffer.as_bytes(),
+ vec![
+ 40u8, 0, 0, 0, // prog 0 offset
+ 48, 0, 0, 0, // prog 1 offset
+ 255, 255, 255, 255, // prog 2 offset
+ 255, 255, 255, 255, // prog 3 offset
+ 255, 255, 255, 255, // prog 4 offset
+ 64, 0, 0, 0, // prog 5 offset
+ 97, 0, 0, 0, // multi-prog 6 offset
+ 144, 0, 0, 0, // prog 7 offset
+ 255, 255, 255, 255, // prog 8 offset
+ 0, 0, 0, 0, // offset table padding
+ 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, // prog 0, pointer
+ 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, // prog 1, pointer
+ 0x33, // prog 1, field 0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // prog 1 padding
+ 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, // prog 5, pointer
+ 0x55, 0x55, 0x55, 0x55, // prog 5, field 0
+ 0x66, 0x66, // prog 5, field 1
+ 0x00, 0x00, // prog 5, field 1 padding
+ 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, // prog 5, field 2
+ 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // prog 5 padding
+ 16, 0, 0, 0, // prog 6, subprog 0 offset
+ 255, 255, 255, 255, // prog 6, subprog 1 offset
+ 40, 0, 0, 0, // prog 6, subprog 2 offset
+ 255, 255, 255, 255, // prog 6, subprog 3 offset
+ 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, 0x99, // prog 6, subprog 0, pointer
+ 0xaa, // prog 6, subprog 0, field 0
+ 0x00, 0x00, 0x00, // prog 6, subprog 0, field 0 padding
+ 0xbb, 0xbb, 0xbb, 0xbb, // prog 6, subprog 0, field 1
+ 0xcc, 0xcc, 0xcc, 0xcc, // prog 6, subprog 0, field 2
+ 0x00, 0x00, 0x00, 0x00, // prog 6, subprog 0 padding
+ 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, // prog 6, subprog 2, pointer
+ 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, // prog 7, pointer
+ ]
+ );
+ }
+
+ struct TestGroup {
+ containers: Vec<Rc<OptixCell<TestContainer>>>,
+ }
+
+ #[gat]
+ impl<'a> VisitCallChain for TestGroup {
+ type ProgramData = TestProgram;
+ type CopyContext<'temp> = ();
+
+ fn len(&self, _: u32) -> usize {
+ self.containers.len()
+ }
+
+ fn visit_alloc(
+ &self,
+ _: u32,
+ mut visitor: impl FnMut(
+ usize,
+ usize,
+ Option<((), &Self::ProgramData)>,
+ ) -> Result<(), RTresult>,
+ ) -> Result<(), RTresult> {
+ for (outer_idx, container) in self.containers.iter().enumerate() {
+ let container = container.borrow()?;
+ for (inner_odx, program) in container.programs.iter().enumerate() {
+ visitor(
+ outer_idx,
+ inner_odx,
+ program
+ .as_deref()
+ .map(OptixCell::borrow)
+ .transpose()?
+ .as_deref()
+ .map(|prog| ((), prog)),
+ )?
+ }
+ }
+ Ok(())
+ }
+
+ fn get_program_block_layout(p: &Self::ProgramData) -> Result<Layout, RTresult> {
+ Ok(p.variables_block.layout)
+ }
+
+ fn copy_program_block(
+ &self,
+ _: (),
+ p: &Self::ProgramData,
+ dst: &mut [u8],
+ ) -> Result<(), RTresult> {
+ dst[..mem::size_of::<usize>()].copy_from_slice(&p.ptr.to_ne_bytes());
+ for (name, var) in p.variables_block.variables.iter() {
+ let var_data = &p.variables[name];
+ let var_data = var_data.borrow()?;
+ let variable_offset = var.offset as usize;
+ var_data.copy_into_buffer(
+ &mut dst[variable_offset..variable_offset + var.size as usize],
+ )?;
+ }
+ Ok(())
+ }
+ }
+
+ struct TestContainer {
+ programs: Vec<Option<Rc<OptixCell<TestProgram>>>>,
+ }
+
+ impl TestContainer {
+ fn new(programs: Vec<Option<Rc<OptixCell<TestProgram>>>>) -> Rc<OptixCell<Self>> {
+ Rc::new(OptixCell::new(Self { programs }))
+ }
+ }
+
+ impl OptixObjectData for TestContainer {
+ const TYPE: TypeTag = TypeTag::GeometryInstance;
+
+ fn deregister(&mut self, _this: &Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ Ok(())
+ }
+
+ fn context<'a>(&'a mut self) -> MaybeWeakRefMut<'a, ContextData> {
+ todo!()
+ }
+ }
+
+ struct TestProgram {
+ ptr: usize,
+ variables_block: VariablesBlock,
+ variables: FxHashMap<CString, Rc<OptixCell<VariableData>>>,
+ }
+
+ impl TestProgram {
+ fn new(
+ vars: Vec<Layout>,
+ _context: &Rc<OptixCell<ContextData>>,
+ byte: &mut u8,
+ ) -> Rc<OptixCell<Self>> {
+ let ptr =
+ usize::from_ne_bytes([*byte, *byte, *byte, *byte, *byte, *byte, *byte, *byte]);
+ *byte += 0x11;
+ let mut layout = Layout::new::<usize>();
+ let variables = vars
+ .iter()
+ .enumerate()
+ .map(|(idx, l)| {
+ let (new_layout, offset) = layout.extend(*l).unwrap();
+ layout = new_layout;
+ let name = idx.to_string();
+ (
+ CString::new(name).unwrap(),
+ hip_common::raytracing::Variable {
+ offset: offset as u32,
+ size: l.size() as u32,
+ default_value: Vec::new(),
+ },
+ )
+ })
+ .collect::<FxHashMap<_, _>>();
+ let variables_block = VariablesBlock { layout, variables };
+ let variables = vars
+ .iter()
+ .enumerate()
+ .map(|(idx, l)| {
+ let (new_layout, _) = layout.extend(*l).unwrap();
+ layout = new_layout;
+ let vec = vec![*byte; l.size()];
+ let mut aligned_vec = AlignedBuffer::new(*l);
+ aligned_vec.as_bytes_mut().copy_from_slice(&*vec);
+ let variable = Rc::new(OptixCell::new(VariableData {
+ value: crate::variable::VariableValue::Boxed(aligned_vec),
+ context: unsafe { mem::zeroed() },
+ }));
+ *byte += 0x11;
+ let name = idx.to_string();
+ (CString::new(name).unwrap(), variable)
+ })
+ .collect::<FxHashMap<_, _>>();
+ Rc::new(OptixCell::new(Self {
+ ptr,
+ variables_block,
+ variables,
+ }))
+ }
+ }
+
+ impl OptixObjectData for TestProgram {
+ const TYPE: TypeTag = TypeTag::Program;
+
+ fn deregister(&mut self, _this: &Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ Ok(())
+ }
+
+ fn context<'a>(&'a mut self) -> MaybeWeakRefMut<'a, ContextData> {
+ panic!()
+ }
+ }
+}
diff --git a/zluda_rt/src/test_common.rs b/zluda_rt/src/test_common.rs new file mode 100644 index 0000000..7d982c9 --- /dev/null +++ b/zluda_rt/src/test_common.rs @@ -0,0 +1,125 @@ +use libloading::{Library, Symbol};
+use optix_types::*;
+use std::ffi::c_void;
+
+#[cfg(windows)]
+const OPTIX_DLL: &'static str = concat!(env!("CARGO_MANIFEST_DIR"), "/bin/optix.6.5.0.dll");
+
+#[cfg(not(windows))]
+const OPTIX_DLL: &'static str = concat!(env!("CARGO_MANIFEST_DIR"), "/bin/liboptix.so.6.5.0");
+
+fn load_optix() -> Library {
+ unsafe { Library::new(OPTIX_DLL).unwrap() }
+}
+
+fn get_proc_address<'a>(lib: &'a Library, name: &str) -> Symbol<'a, *mut c_void> {
+ unsafe { lib.get::<*mut c_void>(name.as_bytes()).unwrap() }
+}
+
+pub struct Cuda {
+ lib: Library
+}
+
+unsafe impl Send for Cuda {}
+unsafe impl Sync for Cuda {}
+
+#[derive(Copy, Clone)]
+pub struct Zluda;
+
+macro_rules! optix6_fn_table_void{
+ ($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* $(,)? ) -> $ret_type:ty);*) => {
+ pub trait OptixFnsVoid {
+ $(
+ unsafe fn $fn_name (&self, $( $arg_id : $arg_type),* ) -> $ret_type;
+ )*
+ }
+
+ impl OptixFnsVoid for Cuda {
+ $(
+ unsafe fn $fn_name (&self, $( $arg_id : $arg_type),* ) -> $ret_type {
+ let fn_ptr = crate::test_common::get_proc_address(&self.lib, concat!(stringify!($fn_name), "\0"));
+ let cu_fn = std::mem::transmute::<_, unsafe extern $abi fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr);
+ cu_fn ( $( $arg_id),* )
+ }
+ )*
+ }
+
+ impl OptixFnsVoid for Zluda {
+ $(
+ unsafe fn $fn_name (&self, $( $arg_id : $arg_type),* ) -> $ret_type {
+ crate::$fn_name ( $( $arg_id),* )
+ }
+ )*
+ }
+ };
+}
+
+macro_rules! optix6_fn_table_rtresult {
+ ($($abi:literal fn $fn_name:ident( $($arg_id:ident : $arg_type:ty),* $(,)? ) -> $ret_type:ty);*) => {
+ pub trait OptixFns: OptixFnsVoid {
+ fn new() -> Self;
+ $(
+ paste::paste! {
+ unsafe fn $fn_name (&self, $( $arg_id : $arg_type),* ) {
+ let err = self. [< $fn_name _unchecked >] (
+ $( $arg_id),*
+ );
+ if err != optix_types::RTresult::RT_SUCCESS {
+ panic!("{}", err.0);
+ }
+ }
+ #[allow(non_snake_case)]
+ unsafe fn [< $fn_name _unchecked >] (&self, $( $arg_id : $arg_type),* ) -> $ret_type;
+ }
+ )*
+ }
+
+ impl OptixFns for Cuda {
+ fn new() -> Self {
+ let lib = crate::test_common::load_optix();
+ Self { lib }
+ }
+ $(
+ paste::paste! {
+ unsafe fn [< $fn_name _unchecked >] (&self, $( $arg_id : $arg_type),* ) -> $ret_type {
+ let fn_ptr = crate::test_common::get_proc_address(&self.lib, concat!(stringify!($fn_name), "\0"));
+ let cu_fn = std::mem::transmute::<_, unsafe extern $abi fn( $( $arg_id : $arg_type),* ) -> $ret_type>(fn_ptr);
+ cu_fn ( $( $arg_id),* )
+ }
+ }
+ )*
+ }
+
+ impl OptixFns for Zluda {
+ fn new() -> Self { Self }
+ $(
+ paste::paste! {
+ unsafe fn [< $fn_name _unchecked >] (&self, $( $arg_id : $arg_type),* ) -> $ret_type {
+ crate::$fn_name ( $( $arg_id),* )
+ }
+ }
+ )*
+ }
+ };
+}
+
+optix_base::optix6_function_declarations!(optix6_fn_table_rtresult, optix6_fn_table_void, [rtContextGetErrorString]);
+
+#[macro_export]
+macro_rules! optix_test {
+ ($func:ident) => {
+ paste::paste! {
+ #[test]
+ #[allow(non_snake_case)]
+ fn [<$func _zluda>]() {
+ unsafe { $func::<crate::test_common::Zluda>(crate::test_common::Zluda::new()) }
+ }
+
+ #[test]
+ #[allow(non_snake_case)]
+ fn [<$func _cuda>]() {
+ unsafe { $func::<crate::test_common::Cuda>(crate::test_common::Cuda::new()) }
+ }
+ }
+ };
+}
diff --git a/zluda_rt/src/tests/alloca_bug.cu b/zluda_rt/src/tests/alloca_bug.cu new file mode 100644 index 0000000..520f615 --- /dev/null +++ b/zluda_rt/src/tests/alloca_bug.cu @@ -0,0 +1,22 @@ +// nvcc alloca_bug.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc +#include <optix.h> +#include <optixu/optixu_math_namespace.h> + +rtDeclareVariable(rtCallableProgramId<float3(float3 &mat, float3 &prd)>, sysBRDFEval, , ); +rtBuffer<float3> sysMaterialParameters; + +RT_PROGRAM void closest_hit() +{ + float3 mat = sysMaterialParameters[0]; + + if (mat.x != 0) + { + const float3 texColor = make_float3(0, 0,0); + mat = make_float3(powf(texColor.x, 2.2f), 0,0); + } + float3 prd2; + float3 f = sysBRDFEval(mat, prd2); + + if (prd2.x > 0.0f) + prd2 *= f; +} diff --git a/zluda_rt/src/tests/alloca_bug.ptx b/zluda_rt/src/tests/alloca_bug.ptx new file mode 100644 index 0000000..f2ae4d1 --- /dev/null +++ b/zluda_rt/src/tests/alloca_bug.ptx @@ -0,0 +1,126 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-31833905 +// Cuda compilation tools, release 11.8, V11.8.89 +// Based on NVVM 7.0.1 +// + +.version 7.8 +.target sm_52 +.address_size 64 + + // .globl _Z11closest_hitv +.visible .global .align 4 .b8 sysBRDFEval[4]; +.visible .global .align 1 .b8 sysMaterialParameters[1]; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo11sysBRDFEvalE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE; +.visible .global .align 1 .b8 _ZN21rti_internal_typename11sysBRDFEvalE[54] = {114, 116, 67, 97, 108, 108, 97, 98, 108, 101, 80, 114, 111, 103, 114, 97, 109, 73, 100, 60, 102, 108, 111, 97, 116, 51, 40, 102, 108, 111, 97, 116, 51, 32, 38, 109, 97, 116, 44, 32, 102, 108, 111, 97, 116, 51, 32, 38, 112, 114, 100, 41, 62, 0}; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum11sysBRDFEvalE = 4920; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic11sysBRDFEvalE[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation11sysBRDFEvalE[1]; + +.visible .entry _Z11closest_hitv() +{ + .local .align 4 .b8 __local_depot0[24]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .pred %p<3>; + .reg .f32 %f<13>; + .reg .b32 %r<5>; + .reg .b64 %rd<16>; + + + mov.u64 %SPL, __local_depot0; + cvta.local.u64 %SP, %SPL; + add.u64 %rd9, %SP, 0; + add.u64 %rd1, %SPL, 0; + mov.u64 %rd10, sysMaterialParameters; + cvta.global.u64 %rd4, %rd10; + mov.u32 %r1, 1; + mov.u32 %r2, 12; + mov.u64 %rd8, 0; + // begin inline asm + call (%rd3), _rt_buffer_get_64, (%rd4, %r1, %r2, %rd8, %rd8, %rd8, %rd8); + // end inline asm + ld.f32 %f5, [%rd3]; + ld.f32 %f6, [%rd3+4]; + ld.f32 %f7, [%rd3+8]; + st.local.f32 [%rd1], %f5; + st.local.f32 [%rd1+4], %f6; + st.local.f32 [%rd1+8], %f7; + setp.eq.f32 %p1, %f5, 0f00000000; + @%p1 bra $L__BB0_2; + + mov.u32 %r3, 0; + st.local.u32 [%rd1], %r3; + st.local.u32 [%rd1+4], %r3; + st.local.u32 [%rd1+8], %r3; + +$L__BB0_2: + ld.global.u32 %r4, [sysBRDFEval]; + // begin inline asm + call (%rd11), _rt_callable_program_from_id_v2_64, (%r4, %rd8); + // end inline asm + add.u64 %rd15, %SP, 12; + { // callseq 0, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.b64 [param0+0], %rd9; + .param .b64 param1; + st.param.b64 [param1+0], %rd15; + .param .align 4 .b8 retval0[12]; + prototype_0 : .callprototype (.param .align 4 .b8 _[12]) _ (.param .b64 _, .param .b64 _); + call (retval0), + %rd11, + ( + param0, + param1 + ) + , prototype_0; + ld.param.f32 %f1, [retval0+0]; + ld.param.f32 %f2, [retval0+4]; + ld.param.f32 %f3, [retval0+8]; + } // callseq 0 + add.u64 %rd2, %SPL, 12; + ld.local.f32 %f4, [%rd2]; + setp.leu.f32 %p2, %f4, 0f00000000; + @%p2 bra $L__BB0_4; + + mul.f32 %f8, %f1, %f4; + st.local.f32 [%rd2], %f8; + ld.local.f32 %f9, [%rd2+4]; + mul.f32 %f10, %f2, %f9; + st.local.f32 [%rd2+4], %f10; + ld.local.f32 %f11, [%rd2+8]; + mul.f32 %f12, %f3, %f11; + st.local.f32 [%rd2+8], %f12; + +$L__BB0_4: + ret; + +} + diff --git a/zluda_rt/src/tests/any_hit_intersect.cu b/zluda_rt/src/tests/any_hit_intersect.cu new file mode 100644 index 0000000..5b9bece --- /dev/null +++ b/zluda_rt/src/tests/any_hit_intersect.cu @@ -0,0 +1,78 @@ +#include <optix.h> +#include <optixu/optixu_math_namespace.h> +#include <optix_world.h> + +using namespace optix; + +rtDeclareVariable(float4, sphere, , ); +rtDeclareVariable(uint2, launch_index, rtLaunchIndex, ); +rtDeclareVariable(unsigned int, b_index, , ); +rtBuffer<unsigned int, 1> output_buffer; +rtBuffer<unsigned int, 1> output_buffer2; +rtDeclareVariable(rtObject, bvh, , ); +rtDeclareVariable(optix::Ray, ray, rtCurrentRay, ); + +RT_PROGRAM void start(void) +{ + Ray ray = make_Ray(make_float3(0, 0, -1), make_float3(0,0,1), 0, 0.0, RT_DEFAULT_MAX); + char unused = 0; + rtTrace(bvh, ray, unused); +} + +RT_PROGRAM void set_buffer(void) +{ + atomicAdd(&output_buffer[b_index], 1); +} + +RT_PROGRAM void set_buffer2(void) +{ + atomicAdd(&output_buffer2[b_index], 1); +} + +RT_PROGRAM void intersect(int primIdx) +{ + float3 center = make_float3(sphere); + float3 O = ray.origin - center; + float l = 1 / length(ray.direction); + float3 D = ray.direction * l; + float radius = sphere.w; + + float b = dot(O, D); + float c = dot(O, O)-radius*radius; + float disc = b*b-c; + if(disc > 0.0f){ + float sdisc = sqrtf(disc); + float root1 = (-b - sdisc); + + float root11 = 0.0f; + + bool check_second = true; + if( rtPotentialIntersection( (root1 + root11) * l ) ) { + //shading_normal = geometric_normal = (O + (root1 + root11)*D)/radius; + if(rtReportIntersection(launch_index.x)) + check_second = false; + } + if(check_second) { + float root2 = (-b + sdisc); + if( rtPotentialIntersection( root2 * l ) ) { + //shading_normal = geometric_normal = (O + root2*D)/radius; + rtReportIntersection(launch_index.x); + } + } + } +} + +RT_PROGRAM void bounds (int, float result[6]) +{ + const float3 cen = make_float3( sphere ); + const float3 rad = make_float3( sphere.w ); + + optix::Aabb* aabb = (optix::Aabb*)result; + + if( rad.x > 0.0f && !isinf(rad.x) ) { + aabb->m_min = cen - rad; + aabb->m_max = cen + rad; + } else { + aabb->invalidate(); + } +} diff --git a/zluda_rt/src/tests/any_hit_intersect.ptx b/zluda_rt/src/tests/any_hit_intersect.ptx new file mode 100644 index 0000000..d237fdd --- /dev/null +++ b/zluda_rt/src/tests/any_hit_intersect.ptx @@ -0,0 +1,271 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-31442593 +// Cuda compilation tools, release 11.7, V11.7.99 +// Based on NVVM 7.0.1 +// + +.version 7.7 +.target sm_52 +.address_size 64 + + // .globl _Z5startv +.global .align 16 .b8 sphere[16]; +.global .align 8 .b8 launch_index[8]; +.global .align 4 .u32 b_index; +.global .align 1 .b8 output_buffer[1]; +.global .align 1 .b8 output_buffer2[1]; +.global .align 4 .b8 bvh[4]; +.global .align 4 .b8 ray[36]; +.global .align 4 .b8 _ZN21rti_internal_typeinfo6sphereE[8] = {82, 97, 121, 0, 16, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo12launch_indexE[8] = {82, 97, 121, 0, 8, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo7b_indexE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo3bvhE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.global .align 4 .b8 _ZN21rti_internal_typeinfo3rayE[8] = {82, 97, 121, 0, 36, 0, 0, 0}; +.global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E; +.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E; +.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E; +.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE; +.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE; +.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE; +.global .align 1 .b8 _ZN21rti_internal_typename6sphereE[7] = {102, 108, 111, 97, 116, 52, 0}; +.global .align 1 .b8 _ZN21rti_internal_typename12launch_indexE[6] = {117, 105, 110, 116, 50, 0}; +.global .align 1 .b8 _ZN21rti_internal_typename7b_indexE[13] = {117, 110, 115, 105, 103, 110, 101, 100, 32, 105, 110, 116, 0}; +.global .align 1 .b8 _ZN21rti_internal_typename3bvhE[9] = {114, 116, 79, 98, 106, 101, 99, 116, 0}; +.global .align 1 .b8 _ZN21rti_internal_typename3rayE[11] = {111, 112, 116, 105, 120, 58, 58, 82, 97, 121, 0}; +.global .align 4 .u32 _ZN21rti_internal_typeenum6sphereE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum12launch_indexE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum7b_indexE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum3bvhE = 4919; +.global .align 4 .u32 _ZN21rti_internal_typeenum3rayE = 4919; +.global .align 1 .b8 _ZN21rti_internal_semantic6sphereE[1]; +.global .align 1 .b8 _ZN21rti_internal_semantic12launch_indexE[14] = {114, 116, 76, 97, 117, 110, 99, 104, 73, 110, 100, 101, 120, 0}; +.global .align 1 .b8 _ZN21rti_internal_semantic7b_indexE[1]; +.global .align 1 .b8 _ZN21rti_internal_semantic3bvhE[1]; +.global .align 1 .b8 _ZN21rti_internal_semantic3rayE[13] = {114, 116, 67, 117, 114, 114, 101, 110, 116, 82, 97, 121, 0}; +.global .align 1 .b8 _ZN23rti_internal_annotation6sphereE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation12launch_indexE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation7b_indexE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation3bvhE[1]; +.global .align 1 .b8 _ZN23rti_internal_annotation3rayE[1]; + +.visible .entry _Z5startv() +{ + .local .align 1 .b8 __local_depot0[1]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .b16 %rs<2>; + .reg .f32 %f<9>; + .reg .b32 %r<6>; + .reg .b64 %rd<3>; + + + mov.u64 %SPL, __local_depot0; + cvta.local.u64 %SP, %SPL; + add.u64 %rd1, %SP, 0; + add.u64 %rd2, %SPL, 0; + mov.u16 %rs1, 0; + st.local.u8 [%rd2], %rs1; + ld.global.u32 %r1, [bvh]; + mov.f32 %f3, 0fBF800000; + mov.f32 %f6, 0f3F800000; + mov.f32 %f7, 0f00000000; + mov.f32 %f8, 0f6C4ECB8F; + mov.u32 %r3, 255; + mov.u32 %r4, 0; + mov.u32 %r5, 1; + // begin inline asm + call _rt_trace_mask_flags_64, (%r1, %f7, %f7, %f3, %f7, %f7, %f6, %r4, %f7, %f8, %r3, %r4, %rd1, %r5); + // end inline asm + ret; + +} + // .globl _Z10set_bufferv +.visible .entry _Z10set_bufferv() +{ + .reg .b32 %r<4>; + .reg .b64 %rd<8>; + + + ld.global.u32 %rd3, [b_index]; + mov.u64 %rd6, 0; + mov.u64 %rd7, output_buffer; + cvta.global.u64 %rd2, %rd7; + mov.u32 %r1, 1; + mov.u32 %r2, 4; + // begin inline asm + call (%rd1), _rt_buffer_get_64, (%rd2, %r1, %r2, %rd3, %rd6, %rd6, %rd6); + // end inline asm + atom.add.u32 %r3, [%rd1], 1; + ret; + +} + // .globl _Z11set_buffer2v +.visible .entry _Z11set_buffer2v() +{ + .reg .b32 %r<4>; + .reg .b64 %rd<8>; + + + ld.global.u32 %rd3, [b_index]; + mov.u64 %rd6, 0; + mov.u64 %rd7, output_buffer2; + cvta.global.u64 %rd2, %rd7; + mov.u32 %r1, 1; + mov.u32 %r2, 4; + // begin inline asm + call (%rd1), _rt_buffer_get_64, (%rd2, %r1, %r2, %rd3, %rd6, %rd6, %rd6); + // end inline asm + atom.add.u32 %r3, [%rd1], 1; + ret; + +} + // .globl _Z9intersecti +.visible .entry _Z9intersecti( + .param .u32 _Z9intersecti_param_0 +) +{ + .reg .pred %p<5>; + .reg .f32 %f<43>; + .reg .b32 %r<7>; + + + ld.global.v4.f32 {%f5, %f6, %f7, %f8}, [sphere]; + ld.global.f32 %f13, [ray]; + sub.f32 %f14, %f13, %f5; + ld.global.f32 %f15, [ray+4]; + sub.f32 %f16, %f15, %f6; + ld.global.f32 %f17, [ray+8]; + sub.f32 %f18, %f17, %f7; + ld.global.f32 %f19, [ray+12]; + ld.global.f32 %f20, [ray+16]; + mul.f32 %f21, %f20, %f20; + fma.rn.f32 %f22, %f19, %f19, %f21; + ld.global.f32 %f23, [ray+20]; + fma.rn.f32 %f24, %f23, %f23, %f22; + sqrt.rn.f32 %f25, %f24; + rcp.rn.f32 %f1, %f25; + mul.f32 %f26, %f19, %f1; + mul.f32 %f27, %f1, %f20; + mul.f32 %f28, %f1, %f23; + mul.f32 %f29, %f16, %f27; + fma.rn.f32 %f30, %f14, %f26, %f29; + fma.rn.f32 %f2, %f18, %f28, %f30; + mul.f32 %f31, %f16, %f16; + fma.rn.f32 %f32, %f14, %f14, %f31; + fma.rn.f32 %f33, %f18, %f18, %f32; + mul.f32 %f34, %f8, %f8; + sub.f32 %f35, %f33, %f34; + mul.f32 %f36, %f2, %f2; + sub.f32 %f3, %f36, %f35; + setp.leu.f32 %p1, %f3, 0f00000000; + @%p1 bra $L__BB3_5; + + sqrt.rn.f32 %f4, %f3; + neg.f32 %f38, %f2; + sub.f32 %f39, %f38, %f4; + add.f32 %f40, %f39, 0f00000000; + mul.f32 %f37, %f1, %f40; + // begin inline asm + call (%r1), _rt_potential_intersection, (%f37); + // end inline asm + setp.eq.s32 %p2, %r1, 0; + @%p2 bra $L__BB3_3; + + ld.global.u32 %r3, [launch_index]; + // begin inline asm + call (%r2), _rt_report_intersection, (%r3); + // end inline asm + setp.ne.s32 %p3, %r2, 0; + @%p3 bra $L__BB3_5; + +$L__BB3_3: + sub.f32 %f42, %f4, %f2; + mul.f32 %f41, %f1, %f42; + // begin inline asm + call (%r4), _rt_potential_intersection, (%f41); + // end inline asm + setp.eq.s32 %p4, %r4, 0; + @%p4 bra $L__BB3_5; + + ld.global.u32 %r6, [launch_index]; + // begin inline asm + call (%r5), _rt_report_intersection, (%r6); + // end inline asm + +$L__BB3_5: + ret; + +} + // .globl _Z6boundsiPf +.visible .entry _Z6boundsiPf( + .param .u32 _Z6boundsiPf_param_0, + .param .u64 _Z6boundsiPf_param_1 +) +{ + .reg .pred %p<3>; + .reg .f32 %f<17>; + .reg .b32 %r<3>; + .reg .b64 %rd<3>; + + + ld.param.u64 %rd2, [_Z6boundsiPf_param_1]; + cvta.to.global.u64 %rd1, %rd2; + ld.global.v4.f32 {%f6, %f7, %f8, %f9}, [sphere]; + setp.leu.f32 %p1, %f9, 0f00000000; + @%p1 bra $L__BB4_2; + + abs.f32 %f10, %f9; + setp.neu.f32 %p2, %f10, 0f7F800000; + @%p2 bra $L__BB4_3; + bra.uni $L__BB4_2; + +$L__BB4_3: + sub.f32 %f11, %f6, %f9; + st.global.f32 [%rd1], %f11; + sub.f32 %f12, %f7, %f9; + st.global.f32 [%rd1+4], %f12; + sub.f32 %f13, %f8, %f9; + st.global.f32 [%rd1+8], %f13; + add.f32 %f14, %f6, %f9; + st.global.f32 [%rd1+12], %f14; + add.f32 %f15, %f7, %f9; + st.global.f32 [%rd1+16], %f15; + add.f32 %f16, %f8, %f9; + st.global.f32 [%rd1+20], %f16; + bra.uni $L__BB4_4; + +$L__BB4_2: + mov.u32 %r1, 2096152002; + st.global.u32 [%rd1], %r1; + st.global.u32 [%rd1+4], %r1; + st.global.u32 [%rd1+8], %r1; + mov.u32 %r2, -51331646; + st.global.u32 [%rd1+12], %r2; + st.global.u32 [%rd1+16], %r2; + st.global.u32 [%rd1+20], %r2; + +$L__BB4_4: + ret; + +} + diff --git a/zluda_rt/src/tests/barycentrics.cu b/zluda_rt/src/tests/barycentrics.cu new file mode 100644 index 0000000..fdd600d --- /dev/null +++ b/zluda_rt/src/tests/barycentrics.cu @@ -0,0 +1,84 @@ +// nvcc barycentrics.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc
+#include <optix.h>
+#include <optixu/optixu_math_namespace.h>
+#include <optix_world.h>
+
+using namespace optix;
+
+rtDeclareVariable(float2, barycentrics, attribute rtTriangleBarycentrics, );
+rtBuffer<float2, 1> output_buffer1;
+rtBuffer<float2, 1> output_buffer2;
+rtBuffer<int, 1> output_buffer3;
+rtDeclareVariable(rtObject, bvh, , );
+rtDeclareVariable(float4, sphere, , );
+
+rtDeclareVariable(optix::Ray, ray, rtCurrentRay, );
+rtDeclareVariable(int2, launch_index, rtLaunchIndex, );
+
+RT_PROGRAM void start() {
+ Ray ray = make_Ray(make_float3(float(launch_index.x), 0, -1), make_float3(0,0,1), 0, 0.0, RT_DEFAULT_MAX);
+ char unused = 0;
+ rtTrace(bvh, ray, unused);
+}
+
+RT_PROGRAM void intersect(int primIdx)
+{
+ float3 center = make_float3(sphere);
+ float3 O = ray.origin - center;
+ float l = 1 / length(ray.direction);
+ float3 D = ray.direction * l;
+ float radius = sphere.w;
+
+ float b = dot(O, D);
+ float c = dot(O, O)-radius*radius;
+ float disc = b*b-c;
+ if(disc > 0.0f){
+ float sdisc = sqrtf(disc);
+ float root1 = (-b - sdisc);
+
+ float root11 = 0.0f;
+
+ bool check_second = true;
+ if( rtPotentialIntersection( (root1 + root11) * l ) ) {
+ barycentrics = make_float2(100, 200);
+ if(rtReportIntersection(0))
+ check_second = false;
+ }
+ if(check_second) {
+ float root2 = (-b + sdisc);
+ if( rtPotentialIntersection( root2 * l ) ) {
+ barycentrics = make_float2(100, 200);
+ rtReportIntersection(0);
+ }
+ }
+ }
+}
+
+RT_PROGRAM void bounds (int, float result[6])
+{
+ const float3 cen = make_float3( sphere );
+ const float3 rad = make_float3( sphere.w );
+
+ optix::Aabb* aabb = (optix::Aabb*)result;
+
+ if( rad.x > 0.0f && !isinf(rad.x) ) {
+ aabb->m_min = cen - rad;
+ aabb->m_max = cen + rad;
+ } else {
+ aabb->invalidate();
+ }
+}
+
+RT_PROGRAM void attribute_program() {
+ float2 read_barycentrics = rtGetTriangleBarycentrics();
+ barycentrics.x = read_barycentrics.x;
+ barycentrics.y = 0.1;
+}
+
+RT_PROGRAM void closest_hit() {
+ output_buffer1[launch_index.x] = barycentrics;
+ // rtGetTriangleBarycentrics() happens to work here,
+ // but is only valid in attribute programs
+ //output_buffer2[launch_index.x] = rtGetTriangleBarycentrics();
+ output_buffer3[launch_index.x] = rtGetPrimitiveIndex();
+}
\ No newline at end of file diff --git a/zluda_rt/src/tests/barycentrics.ptx b/zluda_rt/src/tests/barycentrics.ptx new file mode 100644 index 0000000..2e6c878 --- /dev/null +++ b/zluda_rt/src/tests/barycentrics.ptx @@ -0,0 +1,297 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-31833905 +// Cuda compilation tools, release 11.8, V11.8.89 +// Based on NVVM 7.0.1 +// + +.version 7.8 +.target sm_52 +.address_size 64 + + // .globl _Z5startv +.visible .global .align 8 .b8 barycentrics[8]; +.visible .global .align 1 .b8 output_buffer1[1]; +.visible .global .align 1 .b8 output_buffer2[1]; +.visible .global .align 1 .b8 output_buffer3[1]; +.visible .global .align 4 .b8 bvh[4]; +.visible .global .align 16 .b8 sphere[16]; +.visible .global .align 4 .b8 ray[36]; +.visible .global .align 8 .b8 launch_index[8]; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo12barycentricsE[8] = {82, 97, 121, 0, 8, 0, 0, 0}; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo3bvhE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo6sphereE[8] = {82, 97, 121, 0, 16, 0, 0, 0}; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo3rayE[8] = {82, 97, 121, 0, 36, 0, 0, 0}; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo12launch_indexE[8] = {82, 97, 121, 0, 8, 0, 0, 0}; +.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE; +.visible .global .align 1 .b8 _ZN21rti_internal_typename12barycentricsE[7] = {102, 108, 111, 97, 116, 50, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_typename3bvhE[9] = {114, 116, 79, 98, 106, 101, 99, 116, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_typename6sphereE[7] = {102, 108, 111, 97, 116, 52, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_typename3rayE[11] = {111, 112, 116, 105, 120, 58, 58, 82, 97, 121, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_typename12launch_indexE[5] = {105, 110, 116, 50, 0}; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum12barycentricsE = 4919; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum3bvhE = 4919; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum6sphereE = 4919; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum3rayE = 4919; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum12launch_indexE = 4919; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic12barycentricsE[33] = {97, 116, 116, 114, 105, 98, 117, 116, 101, 32, 114, 116, 84, 114, 105, 97, 110, 103, 108, 101, 66, 97, 114, 121, 99, 101, 110, 116, 114, 105, 99, 115, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic3bvhE[1]; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic6sphereE[1]; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic3rayE[13] = {114, 116, 67, 117, 114, 114, 101, 110, 116, 82, 97, 121, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic12launch_indexE[14] = {114, 116, 76, 97, 117, 110, 99, 104, 73, 110, 100, 101, 120, 0}; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation12barycentricsE[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation3bvhE[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation6sphereE[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation3rayE[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation12launch_indexE[1]; + +.visible .entry _Z5startv() +{ + .local .align 1 .b8 __local_depot0[1]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .b16 %rs<2>; + .reg .f32 %f<9>; + .reg .b32 %r<7>; + .reg .b64 %rd<3>; + + + mov.u64 %SPL, __local_depot0; + cvta.local.u64 %SP, %SPL; + add.u64 %rd1, %SP, 0; + add.u64 %rd2, %SPL, 0; + ld.global.u32 %r6, [launch_index]; + cvt.rn.f32.s32 %f1, %r6; + mov.u16 %rs1, 0; + st.local.u8 [%rd2], %rs1; + ld.global.u32 %r1, [bvh]; + mov.f32 %f3, 0fBF800000; + mov.f32 %f6, 0f3F800000; + mov.f32 %f7, 0f00000000; + mov.f32 %f8, 0f6C4ECB8F; + mov.u32 %r3, 255; + mov.u32 %r4, 0; + mov.u32 %r5, 1; + // begin inline asm + call _rt_trace_mask_flags_64, (%r1, %f1, %f7, %f3, %f7, %f7, %f6, %r4, %f7, %f8, %r3, %r4, %rd1, %r5); + // end inline asm + ret; + +} + // .globl _Z9intersecti +.visible .entry _Z9intersecti( + .param .u32 _Z9intersecti_param_0 +) +{ + .reg .pred %p<5>; + .reg .f32 %f<47>; + .reg .b32 %r<7>; + + + ld.global.v4.f32 {%f5, %f6, %f7, %f8}, [sphere]; + ld.global.f32 %f13, [ray]; + sub.f32 %f14, %f13, %f5; + ld.global.f32 %f15, [ray+4]; + sub.f32 %f16, %f15, %f6; + ld.global.f32 %f17, [ray+8]; + sub.f32 %f18, %f17, %f7; + ld.global.f32 %f19, [ray+12]; + ld.global.f32 %f20, [ray+16]; + mul.f32 %f21, %f20, %f20; + fma.rn.f32 %f22, %f19, %f19, %f21; + ld.global.f32 %f23, [ray+20]; + fma.rn.f32 %f24, %f23, %f23, %f22; + sqrt.rn.f32 %f25, %f24; + rcp.rn.f32 %f1, %f25; + mul.f32 %f26, %f19, %f1; + mul.f32 %f27, %f1, %f20; + mul.f32 %f28, %f1, %f23; + mul.f32 %f29, %f16, %f27; + fma.rn.f32 %f30, %f14, %f26, %f29; + fma.rn.f32 %f2, %f18, %f28, %f30; + mul.f32 %f31, %f16, %f16; + fma.rn.f32 %f32, %f14, %f14, %f31; + fma.rn.f32 %f33, %f18, %f18, %f32; + mul.f32 %f34, %f8, %f8; + sub.f32 %f35, %f33, %f34; + mul.f32 %f36, %f2, %f2; + sub.f32 %f3, %f36, %f35; + setp.leu.f32 %p1, %f3, 0f00000000; + @%p1 bra $L__BB1_5; + + sqrt.rn.f32 %f4, %f3; + neg.f32 %f38, %f2; + sub.f32 %f39, %f38, %f4; + add.f32 %f40, %f39, 0f00000000; + mul.f32 %f37, %f1, %f40; + // begin inline asm + call (%r1), _rt_potential_intersection, (%f37); + // end inline asm + setp.eq.s32 %p2, %r1, 0; + @%p2 bra $L__BB1_3; + + mov.f32 %f41, 0f43480000; + mov.f32 %f42, 0f42C80000; + st.global.v2.f32 [barycentrics], {%f42, %f41}; + mov.u32 %r3, 0; + // begin inline asm + call (%r2), _rt_report_intersection, (%r3); + // end inline asm + setp.ne.s32 %p3, %r2, 0; + @%p3 bra $L__BB1_5; + +$L__BB1_3: + sub.f32 %f44, %f4, %f2; + mul.f32 %f43, %f1, %f44; + // begin inline asm + call (%r4), _rt_potential_intersection, (%f43); + // end inline asm + setp.eq.s32 %p4, %r4, 0; + @%p4 bra $L__BB1_5; + + mov.f32 %f45, 0f43480000; + mov.f32 %f46, 0f42C80000; + st.global.v2.f32 [barycentrics], {%f46, %f45}; + mov.u32 %r6, 0; + // begin inline asm + call (%r5), _rt_report_intersection, (%r6); + // end inline asm + +$L__BB1_5: + ret; + +} + // .globl _Z6boundsiPf +.visible .entry _Z6boundsiPf( + .param .u32 _Z6boundsiPf_param_0, + .param .u64 _Z6boundsiPf_param_1 +) +{ + .reg .pred %p<3>; + .reg .f32 %f<17>; + .reg .b32 %r<3>; + .reg .b64 %rd<3>; + + + ld.param.u64 %rd2, [_Z6boundsiPf_param_1]; + cvta.to.global.u64 %rd1, %rd2; + ld.global.v4.f32 {%f6, %f7, %f8, %f9}, [sphere]; + setp.leu.f32 %p1, %f9, 0f00000000; + @%p1 bra $L__BB2_2; + + abs.f32 %f10, %f9; + setp.neu.f32 %p2, %f10, 0f7F800000; + @%p2 bra $L__BB2_3; + bra.uni $L__BB2_2; + +$L__BB2_3: + sub.f32 %f11, %f6, %f9; + st.global.f32 [%rd1], %f11; + sub.f32 %f12, %f7, %f9; + st.global.f32 [%rd1+4], %f12; + sub.f32 %f13, %f8, %f9; + st.global.f32 [%rd1+8], %f13; + add.f32 %f14, %f6, %f9; + st.global.f32 [%rd1+12], %f14; + add.f32 %f15, %f7, %f9; + st.global.f32 [%rd1+16], %f15; + add.f32 %f16, %f8, %f9; + st.global.f32 [%rd1+20], %f16; + bra.uni $L__BB2_4; + +$L__BB2_2: + mov.u32 %r1, 2096152002; + st.global.u32 [%rd1], %r1; + st.global.u32 [%rd1+4], %r1; + st.global.u32 [%rd1+8], %r1; + mov.u32 %r2, -51331646; + st.global.u32 [%rd1+12], %r2; + st.global.u32 [%rd1+16], %r2; + st.global.u32 [%rd1+20], %r2; + +$L__BB2_4: + ret; + +} + // .globl _Z17attribute_programv +.visible .entry _Z17attribute_programv() +{ + .reg .f32 %f<4>; + + + // begin inline asm + call (%f1, %f2), _rt_get_triangle_barycentrics, (); + // end inline asm + mov.f32 %f3, 0f3DCCCCCD; + st.global.v2.f32 [barycentrics], {%f1, %f3}; + ret; + +} + // .globl _Z11closest_hitv +.visible .entry _Z11closest_hitv() +{ + .reg .f32 %f<7>; + .reg .b32 %r<8>; + .reg .b64 %rd<22>; + + + ld.global.v2.f32 {%f3, %f4}, [barycentrics]; + mov.u64 %rd18, 0; + ld.global.s32 %rd3, [launch_index]; + mov.u64 %rd19, output_buffer1; + cvta.global.u64 %rd2, %rd19; + mov.u32 %r6, 1; + mov.u32 %r4, 8; + // begin inline asm + call (%rd1), _rt_buffer_get_64, (%rd2, %r6, %r4, %rd3, %rd18, %rd18, %rd18); + // end inline asm + st.v2.f32 [%rd1], {%f3, %f4}; + // begin inline asm + call (%f1, %f2), _rt_get_triangle_barycentrics, (); + // end inline asm + ld.global.s32 %rd9, [launch_index]; + mov.u64 %rd20, output_buffer2; + cvta.global.u64 %rd8, %rd20; + // begin inline asm + call (%rd7), _rt_buffer_get_64, (%rd8, %r6, %r4, %rd9, %rd18, %rd18, %rd18); + // end inline asm + st.v2.f32 [%rd7], {%f1, %f2}; + // begin inline asm + call (%r5), _rt_get_primitive_index, (); + // end inline asm + ld.global.s32 %rd15, [launch_index]; + mov.u64 %rd21, output_buffer3; + cvta.global.u64 %rd14, %rd21; + mov.u32 %r7, 4; + // begin inline asm + call (%rd13), _rt_buffer_get_64, (%rd14, %r6, %r7, %rd15, %rd18, %rd18, %rd18); + // end inline asm + st.u32 [%rd13], %r5; + ret; + +} + diff --git a/zluda_rt/src/tests/buffer_id.cu b/zluda_rt/src/tests/buffer_id.cu new file mode 100644 index 0000000..968bc85 --- /dev/null +++ b/zluda_rt/src/tests/buffer_id.cu @@ -0,0 +1,13 @@ +// nvcc buffer_id.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc +#include <optix.h> +#include <optixu/optixu_math_namespace.h> +#include <optix_world.h> + +using namespace optix; + +rtBuffer<rtBufferId<unsigned int> > buffers; + +RT_PROGRAM void start() { + buffers[0][2] = 0x0118378c; + buffers[0][1] = buffers[0].size(); +} diff --git a/zluda_rt/src/tests/buffer_id.ptx b/zluda_rt/src/tests/buffer_id.ptx new file mode 100644 index 0000000..4ae8645 --- /dev/null +++ b/zluda_rt/src/tests/buffer_id.ptx @@ -0,0 +1,80 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-31833905 +// Cuda compilation tools, release 11.8, V11.8.89 +// Based on NVVM 7.0.1 +// + +.version 7.8 +.target sm_52 +.address_size 64 + + // .globl _Z5startv +.visible .global .align 1 .b8 buffers[1]; +.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE; + +.visible .entry _Z5startv() +{ + .reg .b32 %r<17>; + .reg .b64 %rd<34>; + + + mov.u64 %rd33, buffers; + cvta.global.u64 %rd2, %rd33; + mov.u32 %r14, 1; + mov.u32 %r15, 4; + mov.u64 %rd32, 0; + // begin inline asm + call (%rd1), _rt_buffer_get_64, (%rd2, %r14, %r15, %rd32, %rd32, %rd32, %rd32); + // end inline asm + ld.u32 %r3, [%rd1]; + mov.u64 %rd8, 2; + // begin inline asm + call (%rd7), _rt_buffer_get_id_64, (%r3, %r14, %r15, %rd8, %rd32, %rd32, %rd32); + // end inline asm + mov.u32 %r16, 18364300; + st.u32 [%rd7], %r16; + // begin inline asm + call (%rd12), _rt_buffer_get_64, (%rd2, %r14, %r15, %rd32, %rd32, %rd32, %rd32); + // end inline asm + ld.u32 %r8, [%rd12]; + // begin inline asm + call (%rd18, %rd19, %rd20, %rd21), _rt_buffer_get_id_size_64, (%r8, %r14, %r15); + // end inline asm + // begin inline asm + call (%rd22), _rt_buffer_get_64, (%rd2, %r14, %r15, %rd32, %rd32, %rd32, %rd32); + // end inline asm + ld.u32 %r13, [%rd22]; + mov.u64 %rd29, 1; + // begin inline asm + call (%rd28), _rt_buffer_get_id_64, (%r13, %r14, %r15, %rd29, %rd32, %rd32, %rd32); + // end inline asm + st.u32 [%rd28], %rd18; + ret; + +} + diff --git a/zluda_rt/src/tests/buffer_id_call.cu b/zluda_rt/src/tests/buffer_id_call.cu new file mode 100644 index 0000000..a9c7659 --- /dev/null +++ b/zluda_rt/src/tests/buffer_id_call.cu @@ -0,0 +1,23 @@ +// nvcc buffer_id_call.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc +#include <optix.h> +#include <optixu/optixu_math_namespace.h> +#include <optix_world.h> + +using namespace optix; + +rtBuffer<rtBufferId<unsigned int> > buffers; + +__noinline__ +__device__ void start2() { + buffers[0][2] = 0x0118378c; + buffers[0][1] = buffers[0].size(); +} + +__noinline__ +__device__ void start1() { + start2(); +} + +RT_PROGRAM void start() { + start1(); +} diff --git a/zluda_rt/src/tests/buffer_id_call.ptx b/zluda_rt/src/tests/buffer_id_call.ptx new file mode 100644 index 0000000..6c2432f --- /dev/null +++ b/zluda_rt/src/tests/buffer_id_call.ptx @@ -0,0 +1,112 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-31833905 +// Cuda compilation tools, release 11.8, V11.8.89 +// Based on NVVM 7.0.1 +// + +.version 7.8 +.target sm_52 +.address_size 64 + + // .globl _Z6start2v +.visible .global .align 1 .b8 buffers[1]; +.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE; + +.visible .func _Z6start2v() +{ + .reg .b32 %r<17>; + .reg .b64 %rd<34>; + + + mov.u64 %rd33, buffers; + cvta.global.u64 %rd2, %rd33; + mov.u32 %r14, 1; + mov.u32 %r15, 4; + mov.u64 %rd32, 0; + // begin inline asm + call (%rd1), _rt_buffer_get_64, (%rd2, %r14, %r15, %rd32, %rd32, %rd32, %rd32); + // end inline asm + ld.u32 %r3, [%rd1]; + mov.u64 %rd8, 2; + // begin inline asm + call (%rd7), _rt_buffer_get_id_64, (%r3, %r14, %r15, %rd8, %rd32, %rd32, %rd32); + // end inline asm + mov.u32 %r16, 18364300; + st.u32 [%rd7], %r16; + // begin inline asm + call (%rd12), _rt_buffer_get_64, (%rd2, %r14, %r15, %rd32, %rd32, %rd32, %rd32); + // end inline asm + ld.u32 %r8, [%rd12]; + // begin inline asm + call (%rd18, %rd19, %rd20, %rd21), _rt_buffer_get_id_size_64, (%r8, %r14, %r15); + // end inline asm + // begin inline asm + call (%rd22), _rt_buffer_get_64, (%rd2, %r14, %r15, %rd32, %rd32, %rd32, %rd32); + // end inline asm + ld.u32 %r13, [%rd22]; + mov.u64 %rd29, 1; + // begin inline asm + call (%rd28), _rt_buffer_get_id_64, (%r13, %r14, %r15, %rd29, %rd32, %rd32, %rd32); + // end inline asm + st.u32 [%rd28], %rd18; + ret; + +} + // .globl _Z6start1v +.visible .func _Z6start1v() +{ + + + + { // callseq 0, 0 + .reg .b32 temp_param_reg; + call.uni + _Z6start2v, + ( + ); + } // callseq 0 + ret; + +} + // .globl _Z5startv +.visible .entry _Z5startv() +{ + + + + { // callseq 1, 0 + .reg .b32 temp_param_reg; + call.uni + _Z6start1v, + ( + ); + } // callseq 1 + ret; + +} + diff --git a/zluda_rt/src/tests/buffer_id_callable.cu b/zluda_rt/src/tests/buffer_id_callable.cu new file mode 100644 index 0000000..e590d7e --- /dev/null +++ b/zluda_rt/src/tests/buffer_id_callable.cu @@ -0,0 +1,18 @@ +// nvcc buffer_id_callable.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc +#include <optix.h> +#include <optixu/optixu_math_namespace.h> +#include <optix_world.h> + +using namespace optix; + +rtBuffer<rtBufferId<unsigned int> > buffers; +rtDeclareVariable(rtCallableProgramId<void(void)>, program,,); + +RT_CALLABLE_PROGRAM void callable() { + buffers[0][2] = 0x0118378c; + buffers[0][1] = buffers[0].size(); +} + +RT_PROGRAM void start() { + program(); +} diff --git a/zluda_rt/src/tests/buffer_id_callable.ptx b/zluda_rt/src/tests/buffer_id_callable.ptx new file mode 100644 index 0000000..4826996 --- /dev/null +++ b/zluda_rt/src/tests/buffer_id_callable.ptx @@ -0,0 +1,110 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-31833905 +// Cuda compilation tools, release 11.8, V11.8.89 +// Based on NVVM 7.0.1 +// + +.version 7.8 +.target sm_52 +.address_size 64 + + // .globl _Z8callablev +.visible .global .align 1 .b8 buffers[1]; +.visible .global .align 4 .b8 program[4]; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo7programE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE; +.visible .global .align 1 .b8 _ZN21rti_internal_typename7programE[32] = {114, 116, 67, 97, 108, 108, 97, 98, 108, 101, 80, 114, 111, 103, 114, 97, 109, 73, 100, 60, 118, 111, 105, 100, 40, 118, 111, 105, 100, 41, 62, 0}; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum7programE = 4920; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic7programE[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation7programE[1]; + +.visible .func _Z8callablev() +{ + .reg .b32 %r<17>; + .reg .b64 %rd<34>; + + + mov.u64 %rd33, buffers; + cvta.global.u64 %rd2, %rd33; + mov.u32 %r14, 1; + mov.u32 %r15, 4; + mov.u64 %rd32, 0; + // begin inline asm + call (%rd1), _rt_buffer_get_64, (%rd2, %r14, %r15, %rd32, %rd32, %rd32, %rd32); + // end inline asm + ld.u32 %r3, [%rd1]; + mov.u64 %rd8, 2; + // begin inline asm + call (%rd7), _rt_buffer_get_id_64, (%r3, %r14, %r15, %rd8, %rd32, %rd32, %rd32); + // end inline asm + mov.u32 %r16, 18364300; + st.u32 [%rd7], %r16; + // begin inline asm + call (%rd12), _rt_buffer_get_64, (%rd2, %r14, %r15, %rd32, %rd32, %rd32, %rd32); + // end inline asm + ld.u32 %r8, [%rd12]; + // begin inline asm + call (%rd18, %rd19, %rd20, %rd21), _rt_buffer_get_id_size_64, (%r8, %r14, %r15); + // end inline asm + // begin inline asm + call (%rd22), _rt_buffer_get_64, (%rd2, %r14, %r15, %rd32, %rd32, %rd32, %rd32); + // end inline asm + ld.u32 %r13, [%rd22]; + mov.u64 %rd29, 1; + // begin inline asm + call (%rd28), _rt_buffer_get_id_64, (%r13, %r14, %r15, %rd29, %rd32, %rd32, %rd32); + // end inline asm + st.u32 [%rd28], %rd18; + ret; + +} + // .globl _Z5startv +.visible .entry _Z5startv() +{ + .reg .b32 %r<2>; + .reg .b64 %rd<4>; + + + ld.global.u32 %r1, [program]; + mov.u64 %rd2, 0; + // begin inline asm + call (%rd1), _rt_callable_program_from_id_v2_64, (%r1, %rd2); + // end inline asm + { // callseq 0, 0 + .reg .b32 temp_param_reg; + prototype_0 : .callprototype ()_ (); + call + %rd1, + ( + ) + , prototype_0; + } // callseq 0 + ret; + +} + diff --git a/zluda_rt/src/tests/buffer_mipmap.cu b/zluda_rt/src/tests/buffer_mipmap.cu new file mode 100644 index 0000000..e08cc8a --- /dev/null +++ b/zluda_rt/src/tests/buffer_mipmap.cu @@ -0,0 +1,17 @@ +// nvcc buffer_id.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc +#include <optix.h> +#include <optixu/optixu_math_namespace.h> +#include <optix_world.h> + +using namespace optix; + +rtDeclareVariable( uint, texture_id, , ); +rtBuffer<uint2, 1> output_buffer; + +RT_PROGRAM void start() { + bool isResident; + uint4 val0 = rtTex2DLodLoadOrRequest<uint4>( texture_id, 0, 0, 0, isResident ); + output_buffer[0] = make_uint2(val0.x, val0.y); + uint4 val1 = rtTex2DLodLoadOrRequest<uint4>( texture_id, 0, 0, 1000, isResident ); + output_buffer[1] = make_uint2(val1.x, val1.y); +} diff --git a/zluda_rt/src/tests/buffer_mipmap.ptx b/zluda_rt/src/tests/buffer_mipmap.ptx new file mode 100644 index 0000000..6889584 --- /dev/null +++ b/zluda_rt/src/tests/buffer_mipmap.ptx @@ -0,0 +1,89 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-27506705 +// Cuda compilation tools, release 10.2, V10.2.89 +// Based on LLVM 3.4svn +// + +.version 6.5 +.target sm_30 +.address_size 64 + + // .globl _Z5startv +.visible .global .align 4 .u32 texture_id; +.visible .global .align 1 .b8 output_buffer[1]; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo10texture_idE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE; +.visible .global .align 1 .b8 _ZN21rti_internal_typename10texture_idE[5] = {117, 105, 110, 116, 0}; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum10texture_idE = 4919; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic10texture_idE[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation10texture_idE[1]; + +.visible .entry _Z5startv( + +) +{ + .local .align 1 .b8 __local_depot0[1]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .f32 %f<11>; + .reg .b32 %r<17>; + .reg .b64 %rd<16>; + + + mov.u64 %SPL, __local_depot0; + cvta.local.u64 %SP, %SPL; + ld.global.u32 %r5, [texture_id]; + mov.u32 %r14, 2; + mov.f32 %f9, 0f00000000; + add.u64 %rd8, %SP, 0; + // inline asm + call (%r1, %r2, %r3, %r4), _rt_texture_lod_load_or_request_u_id, (%r5, %r14, %f9, %f9, %f9, %f9, %f9, %rd8); + // inline asm + mov.u64 %rd15, output_buffer; + cvta.global.u64 %rd3, %rd15; + mov.u32 %r15, 1; + mov.u32 %r16, 8; + mov.u64 %rd14, 0; + // inline asm + call (%rd2), _rt_buffer_get_64, (%rd3, %r15, %r16, %rd14, %rd14, %rd14, %rd14); + // inline asm + st.v2.u32 [%rd2], {%r1, %r2}; + ld.global.u32 %r13, [texture_id]; + mov.f32 %f10, 0f447A0000; + // inline asm + call (%r9, %r10, %r11, %r12), _rt_texture_lod_load_or_request_u_id, (%r13, %r14, %f9, %f9, %f9, %f9, %f10, %rd8); + // inline asm + mov.u64 %rd11, 1; + // inline asm + call (%rd9), _rt_buffer_get_64, (%rd3, %r15, %r16, %rd11, %rd14, %rd14, %rd14); + // inline asm + st.v2.u32 [%rd9], {%r9, %r10}; + ret; +} + + diff --git a/zluda_rt/src/tests/callable_programs.cu b/zluda_rt/src/tests/callable_programs.cu new file mode 100644 index 0000000..c63aae5 --- /dev/null +++ b/zluda_rt/src/tests/callable_programs.cu @@ -0,0 +1,28 @@ +// nvcc callable_programs.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc
+#include <optix.h>
+#include <optixu/optixu_math_namespace.h>
+#include <optix_world.h>
+
+using namespace optix;
+
+rtDeclareVariable(unsigned int, value, , );
+rtBuffer<unsigned int, 1> output_buffer;
+
+typedef rtCallableProgramId<unsigned int(unsigned int)> int_operator;
+rtDeclareVariable(int_operator, add_fn,,);
+rtDeclareVariable(int_operator, mult_fn,,);
+
+RT_CALLABLE_PROGRAM unsigned int add_value(unsigned int input) {
+ return input + value;
+}
+
+RT_CALLABLE_PROGRAM unsigned int multiply_value(unsigned int input) {
+ return input * value;
+}
+
+RT_PROGRAM void start() {
+ unsigned int x = value;
+ x = add_fn(x);
+ x = mult_fn(x);
+ output_buffer[0] = x;
+}
diff --git a/zluda_rt/src/tests/callable_programs.ptx b/zluda_rt/src/tests/callable_programs.ptx new file mode 100644 index 0000000..b626980 --- /dev/null +++ b/zluda_rt/src/tests/callable_programs.ptx @@ -0,0 +1,152 @@ +//
+// Generated by NVIDIA NVVM Compiler
+//
+// Compiler Build ID: CL-31833905
+// Cuda compilation tools, release 11.8, V11.8.89
+// Based on NVVM 7.0.1
+//
+
+.version 7.8
+.target sm_52
+.address_size 64
+
+ // .globl _Z9add_valuej
+.visible .global .align 4 .u32 value;
+.visible .global .align 1 .b8 output_buffer[1];
+.visible .global .align 4 .b8 add_fn[4];
+.visible .global .align 4 .b8 mult_fn[4];
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo5valueE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo6add_fnE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo7mult_fnE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
+.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE;
+.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE;
+.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE;
+.visible .global .align 1 .b8 _ZN21rti_internal_typename5valueE[13] = {117, 110, 115, 105, 103, 110, 101, 100, 32, 105, 110, 116, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_typename6add_fnE[13] = {105, 110, 116, 95, 111, 112, 101, 114, 97, 116, 111, 114, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_typename7mult_fnE[13] = {105, 110, 116, 95, 111, 112, 101, 114, 97, 116, 111, 114, 0};
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum5valueE = 4919;
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum6add_fnE = 4920;
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum7mult_fnE = 4920;
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic5valueE[1];
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic6add_fnE[1];
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic7mult_fnE[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation5valueE[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation6add_fnE[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation7mult_fnE[1];
+
+.visible .func (.param .b32 func_retval0) _Z9add_valuej(
+ .param .b32 _Z9add_valuej_param_0
+)
+{
+ .reg .b32 %r<4>;
+
+
+ ld.param.u32 %r1, [_Z9add_valuej_param_0];
+ ld.global.u32 %r2, [value];
+ add.s32 %r3, %r2, %r1;
+ st.param.b32 [func_retval0+0], %r3;
+ ret;
+
+}
+ // .globl _Z14multiply_valuej
+.visible .func (.param .b32 func_retval0) _Z14multiply_valuej(
+ .param .b32 _Z14multiply_valuej_param_0
+)
+{
+ .reg .b32 %r<4>;
+
+
+ ld.param.u32 %r1, [_Z14multiply_valuej_param_0];
+ ld.global.u32 %r2, [value];
+ mul.lo.s32 %r3, %r2, %r1;
+ st.param.b32 [func_retval0+0], %r3;
+ ret;
+
+}
+ // .globl _Z5startv
+.visible .entry _Z5startv()
+{
+ .reg .b32 %r<8>;
+ .reg .b64 %rd<14>;
+
+
+ ld.global.u32 %r5, [value];
+ mov.u64 %rd10, 0;
+ ld.global.u32 %r1, [add_fn];
+ // begin inline asm
+ call (%rd1), _rt_callable_program_from_id_v2_64, (%r1, %rd10);
+ // end inline asm
+ mov.u32 %r3, 1;
+ { // callseq 0, 0
+ .reg .b32 temp_param_reg;
+ .param .b32 param0;
+ st.param.b32 [param0+0], %r5;
+ .param .b32 retval0;
+ prototype_0 : .callprototype (.param .b32 _) _ (.param .b32 _);
+ call (retval0),
+ %rd1,
+ (
+ param0
+ )
+ , prototype_0;
+ ld.param.b32 %r6, [retval0+0];
+ } // callseq 0
+ ld.global.u32 %r2, [mult_fn];
+ // begin inline asm
+ call (%rd3), _rt_callable_program_from_id_v2_64, (%r2, %rd10);
+ // end inline asm
+ { // callseq 1, 0
+ .reg .b32 temp_param_reg;
+ .param .b32 param0;
+ st.param.b32 [param0+0], %r6;
+ .param .b32 retval0;
+ prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _);
+ call (retval0),
+ %rd3,
+ (
+ param0
+ )
+ , prototype_1;
+ ld.param.b32 %r7, [retval0+0];
+ } // callseq 1
+ mov.u64 %rd13, output_buffer;
+ cvta.global.u64 %rd6, %rd13;
+ mov.u32 %r4, 4;
+ // begin inline asm
+ call (%rd5), _rt_buffer_get_64, (%rd6, %r3, %r4, %rd10, %rd10, %rd10, %rd10);
+ // end inline asm
+ st.u32 [%rd5], %r7;
+ ret;
+
+}
+ // .globl _Z10do_nothingv
+.visible .entry _Z10do_nothingv()
+{
+
+
+
+ ret;
+
+}
+
diff --git a/zluda_rt/src/tests/default_variable.cu b/zluda_rt/src/tests/default_variable.cu new file mode 100644 index 0000000..13e7a99 --- /dev/null +++ b/zluda_rt/src/tests/default_variable.cu @@ -0,0 +1,13 @@ +// nvcc default_variable.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc +#include <optix.h> +#include <optixu/optixu_math_namespace.h> +#include <optix_world.h> + +using namespace optix; + +rtBuffer<unsigned int, 1> var_buffer; +rtDeclareVariable(unsigned int, x, , ) = 55; + +RT_PROGRAM void start() { + var_buffer[0] = x; +} diff --git a/zluda_rt/src/tests/default_variable.ptx b/zluda_rt/src/tests/default_variable.ptx new file mode 100644 index 0000000..22d1b96 --- /dev/null +++ b/zluda_rt/src/tests/default_variable.ptx @@ -0,0 +1,65 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-31833905 +// Cuda compilation tools, release 11.8, V11.8.89 +// Based on NVVM 7.0.1 +// + +.version 7.8 +.target sm_52 +.address_size 64 + + // .globl _Z5startv +.visible .global .align 1 .b8 var_buffer[1]; +.visible .global .align 4 .u32 x = 55; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo1xE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE; +.visible .global .align 1 .b8 _ZN21rti_internal_typename1xE[13] = {117, 110, 115, 105, 103, 110, 101, 100, 32, 105, 110, 116, 0}; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum1xE = 4919; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic1xE[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation1xE[1]; + +.visible .entry _Z5startv() +{ + .reg .b32 %r<4>; + .reg .b64 %rd<8>; + + + ld.global.u32 %r3, [x]; + mov.u64 %rd6, 0; + mov.u64 %rd7, var_buffer; + cvta.global.u64 %rd2, %rd7; + mov.u32 %r1, 1; + mov.u32 %r2, 4; + // begin inline asm + call (%rd1), _rt_buffer_get_64, (%rd2, %r1, %r2, %rd6, %rd6, %rd6, %rd6); + // end inline asm + st.u32 [%rd1], %r3; + ret; + +} + diff --git a/zluda_rt/src/tests/exception.cu b/zluda_rt/src/tests/exception.cu new file mode 100644 index 0000000..c114768 --- /dev/null +++ b/zluda_rt/src/tests/exception.cu @@ -0,0 +1,50 @@ +// nvcc exception.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc +#include <optix.h> +#include <optixu/optixu_math_namespace.h> +#include <optix_world.h> + +using namespace optix; + +rtBuffer<unsigned int, 1> var_buffer; +rtDeclareVariable(rtObject, bvh, , ); +rtDeclareVariable(uint2, launch_index, rtLaunchIndex, ); +typedef rtCallableProgramId<unsigned int(unsigned int)> int_operator; +rtDeclareVariable(int_operator, callable1,,); +rtDeclareVariable(int_operator, callable2,,); +rtDeclareVariable(int_operator, callable3,,); + +RT_PROGRAM void trace() { + Ray ray = make_Ray(make_float3(float(launch_index.x), 0, -1), make_float3(0,0,1), 0, 0.0, RT_DEFAULT_MAX); + char unused = 0; + rtTrace(bvh, ray, unused); +} + +RT_PROGRAM void throw_() { + rtThrow(RT_EXCEPTION_USER); +} + +RT_PROGRAM void exception() { + var_buffer[0] = rtGetExceptionCode(); +} + +RT_PROGRAM void call_callable1() { + callable1(1); +} + +RT_CALLABLE_PROGRAM unsigned int call_callable2(unsigned int x) { + return callable2(x); +} + +RT_CALLABLE_PROGRAM unsigned int throw_callable(unsigned int x) { + rtThrow(RT_EXCEPTION_USER + x); + return x; +} + +__noinline__ __device__ void throw_callable_sub() { + callable3(1); +} + +RT_CALLABLE_PROGRAM unsigned int throw_callable_main(unsigned int x) { + throw_callable_sub(); + return x; +} diff --git a/zluda_rt/src/tests/exception.ptx b/zluda_rt/src/tests/exception.ptx new file mode 100644 index 0000000..b0304e0 --- /dev/null +++ b/zluda_rt/src/tests/exception.ptx @@ -0,0 +1,294 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-27506705 +// Cuda compilation tools, release 10.2, V10.2.89 +// Based on LLVM 3.4svn +// + +.version 6.5 +.target sm_30 +.address_size 64 + + // .globl _Z14call_callable2j +.visible .global .align 1 .b8 var_buffer[1]; +.visible .global .align 4 .b8 bvh[4]; +.visible .global .align 8 .b8 launch_index[8]; +.visible .global .align 4 .b8 callable1[4]; +.visible .global .align 4 .b8 callable2[4]; +.visible .global .align 4 .b8 callable3[4]; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo3bvhE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo12launch_indexE[8] = {82, 97, 121, 0, 8, 0, 0, 0}; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo9callable1E[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo9callable2E[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo9callable3E[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE; +.visible .global .align 1 .b8 _ZN21rti_internal_typename3bvhE[9] = {114, 116, 79, 98, 106, 101, 99, 116, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_typename12launch_indexE[6] = {117, 105, 110, 116, 50, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_typename9callable1E[13] = {105, 110, 116, 95, 111, 112, 101, 114, 97, 116, 111, 114, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_typename9callable2E[13] = {105, 110, 116, 95, 111, 112, 101, 114, 97, 116, 111, 114, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_typename9callable3E[13] = {105, 110, 116, 95, 111, 112, 101, 114, 97, 116, 111, 114, 0}; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum3bvhE = 4919; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum12launch_indexE = 4919; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum9callable1E = 4920; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum9callable2E = 4920; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum9callable3E = 4920; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic3bvhE[1]; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic12launch_indexE[14] = {114, 116, 76, 97, 117, 110, 99, 104, 73, 110, 100, 101, 120, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic9callable1E[1]; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic9callable2E[1]; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic9callable3E[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation3bvhE[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation12launch_indexE[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation9callable1E[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation9callable2E[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation9callable3E[1]; + +.visible .func (.param .b32 func_retval0) _Z14call_callable2j( + .param .b32 _Z14call_callable2j_param_0 +) +{ + .reg .b32 %r<4>; + .reg .b64 %rd<4>; + + + ld.param.u32 %r2, [_Z14call_callable2j_param_0]; + ld.global.u32 %r1, [callable2]; + mov.u64 %rd2, 0; + // inline asm + call (%rd1), _rt_callable_program_from_id_v2_64, (%r1, %rd2); + // inline asm + // Callseq Start 0 + { + .reg .b32 temp_param_reg; + // <end>} + .param .b32 param0; + st.param.b32 [param0+0], %r2; + .param .b32 retval0; + prototype_0 : .callprototype (.param .b32 _) _ (.param .b32 _) ; + call (retval0), + %rd1, + ( + param0 + ) + , prototype_0; + ld.param.b32 %r3, [retval0+0]; + + //{ + }// Callseq End 0 + st.param.b32 [func_retval0+0], %r3; + ret; +} + + // .globl _Z14throw_callablej +.visible .func (.param .b32 func_retval0) _Z14throw_callablej( + .param .b32 _Z14throw_callablej_param_0 +) +{ + .reg .b32 %r<3>; + + + ld.param.u32 %r2, [_Z14throw_callablej_param_0]; + add.s32 %r1, %r2, 1024; + // inline asm + call _rt_throw, (%r1); + // inline asm + st.param.b32 [func_retval0+0], %r2; + ret; +} + + // .globl _Z18throw_callable_subv +.visible .func _Z18throw_callable_subv( + +) +{ + .reg .b32 %r<4>; + .reg .b64 %rd<4>; + + + ld.global.u32 %r1, [callable3]; + mov.u64 %rd2, 0; + // inline asm + call (%rd1), _rt_callable_program_from_id_v2_64, (%r1, %rd2); + // inline asm + mov.u32 %r2, 1; + // Callseq Start 1 + { + .reg .b32 temp_param_reg; + // <end>} + .param .b32 param0; + st.param.b32 [param0+0], %r2; + .param .b32 retval0; + prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _) ; + call (retval0), + %rd1, + ( + param0 + ) + , prototype_1; + ld.param.b32 %r3, [retval0+0]; + + //{ + }// Callseq End 1 + ret; +} + + // .globl _Z19throw_callable_mainj +.visible .func (.param .b32 func_retval0) _Z19throw_callable_mainj( + .param .b32 _Z19throw_callable_mainj_param_0 +) +{ + .reg .b32 %r<2>; + + + ld.param.u32 %r1, [_Z19throw_callable_mainj_param_0]; + // Callseq Start 2 + { + .reg .b32 temp_param_reg; + // <end>} + call.uni + _Z18throw_callable_subv, + ( + ); + + //{ + }// Callseq End 2 + st.param.b32 [func_retval0+0], %r1; + ret; +} + + // .globl _Z5tracev +.visible .entry _Z5tracev( + +) +{ + .local .align 1 .b8 __local_depot4[1]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .b16 %rs<2>; + .reg .f32 %f<9>; + .reg .b32 %r<7>; + .reg .b64 %rd<3>; + + + mov.u64 %SPL, __local_depot4; + cvta.local.u64 %SP, %SPL; + add.u64 %rd1, %SP, 0; + add.u64 %rd2, %SPL, 0; + ld.global.u32 %r6, [launch_index]; + cvt.rn.f32.u32 %f1, %r6; + mov.u16 %rs1, 0; + st.local.u8 [%rd2], %rs1; + ld.global.u32 %r1, [bvh]; + mov.u32 %r3, 255; + mov.u32 %r4, 0; + mov.u32 %r5, 1; + mov.f32 %f3, 0fBF800000; + mov.f32 %f6, 0f3F800000; + mov.f32 %f7, 0f00000000; + mov.f32 %f8, 0f6C4ECB8F; + // inline asm + call _rt_trace_mask_flags_64, (%r1, %f1, %f7, %f3, %f7, %f7, %f6, %r4, %f7, %f8, %r3, %r4, %rd1, %r5); + // inline asm + ret; +} + + // .globl _Z6throw_v +.visible .entry _Z6throw_v( + +) +{ + .reg .b32 %r<2>; + + + mov.u32 %r1, 1024; + // inline asm + call _rt_throw, (%r1); + // inline asm + ret; +} + + // .globl _Z9exceptionv +.visible .entry _Z9exceptionv( + +) +{ + .reg .b32 %r<4>; + .reg .b64 %rd<8>; + + + // inline asm + call (%r1), _rt_get_exception_code, (); + // inline asm + mov.u64 %rd7, var_buffer; + cvta.global.u64 %rd2, %rd7; + mov.u32 %r2, 1; + mov.u32 %r3, 4; + mov.u64 %rd6, 0; + // inline asm + call (%rd1), _rt_buffer_get_64, (%rd2, %r2, %r3, %rd6, %rd6, %rd6, %rd6); + // inline asm + st.u32 [%rd1], %r1; + ret; +} + + // .globl _Z14call_callable1v +.visible .entry _Z14call_callable1v( + +) +{ + .reg .b32 %r<4>; + .reg .b64 %rd<4>; + + + ld.global.u32 %r1, [callable1]; + mov.u64 %rd2, 0; + // inline asm + call (%rd1), _rt_callable_program_from_id_v2_64, (%r1, %rd2); + // inline asm + mov.u32 %r2, 1; + // Callseq Start 3 + { + .reg .b32 temp_param_reg; + // <end>} + .param .b32 param0; + st.param.b32 [param0+0], %r2; + .param .b32 retval0; + prototype_3 : .callprototype (.param .b32 _) _ (.param .b32 _) ; + call (retval0), + %rd1, + ( + param0 + ) + , prototype_3; + ld.param.b32 %r3, [retval0+0]; + + //{ + }// Callseq End 3 + ret; +} + + diff --git a/zluda_rt/src/tests/exception_subfunc.cu b/zluda_rt/src/tests/exception_subfunc.cu new file mode 100644 index 0000000..4d57a77 --- /dev/null +++ b/zluda_rt/src/tests/exception_subfunc.cu @@ -0,0 +1,28 @@ +// nvcc exception.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc +#include <optix.h> +#include <optixu/optixu_math_namespace.h> +#include <optix_world.h> + +using namespace optix; + +rtBuffer<unsigned int, 1> var_buffer; +rtDeclareVariable(rtObject, bvh, , ); +rtDeclareVariable(uint2, launch_index, rtLaunchIndex, ); + +__device__ __noinline__ void trace() { + Ray ray = make_Ray(make_float3(float(launch_index.x), 0, -1), make_float3(0,0,1), 0, 0.0, RT_DEFAULT_MAX); + char unused = 0; + rtTrace(bvh, ray, unused); +} + +RT_PROGRAM void start() { + trace(); +} + +RT_PROGRAM void throw_() { + rtThrow(RT_EXCEPTION_USER); +} + +RT_PROGRAM void exception() { + var_buffer[0] = rtGetExceptionCode(); +} diff --git a/zluda_rt/src/tests/exception_subfunc.ptx b/zluda_rt/src/tests/exception_subfunc.ptx new file mode 100644 index 0000000..d05b30a --- /dev/null +++ b/zluda_rt/src/tests/exception_subfunc.ptx @@ -0,0 +1,148 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-27506705 +// Cuda compilation tools, release 10.2, V10.2.89 +// Based on LLVM 3.4svn +// + +.version 6.5 +.target sm_30 +.address_size 64 + + // .globl _Z5tracev +.visible .global .align 1 .b8 var_buffer[1]; +.visible .global .align 4 .b8 bvh[4]; +.visible .global .align 8 .b8 launch_index[8]; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo3bvhE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo12launch_indexE[8] = {82, 97, 121, 0, 8, 0, 0, 0}; +.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE; +.visible .global .align 1 .b8 _ZN21rti_internal_typename3bvhE[9] = {114, 116, 79, 98, 106, 101, 99, 116, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_typename12launch_indexE[6] = {117, 105, 110, 116, 50, 0}; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum3bvhE = 4919; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum12launch_indexE = 4919; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic3bvhE[1]; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic12launch_indexE[14] = {114, 116, 76, 97, 117, 110, 99, 104, 73, 110, 100, 101, 120, 0}; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation3bvhE[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation12launch_indexE[1]; + +.visible .func _Z5tracev( + +) +{ + .local .align 1 .b8 __local_depot0[1]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .b16 %rs<2>; + .reg .f32 %f<9>; + .reg .b32 %r<7>; + .reg .b64 %rd<3>; + + + mov.u64 %SPL, __local_depot0; + cvta.local.u64 %SP, %SPL; + add.u64 %rd1, %SP, 0; + add.u64 %rd2, %SPL, 0; + ld.global.u32 %r6, [launch_index]; + cvt.rn.f32.u32 %f1, %r6; + mov.u16 %rs1, 0; + st.local.u8 [%rd2], %rs1; + ld.global.u32 %r1, [bvh]; + mov.u32 %r3, 255; + mov.u32 %r4, 0; + mov.u32 %r5, 1; + mov.f32 %f3, 0fBF800000; + mov.f32 %f6, 0f3F800000; + mov.f32 %f7, 0f00000000; + mov.f32 %f8, 0f6C4ECB8F; + // inline asm + call _rt_trace_mask_flags_64, (%r1, %f1, %f7, %f3, %f7, %f7, %f6, %r4, %f7, %f8, %r3, %r4, %rd1, %r5); + // inline asm + ret; +} + + // .globl _Z5startv +.visible .entry _Z5startv( + +) +{ + + + + // Callseq Start 0 + { + .reg .b32 temp_param_reg; + // <end>} + call.uni + _Z5tracev, + ( + ); + + //{ + }// Callseq End 0 + ret; +} + + // .globl _Z6throw_v +.visible .entry _Z6throw_v( + +) +{ + .reg .b32 %r<2>; + + + mov.u32 %r1, 1024; + // inline asm + call _rt_throw, (%r1); + // inline asm + ret; +} + + // .globl _Z9exceptionv +.visible .entry _Z9exceptionv( + +) +{ + .reg .b32 %r<4>; + .reg .b64 %rd<8>; + + + // inline asm + call (%r1), _rt_get_exception_code, (); + // inline asm + mov.u64 %rd7, var_buffer; + cvta.global.u64 %rd2, %rd7; + mov.u32 %r2, 1; + mov.u32 %r3, 4; + mov.u64 %rd6, 0; + // inline asm + call (%rd1), _rt_buffer_get_64, (%rd2, %r2, %r3, %rd6, %rd6, %rd6, %rd6); + // inline asm + st.u32 [%rd1], %r1; + ret; +} + + diff --git a/zluda_rt/src/tests/get_transform.cu b/zluda_rt/src/tests/get_transform.cu new file mode 100644 index 0000000..59dbb17 --- /dev/null +++ b/zluda_rt/src/tests/get_transform.cu @@ -0,0 +1,80 @@ +// nvcc get_transform.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc +#include <optix.h> +#include <optixu/optixu_math_namespace.h> +#include <optix_world.h> + +using namespace optix; + +rtBuffer<float, 1> object_transforms; +rtDeclareVariable(optix::Ray, ray, rtCurrentRay, ); +rtDeclareVariable(rtObject, bvh, , ); +rtDeclareVariable(float4, sphere, , ); +rtDeclareVariable(uint2, launch_index, rtLaunchIndex, ); + +RT_PROGRAM void start() { + Ray ray = make_Ray(make_float3(float(launch_index.x), 0, -1), make_float3(0,0,1), 0, 0.0, RT_DEFAULT_MAX); + char unused = 0; + rtTrace(bvh, ray, unused); +} + +RT_PROGRAM void intersect(int primIdx) +{ + float3 center = make_float3(sphere); + float3 O = ray.origin - center; + float l = 1 / length(ray.direction); + float3 D = ray.direction * l; + float radius = sphere.w; + + float b = dot(O, D); + float c = dot(O, O)-radius*radius; + float disc = b*b-c; + if(disc > 0.0f){ + float sdisc = sqrtf(disc); + float root1 = (-b - sdisc); + + float root11 = 0.0f; + + bool check_second = true; + if( rtPotentialIntersection( (root1 + root11) * l ) ) { + if(rtReportIntersection(0)) + { + rtGetTransform(RT_OBJECT_TO_WORLD, &object_transforms[16*0]); + check_second = false; + } + } + if(check_second) { + float root2 = (-b + sdisc); + if( rtPotentialIntersection( root2 * l ) ) { + if(rtReportIntersection(0)) + { + rtGetTransform(RT_OBJECT_TO_WORLD, &object_transforms[16*0]); + } + } + } + } +} + +RT_PROGRAM void bounds (int, float result[6]) +{ + // fails compilation + //rtGetTransform(RT_OBJECT_TO_WORLD, &object_transforms[16*0]); + const float3 cen = make_float3( sphere ); + const float3 rad = make_float3( sphere.w ); + + optix::Aabb* aabb = (optix::Aabb*)result; + + if( rad.x > 0.0f && !isinf(rad.x) ) { + aabb->m_min = cen - rad; + aabb->m_max = cen + rad; + } else { + aabb->invalidate(); + } +} + +RT_PROGRAM void any_hit() { + rtGetTransform(RT_OBJECT_TO_WORLD, &object_transforms[16*1]); +} + +RT_PROGRAM void closest_hit() { + rtGetTransform(RT_WORLD_TO_OBJECT, &object_transforms[16*2]); +} diff --git a/zluda_rt/src/tests/get_transform.ptx b/zluda_rt/src/tests/get_transform.ptx new file mode 100644 index 0000000..5b19a1c --- /dev/null +++ b/zluda_rt/src/tests/get_transform.ptx @@ -0,0 +1,368 @@ +//
+// Generated by NVIDIA NVVM Compiler
+//
+// Compiler Build ID: CL-31833905
+// Cuda compilation tools, release 11.8, V11.8.89
+// Based on NVVM 7.0.1
+//
+
+.version 7.8
+.target sm_52
+.address_size 64
+
+ // .globl _Z5startv
+.visible .global .align 1 .b8 object_transforms[1];
+.visible .global .align 4 .b8 ray[36];
+.visible .global .align 4 .b8 bvh[4];
+.visible .global .align 16 .b8 sphere[16];
+.visible .global .align 8 .b8 launch_index[8];
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo3rayE[8] = {82, 97, 121, 0, 36, 0, 0, 0};
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo3bvhE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo6sphereE[8] = {82, 97, 121, 0, 16, 0, 0, 0};
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo12launch_indexE[8] = {82, 97, 121, 0, 8, 0, 0, 0};
+.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE;
+.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE;
+.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE;
+.visible .global .align 1 .b8 _ZN21rti_internal_typename3rayE[11] = {111, 112, 116, 105, 120, 58, 58, 82, 97, 121, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_typename3bvhE[9] = {114, 116, 79, 98, 106, 101, 99, 116, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_typename6sphereE[7] = {102, 108, 111, 97, 116, 52, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_typename12launch_indexE[6] = {117, 105, 110, 116, 50, 0};
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum3rayE = 4919;
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum3bvhE = 4919;
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum6sphereE = 4919;
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum12launch_indexE = 4919;
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic3rayE[13] = {114, 116, 67, 117, 114, 114, 101, 110, 116, 82, 97, 121, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic3bvhE[1];
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic6sphereE[1];
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic12launch_indexE[14] = {114, 116, 76, 97, 117, 110, 99, 104, 73, 110, 100, 101, 120, 0};
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation3rayE[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation3bvhE[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation6sphereE[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation12launch_indexE[1];
+
+.visible .entry _Z5startv()
+{
+ .local .align 1 .b8 __local_depot0[1];
+ .reg .b64 %SP;
+ .reg .b64 %SPL;
+ .reg .b16 %rs<2>;
+ .reg .f32 %f<9>;
+ .reg .b32 %r<7>;
+ .reg .b64 %rd<3>;
+
+
+ mov.u64 %SPL, __local_depot0;
+ cvta.local.u64 %SP, %SPL;
+ add.u64 %rd1, %SP, 0;
+ add.u64 %rd2, %SPL, 0;
+ ld.global.u32 %r6, [launch_index];
+ cvt.rn.f32.u32 %f1, %r6;
+ mov.u16 %rs1, 0;
+ st.local.u8 [%rd2], %rs1;
+ ld.global.u32 %r1, [bvh];
+ mov.f32 %f3, 0fBF800000;
+ mov.f32 %f6, 0f3F800000;
+ mov.f32 %f7, 0f00000000;
+ mov.f32 %f8, 0f6C4ECB8F;
+ mov.u32 %r3, 255;
+ mov.u32 %r4, 0;
+ mov.u32 %r5, 1;
+ // begin inline asm
+ call _rt_trace_mask_flags_64, (%r1, %f1, %f7, %f3, %f7, %f7, %f6, %r4, %f7, %f8, %r3, %r4, %rd1, %r5);
+ // end inline asm
+ ret;
+
+}
+ // .globl _Z9intersecti
+.visible .entry _Z9intersecti(
+ .param .u32 _Z9intersecti_param_0
+)
+{
+ .reg .pred %p<6>;
+ .reg .f32 %f<75>;
+ .reg .b32 %r<13>;
+ .reg .b64 %rd<15>;
+
+
+ ld.global.v4.f32 {%f5, %f6, %f7, %f8}, [sphere];
+ ld.global.f32 %f13, [ray];
+ sub.f32 %f14, %f13, %f5;
+ ld.global.f32 %f15, [ray+4];
+ sub.f32 %f16, %f15, %f6;
+ ld.global.f32 %f17, [ray+8];
+ sub.f32 %f18, %f17, %f7;
+ ld.global.f32 %f19, [ray+12];
+ ld.global.f32 %f20, [ray+16];
+ mul.f32 %f21, %f20, %f20;
+ fma.rn.f32 %f22, %f19, %f19, %f21;
+ ld.global.f32 %f23, [ray+20];
+ fma.rn.f32 %f24, %f23, %f23, %f22;
+ sqrt.rn.f32 %f25, %f24;
+ rcp.rn.f32 %f1, %f25;
+ mul.f32 %f26, %f19, %f1;
+ mul.f32 %f27, %f1, %f20;
+ mul.f32 %f28, %f1, %f23;
+ mul.f32 %f29, %f16, %f27;
+ fma.rn.f32 %f30, %f14, %f26, %f29;
+ fma.rn.f32 %f2, %f18, %f28, %f30;
+ mul.f32 %f31, %f16, %f16;
+ fma.rn.f32 %f32, %f14, %f14, %f31;
+ fma.rn.f32 %f33, %f18, %f18, %f32;
+ mul.f32 %f34, %f8, %f8;
+ sub.f32 %f35, %f33, %f34;
+ mul.f32 %f36, %f2, %f2;
+ sub.f32 %f3, %f36, %f35;
+ setp.leu.f32 %p1, %f3, 0f00000000;
+ @%p1 bra $L__BB1_7;
+
+ sqrt.rn.f32 %f4, %f3;
+ neg.f32 %f38, %f2;
+ sub.f32 %f39, %f38, %f4;
+ add.f32 %f40, %f39, 0f00000000;
+ mul.f32 %f37, %f1, %f40;
+ // begin inline asm
+ call (%r1), _rt_potential_intersection, (%f37);
+ // end inline asm
+ setp.eq.s32 %p2, %r1, 0;
+ @%p2 bra $L__BB1_4;
+
+ mov.u32 %r3, 0;
+ // begin inline asm
+ call (%r2), _rt_report_intersection, (%r3);
+ // end inline asm
+ setp.eq.s32 %p3, %r2, 0;
+ @%p3 bra $L__BB1_4;
+
+ mov.u64 %rd7, object_transforms;
+ cvta.global.u64 %rd2, %rd7;
+ mov.u32 %r4, 1;
+ mov.u32 %r5, 4;
+ mov.u64 %rd6, 0;
+ // begin inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r4, %r5, %rd6, %rd6, %rd6, %rd6);
+ // end inline asm
+ mov.u32 %r6, 3841;
+ // begin inline asm
+ call (%f41, %f42, %f43, %f44, %f45, %f46, %f47, %f48, %f49, %f50, %f51, %f52, %f53, %f54, %f55, %f56), _rt_get_transform, (%r6);
+ // end inline asm
+ st.f32 [%rd1], %f41;
+ st.f32 [%rd1+4], %f42;
+ st.f32 [%rd1+8], %f43;
+ st.f32 [%rd1+12], %f44;
+ st.f32 [%rd1+16], %f45;
+ st.f32 [%rd1+20], %f46;
+ st.f32 [%rd1+24], %f47;
+ st.f32 [%rd1+28], %f48;
+ st.f32 [%rd1+32], %f49;
+ st.f32 [%rd1+36], %f50;
+ st.f32 [%rd1+40], %f51;
+ st.f32 [%rd1+44], %f52;
+ st.f32 [%rd1+48], %f53;
+ st.f32 [%rd1+52], %f54;
+ st.f32 [%rd1+56], %f55;
+ st.f32 [%rd1+60], %f56;
+ bra.uni $L__BB1_7;
+
+$L__BB1_4:
+ sub.f32 %f58, %f4, %f2;
+ mul.f32 %f57, %f1, %f58;
+ // begin inline asm
+ call (%r7), _rt_potential_intersection, (%f57);
+ // end inline asm
+ setp.eq.s32 %p4, %r7, 0;
+ @%p4 bra $L__BB1_7;
+
+ mov.u32 %r9, 0;
+ // begin inline asm
+ call (%r8), _rt_report_intersection, (%r9);
+ // end inline asm
+ setp.eq.s32 %p5, %r8, 0;
+ @%p5 bra $L__BB1_7;
+
+ mov.u64 %rd14, object_transforms;
+ cvta.global.u64 %rd9, %rd14;
+ mov.u32 %r10, 1;
+ mov.u32 %r11, 4;
+ mov.u64 %rd13, 0;
+ // begin inline asm
+ call (%rd8), _rt_buffer_get_64, (%rd9, %r10, %r11, %rd13, %rd13, %rd13, %rd13);
+ // end inline asm
+ mov.u32 %r12, 3841;
+ // begin inline asm
+ call (%f59, %f60, %f61, %f62, %f63, %f64, %f65, %f66, %f67, %f68, %f69, %f70, %f71, %f72, %f73, %f74), _rt_get_transform, (%r12);
+ // end inline asm
+ st.f32 [%rd8], %f59;
+ st.f32 [%rd8+4], %f60;
+ st.f32 [%rd8+8], %f61;
+ st.f32 [%rd8+12], %f62;
+ st.f32 [%rd8+16], %f63;
+ st.f32 [%rd8+20], %f64;
+ st.f32 [%rd8+24], %f65;
+ st.f32 [%rd8+28], %f66;
+ st.f32 [%rd8+32], %f67;
+ st.f32 [%rd8+36], %f68;
+ st.f32 [%rd8+40], %f69;
+ st.f32 [%rd8+44], %f70;
+ st.f32 [%rd8+48], %f71;
+ st.f32 [%rd8+52], %f72;
+ st.f32 [%rd8+56], %f73;
+ st.f32 [%rd8+60], %f74;
+
+$L__BB1_7:
+ ret;
+
+}
+ // .globl _Z6boundsiPf
+.visible .entry _Z6boundsiPf(
+ .param .u32 _Z6boundsiPf_param_0,
+ .param .u64 _Z6boundsiPf_param_1
+)
+{
+ .reg .pred %p<3>;
+ .reg .f32 %f<17>;
+ .reg .b32 %r<3>;
+ .reg .b64 %rd<3>;
+
+
+ ld.param.u64 %rd2, [_Z6boundsiPf_param_1];
+ cvta.to.global.u64 %rd1, %rd2;
+ ld.global.v4.f32 {%f6, %f7, %f8, %f9}, [sphere];
+ setp.leu.f32 %p1, %f9, 0f00000000;
+ @%p1 bra $L__BB2_2;
+
+ abs.f32 %f10, %f9;
+ setp.neu.f32 %p2, %f10, 0f7F800000;
+ @%p2 bra $L__BB2_3;
+ bra.uni $L__BB2_2;
+
+$L__BB2_3:
+ sub.f32 %f11, %f6, %f9;
+ st.global.f32 [%rd1], %f11;
+ sub.f32 %f12, %f7, %f9;
+ st.global.f32 [%rd1+4], %f12;
+ sub.f32 %f13, %f8, %f9;
+ st.global.f32 [%rd1+8], %f13;
+ add.f32 %f14, %f6, %f9;
+ st.global.f32 [%rd1+12], %f14;
+ add.f32 %f15, %f7, %f9;
+ st.global.f32 [%rd1+16], %f15;
+ add.f32 %f16, %f8, %f9;
+ st.global.f32 [%rd1+20], %f16;
+ bra.uni $L__BB2_4;
+
+$L__BB2_2:
+ mov.u32 %r1, 2096152002;
+ st.global.u32 [%rd1], %r1;
+ st.global.u32 [%rd1+4], %r1;
+ st.global.u32 [%rd1+8], %r1;
+ mov.u32 %r2, -51331646;
+ st.global.u32 [%rd1+12], %r2;
+ st.global.u32 [%rd1+16], %r2;
+ st.global.u32 [%rd1+20], %r2;
+
+$L__BB2_4:
+ ret;
+
+}
+ // .globl _Z7any_hitv
+.visible .entry _Z7any_hitv()
+{
+ .reg .f32 %f<17>;
+ .reg .b32 %r<4>;
+ .reg .b64 %rd<8>;
+
+
+ mov.u64 %rd7, object_transforms;
+ cvta.global.u64 %rd2, %rd7;
+ mov.u32 %r1, 1;
+ mov.u32 %r2, 4;
+ mov.u64 %rd3, 16;
+ mov.u64 %rd6, 0;
+ // begin inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r1, %r2, %rd3, %rd6, %rd6, %rd6);
+ // end inline asm
+ mov.u32 %r3, 3841;
+ // begin inline asm
+ call (%f1, %f2, %f3, %f4, %f5, %f6, %f7, %f8, %f9, %f10, %f11, %f12, %f13, %f14, %f15, %f16), _rt_get_transform, (%r3);
+ // end inline asm
+ st.f32 [%rd1], %f1;
+ st.f32 [%rd1+4], %f2;
+ st.f32 [%rd1+8], %f3;
+ st.f32 [%rd1+12], %f4;
+ st.f32 [%rd1+16], %f5;
+ st.f32 [%rd1+20], %f6;
+ st.f32 [%rd1+24], %f7;
+ st.f32 [%rd1+28], %f8;
+ st.f32 [%rd1+32], %f9;
+ st.f32 [%rd1+36], %f10;
+ st.f32 [%rd1+40], %f11;
+ st.f32 [%rd1+44], %f12;
+ st.f32 [%rd1+48], %f13;
+ st.f32 [%rd1+52], %f14;
+ st.f32 [%rd1+56], %f15;
+ st.f32 [%rd1+60], %f16;
+ ret;
+
+}
+ // .globl _Z11closest_hitv
+.visible .entry _Z11closest_hitv()
+{
+ .reg .f32 %f<17>;
+ .reg .b32 %r<4>;
+ .reg .b64 %rd<8>;
+
+
+ mov.u64 %rd7, object_transforms;
+ cvta.global.u64 %rd2, %rd7;
+ mov.u32 %r1, 1;
+ mov.u32 %r2, 4;
+ mov.u64 %rd3, 32;
+ mov.u64 %rd6, 0;
+ // begin inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r1, %r2, %rd3, %rd6, %rd6, %rd6);
+ // end inline asm
+ mov.u32 %r3, 3840;
+ // begin inline asm
+ call (%f1, %f2, %f3, %f4, %f5, %f6, %f7, %f8, %f9, %f10, %f11, %f12, %f13, %f14, %f15, %f16), _rt_get_transform, (%r3);
+ // end inline asm
+ st.f32 [%rd1], %f1;
+ st.f32 [%rd1+4], %f2;
+ st.f32 [%rd1+8], %f3;
+ st.f32 [%rd1+12], %f4;
+ st.f32 [%rd1+16], %f5;
+ st.f32 [%rd1+20], %f6;
+ st.f32 [%rd1+24], %f7;
+ st.f32 [%rd1+28], %f8;
+ st.f32 [%rd1+32], %f9;
+ st.f32 [%rd1+36], %f10;
+ st.f32 [%rd1+40], %f11;
+ st.f32 [%rd1+44], %f12;
+ st.f32 [%rd1+48], %f13;
+ st.f32 [%rd1+52], %f14;
+ st.f32 [%rd1+56], %f15;
+ st.f32 [%rd1+60], %f16;
+ ret;
+
+}
+
diff --git a/zluda_rt/src/tests/mod.rs b/zluda_rt/src/tests/mod.rs new file mode 100644 index 0000000..7db155d --- /dev/null +++ b/zluda_rt/src/tests/mod.rs @@ -0,0 +1,1648 @@ +use crate::optix_test;
+use crate::test_common::OptixFns;
+use float_cmp::assert_approx_eq;
+use glam::{Mat4, Quat, Vec3};
+use optix_types::*;
+use std::{ffi::CStr, mem, ptr};
+
+const _SET_VARIABLE_CU: &'static [u8] = b"
+#include <optix.h>
+#include <optixu/optixu_math_namespace.h>
+
+using namespace optix;
+
+struct Payload {
+ float data;
+};
+
+rtDeclareVariable(uint2, launch_index, rtLaunchIndex, );
+rtDeclareVariable(float, value, , );
+rtBuffer<float, 1> output_buffer;
+rtDeclareVariable(rtObject, bvh, , );
+rtDeclareVariable(Payload, payload, rtPayload, );
+
+RT_PROGRAM void start(void)
+{
+ Payload p { float(launch_index.x) };
+ Ray ray = make_Ray(make_float3(float(launch_index.x), 0, -1), make_float3(0,0,1), 0, 0.0, RT_DEFAULT_MAX);
+ rtTrace(bvh, ray, p);
+}
+
+RT_PROGRAM void set_variable(void)
+{
+ output_buffer[launch_index.x] = value;
+}
+";
+
+const SET_VARIABLE_PTX: &'static [u8] = b"
+.version 7.0
+.target sm_52
+.address_size 64
+
+ // .globl _Z5startv
+.global .align 8 .b8 launch_index[8];
+.global .align 4 .f32 value;
+.global .align 1 .b8 output_buffer[1];
+.global .align 4 .b8 bvh[4];
+.global .align 4 .b8 payload[4];
+.global .align 4 .b8 _ZN21rti_internal_typeinfo12launch_indexE[8] = {82, 97, 121, 0, 8, 0, 0, 0};
+.global .align 4 .b8 _ZN21rti_internal_typeinfo5valueE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
+.global .align 4 .b8 _ZN21rti_internal_typeinfo3bvhE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
+.global .align 4 .b8 _ZN21rti_internal_typeinfo7payloadE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
+.global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE;
+.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E;
+.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E;
+.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E;
+.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E;
+.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E;
+.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E;
+.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E;
+.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E;
+.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E;
+.global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E;
+.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E;
+.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E;
+.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E;
+.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E;
+.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E;
+.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E;
+.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E;
+.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E;
+.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E;
+.global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E;
+.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE;
+.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE;
+.global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE;
+.global .align 1 .b8 _ZN21rti_internal_typename12launch_indexE[6] = {117, 105, 110, 116, 50, 0};
+.global .align 1 .b8 _ZN21rti_internal_typename5valueE[6] = {102, 108, 111, 97, 116, 0};
+.global .align 1 .b8 _ZN21rti_internal_typename3bvhE[9] = {114, 116, 79, 98, 106, 101, 99, 116, 0};
+.global .align 1 .b8 _ZN21rti_internal_typename7payloadE[8] = {80, 97, 121, 108, 111, 97, 100, 0};
+.global .align 4 .u32 _ZN21rti_internal_typeenum12launch_indexE = 4919;
+.global .align 4 .u32 _ZN21rti_internal_typeenum5valueE = 4919;
+.global .align 4 .u32 _ZN21rti_internal_typeenum3bvhE = 4919;
+.global .align 4 .u32 _ZN21rti_internal_typeenum7payloadE = 4919;
+.global .align 1 .b8 _ZN21rti_internal_semantic12launch_indexE[14] = {114, 116, 76, 97, 117, 110, 99, 104, 73, 110, 100, 101, 120, 0};
+.global .align 1 .b8 _ZN21rti_internal_semantic5valueE[1];
+.global .align 1 .b8 _ZN21rti_internal_semantic3bvhE[1];
+.global .align 1 .b8 _ZN21rti_internal_semantic7payloadE[10] = {114, 116, 80, 97, 121, 108, 111, 97, 100, 0};
+.global .align 1 .b8 _ZN23rti_internal_annotation12launch_indexE[1];
+.global .align 1 .b8 _ZN23rti_internal_annotation5valueE[1];
+.global .align 1 .b8 _ZN23rti_internal_annotation3bvhE[1];
+.global .align 1 .b8 _ZN23rti_internal_annotation7payloadE[1];
+
+.visible .entry _Z5startv(
+
+)
+{
+ .local .align 4 .b8 __local_depot0[4];
+ .reg .b64 %SP;
+ .reg .b64 %SPL;
+ .reg .f32 %f<9>;
+ .reg .b32 %r<7>;
+ .reg .b64 %rd<3>;
+
+
+ mov.u64 %SPL, __local_depot0;
+ cvta.local.u64 %SP, %SPL;
+ add.u64 %rd1, %SP, 0;
+ add.u64 %rd2, %SPL, 0;
+ ld.global.u32 %r6, [launch_index];
+ cvt.rn.f32.u32 %f1, %r6;
+ st.local.f32 [%rd2], %f1;
+ ld.global.u32 %r1, [bvh];
+ mov.u32 %r3, 255;
+ mov.u32 %r4, 0;
+ mov.u32 %r5, 4;
+ mov.f32 %f3, 0fBF800000;
+ mov.f32 %f6, 0f3F800000;
+ mov.f32 %f7, 0f00000000;
+ mov.f32 %f8, 0f6C4ECB8F;
+ // inline asm
+ call _rt_trace_mask_flags_64, (%r1, %f1, %f7, %f3, %f7, %f7, %f6, %r4, %f7, %f8, %r3, %r4, %rd1, %r5);
+ // inline asm
+ ret;
+}
+
+ // .globl _Z12set_variablev
+.visible .entry _Z12set_variablev(
+
+)
+{
+ .reg .f32 %f<2>;
+ .reg .b32 %r<3>;
+ .reg .b64 %rd<8>;
+
+
+ ld.global.f32 %f1, [value];
+ ld.global.u32 %rd3, [launch_index];
+ mov.u64 %rd7, output_buffer;
+ cvta.global.u64 %rd2, %rd7;
+ mov.u32 %r1, 1;
+ mov.u32 %r2, 4;
+ mov.u64 %rd6, 0;
+ // inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r1, %r2, %rd3, %rd6, %rd6, %rd6);
+ // inline asm
+ st.f32 [%rd1], %f1;
+ ret;
+}\0";
+
+optix_test!(variable_scoping);
+
+// List of points, `+` means variable is set, `-` means variable is unset
+// 0: Program-, GI-, Material-, Context+ => produces 4
+// 1: Program+, GI-, Material-, Context+ => produces 1
+// 2: Program-, GI+, Material-, Context+ => produces 2
+// 3: Program-, GI-, Material+, Context+ => produces 3
+unsafe fn variable_scoping<Optix: OptixFns>(mut o: Optix) {
+ let variable_key = b"value\0";
+ let triangles = 4;
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let mut ctx_variable = ptr::null_mut();
+ o.rtContextDeclareVariable(ctx, variable_key.as_ptr() as _, &mut ctx_variable);
+ o.rtVariableSet1f(ctx_variable, 4f32);
+ let mut output_buffer = ptr::null_mut();
+ o.rtBufferCreate(ctx, RTbuffertype::RT_BUFFER_OUTPUT.0, &mut output_buffer);
+ o.rtBufferSetSize1D(output_buffer, triangles as u64);
+ o.rtBufferSetFormat(output_buffer, RTformat::RT_FORMAT_FLOAT);
+ let mut output_buffer_var = ptr::null_mut();
+ o.rtContextDeclareVariable(
+ ctx,
+ b"output_buffer\0".as_ptr() as _,
+ &mut output_buffer_var,
+ );
+ o.rtVariableSetObject(output_buffer_var, RTobject(output_buffer as _));
+ let mut raygen = mem::zeroed();
+ o.rtProgramCreateFromPTXString(
+ ctx,
+ SET_VARIABLE_PTX.as_ptr() as _,
+ b"start\0".as_ptr() as _,
+ &mut raygen,
+ );
+ o.rtContextSetRayGenerationProgram(ctx, 0, raygen);
+ // we are reusing material&program pair to see how ZLUDA handles this
+ let anyhit_unset = create_anyhit_program(&mut o, ctx);
+ let material_unset = create_material(&mut o, ctx);
+ let triangle_instances = (0..triangles)
+ .map(|triangle| create_triangles(&mut o, ctx, triangle, 1, 0.0).0)
+ .collect::<Vec<_>>();
+ setup_geometry_instance(
+ &mut o,
+ triangle_instances[0],
+ material_unset,
+ anyhit_unset,
+ ptr::null_mut(),
+ );
+ let anyhit_1 = create_anyhit_program_with_var(&mut o, ctx, variable_key, 1f32);
+ let material_1 = create_material(&mut o, ctx);
+ setup_geometry_instance(
+ &mut o,
+ triangle_instances[1],
+ material_1,
+ anyhit_1,
+ ptr::null_mut(),
+ );
+ let mut variable_2 = ptr::null_mut();
+ o.rtGeometryInstanceDeclareVariable(
+ triangle_instances[2],
+ variable_key.as_ptr() as _,
+ &mut variable_2,
+ );
+ o.rtVariableSet1f(variable_2, 2f32);
+ setup_geometry_instance(
+ &mut o,
+ triangle_instances[2],
+ material_unset,
+ anyhit_unset,
+ ptr::null_mut(),
+ );
+ let anyhit_3 = create_anyhit_program(&mut o, ctx);
+ let material_3 = create_material_with_var(&mut o, ctx, variable_key, 3f32);
+ setup_geometry_instance(
+ &mut o,
+ triangle_instances[3],
+ material_3,
+ anyhit_3,
+ ptr::null_mut(),
+ );
+ let geo_group = create_geometry_group(&o, ctx, &triangle_instances);
+ create_set_accelerator(&mut o, ctx, geo_group, b"bvh\0");
+ o.rtContextValidate(ctx);
+ launch_2d(&mut o, ctx, 0, triangles as u64, 1);
+ assert_buffer_eq(&mut o, output_buffer, &[4f32, 1f32, 2f32, 3f32][..]);
+ o.rtContextDestroy(ctx);
+}
+
+unsafe fn create_context<Optix: OptixFns>(o: &Optix) -> RTcontext {
+ let mut ctx = ptr::null_mut();
+ o.rtContextCreate(&mut ctx);
+ let disable_cache = 0u32;
+ o.rtContextSetAttribute(
+ ctx,
+ RTcontextattribute::RT_CONTEXT_ATTRIBUTE_DISK_CACHE_ENABLED,
+ mem::size_of::<u32>() as u64,
+ &disable_cache as *const _ as _,
+ );
+ ctx
+}
+
+unsafe fn create_geometry_group<Optix: OptixFns>(
+ o: &Optix,
+ ctx: *mut RTcontext_api,
+ triangle_instances: &[*mut RTgeometryinstance_api],
+) -> *mut RTgeometrygroup_api {
+ let mut geo_group = ptr::null_mut();
+ o.rtGeometryGroupCreate(ctx, &mut geo_group);
+ o.rtGeometryGroupSetChildCount(geo_group, triangle_instances.len() as u32);
+ for (idx, triangle) in triangle_instances.iter().enumerate() {
+ o.rtGeometryGroupSetChild(geo_group, idx as u32, *triangle);
+ }
+ geo_group
+}
+
+unsafe fn create_set_accelerator<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: *mut RTcontext_api,
+ geo_group: *mut RTgeometrygroup_api,
+ name: &[u8],
+) {
+ let mut accel = ptr::null_mut();
+ o.rtAccelerationCreate(ctx, &mut accel);
+ o.rtGeometryGroupSetAcceleration(geo_group, accel);
+ o.rtAccelerationSetBuilder(accel, b"Bvh\0".as_ptr() as _);
+ let mut bvh_var = ptr::null_mut();
+ o.rtContextDeclareVariable(ctx, name.as_ptr() as _, &mut bvh_var);
+ o.rtVariableSetObject(bvh_var, RTobject(geo_group as _));
+}
+
+unsafe fn create_set_accelerator_group<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: RTcontext,
+ group: RTgroup,
+ name: &[u8],
+) {
+ let mut accel = ptr::null_mut();
+ o.rtAccelerationCreate(ctx, &mut accel);
+ o.rtGroupSetAcceleration(group, accel);
+ o.rtAccelerationSetBuilder(accel, b"Bvh\0".as_ptr() as _);
+ let mut bvh_var = ptr::null_mut();
+ o.rtContextDeclareVariable(ctx, name.as_ptr() as _, &mut bvh_var);
+ o.rtVariableSetObject(bvh_var, RTobject(group as _));
+}
+
+unsafe fn setup_geometry_instance<Optix: OptixFns>(
+ o: &mut Optix,
+ triangle: RTgeometryinstance,
+ material: RTmaterial,
+ any_hit: RTprogram,
+ closest_hit: RTprogram,
+) {
+ o.rtGeometryInstanceSetMaterialCount(triangle, 1);
+ o.rtGeometryInstanceSetMaterial(triangle, 0, material);
+ if any_hit != ptr::null_mut() {
+ o.rtMaterialSetAnyHitProgram(material, 0, any_hit);
+ }
+ if closest_hit != ptr::null_mut() {
+ o.rtMaterialSetClosestHitProgram(material, 0, closest_hit);
+ }
+}
+
+unsafe fn create_anyhit_program_with_var<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: RTcontext,
+ variable_key: &[u8],
+ value: f32,
+) -> RTprogram {
+ let program = create_anyhit_program(o, ctx);
+ let mut program_variable = mem::zeroed();
+ o.rtProgramDeclareVariable(program, variable_key.as_ptr() as _, &mut program_variable);
+ o.rtVariableSet1f(program_variable, value);
+ program
+}
+
+unsafe fn create_anyhit_program<Optix: OptixFns>(o: &mut Optix, ctx: RTcontext) -> RTprogram {
+ let mut anyhit = mem::zeroed();
+ o.rtProgramCreateFromPTXString(
+ ctx,
+ SET_VARIABLE_PTX.as_ptr() as _,
+ b"set_variable\0".as_ptr() as _,
+ &mut anyhit,
+ );
+ anyhit
+}
+
+unsafe fn create_material_with_var<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: RTcontext,
+ variable_key: &[u8],
+ value: f32,
+) -> RTmaterial {
+ let material = create_material(o, ctx);
+ let mut program_variable = mem::zeroed();
+ o.rtMaterialDeclareVariable(material, variable_key.as_ptr() as _, &mut program_variable);
+ o.rtVariableSet1f(program_variable, value);
+ material
+}
+
+unsafe fn create_material<Optix: OptixFns>(o: &mut Optix, ctx: RTcontext) -> RTmaterial {
+ let mut material_unset = mem::zeroed();
+ o.rtMaterialCreate(ctx, &mut material_unset);
+ material_unset
+}
+
+unsafe fn create_triangles<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: RTcontext,
+ starting_triangle: usize,
+ triangle_count: usize,
+ depth: f32,
+) -> (RTgeometryinstance, RTgeometrytriangles) {
+ create_triangles_scaled(o, ctx, starting_triangle, triangle_count, depth, 0.1)
+}
+
+unsafe fn create_triangles_scaled<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: RTcontext,
+ starting_triangle: usize,
+ triangle_count: usize,
+ depth: f32,
+ scale: f32,
+) -> (RTgeometryinstance, RTgeometrytriangles) {
+ let mut input_buffer = ptr::null_mut();
+ o.rtBufferCreate(ctx, RTbuffertype::RT_BUFFER_INPUT.0, &mut input_buffer);
+ o.rtBufferSetFormat(input_buffer, RTformat::RT_FORMAT_FLOAT3);
+ o.rtBufferSetSize1D(input_buffer, (triangle_count * 3) as u64);
+ {
+ let mut host_ptr = ptr::null_mut();
+ o.rtBufferMap(input_buffer, &mut host_ptr);
+ let ptr = host_ptr as *mut [(f32, f32, f32); 3];
+ for i in 0..triangle_count {
+ let point = (starting_triangle + i) as f32;
+ let coords = [
+ (point - scale, -scale, depth),
+ (point + scale, -scale, depth),
+ (point, scale, depth),
+ ];
+ *ptr.add(i) = coords;
+ }
+ o.rtBufferUnmap(input_buffer);
+ }
+ let mut index_buffer = ptr::null_mut();
+ o.rtBufferCreate(ctx, RTbuffertype::RT_BUFFER_INPUT.0, &mut index_buffer);
+ o.rtBufferSetFormat(index_buffer, RTformat::RT_FORMAT_UNSIGNED_INT3);
+ o.rtBufferSetSize1D(index_buffer, triangle_count as u64);
+ {
+ let mut host_ptr = ptr::null_mut();
+ o.rtBufferMap(index_buffer, &mut host_ptr);
+ let host_ptr = host_ptr as *mut u32;
+ let indices = (0..(triangle_count * 3) as u32).collect::<Vec<_>>();
+ ptr::copy_nonoverlapping(indices.as_ptr(), host_ptr, indices.len());
+ o.rtBufferUnmap(index_buffer);
+ }
+ let mut geometry_triangles = ptr::null_mut();
+ o.rtGeometryTrianglesCreate(ctx, &mut geometry_triangles);
+ o.rtGeometryTrianglesSetPrimitiveCount(geometry_triangles, triangle_count as u32);
+ o.rtGeometryTrianglesSetVertices(
+ geometry_triangles,
+ 3 * triangle_count as u32,
+ input_buffer,
+ 0,
+ 12,
+ RTformat::RT_FORMAT_FLOAT3,
+ );
+ o.rtGeometryTrianglesSetTriangleIndices(
+ geometry_triangles,
+ index_buffer,
+ 0,
+ 12,
+ RTformat::RT_FORMAT_UNSIGNED_INT3,
+ );
+ let mut geometry_instance = ptr::null_mut();
+ o.rtGeometryTrianglesValidate(geometry_triangles);
+ o.rtGeometryInstanceCreate(ctx, &mut geometry_instance);
+ o.rtGeometryInstanceSetGeometryTriangles(geometry_instance, geometry_triangles);
+ (geometry_instance, geometry_triangles)
+}
+
+pub const ANY_HIT_INTERSECT_PTX: &'static str =
+ concat!(include_str!("any_hit_intersect.ptx"), "\0");
+
+optix_test!(fail_on_multi_material_triangles);
+unsafe fn fail_on_multi_material_triangles<Optix: OptixFns>(mut o: Optix) {
+ let variable_key = b"b_index\0";
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let material_0 = create_material_with_var_u32(&mut o, ctx, variable_key, 0);
+ let material_1 = create_material_with_var_u32(&mut o, ctx, variable_key, 1);
+ let triangle = create_triangles(&mut o, ctx, 0, 1, 0.0).0;
+ o.rtGeometryInstanceSetMaterialCount(triangle, 2);
+ o.rtGeometryInstanceSetMaterial(triangle, 0, material_0);
+ o.rtGeometryInstanceSetMaterial(triangle, 1, material_1);
+ // Triangles instance: if it has multiple materials, those materials must be partitioned into
+ // triangles with rtGeometryTrianglesSetMaterialIndices
+ assert_eq!(
+ RTresult::RT_ERROR_INVALID_CONTEXT,
+ o.rtGeometryInstanceValidate_unchecked(triangle)
+ );
+ o.rtContextDestroy(ctx);
+}
+
+unsafe fn launch_2d<Optix: OptixFns>(
+ o: &Optix,
+ ctx: *mut RTcontext_api,
+ entry_point: u32,
+ width: u64,
+ height: u64,
+) {
+ let error = o.rtContextLaunch2D_unchecked(ctx, entry_point, width, height);
+ if error != RTresult::RT_SUCCESS {
+ let mut err_string = ptr::null();
+ o.rtContextGetErrorString(ctx, error, &mut err_string);
+ panic!(
+ "{:?} {}",
+ error,
+ CStr::from_ptr(err_string).to_str().unwrap()
+ );
+ }
+}
+
+unsafe fn assert_buffer_eq<T: Copy + Default + PartialEq + std::fmt::Debug, Optix: OptixFns>(
+ o: &Optix,
+ output_buffer: *mut RTbuffer_api,
+ buff: &[T],
+) {
+ let mut host_ptr = ptr::null_mut();
+ let mut result = vec![T::default(); buff.len()];
+ o.rtBufferMap(output_buffer, &mut host_ptr);
+ ptr::copy_nonoverlapping::<T>(host_ptr as *const T, result.as_mut_ptr(), buff.len());
+ o.rtBufferUnmap(output_buffer);
+ assert_eq!(&*result, buff);
+}
+
+unsafe fn assert_buffer_eq_float<Optix: OptixFns>(
+ o: &Optix,
+ epsilon: f32,
+ output_buffer: *mut RTbuffer_api,
+ buff: &[f32],
+) {
+ let mut host_ptr = ptr::null_mut();
+ let mut result = vec![0f32; buff.len()];
+ o.rtBufferMap(output_buffer, &mut host_ptr);
+ ptr::copy_nonoverlapping::<f32>(host_ptr as *const f32, result.as_mut_ptr(), buff.len());
+ o.rtBufferUnmap(output_buffer);
+ assert_approx_eq!(&[f32], buff, &*result, epsilon = epsilon);
+}
+
+unsafe fn create_material_with_var_u32<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: RTcontext,
+ variable_key: &[u8],
+ value: u32,
+) -> RTmaterial {
+ let material = create_material(o, ctx);
+ let mut program_variable = mem::zeroed();
+ o.rtMaterialDeclareVariable(material, variable_key.as_ptr() as _, &mut program_variable);
+ o.rtVariableSet1ui(program_variable, value);
+ material
+}
+
+optix_test!(any_hit_multiple_materials);
+unsafe fn any_hit_multiple_materials<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 2);
+ let mut sphere = ptr::null_mut();
+ o.rtContextDeclareVariable(ctx, b"sphere\0".as_ptr() as _, &mut sphere);
+ o.rtVariableSet4f(sphere, 0.0, 0.0, 0.0, 100.0);
+ let output_buffer = create_buffer_u32(&mut o, ctx, "output_buffer", 2);
+ let output_buffer2 = create_buffer_u32(&mut o, ctx, "output_buffer2", 2);
+ let raygen = create_program(&mut o, ctx, ANY_HIT_INTERSECT_PTX, "start\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, raygen);
+ let any_hit = create_program(&mut o, ctx, ANY_HIT_INTERSECT_PTX, "set_buffer\0");
+ let closest_hit = create_program(&mut o, ctx, ANY_HIT_INTERSECT_PTX, "set_buffer2\0");
+ let material_0 = create_material_with_var_u32(&mut o, ctx, b"b_index\0", 0);
+ let material_1 = create_material_with_var_u32(&mut o, ctx, b"b_index\0", 1);
+ let bb_prog = create_program(&mut o, ctx, ANY_HIT_INTERSECT_PTX, "bounds\0");
+ let intersect_prog = create_program(&mut o, ctx, ANY_HIT_INTERSECT_PTX, "intersect\0");
+ let sphere = create_custom_geometry(&mut o, ctx, 1, bb_prog, intersect_prog);
+ o.rtGeometryInstanceSetMaterialCount(sphere, 2);
+ o.rtGeometryInstanceSetMaterial(sphere, 0, material_0);
+ o.rtGeometryInstanceSetMaterial(sphere, 1, material_1);
+ o.rtMaterialSetAnyHitProgram(material_0, 0, any_hit);
+ o.rtMaterialSetAnyHitProgram(material_1, 0, any_hit);
+ o.rtMaterialSetClosestHitProgram(material_0, 0, closest_hit);
+ o.rtMaterialSetClosestHitProgram(material_1, 0, closest_hit);
+ let geo_group = create_geometry_group(&o, ctx, &[sphere]);
+ create_set_accelerator(&mut o, ctx, geo_group, b"bvh\0");
+ launch_2d(&mut o, ctx, 0, 2, 1);
+ assert_buffer_eq(&mut o, output_buffer, &[1u32, 1][..]);
+ assert_buffer_eq(&mut o, output_buffer2, &[1u32, 1][..]);
+ o.rtContextDestroy(ctx);
+}
+
+unsafe fn create_custom_geometry<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: RTcontext,
+ primitives: u32,
+ bb_program: RTprogram,
+ intersect_program: RTprogram,
+) -> RTgeometryinstance {
+ let mut geometry = ptr::null_mut();
+ o.rtGeometryCreate(ctx, &mut geometry);
+ o.rtGeometrySetPrimitiveCount(geometry, primitives);
+ o.rtGeometrySetBoundingBoxProgram(geometry, bb_program);
+ o.rtGeometrySetIntersectionProgram(geometry, intersect_program);
+ let mut geometry_instance = ptr::null_mut();
+ o.rtGeometryInstanceCreate(ctx, &mut geometry_instance);
+ o.rtGeometryInstanceSetGeometry(geometry_instance, geometry);
+ geometry_instance
+}
+
+unsafe fn create_program<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: RTcontext,
+ text: &str,
+ name: &str,
+) -> RTprogram {
+ let mut program = mem::zeroed();
+ o.rtProgramCreateFromPTXString(ctx, text.as_ptr() as _, name.as_ptr() as _, &mut program);
+ program
+}
+
+unsafe fn create_buffer_u32<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: *mut RTcontext_api,
+ name: &str,
+ len: usize,
+) -> RTbuffer {
+ let mut output_buffer = ptr::null_mut();
+ o.rtBufferCreate(ctx, RTbuffertype::RT_BUFFER_OUTPUT.0, &mut output_buffer);
+ o.rtBufferSetSize1D(output_buffer, len as u64);
+ o.rtBufferSetFormat(output_buffer, RTformat::RT_FORMAT_UNSIGNED_INT);
+ let mut host_ptr = ptr::null_mut();
+ o.rtBufferMap(output_buffer, &mut host_ptr);
+ ptr::write_bytes(host_ptr as *mut u32, 0, len);
+ o.rtBufferUnmap(output_buffer);
+ let mut output_buffer_var = ptr::null_mut();
+ let mut name = name.to_string();
+ name.push('\0');
+ o.rtContextDeclareVariable(ctx, name.as_ptr() as _, &mut output_buffer_var);
+ o.rtVariableSetObject(output_buffer_var, RTobject(output_buffer as _));
+ output_buffer
+}
+
+unsafe fn create_buffer_u32_with_values<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: *mut RTcontext_api,
+ name: &str,
+ values: &[u32],
+) -> RTbuffer {
+ let mut output_buffer = ptr::null_mut();
+ o.rtBufferCreate(ctx, RTbuffertype::RT_BUFFER_INPUT_OUTPUT.0, &mut output_buffer);
+ o.rtBufferSetSize1D(output_buffer, values.len() as u64);
+ o.rtBufferSetFormat(output_buffer, RTformat::RT_FORMAT_USER);
+ o.rtBufferSetElementSize(output_buffer, 4);
+ let mut host_ptr: *mut std::ffi::c_void = ptr::null_mut();
+ o.rtBufferMap(output_buffer, &mut host_ptr);
+ ptr::copy_nonoverlapping(values.as_ptr(), host_ptr.cast::<u32>(), values.len());
+ o.rtBufferUnmap(output_buffer);
+ let mut output_buffer_var = ptr::null_mut();
+ let mut name = name.to_string();
+ name.push('\0');
+ o.rtContextDeclareVariable(ctx, name.as_ptr() as _, &mut output_buffer_var);
+ o.rtVariableSetObject(output_buffer_var, RTobject(output_buffer as _));
+ output_buffer
+}
+
+const CALLABLE_PROGRAMS_PTX: &'static str = concat!(include_str!("callable_programs.ptx"), "\0");
+
+optix_test!(callable_programs);
+unsafe fn callable_programs<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let start = create_program(&mut o, ctx, CALLABLE_PROGRAMS_PTX, "start\0");
+ let add_value = create_program(&mut o, ctx, CALLABLE_PROGRAMS_PTX, "add_value\0");
+ let multiply_value = create_program(&mut o, ctx, CALLABLE_PROGRAMS_PTX, "multiply_value\0");
+ let variable_name = b"value\0";
+ context_set_u32(&mut o, ctx, variable_name, 1);
+ program_set_u32(&mut o, add_value, variable_name, 2);
+ program_set_u32(&mut o, multiply_value, variable_name, 2);
+ let add_fn_id = program_get_id(&mut o, add_value);
+ let multiply_fn_id = program_get_id(&mut o, multiply_value);
+ o.rtContextSetRayGenerationProgram(ctx, 0, start);
+ context_set_u32(&mut o, ctx, b"add_fn\0", add_fn_id as u32);
+ context_set_u32(&mut o, ctx, b"mult_fn\0", multiply_fn_id as u32);
+ let output_buffer = create_buffer_u32(&mut o, ctx, "output_buffer", 1);
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, output_buffer, &[6u32]);
+ o.rtContextDestroy(ctx);
+}
+
+unsafe fn program_get_id<Optix: OptixFns>(o: &mut Optix, program: *mut RTprogram_api) -> i32 {
+ let mut prog_id = 0;
+ o.rtProgramGetId(program, &mut prog_id);
+ prog_id
+}
+
+unsafe fn context_set_u32<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: RTcontext,
+ variable_name: &[u8],
+ value: u32,
+) -> RTvariable {
+ let mut variable = ptr::null_mut();
+ o.rtContextDeclareVariable(ctx, variable_name.as_ptr() as _, &mut variable);
+ o.rtVariableSet1ui(variable, value);
+ variable
+}
+
+unsafe fn program_set_u32<Optix: OptixFns>(
+ o: &mut Optix,
+ program: RTprogram,
+ variable_name: &[u8],
+ value: u32,
+) -> RTvariable {
+ let mut variable = ptr::null_mut();
+ o.rtProgramDeclareVariable(program, variable_name.as_ptr() as _, &mut variable);
+ o.rtVariableSet1ui(variable, value);
+ variable
+}
+
+const TEXTURE_SAMPLER_PTX: &'static str = concat!(include_str!("texture_sampler.ptx"), "\0");
+
+optix_test!(texture_sampler);
+unsafe fn texture_sampler<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let start = create_program(&mut o, ctx, TEXTURE_SAMPLER_PTX, "start\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, start);
+ let image1 = create_image(
+ &mut o,
+ ctx,
+ &[
+ (0.0, 0.25, 0.5, 0.75),
+ (1.0, 1.25, 1.5, 1.75),
+ (2.0, 2.25, 2.5, 2.75),
+ (3.0, 3.25, 3.5, 3.75),
+ ],
+ 2,
+ );
+ let image2 = create_image(
+ &mut o,
+ ctx,
+ &[
+ (4.0, 4.25, 4.5, 4.75),
+ (5.0, 5.25, 5.5, 5.75),
+ (6.0, 6.25, 6.5, 6.75),
+ (7.0, 7.25, 7.5, 7.75),
+ ],
+ 2,
+ );
+ let output_buffer = create_buffer_2d(
+ &mut o,
+ ctx,
+ RTformat::RT_FORMAT_FLOAT4,
+ "output_buffer",
+ (2, 4),
+ );
+ context_set_object(&mut o, ctx, b"image1\0", image1);
+ let mut image2_id = 0;
+ o.rtTextureSamplerGetId(image2, &mut image2_id);
+ context_set_u32(&mut o, ctx, b"image2\0", image2_id as u32);
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq::<(f32, f32, f32, f32), _>(
+ &o,
+ output_buffer,
+ &[
+ (0.0, 0.25, 0.5, 0.75),
+ (1.0, 1.25, 1.5, 1.75),
+ (2.0, 2.25, 2.5, 2.75),
+ (3.0, 3.25, 3.5, 3.75),
+ (4.0, 4.25, 4.5, 4.75),
+ (5.0, 5.25, 5.5, 5.75),
+ (6.0, 6.25, 6.5, 6.75),
+ (7.0, 7.25, 7.5, 7.75),
+ ],
+ );
+ o.rtContextDestroy(ctx);
+}
+
+unsafe fn context_set_object<T, Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: *mut RTcontext_api,
+ variable_name: &[u8],
+ value: *mut T,
+) -> RTvariable {
+ let mut variable = ptr::null_mut();
+ o.rtContextDeclareVariable(ctx, variable_name.as_ptr() as _, &mut variable);
+ o.rtVariableSetObject(variable, RTobject(value as _));
+ variable
+}
+
+unsafe fn create_image<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: *mut RTcontext_api,
+ content: &[(f32, f32, f32, f32)],
+ width: u32,
+) -> RTtexturesampler {
+ let mut image = ptr::null_mut();
+ o.rtTextureSamplerCreate(ctx, &mut image);
+ let mut buffer = ptr::null_mut();
+ o.rtBufferCreate(ctx, RTbuffertype::RT_BUFFER_INPUT.0, &mut buffer);
+ o.rtBufferSetSize2D(buffer, width as u64, (content.len() as u32 / width) as u64);
+ o.rtBufferSetFormat(buffer, RTformat::RT_FORMAT_FLOAT4);
+ let mut buffer_pointer = ptr::null_mut();
+ o.rtBufferMap(buffer, &mut buffer_pointer);
+ ptr::copy_nonoverlapping(
+ content.as_ptr(),
+ buffer_pointer as *mut (f32, f32, f32, f32),
+ content.len(),
+ );
+ o.rtBufferUnmap(buffer);
+ o.rtTextureSamplerSetWrapMode(image, 0, RTwrapmode::RT_WRAP_REPEAT);
+ o.rtTextureSamplerSetWrapMode(image, 1, RTwrapmode::RT_WRAP_REPEAT);
+ o.rtTextureSamplerSetFilteringModes(
+ image,
+ RTfiltermode::RT_FILTER_NEAREST,
+ RTfiltermode::RT_FILTER_NEAREST,
+ RTfiltermode::RT_FILTER_NONE,
+ );
+ o.rtTextureSamplerSetIndexingMode(image, RTtextureindexmode::RT_TEXTURE_INDEX_ARRAY_INDEX);
+ o.rtTextureSamplerSetReadMode(image, RTtexturereadmode::RT_TEXTURE_READ_NORMALIZED_FLOAT);
+ o.rtTextureSamplerSetMaxAnisotropy(image, 1.0f32);
+ o.rtTextureSamplerSetBuffer(image, 0, 0, buffer);
+ image
+}
+
+unsafe fn create_buffer_1d<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: *mut RTcontext_api,
+ format: RTformat,
+ name: &str,
+ width: u64,
+) -> RTbuffer {
+ let mut output_buffer = ptr::null_mut();
+ o.rtBufferCreate(ctx, RTbuffertype::RT_BUFFER_OUTPUT.0, &mut output_buffer);
+ o.rtBufferSetSize1D(output_buffer, width);
+ o.rtBufferSetFormat(output_buffer, format);
+ let mut output_buffer_var = ptr::null_mut();
+ let mut name = name.to_string();
+ name.push('\0');
+ o.rtContextDeclareVariable(ctx, name.as_ptr() as _, &mut output_buffer_var);
+ o.rtVariableSetObject(output_buffer_var, RTobject(output_buffer as _));
+ output_buffer
+}
+
+unsafe fn create_buffer_2d<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: *mut RTcontext_api,
+ format: RTformat,
+ name: &str,
+ (width, height): (u64, u64),
+) -> RTbuffer {
+ let mut output_buffer = ptr::null_mut();
+ o.rtBufferCreate(ctx, RTbuffertype::RT_BUFFER_OUTPUT.0, &mut output_buffer);
+ o.rtBufferSetSize2D(output_buffer, width, height);
+ o.rtBufferSetFormat(output_buffer, format);
+ let mut output_buffer_var = ptr::null_mut();
+ let mut name = name.to_string();
+ name.push('\0');
+ o.rtContextDeclareVariable(ctx, name.as_ptr() as _, &mut output_buffer_var);
+ o.rtVariableSetObject(output_buffer_var, RTobject(output_buffer as _));
+ output_buffer
+}
+
+const BARYCENTRICS_PTX: &'static str = concat!(include_str!("barycentrics.ptx"), "\0");
+
+optix_test!(barycentrics);
+unsafe fn barycentrics<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let start = create_program(&mut o, ctx, BARYCENTRICS_PTX, "start\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, start);
+ let attribute_program = create_program(&mut o, ctx, BARYCENTRICS_PTX, "attribute_program\0");
+ let closest_hit = create_program(&mut o, ctx, BARYCENTRICS_PTX, "closest_hit\0");
+ let bounds = create_program(&mut o, ctx, BARYCENTRICS_PTX, "bounds\0");
+ let intersect = create_program(&mut o, ctx, BARYCENTRICS_PTX, "intersect\0");
+ let mut sphere = ptr::null_mut();
+ o.rtContextDeclareVariable(ctx, b"sphere\0".as_ptr() as _, &mut sphere);
+ o.rtVariableSet4f(sphere, 0.0, 0.0, 0.0, 100.0);
+ let output_buffer1 =
+ create_buffer(&mut o, ctx, RTformat::RT_FORMAT_FLOAT2, "output_buffer1", 4);
+ create_buffer(&mut o, ctx, RTformat::RT_FORMAT_FLOAT2, "output_buffer2", 4);
+ let output_buffer3 = create_buffer(
+ &mut o,
+ ctx,
+ RTformat::RT_FORMAT_UNSIGNED_INT,
+ "output_buffer3",
+ 4,
+ );
+ let sphere = create_custom_geometry(&mut o, ctx, 1, bounds, intersect);
+ let (triangles_instance, triangles) = create_triangles(&mut o, ctx, 0, 2, 0.0);
+ let (triangles_instance2, _) = create_triangles(&mut o, ctx, 2, 1, 0.0);
+ o.rtGeometryTrianglesSetAttributeProgram(triangles, attribute_program);
+ let material = create_material(&mut o, ctx);
+ setup_geometry_instance(
+ &mut o,
+ triangles_instance,
+ material,
+ ptr::null_mut(),
+ closest_hit,
+ );
+ setup_geometry_instance(
+ &mut o,
+ triangles_instance2,
+ material,
+ ptr::null_mut(),
+ closest_hit,
+ );
+ setup_geometry_instance(&mut o, sphere, material, ptr::null_mut(), closest_hit);
+ let mut group = ptr::null_mut();
+ let triangles_group =
+ create_geometry_group(&o, ctx, &[triangles_instance, triangles_instance2]);
+ create_set_accelerator(&mut o, ctx, triangles_group, b"unused1\0");
+ let sphere_group = create_geometry_group(&o, ctx, &[sphere]);
+ create_set_accelerator(&mut o, ctx, sphere_group, b"unused2\0");
+ o.rtGroupCreate(ctx, &mut group);
+ o.rtGroupSetChildCount(group, 2);
+ o.rtGroupSetChild(group, 0, RTobject(triangles_group as _));
+ o.rtGroupSetChild(group, 1, RTobject(sphere_group as _));
+ create_set_accelerator_group(&mut o, ctx, group, b"bvh\0");
+ launch_2d(&mut o, ctx, 0, 4, 1);
+ assert_buffer_eq_float(
+ &o,
+ 0.000001,
+ output_buffer1,
+ &[
+ 0.25f32, 0.1f32, 0.25f32, 0.1f32, 0.25f32, 0.5f32, 100f32, 200f32,
+ ],
+ );
+ /*
+ assert_buffer_eq_float(
+ &o,
+ 0.000001,
+ output_buffer2,
+ &[
+ 0.25f32, 0.5f32, 0.25f32, 0.5f32, 0.25f32, 0.5f32, 100f32, 200f32,
+ ],
+ );
+ */
+ assert_buffer_eq(&o, output_buffer3, &[0u32, 1u32, 0u32, 0u32]);
+}
+
+unsafe fn create_buffer<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: *mut RTcontext_api,
+ format: RTformat,
+ name: &str,
+ len: usize,
+) -> RTbuffer {
+ let mut output_buffer = ptr::null_mut();
+ o.rtBufferCreate(ctx, RTbuffertype::RT_BUFFER_OUTPUT.0, &mut output_buffer);
+ o.rtBufferSetSize1D(output_buffer, len as u64);
+ o.rtBufferSetFormat(output_buffer, format);
+ let mut output_buffer_var = ptr::null_mut();
+ let mut name = name.to_string();
+ name.push('\0');
+ o.rtContextDeclareVariable(ctx, name.as_ptr() as _, &mut output_buffer_var);
+ o.rtVariableSetObject(output_buffer_var, RTobject(output_buffer as _));
+ output_buffer
+}
+
+const TRACE_CONTROL_PTX: &'static str = concat!(include_str!("trace_control.ptx"), "\0");
+
+optix_test!(ignore_intersection);
+unsafe fn ignore_intersection<Optix: OptixFns>(o: Optix) {
+ trace_control(o, "any_hit_ignore\0", 1)
+}
+
+optix_test!(terminate_ray);
+unsafe fn terminate_ray<Optix: OptixFns>(o: Optix) {
+ trace_control(o, "any_hit_terminate\0", 2)
+}
+
+unsafe fn trace_control<Optix: OptixFns>(mut o: Optix, any_hit: &str, result: u32) {
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let mut sphere = ptr::null_mut();
+ o.rtContextDeclareVariable(ctx, b"sphere\0".as_ptr() as _, &mut sphere);
+ o.rtVariableSet4f(sphere, 0.0, 0.0, 0.0, 100.0);
+ let start = create_program(&mut o, ctx, TRACE_CONTROL_PTX, "start\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, start);
+ let bounds = create_program(&mut o, ctx, TRACE_CONTROL_PTX, "bounds\0");
+ let intersect = create_program(&mut o, ctx, TRACE_CONTROL_PTX, "intersect\0");
+ let any_hit = create_program(&mut o, ctx, TRACE_CONTROL_PTX, any_hit);
+ let closest_hit = create_program(&mut o, ctx, TRACE_CONTROL_PTX, "closest_hit\0");
+ let output_buffer = create_buffer_u32(&mut o, ctx, "output_buffer", 1);
+ create_buffer_u32(&mut o, ctx, "temp_buffer", 1);
+ let sphere1 = create_custom_geometry(&mut o, ctx, 1, bounds, intersect);
+ let sphere2 = create_custom_geometry(&mut o, ctx, 1, bounds, intersect);
+ let sphere3 = create_custom_geometry(&mut o, ctx, 1, bounds, intersect);
+ let sphere4 = create_custom_geometry(&mut o, ctx, 1, bounds, intersect);
+ let material = create_material(&mut o, ctx);
+ setup_geometry_instance(&mut o, sphere1, material, any_hit, closest_hit);
+ setup_geometry_instance(&mut o, sphere2, material, any_hit, closest_hit);
+ setup_geometry_instance(&mut o, sphere3, material, any_hit, closest_hit);
+ setup_geometry_instance(&mut o, sphere4, material, any_hit, closest_hit);
+ let geo_group = create_geometry_group(&o, ctx, &[sphere1, sphere2, sphere3, sphere4]);
+ create_set_accelerator(&mut o, ctx, geo_group, b"bvh\0");
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, output_buffer, &[result]);
+}
+
+optix_test!(attribute_program_runs_before_closest_hit);
+unsafe fn attribute_program_runs_before_closest_hit<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let start = create_program(&mut o, ctx, TRACE_CONTROL_PTX, "start\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, start);
+ let attribute_program = create_program(&mut o, ctx, TRACE_CONTROL_PTX, "attribute1\0");
+ let any_hit = create_program(&mut o, ctx, TRACE_CONTROL_PTX, "any_hit_plus_one\0");
+ let closest_hit = create_program(&mut o, ctx, TRACE_CONTROL_PTX, "closest_hit\0");
+ let output_buffer = create_buffer_u32(&mut o, ctx, "output_buffer", 1);
+ let temp_buffer = create_buffer_u32(&mut o, ctx, "temp_buffer", 1);
+ let material = create_material(&mut o, ctx);
+ let (triangle1_instance, triangle1) = create_triangles(&mut o, ctx, 0, 1, -0.1f32);
+ o.rtGeometryTrianglesSetAttributeProgram(triangle1, attribute_program);
+ setup_geometry_instance(&mut o, triangle1_instance, material, any_hit, closest_hit);
+ let geo_group = create_geometry_group(&o, ctx, &[triangle1_instance]);
+ create_set_accelerator(&mut o, ctx, geo_group, b"bvh\0");
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, output_buffer, &[0xc4bb2187u32 + 1]);
+ assert_buffer_eq(&o, temp_buffer, &[1]);
+}
+
+optix_test!(rollback_attributes_on_ignore);
+unsafe fn rollback_attributes_on_ignore<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let start = create_program(&mut o, ctx, TRACE_CONTROL_PTX, "start\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, start);
+ let attribute_program = create_program(&mut o, ctx, TRACE_CONTROL_PTX, "attribute2\0");
+ let any_hit = create_program(&mut o, ctx, TRACE_CONTROL_PTX, "any_hit_always_ignore\0");
+ let closest_hit = create_program(&mut o, ctx, TRACE_CONTROL_PTX, "closest_hit\0");
+ let output_buffer = create_buffer_u32(&mut o, ctx, "output_buffer", 1);
+ let temp_buffer = create_buffer_u32(&mut o, ctx, "temp_buffer", 1);
+ let material = create_material(&mut o, ctx);
+ let (triangle1_instance, triangle1) = create_triangles(&mut o, ctx, 0, 1, -0.1f32);
+ o.rtGeometryTrianglesSetAttributeProgram(triangle1, attribute_program);
+ setup_geometry_instance(&mut o, triangle1_instance, material, any_hit, closest_hit);
+ let geo_group = create_geometry_group(&o, ctx, &[triangle1_instance]);
+ create_set_accelerator(&mut o, ctx, geo_group, b"bvh\0");
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_neq(&o, output_buffer, 0xc4bb2187u32);
+ assert_buffer_eq(&o, temp_buffer, &[1]);
+}
+
+unsafe fn assert_buffer_neq<T: Copy + Default + PartialEq + std::fmt::Debug, Optix: OptixFns>(
+ o: &Optix,
+ output_buffer: *mut RTbuffer_api,
+ value: T,
+) {
+ let mut host_ptr = ptr::null_mut();
+ let mut result = vec![T::default(); 1];
+ o.rtBufferMap(output_buffer, &mut host_ptr);
+ ptr::copy_nonoverlapping::<T>(host_ptr as *const T, result.as_mut_ptr(), 1);
+ o.rtBufferUnmap(output_buffer);
+ assert_ne!(value, result[0]);
+}
+
+const BUFFER_ID_PTX: &'static str = concat!(include_str!("buffer_id.ptx"), "\0");
+const BUFFER_ID_CALL_PTX: &'static str = concat!(include_str!("buffer_id_call.ptx"), "\0");
+
+optix_test!(buffer_id);
+optix_test!(buffer_id_call);
+
+unsafe fn buffer_id<Optix: OptixFns>(o: Optix) {
+ buffer_id_impl(o, BUFFER_ID_PTX)
+}
+unsafe fn buffer_id_call<Optix: OptixFns>(o: Optix) {
+ buffer_id_impl(o, BUFFER_ID_CALL_PTX)
+}
+
+unsafe fn buffer_id_impl<Optix: OptixFns>(mut o: Optix, text: &str) {
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let start = create_program(&mut o, ctx, text, "start\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, start);
+ let buffer = create_buffer_u32(&mut o, ctx, "output_buffer", 3);
+ create_buffer_of_buffers(&mut o, ctx, "buffers", &[buffer][..]);
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, buffer, &[0, 3, 0x0118378c]);
+}
+
+unsafe fn create_buffer_of_buffers<Optix: OptixFns>(
+ o: &mut Optix,
+ ctx: RTcontext,
+ name: &str,
+ buffer: &[RTbuffer],
+) {
+ let sub_buffers = buffer
+ .iter()
+ .copied()
+ .map(|buffer| {
+ let mut buffer_id = 0;
+ unsafe { o.rtBufferGetId(buffer, &mut buffer_id) };
+ buffer_id
+ })
+ .collect::<Vec<_>>();
+ let mut main_buffer = ptr::null_mut();
+ o.rtBufferCreate(ctx, RTbuffertype::RT_BUFFER_INPUT.0, &mut main_buffer);
+ o.rtBufferSetSize1D(main_buffer, sub_buffers.len() as u64);
+ o.rtBufferSetFormat(main_buffer, RTformat::RT_FORMAT_BUFFER_ID);
+ {
+ let mut host_ptr = ptr::null_mut();
+ o.rtBufferMap(main_buffer, &mut host_ptr);
+ ptr::copy_nonoverlapping(sub_buffers.as_ptr(), host_ptr as _, sub_buffers.len());
+ o.rtBufferUnmap(main_buffer);
+ }
+ let mut main_buffer_var = ptr::null_mut();
+ let mut name = name.to_string();
+ name.push('\0');
+ o.rtContextDeclareVariable(ctx, name.as_ptr() as _, &mut main_buffer_var);
+ o.rtVariableSetObject(main_buffer_var, RTobject(main_buffer as _));
+}
+
+const BUFFER_ID_CALLABLE_PTX: &'static str = concat!(include_str!("buffer_id_callable.ptx"), "\0");
+
+optix_test!(buffer_id_callable);
+
+unsafe fn buffer_id_callable<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let start = create_program(&mut o, ctx, BUFFER_ID_CALLABLE_PTX, "start\0");
+ let callable = create_program(&mut o, ctx, BUFFER_ID_CALLABLE_PTX, "callable\0");
+ let callable_id = program_get_id(&mut o, callable);
+ context_set_u32(&mut o, ctx, b"program\0", callable_id as u32);
+ o.rtContextSetRayGenerationProgram(ctx, 0, start);
+ let buffer = create_buffer_u32(&mut o, ctx, "output_buffer", 3);
+ create_buffer_of_buffers(&mut o, ctx, "buffers", &[buffer][..]);
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, buffer, &[0, 3, 0x0118378c]);
+}
+
+const TRIANGLE_FRONT_PTX: &'static str = concat!(include_str!("triangle_front.ptx"), "\0");
+
+optix_test!(triangle_front);
+unsafe fn triangle_front<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let start = create_program(&mut o, ctx, TRIANGLE_FRONT_PTX, "start\0");
+ let bounds = create_program(&mut o, ctx, TRIANGLE_FRONT_PTX, "bounds\0");
+ let intersect = create_program(&mut o, ctx, TRIANGLE_FRONT_PTX, "intersect\0");
+ let closest_hit = create_program(&mut o, ctx, TRIANGLE_FRONT_PTX, "closest_hit\0");
+ let mut sphere = ptr::null_mut();
+ o.rtContextDeclareVariable(ctx, b"sphere\0".as_ptr() as _, &mut sphere);
+ o.rtVariableSet4f(sphere, 0.0, 0.0, 0.0, 100.0);
+ o.rtContextSetRayGenerationProgram(ctx, 0, start);
+ let output_buffer1 = create_buffer_u32(&mut o, ctx, "output_buffer1", 3);
+ let output_buffer2 = create_buffer_u32(&mut o, ctx, "output_buffer2", 3);
+ let output_buffer3 = create_buffer_u32(&mut o, ctx, "output_buffer3", 3);
+ let sphere = create_custom_geometry(&mut o, ctx, 1, bounds, intersect);
+ let (triangles_instance1, _) = create_triangles(&mut o, ctx, 0, 1, 0.0);
+ let (triangles_instance2, _) = create_triangles(&mut o, ctx, 1, 1, 0.0);
+ let material = create_material(&mut o, ctx);
+ setup_geometry_instance(&mut o, sphere, material, ptr::null_mut(), closest_hit);
+ setup_geometry_instance(
+ &mut o,
+ triangles_instance1,
+ material,
+ ptr::null_mut(),
+ closest_hit,
+ );
+ setup_geometry_instance(
+ &mut o,
+ triangles_instance2,
+ material,
+ ptr::null_mut(),
+ closest_hit,
+ );
+ let mut group = ptr::null_mut();
+ let triangles_group =
+ create_geometry_group(&o, ctx, &[triangles_instance1, triangles_instance2]);
+ create_set_accelerator(&mut o, ctx, triangles_group, b"unused1\0");
+ let sphere_group = create_geometry_group(&o, ctx, &[sphere]);
+ create_set_accelerator(&mut o, ctx, sphere_group, b"unused2\0");
+ o.rtGroupCreate(ctx, &mut group);
+ o.rtGroupSetChildCount(group, 2);
+ o.rtGroupSetChild(group, 0, RTobject(triangles_group as _));
+ o.rtGroupSetChild(group, 1, RTobject(sphere_group as _));
+ create_set_accelerator_group(&mut o, ctx, group, b"bvh\0");
+ launch_2d(&mut o, ctx, 0, 3, 1);
+ assert_buffer_eq(&o, output_buffer1, &[2, 2, 1]);
+ assert_buffer_eq(&o, output_buffer2, &[2, 1, 1]);
+ assert_buffer_eq(&o, output_buffer3, &[1, 2, 1]);
+}
+
+const TRANSFORM_PTX: &'static str = concat!(include_str!("transform.ptx"), "\0");
+
+optix_test!(ray_transform);
+unsafe fn ray_transform<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let start = create_program(&mut o, ctx, TRANSFORM_PTX, "start\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, start);
+ let bounds = create_program(&mut o, ctx, TRANSFORM_PTX, "bounds\0");
+ let intersect = create_program(&mut o, ctx, TRANSFORM_PTX, "intersect\0");
+ let attribute = create_program(&mut o, ctx, TRANSFORM_PTX, "attribute\0");
+ let any_hit = create_program(&mut o, ctx, TRANSFORM_PTX, "any_hit\0");
+ let closest_hit = create_program(&mut o, ctx, TRANSFORM_PTX, "closest_hit\0");
+ let is_triangle = create_buffer_u32(&mut o, ctx, "is_triangle", 2);
+ let origin0 = create_buffer_1d(&mut o, ctx, RTformat::RT_FORMAT_FLOAT3, "origin0", 3);
+ let origin1 = create_buffer_1d(&mut o, ctx, RTformat::RT_FORMAT_FLOAT3, "origin1", 3);
+ create_buffer_of_buffers(&mut o, ctx, "origin", &[origin0, origin1]);
+ let direction0 = create_buffer_1d(&mut o, ctx, RTformat::RT_FORMAT_FLOAT3, "direction0", 3);
+ let direction1 = create_buffer_1d(&mut o, ctx, RTformat::RT_FORMAT_FLOAT3, "direction1", 3);
+ create_buffer_of_buffers(&mut o, ctx, "direction", &[direction0, direction1]);
+ let mut sphere = ptr::null_mut();
+ o.rtContextDeclareVariable(ctx, b"sphere\0".as_ptr() as _, &mut sphere);
+ o.rtVariableSet4f(sphere, 0.0, 0.0, 0.0, 100.0);
+ let sphere = create_custom_geometry(&mut o, ctx, 1, bounds, intersect);
+ let (triangles, triangles_primitive) = create_triangles_scaled(&mut o, ctx, 1, 1, 0.0, 100.0);
+ o.rtGeometryTrianglesSetAttributeProgram(triangles_primitive, attribute);
+ let material = create_material(&mut o, ctx);
+ setup_geometry_instance(&mut o, sphere, material, any_hit, closest_hit);
+ setup_geometry_instance(&mut o, triangles, material, any_hit, closest_hit);
+ let sphere_group = create_geometry_group(&o, ctx, &[sphere]);
+ let triangles_group = create_geometry_group(&o, ctx, &[triangles]);
+ create_set_accelerator(&mut o, ctx, sphere_group, b"unused1\0");
+ create_set_accelerator(&mut o, ctx, triangles_group, b"unused2\0");
+ let transform1 = create_test_transform(&mut o, ctx);
+ let transform2 = create_test_transform(&mut o, ctx);
+ o.rtTransformSetChild(transform1, RTobject(sphere_group as _));
+ o.rtTransformSetChild(transform2, RTobject(triangles_group as _));
+ let mut group = ptr::null_mut();
+ o.rtGroupCreate(ctx, &mut group);
+ o.rtGroupSetChildCount(group, 2);
+ o.rtGroupSetChild(group, 0, RTobject(transform1 as _));
+ o.rtGroupSetChild(group, 1, RTobject(transform2 as _));
+ create_set_accelerator_group(&mut o, ctx, group, b"bvh\0");
+ launch_2d(&mut o, ctx, 0, 2, 1);
+ assert_buffer_eq(&o, is_triangle, &[1, 0]);
+ assert_buffer_eq_float(
+ &mut o,
+ 0.000001f32,
+ origin0,
+ &[
+ 0.0, 0.0, -1.0, -0.3333333, -0.7440169, -0.3035612, 0.0, 0.0, -1.0,
+ ],
+ );
+ assert_buffer_eq_float(
+ &mut o,
+ 0.000001f32,
+ origin1,
+ &[0.0, -0.86602545, 0.0, 0.0, -0.86602545, 0.0, 1.0, 0.0, -1.0],
+ );
+ assert_buffer_eq_float(
+ &mut o,
+ 0.000001f32,
+ direction0,
+ &[
+ 0.0,
+ 0.0,
+ 1.0,
+ -0.24401696,
+ 0.4553418,
+ 0.11111113,
+ 0.0,
+ 0.0,
+ 1.0,
+ ],
+ );
+ assert_buffer_eq_float(
+ &mut o,
+ 0.000001f32,
+ direction1,
+ &[
+ -0.24401696,
+ 0.4553418,
+ 0.11111113,
+ -0.24401696,
+ 0.4553418,
+ 0.11111113,
+ 0.0,
+ 0.0,
+ 1.0,
+ ],
+ );
+}
+
+unsafe fn create_test_transform<Optix: OptixFns>(o: &mut Optix, ctx: RTcontext) -> RTtransform {
+ let mut transform = ptr::null_mut();
+ o.rtTransformCreate(ctx, &mut transform);
+ let matrix = Mat4::from_scale_rotation_translation(
+ Vec3::from_array([1f32, 2f32, 3f32]),
+ Quat::from_axis_angle(
+ Vec3::from_array([1., 1., 1.]).normalize(),
+ (std::f64::consts::PI / 2f64) as f32,
+ ),
+ Vec3::from_array([1., 1., 1.]).normalize(),
+ );
+ o.rtTransformSetMatrix(
+ transform,
+ 1,
+ matrix.to_cols_array().as_ptr(),
+ ptr::null_mut(),
+ );
+ transform
+}
+
+const GET_TRANSFORM_PTX: &'static str = concat!(include_str!("get_transform.ptx"), "\0");
+
+optix_test!(get_transform);
+unsafe fn get_transform<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let start = create_program(&mut o, ctx, GET_TRANSFORM_PTX, "start\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, start);
+ let bounds = create_program(&mut o, ctx, GET_TRANSFORM_PTX, "bounds\0");
+ let intersect = create_program(&mut o, ctx, GET_TRANSFORM_PTX, "intersect\0");
+ let any_hit = create_program(&mut o, ctx, GET_TRANSFORM_PTX, "any_hit\0");
+ let closest_hit = create_program(&mut o, ctx, GET_TRANSFORM_PTX, "closest_hit\0");
+ let object_transforms = create_buffer_1d(
+ &mut o,
+ ctx,
+ RTformat::RT_FORMAT_FLOAT,
+ "object_transforms",
+ 16 * 3,
+ );
+ let mut sphere = ptr::null_mut();
+ o.rtContextDeclareVariable(ctx, b"sphere\0".as_ptr() as _, &mut sphere);
+ o.rtVariableSet4f(sphere, 0.0, 0.0, 0.0, 100.0);
+ let sphere = create_custom_geometry(&mut o, ctx, 1, bounds, intersect);
+ let material = create_material(&mut o, ctx);
+ setup_geometry_instance(&mut o, sphere, material, any_hit, closest_hit);
+ let sphere_group = create_geometry_group(&o, ctx, &[sphere]);
+ create_set_accelerator(&mut o, ctx, sphere_group, b"unused1\0");
+ let transform1 = create_test_transform(&mut o, ctx);
+ o.rtTransformSetChild(transform1, RTobject(sphere_group as _));
+ let mut group = ptr::null_mut();
+ o.rtGroupCreate(ctx, &mut group);
+ o.rtGroupSetChildCount(group, 1);
+ o.rtGroupSetChild(group, 0, RTobject(transform1 as _));
+ create_set_accelerator_group(&mut o, ctx, group, b"bvh\0");
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq_float(
+ &mut o,
+ 0.000001f32,
+ object_transforms,
+ &[
+ 0.33333337,
+ -0.4880339,
+ 2.7320507,
+ 0.57735026,
+ 0.9106836,
+ 0.66666675,
+ -0.73205084,
+ 0.57735026,
+ -0.24401695,
+ 1.8213671,
+ 1.0000001,
+ 0.57735026,
+ 0.0,
+ 0.0,
+ 0.0,
+ 1.0,
+ 0.33333337,
+ -0.4880339,
+ 2.7320507,
+ 0.57735026,
+ 0.9106836,
+ 0.66666675,
+ -0.73205084,
+ 0.57735026,
+ -0.24401695,
+ 1.8213671,
+ 1.0000001,
+ 0.57735026,
+ 0.0,
+ 0.0,
+ 0.0,
+ 1.0,
+ 0.33333337,
+ 0.9106836,
+ -0.24401696,
+ -0.57735026,
+ -0.12200849,
+ 0.16666669,
+ 0.4553418,
+ -0.28867513,
+ 0.3035612,
+ -0.08133899,
+ 0.11111113,
+ -0.19245009,
+ 0.0,
+ 0.0,
+ 0.0,
+ 1.0,
+ ],
+ );
+}
+
+const DEFAULT_VARIABLE_PTX: &'static str = concat!(include_str!("default_variable.ptx"), "\0");
+
+optix_test!(default_variable);
+
+unsafe fn default_variable<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let start = create_program(&mut o, ctx, DEFAULT_VARIABLE_PTX, "start\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, start);
+ let buffer = create_buffer_u32(&mut o, ctx, "var_buffer", 1);
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, buffer, &[55]);
+}
+
+const EXCEPTION_PTX: &'static str = concat!(include_str!("exception.ptx"), "\0");
+
+optix_test!(exception_raygen);
+
+unsafe fn exception_raygen<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetExceptionEnabled(ctx, optix_types::RTexception::RT_EXCEPTION_ALL, 1);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let start = create_program(&mut o, ctx, EXCEPTION_PTX, "throw_\0");
+ let exception = create_program(&mut o, ctx, EXCEPTION_PTX, "exception\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, start);
+ o.rtContextSetExceptionProgram(ctx, 0, exception);
+ let buffer = create_buffer_u32(&mut o, ctx, "var_buffer", 1);
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, buffer, &[1024]);
+}
+
+optix_test!(exception_miss);
+
+unsafe fn exception_miss<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetExceptionEnabled(ctx, optix_types::RTexception::RT_EXCEPTION_ALL, 1);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let trace = create_program(&mut o, ctx, EXCEPTION_PTX, "trace\0");
+ let exception = create_program(&mut o, ctx, EXCEPTION_PTX, "exception\0");
+ let throw_ = create_program(&mut o, ctx, EXCEPTION_PTX, "throw_\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, trace);
+ o.rtContextSetExceptionProgram(ctx, 0, exception);
+ o.rtContextSetMissProgram(ctx, 0, throw_);
+ let material = create_material(&mut o, ctx);
+ let (triangle, _) = create_triangles(&mut o, ctx, 999, 1, 0.0);
+ setup_geometry_instance(&mut o, triangle, material, ptr::null_mut(), ptr::null_mut());
+ let geo_group = create_geometry_group(&o, ctx, &[triangle]);
+ create_set_accelerator(&mut o, ctx, geo_group, b"bvh\0");
+ let buffer = create_buffer_u32(&mut o, ctx, "var_buffer", 1);
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, buffer, &[1024]);
+}
+
+optix_test!(exception_closest_hit);
+
+unsafe fn exception_closest_hit<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetExceptionEnabled(ctx, optix_types::RTexception::RT_EXCEPTION_ALL, 1);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let trace = create_program(&mut o, ctx, EXCEPTION_PTX, "trace\0");
+ let exception = create_program(&mut o, ctx, EXCEPTION_PTX, "exception\0");
+ let throw_ = create_program(&mut o, ctx, EXCEPTION_PTX, "throw_\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, trace);
+ o.rtContextSetExceptionProgram(ctx, 0, exception);
+ let material = create_material(&mut o, ctx);
+ let (triangle, _) = create_triangles(&mut o, ctx, 0, 1, 0.0);
+ setup_geometry_instance(&mut o, triangle, material, ptr::null_mut(), throw_);
+ let geo_group = create_geometry_group(&o, ctx, &[triangle]);
+ create_set_accelerator(&mut o, ctx, geo_group, b"bvh\0");
+ let buffer = create_buffer_u32(&mut o, ctx, "var_buffer", 1);
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, buffer, &[1024]);
+}
+
+optix_test!(exception_any_hit);
+
+unsafe fn exception_any_hit<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetExceptionEnabled(ctx, optix_types::RTexception::RT_EXCEPTION_ALL, 1);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let trace = create_program(&mut o, ctx, EXCEPTION_PTX, "trace\0");
+ let exception = create_program(&mut o, ctx, EXCEPTION_PTX, "exception\0");
+ let throw_ = create_program(&mut o, ctx, EXCEPTION_PTX, "throw_\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, trace);
+ o.rtContextSetExceptionProgram(ctx, 0, exception);
+ let material = create_material(&mut o, ctx);
+ let (triangle, _) = create_triangles(&mut o, ctx, 0, 1, 0.0);
+ setup_geometry_instance(&mut o, triangle, material, throw_, ptr::null_mut());
+ let geo_group = create_geometry_group(&o, ctx, &[triangle]);
+ create_set_accelerator(&mut o, ctx, geo_group, b"bvh\0");
+ let buffer = create_buffer_u32(&mut o, ctx, "var_buffer", 1);
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, buffer, &[1024]);
+}
+
+optix_test!(exception_attribute);
+
+unsafe fn exception_attribute<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetExceptionEnabled(ctx, optix_types::RTexception::RT_EXCEPTION_ALL, 1);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let trace = create_program(&mut o, ctx, EXCEPTION_PTX, "trace\0");
+ let exception = create_program(&mut o, ctx, EXCEPTION_PTX, "exception\0");
+ let throw_ = create_program(&mut o, ctx, EXCEPTION_PTX, "throw_\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, trace);
+ o.rtContextSetExceptionProgram(ctx, 0, exception);
+ let material = create_material(&mut o, ctx);
+ let (triangle_instance, triangle) = create_triangles(&mut o, ctx, 0, 1, 0.0);
+ o.rtGeometryTrianglesSetAttributeProgram(triangle, throw_);
+ setup_geometry_instance(
+ &mut o,
+ triangle_instance,
+ material,
+ ptr::null_mut(),
+ ptr::null_mut(),
+ );
+ let geo_group = create_geometry_group(&o, ctx, &[triangle_instance]);
+ create_set_accelerator(&mut o, ctx, geo_group, b"bvh\0");
+ let buffer = create_buffer_u32(&mut o, ctx, "var_buffer", 1);
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, buffer, &[1024]);
+}
+
+optix_test!(exception_callable);
+
+unsafe fn exception_callable<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetExceptionEnabled(ctx, optix_types::RTexception::RT_EXCEPTION_ALL, 1);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let call_callable1 = create_program(&mut o, ctx, EXCEPTION_PTX, "call_callable1\0");
+ let call_callable2 = create_program(&mut o, ctx, EXCEPTION_PTX, "call_callable2\0");
+ let throw_callable = create_program(&mut o, ctx, EXCEPTION_PTX, "throw_callable\0");
+ let exception = create_program(&mut o, ctx, EXCEPTION_PTX, "exception\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, call_callable1);
+ o.rtContextSetExceptionProgram(ctx, 0, exception);
+ let call_callable2_id = program_get_id(&mut o, call_callable2);
+ let throw_callable_id = program_get_id(&mut o, throw_callable);
+ context_set_u32(&mut o, ctx, b"callable1\0", call_callable2_id as u32);
+ context_set_u32(&mut o, ctx, b"callable2\0", throw_callable_id as u32);
+ let buffer = create_buffer_u32(&mut o, ctx, "var_buffer", 1);
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, buffer, &[1025]);
+}
+
+optix_test!(exception_callable_subfunc);
+
+unsafe fn exception_callable_subfunc<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetExceptionEnabled(ctx, optix_types::RTexception::RT_EXCEPTION_ALL, 1);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let call_callable1 = create_program(&mut o, ctx, EXCEPTION_PTX, "call_callable1\0");
+ let call_callable2 = create_program(&mut o, ctx, EXCEPTION_PTX, "call_callable2\0");
+ let throw_callable = create_program(&mut o, ctx, EXCEPTION_PTX, "throw_callable\0");
+ let throw_callable_main = create_program(&mut o, ctx, EXCEPTION_PTX, "throw_callable_main\0");
+ let exception = create_program(&mut o, ctx, EXCEPTION_PTX, "exception\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, call_callable1);
+ o.rtContextSetExceptionProgram(ctx, 0, exception);
+ let call_callable2_id = program_get_id(&mut o, call_callable2);
+ let throw_callable_main_id = program_get_id(&mut o, throw_callable_main);
+ let throw_callable_id = program_get_id(&mut o, throw_callable);
+ context_set_u32(&mut o, ctx, b"callable1\0", call_callable2_id as u32);
+ context_set_u32(&mut o, ctx, b"callable2\0", throw_callable_main_id as u32);
+ context_set_u32(&mut o, ctx, b"callable3\0", throw_callable_id as u32);
+ let buffer = create_buffer_u32(&mut o, ctx, "var_buffer", 1);
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, buffer, &[1025]);
+}
+
+const EXCEPTION_SUBFUNC_PTX: &'static str = concat!(include_str!("exception_subfunc.ptx"), "\0");
+
+optix_test!(exception_subfunc);
+
+unsafe fn exception_subfunc<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetExceptionEnabled(ctx, optix_types::RTexception::RT_EXCEPTION_ALL, 1);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let trace = create_program(&mut o, ctx, EXCEPTION_SUBFUNC_PTX, "start\0");
+ let exception = create_program(&mut o, ctx, EXCEPTION_SUBFUNC_PTX, "exception\0");
+ let throw_ = create_program(&mut o, ctx, EXCEPTION_SUBFUNC_PTX, "throw_\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, trace);
+ o.rtContextSetExceptionProgram(ctx, 0, exception);
+ o.rtContextSetMissProgram(ctx, 0, throw_);
+ let material = create_material(&mut o, ctx);
+ let (triangle, _) = create_triangles(&mut o, ctx, 999, 1, 0.0);
+ setup_geometry_instance(&mut o, triangle, material, ptr::null_mut(), ptr::null_mut());
+ let geo_group = create_geometry_group(&o, ctx, &[triangle]);
+ create_set_accelerator(&mut o, ctx, geo_group, b"bvh\0");
+ let buffer = create_buffer_u32(&mut o, ctx, "var_buffer", 1);
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, buffer, &[1024]);
+}
+
+const BUFFER_MIPMAP_PTX: &'static str = concat!(include_str!("buffer_mipmap.ptx"), "\0");
+
+// Broken for now because HIP does not support image arrays
+optix_test!(buffer_mipmap);
+
+unsafe fn buffer_mipmap<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let mut buffer = ptr::null_mut();
+ o.rtBufferCreate(ctx, RTbuffertype::RT_BUFFER_INPUT.0, &mut buffer);
+ o.rtBufferSetSize2D(buffer, 3, 3);
+ o.rtBufferSetFormat(buffer, RTformat::RT_FORMAT_UNSIGNED_INT2);
+ o.rtBufferSetMipLevelCount(buffer, 2);
+ let mut width = 0;
+ let mut height = 0;
+ o.rtBufferGetMipLevelSize2D(buffer, 0, &mut width, &mut height);
+ assert_eq!((width, height), (3, 3));
+ o.rtBufferGetMipLevelSize2D(buffer, 1, &mut width, &mut height);
+ assert_eq!((width, height), (1, 1));
+ o.rtBufferGetMipLevelSize2D(buffer, 2, &mut width, &mut height);
+ assert_eq!((width, height), (1, 1));
+ {
+ let mut host_buffer = ptr::null_mut();
+ o.rtBufferMapEx(
+ buffer,
+ RTbuffermapflag::RT_BUFFER_MAP_READ_WRITE.0,
+ 0,
+ ptr::null_mut(),
+ &mut host_buffer,
+ );
+ for i in 0..3 * 3 * 2 {
+ *(host_buffer.cast::<u32>().add(i)) = (i as u32) + 1;
+ }
+ o.rtBufferUnmapEx(buffer, 0);
+ }
+ {
+ let mut host_buffer = ptr::null_mut();
+ o.rtBufferMapEx(
+ buffer,
+ RTbuffermapflag::RT_BUFFER_MAP_READ_WRITE.0,
+ 1,
+ ptr::null_mut(),
+ &mut host_buffer,
+ );
+ *host_buffer.cast::<(u32, u32)>() = (100, 101);
+ o.rtBufferUnmapEx(buffer, 1);
+ }
+ let mut image = ptr::null_mut();
+ o.rtTextureSamplerCreate(ctx, &mut image);
+ o.rtTextureSamplerSetBuffer(image, 0, 0, buffer);
+ o.rtTextureSamplerSetFilteringModes(
+ image,
+ RTfiltermode::RT_FILTER_NEAREST,
+ RTfiltermode::RT_FILTER_NEAREST,
+ RTfiltermode::RT_FILTER_NEAREST,
+ );
+ o.rtTextureSamplerSetIndexingMode(image, RTtextureindexmode::RT_TEXTURE_INDEX_ARRAY_INDEX);
+ o.rtTextureSamplerSetReadMode(image, RTtexturereadmode::RT_TEXTURE_READ_ELEMENT_TYPE);
+ let start = create_program(&mut o, ctx, BUFFER_MIPMAP_PTX, "start\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, start);
+ let mut texture_id = mem::zeroed();
+ o.rtTextureSamplerGetId(image, &mut texture_id);
+ context_set_u32(&mut o, ctx, b"texture_id\0", texture_id as u32);
+ let output_buffer = create_buffer_1d(
+ &mut o,
+ ctx,
+ RTformat::RT_FORMAT_UNSIGNED_INT2,
+ "output_buffer",
+ 3,
+ );
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, output_buffer, &[1, 2, 100, 101]);
+}
+
+const OOB_PTX: &'static str = concat!(include_str!("oob.ptx"), "\0");
+
+optix_test!(oob);
+
+unsafe fn oob<Optix: OptixFns>(mut o: Optix) {
+ let ctx = create_context(&o);
+ o.rtContextSetExceptionEnabled(ctx, RTexception::RT_EXCEPTION_ALL, 0);
+ o.rtContextSetEntryPointCount(ctx, 1);
+ o.rtContextSetRayTypeCount(ctx, 1);
+ let start = create_program(&mut o, ctx, OOB_PTX, "start\0");
+ o.rtContextSetRayGenerationProgram(ctx, 0, start);
+ create_buffer_u32_with_values(&mut o, ctx, "index_", &[4, 2]);
+ create_buffer_u32_with_values(&mut o, ctx, "input", &[10,11,12,13]);
+ let output = create_buffer_u32_with_values(&mut o, ctx, "output", &[u32::MAX, u32::MAX, u32::MAX]);
+ launch_2d(&mut o, ctx, 0, 1, 1);
+ assert_buffer_eq(&o, output, &[0, 12, 0]);
+}
diff --git a/zluda_rt/src/tests/oob.cu b/zluda_rt/src/tests/oob.cu new file mode 100644 index 0000000..cee8c65 --- /dev/null +++ b/zluda_rt/src/tests/oob.cu @@ -0,0 +1,16 @@ +// nvcc oob.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc +#include <optix.h> +#include <optixu/optixu_math_namespace.h> +#include <optix_world.h> + +using namespace optix; + +rtBuffer<unsigned int> index_; +rtBuffer<unsigned int> input; +rtBuffer<unsigned int> output; + +RT_PROGRAM void start() { + output[0] = input[index_[0]]; + output[1] = input[index_[1]]; + output[2] = *((unsigned int *)rt_buffer_get_id(0, 1, 4, 10,10,0,0)); +} diff --git a/zluda_rt/src/tests/oob.ptx b/zluda_rt/src/tests/oob.ptx new file mode 100644 index 0000000..be307ce --- /dev/null +++ b/zluda_rt/src/tests/oob.ptx @@ -0,0 +1,98 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-27506705 +// Cuda compilation tools, release 10.2, V10.2.89 +// Based on LLVM 3.4svn +// + +.version 6.5 +.target sm_30 +.address_size 64 + + // .globl _Z5startv +.visible .global .align 1 .b8 index_[1]; +.visible .global .align 1 .b8 input[1]; +.visible .global .align 1 .b8 output[1]; +.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE; + +.visible .entry _Z5startv( + +) +{ + .reg .b32 %r<21>; + .reg .b64 %rd<51>; + + + mov.u64 %rd48, index_; + cvta.global.u64 %rd2, %rd48; + mov.u32 %r16, 1; + mov.u32 %r17, 4; + mov.u64 %rd47, 0; + // inline asm + call (%rd1), _rt_buffer_get_64, (%rd2, %r16, %r17, %rd47, %rd47, %rd47, %rd47); + // inline asm + ld.u32 %rd9, [%rd1]; + mov.u64 %rd49, input; + cvta.global.u64 %rd8, %rd49; + // inline asm + call (%rd7), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd9, %rd47, %rd47, %rd47); + // inline asm + ld.u32 %r18, [%rd7]; + mov.u64 %rd50, output; + cvta.global.u64 %rd14, %rd50; + // inline asm + call (%rd13), _rt_buffer_get_64, (%rd14, %r16, %r17, %rd47, %rd47, %rd47, %rd47); + // inline asm + st.u32 [%rd13], %r18; + mov.u64 %rd33, 1; + // inline asm + call (%rd19), _rt_buffer_get_64, (%rd2, %r16, %r17, %rd33, %rd47, %rd47, %rd47); + // inline asm + ld.u32 %rd27, [%rd19]; + // inline asm + call (%rd25), _rt_buffer_get_64, (%rd8, %r16, %r17, %rd27, %rd47, %rd47, %rd47); + // inline asm + ld.u32 %r19, [%rd25]; + // inline asm + call (%rd31), _rt_buffer_get_64, (%rd14, %r16, %r17, %rd33, %rd47, %rd47, %rd47); + // inline asm + st.u32 [%rd31], %r19; + mov.u32 %r13, 0; + mov.u64 %rd39, 10; + // inline asm + call (%rd37), _rt_buffer_get_id_64, (%r13, %r16, %r17, %rd39, %rd39, %rd47, %rd47); + // inline asm + ld.u32 %r20, [%rd37]; + mov.u64 %rd44, 2; + // inline asm + call (%rd42), _rt_buffer_get_64, (%rd14, %r16, %r17, %rd44, %rd47, %rd47, %rd47); + // inline asm + st.u32 [%rd42], %r20; + ret; +} + + diff --git a/zluda_rt/src/tests/texture_sampler.cu b/zluda_rt/src/tests/texture_sampler.cu new file mode 100644 index 0000000..a9ceae1 --- /dev/null +++ b/zluda_rt/src/tests/texture_sampler.cu @@ -0,0 +1,21 @@ +// nvcc texture_sampler.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc
+#include <optix.h>
+#include <optixu/optixu_math_namespace.h>
+#include <optix_world.h>
+
+using namespace optix;
+
+rtBuffer<float4, 2> output_buffer;
+rtTextureSampler<float4, 2> image1;
+rtDeclareVariable(unsigned int, image2, , );
+
+RT_PROGRAM void start() {
+ output_buffer[make_uint2(0,0)] = tex2D(image1, 0, 0);
+ output_buffer[make_uint2(1,0)] = tex2D(image1, 1, 0);
+ output_buffer[make_uint2(0,1)] = tex2D(image1, 0, 1);
+ output_buffer[make_uint2(1,1)] = tex2D(image1, 1, 1);
+ output_buffer[make_uint2(0,2)] = rtTex2D<float4>(image2, 0, 0);
+ output_buffer[make_uint2(1,2)] = rtTex2D<float4>(image2, 1, 0);
+ output_buffer[make_uint2(0,3)] = rtTex2D<float4>(image2, 0, 1);
+ output_buffer[make_uint2(1,3)] = rtTex2D<float4>(image2, 1, 1);
+}
diff --git a/zluda_rt/src/tests/texture_sampler.ptx b/zluda_rt/src/tests/texture_sampler.ptx new file mode 100644 index 0000000..6d2d022 --- /dev/null +++ b/zluda_rt/src/tests/texture_sampler.ptx @@ -0,0 +1,119 @@ +//
+// Generated by NVIDIA NVVM Compiler
+//
+// Compiler Build ID: CL-31833905
+// Cuda compilation tools, release 11.8, V11.8.89
+// Based on NVVM 7.0.1
+//
+
+.version 7.8
+.target sm_52
+.address_size 64
+
+ // .globl _Z5startv
+.visible .global .align 1 .b8 output_buffer[1];
+.visible .global .texref image1;
+.visible .global .align 4 .u32 image2;
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo6image2E[8] = {82, 97, 121, 0, 4, 0, 0, 0};
+.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE;
+.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE;
+.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE;
+.visible .global .align 1 .b8 _ZN21rti_internal_typename6image2E[13] = {117, 110, 115, 105, 103, 110, 101, 100, 32, 105, 110, 116, 0};
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum6image2E = 4919;
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic6image2E[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation6image2E[1];
+
+.visible .entry _Z5startv()
+{
+ .reg .f32 %f<49>;
+ .reg .b32 %r<25>;
+ .reg .b64 %rd<51>;
+
+
+ mov.u64 %rd49, output_buffer;
+ cvta.global.u64 %rd2, %rd49;
+ mov.u32 %r24, 2;
+ mov.u32 %r22, 16;
+ mov.u64 %rd48, 0;
+ // begin inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r24, %r22, %rd48, %rd48, %rd48, %rd48);
+ // end inline asm
+ mov.f32 %f32, 0f00000000;
+ mov.u64 %rd45, 1;
+ mov.u64 %rd34, 2;
+ tex.2d.v4.f32.f32 {%f33, %f34, %f35, %f36}, [image1, {%f32, %f32}];
+ mov.u64 %rd46, 3;
+ st.v4.f32 [%rd1], {%f33, %f34, %f35, %f36};
+ // begin inline asm
+ call (%rd7), _rt_buffer_get_64, (%rd2, %r24, %r22, %rd45, %rd48, %rd48, %rd48);
+ // end inline asm
+ mov.f32 %f30, 0f3F800000;
+ tex.2d.v4.f32.f32 {%f37, %f38, %f39, %f40}, [image1, {%f30, %f32}];
+ st.v4.f32 [%rd7], {%f37, %f38, %f39, %f40};
+ // begin inline asm
+ call (%rd13), _rt_buffer_get_64, (%rd2, %r24, %r22, %rd48, %rd45, %rd48, %rd48);
+ // end inline asm
+ tex.2d.v4.f32.f32 {%f41, %f42, %f43, %f44}, [image1, {%f32, %f30}];
+ st.v4.f32 [%rd13], {%f41, %f42, %f43, %f44};
+ // begin inline asm
+ call (%rd19), _rt_buffer_get_64, (%rd2, %r24, %r22, %rd45, %rd45, %rd48, %rd48);
+ // end inline asm
+ tex.2d.v4.f32.f32 {%f45, %f46, %f47, %f48}, [image1, {%f30, %f30}];
+ st.v4.f32 [%rd19], {%f45, %f46, %f47, %f48};
+ // begin inline asm
+ call (%rd25), _rt_buffer_get_64, (%rd2, %r24, %r22, %rd48, %rd34, %rd48, %rd48);
+ // end inline asm
+ ld.global.u32 %r11, [image2];
+ // begin inline asm
+ call (%f1, %f2, %f3, %f4), _rt_texture_get_f_id, (%r11, %r24, %f32, %f32, %f32, %f32);
+ // end inline asm
+ st.v4.f32 [%rd25], {%f1, %f2, %f3, %f4};
+ // begin inline asm
+ call (%rd31), _rt_buffer_get_64, (%rd2, %r24, %r22, %rd45, %rd34, %rd48, %rd48);
+ // end inline asm
+ ld.global.u32 %r15, [image2];
+ // begin inline asm
+ call (%f9, %f10, %f11, %f12), _rt_texture_get_f_id, (%r15, %r24, %f30, %f32, %f32, %f32);
+ // end inline asm
+ st.v4.f32 [%rd31], {%f9, %f10, %f11, %f12};
+ // begin inline asm
+ call (%rd37), _rt_buffer_get_64, (%rd2, %r24, %r22, %rd48, %rd46, %rd48, %rd48);
+ // end inline asm
+ ld.global.u32 %r19, [image2];
+ // begin inline asm
+ call (%f17, %f18, %f19, %f20), _rt_texture_get_f_id, (%r19, %r24, %f32, %f30, %f32, %f32);
+ // end inline asm
+ st.v4.f32 [%rd37], {%f17, %f18, %f19, %f20};
+ // begin inline asm
+ call (%rd43), _rt_buffer_get_64, (%rd2, %r24, %r22, %rd45, %rd46, %rd48, %rd48);
+ // end inline asm
+ ld.global.u32 %r23, [image2];
+ // begin inline asm
+ call (%f25, %f26, %f27, %f28), _rt_texture_get_f_id, (%r23, %r24, %f30, %f30, %f32, %f32);
+ // end inline asm
+ st.v4.f32 [%rd43], {%f25, %f26, %f27, %f28};
+ ret;
+
+}
+
diff --git a/zluda_rt/src/tests/trace_control.cu b/zluda_rt/src/tests/trace_control.cu new file mode 100644 index 0000000..32e1583 --- /dev/null +++ b/zluda_rt/src/tests/trace_control.cu @@ -0,0 +1,121 @@ +// nvcc trace_control.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc +#include <optix.h> +#include <optixu/optixu_math_namespace.h> +#include <optix_world.h> + +using namespace optix; + +rtDeclareVariable(float4, sphere, , ); +rtBuffer<uint, 1> temp_buffer; +rtBuffer<uint, 1> output_buffer; +rtDeclareVariable(uint2, launch_index, rtLaunchIndex, ); +rtDeclareVariable(float, closest_distance, rtIntersectionDistance, ); +rtDeclareVariable(uint, increment, attribute increment, ); +rtDeclareVariable(rtObject, bvh, , ); +rtDeclareVariable(optix::Ray, ray, rtCurrentRay, ); + +RT_PROGRAM void start(void) +{ + Ray ray = make_Ray(make_float3(0, 0, -1), make_float3(0, 0, 1), 0, 0.0, RT_DEFAULT_MAX); + char unused = 0; + rtTrace(bvh, ray, unused); +} + +RT_PROGRAM void intersect(int primIdx) +{ + float3 center = make_float3(sphere); + float3 O = ray.origin - center; + float l = 1 / length(ray.direction); + float3 D = ray.direction * l; + float radius = sphere.w; + + float b = dot(O, D); + float c = dot(O, O) - radius * radius; + float disc = b * b - c; + if (disc > 0.0f) + { + float sdisc = sqrtf(disc); + float root1 = (-b - sdisc); + + float root11 = 0.0f; + + bool check_second = true; + if (rtPotentialIntersection(((root1 + root11) * l) - temp_buffer[launch_index.x])) + { + increment = temp_buffer[launch_index.x]; + if (rtReportIntersection(0)) + check_second = false; + } + if (check_second) + { + float root2 = (-b + sdisc); + if (rtPotentialIntersection((root2 * l) - temp_buffer[launch_index.x])) + { + increment = temp_buffer[launch_index.x]; + rtReportIntersection(0); + } + } + } +} + +RT_PROGRAM void bounds(int, float result[6]) +{ + const float3 cen = make_float3(sphere); + const float3 rad = make_float3(sphere.w); + + optix::Aabb *aabb = (optix::Aabb *)result; + + if (rad.x > 0.0f && !isinf(rad.x)) + { + aabb->m_min = cen - rad; + aabb->m_max = cen + rad; + } + else + { + aabb->invalidate(); + } +} + +RT_PROGRAM void any_hit_ignore(void) +{ + temp_buffer[launch_index.x] += 1; + if (temp_buffer[launch_index.x] >= 3) + { + rtIgnoreIntersection(); + } +} + +RT_PROGRAM void any_hit_terminate(void) +{ + temp_buffer[launch_index.x] += 1; + if (temp_buffer[launch_index.x] >= 3) + { + rtTerminateRay(); + } +} + +RT_PROGRAM void attribute1(void) +{ + increment = 0xc4bb2187 + temp_buffer[0]; +} + +RT_PROGRAM void any_hit_plus_one(void) +{ + temp_buffer[0] += 1; +} + +RT_PROGRAM void attribute2(void) +{ + increment = 0xc4bb2187; +} + +RT_PROGRAM void any_hit_always_ignore(void) +{ + temp_buffer[0] += 1; + rtIgnoreIntersection(); +} + +RT_PROGRAM void closest_hit(void) +{ + output_buffer[launch_index.x] = increment; +}
\ No newline at end of file diff --git a/zluda_rt/src/tests/trace_control.ptx b/zluda_rt/src/tests/trace_control.ptx new file mode 100644 index 0000000..7536802 --- /dev/null +++ b/zluda_rt/src/tests/trace_control.ptx @@ -0,0 +1,442 @@ +//
+// Generated by NVIDIA NVVM Compiler
+//
+// Compiler Build ID: CL-31833905
+// Cuda compilation tools, release 11.8, V11.8.89
+// Based on NVVM 7.0.1
+//
+
+.version 7.8
+.target sm_52
+.address_size 64
+
+ // .globl _Z5startv
+.visible .global .align 16 .b8 sphere[16];
+.visible .global .align 1 .b8 temp_buffer[1];
+.visible .global .align 1 .b8 output_buffer[1];
+.visible .global .align 8 .b8 launch_index[8];
+.visible .global .align 4 .f32 closest_distance;
+.visible .global .align 4 .u32 increment;
+.visible .global .align 4 .b8 bvh[4];
+.visible .global .align 4 .b8 ray[36];
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo6sphereE[8] = {82, 97, 121, 0, 16, 0, 0, 0};
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo12launch_indexE[8] = {82, 97, 121, 0, 8, 0, 0, 0};
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo16closest_distanceE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo9incrementE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo3bvhE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo3rayE[8] = {82, 97, 121, 0, 36, 0, 0, 0};
+.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE;
+.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE;
+.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE;
+.visible .global .align 1 .b8 _ZN21rti_internal_typename6sphereE[7] = {102, 108, 111, 97, 116, 52, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_typename12launch_indexE[6] = {117, 105, 110, 116, 50, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_typename16closest_distanceE[6] = {102, 108, 111, 97, 116, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_typename9incrementE[5] = {117, 105, 110, 116, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_typename3bvhE[9] = {114, 116, 79, 98, 106, 101, 99, 116, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_typename3rayE[11] = {111, 112, 116, 105, 120, 58, 58, 82, 97, 121, 0};
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum6sphereE = 4919;
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum12launch_indexE = 4919;
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum16closest_distanceE = 4919;
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum9incrementE = 4919;
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum3bvhE = 4919;
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum3rayE = 4919;
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic6sphereE[1];
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic12launch_indexE[14] = {114, 116, 76, 97, 117, 110, 99, 104, 73, 110, 100, 101, 120, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic16closest_distanceE[23] = {114, 116, 73, 110, 116, 101, 114, 115, 101, 99, 116, 105, 111, 110, 68, 105, 115, 116, 97, 110, 99, 101, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic9incrementE[20] = {97, 116, 116, 114, 105, 98, 117, 116, 101, 32, 105, 110, 99, 114, 101, 109, 101, 110, 116, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic3bvhE[1];
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic3rayE[13] = {114, 116, 67, 117, 114, 114, 101, 110, 116, 82, 97, 121, 0};
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation6sphereE[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation12launch_indexE[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation16closest_distanceE[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation9incrementE[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation3bvhE[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation3rayE[1];
+
+.visible .entry _Z5startv()
+{
+ .local .align 1 .b8 __local_depot0[1];
+ .reg .b64 %SP;
+ .reg .b64 %SPL;
+ .reg .b16 %rs<2>;
+ .reg .f32 %f<9>;
+ .reg .b32 %r<6>;
+ .reg .b64 %rd<3>;
+
+
+ mov.u64 %SPL, __local_depot0;
+ cvta.local.u64 %SP, %SPL;
+ add.u64 %rd1, %SP, 0;
+ add.u64 %rd2, %SPL, 0;
+ mov.u16 %rs1, 0;
+ st.local.u8 [%rd2], %rs1;
+ ld.global.u32 %r1, [bvh];
+ mov.f32 %f3, 0fBF800000;
+ mov.f32 %f6, 0f3F800000;
+ mov.f32 %f7, 0f00000000;
+ mov.f32 %f8, 0f6C4ECB8F;
+ mov.u32 %r3, 255;
+ mov.u32 %r4, 0;
+ mov.u32 %r5, 1;
+ // begin inline asm
+ call _rt_trace_mask_flags_64, (%r1, %f7, %f7, %f3, %f7, %f7, %f6, %r4, %f7, %f8, %r3, %r4, %rd1, %r5);
+ // end inline asm
+ ret;
+
+}
+ // .globl _Z9intersecti
+.visible .entry _Z9intersecti(
+ .param .u32 _Z9intersecti_param_0
+)
+{
+ .reg .pred %p<5>;
+ .reg .f32 %f<47>;
+ .reg .b32 %r<19>;
+ .reg .b64 %rd<29>;
+
+
+ ld.global.v4.f32 {%f5, %f6, %f7, %f8}, [sphere];
+ ld.global.f32 %f13, [ray];
+ sub.f32 %f14, %f13, %f5;
+ ld.global.f32 %f15, [ray+4];
+ sub.f32 %f16, %f15, %f6;
+ ld.global.f32 %f17, [ray+8];
+ sub.f32 %f18, %f17, %f7;
+ ld.global.f32 %f19, [ray+12];
+ ld.global.f32 %f20, [ray+16];
+ mul.f32 %f21, %f20, %f20;
+ fma.rn.f32 %f22, %f19, %f19, %f21;
+ ld.global.f32 %f23, [ray+20];
+ fma.rn.f32 %f24, %f23, %f23, %f22;
+ sqrt.rn.f32 %f25, %f24;
+ rcp.rn.f32 %f1, %f25;
+ mul.f32 %f26, %f19, %f1;
+ mul.f32 %f27, %f1, %f20;
+ mul.f32 %f28, %f1, %f23;
+ mul.f32 %f29, %f16, %f27;
+ fma.rn.f32 %f30, %f14, %f26, %f29;
+ fma.rn.f32 %f2, %f18, %f28, %f30;
+ mul.f32 %f31, %f16, %f16;
+ fma.rn.f32 %f32, %f14, %f14, %f31;
+ fma.rn.f32 %f33, %f18, %f18, %f32;
+ mul.f32 %f34, %f8, %f8;
+ sub.f32 %f35, %f33, %f34;
+ mul.f32 %f36, %f2, %f2;
+ sub.f32 %f3, %f36, %f35;
+ setp.leu.f32 %p1, %f3, 0f00000000;
+ @%p1 bra $L__BB1_5;
+
+ sqrt.rn.f32 %f4, %f3;
+ neg.f32 %f38, %f2;
+ sub.f32 %f39, %f38, %f4;
+ add.f32 %f40, %f39, 0f00000000;
+ mul.f32 %f41, %f1, %f40;
+ ld.global.u32 %rd3, [launch_index];
+ mov.u64 %rd6, 0;
+ mov.u64 %rd7, temp_buffer;
+ cvta.global.u64 %rd2, %rd7;
+ mov.u32 %r1, 1;
+ mov.u32 %r2, 4;
+ // begin inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r1, %r2, %rd3, %rd6, %rd6, %rd6);
+ // end inline asm
+ ld.u32 %r4, [%rd1];
+ cvt.rn.f32.u32 %f42, %r4;
+ sub.f32 %f37, %f41, %f42;
+ // begin inline asm
+ call (%r3), _rt_potential_intersection, (%f37);
+ // end inline asm
+ setp.eq.s32 %p2, %r3, 0;
+ @%p2 bra $L__BB1_3;
+
+ ld.global.u32 %rd10, [launch_index];
+ // begin inline asm
+ call (%rd8), _rt_buffer_get_64, (%rd2, %r1, %r2, %rd10, %rd6, %rd6, %rd6);
+ // end inline asm
+ ld.u32 %r9, [%rd8];
+ st.global.u32 [increment], %r9;
+ mov.u32 %r8, 0;
+ // begin inline asm
+ call (%r7), _rt_report_intersection, (%r8);
+ // end inline asm
+ setp.ne.s32 %p3, %r7, 0;
+ @%p3 bra $L__BB1_5;
+
+$L__BB1_3:
+ sub.f32 %f44, %f4, %f2;
+ mul.f32 %f45, %f1, %f44;
+ ld.global.u32 %rd17, [launch_index];
+ // begin inline asm
+ call (%rd15), _rt_buffer_get_64, (%rd2, %r1, %r2, %rd17, %rd6, %rd6, %rd6);
+ // end inline asm
+ ld.u32 %r13, [%rd15];
+ cvt.rn.f32.u32 %f46, %r13;
+ sub.f32 %f43, %f45, %f46;
+ // begin inline asm
+ call (%r12), _rt_potential_intersection, (%f43);
+ // end inline asm
+ setp.eq.s32 %p4, %r12, 0;
+ @%p4 bra $L__BB1_5;
+
+ ld.global.u32 %rd24, [launch_index];
+ mov.u64 %rd27, 0;
+ mov.u32 %r14, 1;
+ mov.u32 %r15, 4;
+ // begin inline asm
+ call (%rd22), _rt_buffer_get_64, (%rd2, %r14, %r15, %rd24, %rd27, %rd27, %rd27);
+ // end inline asm
+ ld.u32 %r18, [%rd22];
+ st.global.u32 [increment], %r18;
+ mov.u32 %r17, 0;
+ // begin inline asm
+ call (%r16), _rt_report_intersection, (%r17);
+ // end inline asm
+
+$L__BB1_5:
+ ret;
+
+}
+ // .globl _Z6boundsiPf
+.visible .entry _Z6boundsiPf(
+ .param .u32 _Z6boundsiPf_param_0,
+ .param .u64 _Z6boundsiPf_param_1
+)
+{
+ .reg .pred %p<3>;
+ .reg .f32 %f<17>;
+ .reg .b32 %r<3>;
+ .reg .b64 %rd<3>;
+
+
+ ld.param.u64 %rd2, [_Z6boundsiPf_param_1];
+ cvta.to.global.u64 %rd1, %rd2;
+ ld.global.v4.f32 {%f6, %f7, %f8, %f9}, [sphere];
+ setp.leu.f32 %p1, %f9, 0f00000000;
+ @%p1 bra $L__BB2_2;
+
+ abs.f32 %f10, %f9;
+ setp.neu.f32 %p2, %f10, 0f7F800000;
+ @%p2 bra $L__BB2_3;
+ bra.uni $L__BB2_2;
+
+$L__BB2_3:
+ sub.f32 %f11, %f6, %f9;
+ st.global.f32 [%rd1], %f11;
+ sub.f32 %f12, %f7, %f9;
+ st.global.f32 [%rd1+4], %f12;
+ sub.f32 %f13, %f8, %f9;
+ st.global.f32 [%rd1+8], %f13;
+ add.f32 %f14, %f6, %f9;
+ st.global.f32 [%rd1+12], %f14;
+ add.f32 %f15, %f7, %f9;
+ st.global.f32 [%rd1+16], %f15;
+ add.f32 %f16, %f8, %f9;
+ st.global.f32 [%rd1+20], %f16;
+ bra.uni $L__BB2_4;
+
+$L__BB2_2:
+ mov.u32 %r1, 2096152002;
+ st.global.u32 [%rd1], %r1;
+ st.global.u32 [%rd1+4], %r1;
+ st.global.u32 [%rd1+8], %r1;
+ mov.u32 %r2, -51331646;
+ st.global.u32 [%rd1+12], %r2;
+ st.global.u32 [%rd1+16], %r2;
+ st.global.u32 [%rd1+20], %r2;
+
+$L__BB2_4:
+ ret;
+
+}
+ // .globl _Z14any_hit_ignorev
+.visible .entry _Z14any_hit_ignorev()
+{
+ .reg .pred %p<2>;
+ .reg .b32 %r<8>;
+ .reg .b64 %rd<14>;
+
+
+ ld.global.u32 %rd3, [launch_index];
+ mov.u64 %rd12, 0;
+ mov.u64 %rd13, temp_buffer;
+ cvta.global.u64 %rd2, %rd13;
+ mov.u32 %r3, 1;
+ mov.u32 %r4, 4;
+ // begin inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r3, %r4, %rd3, %rd12, %rd12, %rd12);
+ // end inline asm
+ ld.u32 %r5, [%rd1];
+ add.s32 %r6, %r5, 1;
+ st.u32 [%rd1], %r6;
+ ld.global.u32 %rd9, [launch_index];
+ // begin inline asm
+ call (%rd7), _rt_buffer_get_64, (%rd2, %r3, %r4, %rd9, %rd12, %rd12, %rd12);
+ // end inline asm
+ ld.u32 %r7, [%rd7];
+ setp.lt.u32 %p1, %r7, 3;
+ @%p1 bra $L__BB3_2;
+
+ // begin inline asm
+ call _rt_ignore_intersection, ();
+ // end inline asm
+
+$L__BB3_2:
+ ret;
+
+}
+ // .globl _Z17any_hit_terminatev
+.visible .entry _Z17any_hit_terminatev()
+{
+ .reg .pred %p<2>;
+ .reg .b32 %r<8>;
+ .reg .b64 %rd<14>;
+
+
+ ld.global.u32 %rd3, [launch_index];
+ mov.u64 %rd12, 0;
+ mov.u64 %rd13, temp_buffer;
+ cvta.global.u64 %rd2, %rd13;
+ mov.u32 %r3, 1;
+ mov.u32 %r4, 4;
+ // begin inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r3, %r4, %rd3, %rd12, %rd12, %rd12);
+ // end inline asm
+ ld.u32 %r5, [%rd1];
+ add.s32 %r6, %r5, 1;
+ st.u32 [%rd1], %r6;
+ ld.global.u32 %rd9, [launch_index];
+ // begin inline asm
+ call (%rd7), _rt_buffer_get_64, (%rd2, %r3, %r4, %rd9, %rd12, %rd12, %rd12);
+ // end inline asm
+ ld.u32 %r7, [%rd7];
+ setp.lt.u32 %p1, %r7, 3;
+ @%p1 bra $L__BB4_2;
+
+ // begin inline asm
+ call _rt_terminate_ray, ();
+ // end inline asm
+
+$L__BB4_2:
+ ret;
+
+}
+ // .globl _Z10attribute1v
+.visible .entry _Z10attribute1v()
+{
+ .reg .b32 %r<5>;
+ .reg .b64 %rd<8>;
+
+
+ mov.u64 %rd7, temp_buffer;
+ cvta.global.u64 %rd2, %rd7;
+ mov.u32 %r1, 1;
+ mov.u32 %r2, 4;
+ mov.u64 %rd6, 0;
+ // begin inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r1, %r2, %rd6, %rd6, %rd6, %rd6);
+ // end inline asm
+ ld.u32 %r3, [%rd1];
+ add.s32 %r4, %r3, -994369145;
+ st.global.u32 [increment], %r4;
+ ret;
+
+}
+ // .globl _Z16any_hit_plus_onev
+.visible .entry _Z16any_hit_plus_onev()
+{
+ .reg .b32 %r<5>;
+ .reg .b64 %rd<8>;
+
+
+ mov.u64 %rd7, temp_buffer;
+ cvta.global.u64 %rd2, %rd7;
+ mov.u32 %r1, 1;
+ mov.u32 %r2, 4;
+ mov.u64 %rd6, 0;
+ // begin inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r1, %r2, %rd6, %rd6, %rd6, %rd6);
+ // end inline asm
+ ld.u32 %r3, [%rd1];
+ add.s32 %r4, %r3, 1;
+ st.u32 [%rd1], %r4;
+ ret;
+
+}
+ // .globl _Z10attribute2v
+.visible .entry _Z10attribute2v()
+{
+ .reg .b32 %r<2>;
+
+
+ mov.u32 %r1, -994369145;
+ st.global.u32 [increment], %r1;
+ ret;
+
+}
+ // .globl _Z21any_hit_always_ignorev
+.visible .entry _Z21any_hit_always_ignorev()
+{
+ .reg .b32 %r<5>;
+ .reg .b64 %rd<8>;
+
+
+ mov.u64 %rd7, temp_buffer;
+ cvta.global.u64 %rd2, %rd7;
+ mov.u32 %r1, 1;
+ mov.u32 %r2, 4;
+ mov.u64 %rd6, 0;
+ // begin inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r1, %r2, %rd6, %rd6, %rd6, %rd6);
+ // end inline asm
+ ld.u32 %r3, [%rd1];
+ add.s32 %r4, %r3, 1;
+ st.u32 [%rd1], %r4;
+ // begin inline asm
+ call _rt_ignore_intersection, ();
+ // end inline asm
+ ret;
+
+}
+ // .globl _Z11closest_hitv
+.visible .entry _Z11closest_hitv()
+{
+ .reg .b32 %r<4>;
+ .reg .b64 %rd<8>;
+
+
+ ld.global.u32 %r3, [increment];
+ mov.u64 %rd6, 0;
+ ld.global.u32 %rd3, [launch_index];
+ mov.u64 %rd7, output_buffer;
+ cvta.global.u64 %rd2, %rd7;
+ mov.u32 %r1, 1;
+ mov.u32 %r2, 4;
+ // begin inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r1, %r2, %rd3, %rd6, %rd6, %rd6);
+ // end inline asm
+ st.u32 [%rd1], %r3;
+ ret;
+
+}
+
diff --git a/zluda_rt/src/tests/transform.cu b/zluda_rt/src/tests/transform.cu new file mode 100644 index 0000000..e5a1ce2 --- /dev/null +++ b/zluda_rt/src/tests/transform.cu @@ -0,0 +1,92 @@ +// nvcc transform.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc +#include <optix.h> +#include <optixu/optixu_math_namespace.h> +#include <optix_world.h> + +using namespace optix; + +rtBuffer<unsigned int, 1> is_triangle; +rtBuffer<rtBufferId<float3, 1>> origin; +rtBuffer<rtBufferId<float3, 1>> direction; +rtDeclareVariable(optix::Ray, ray, rtCurrentRay, ); +rtDeclareVariable(rtObject, bvh, , ); +rtDeclareVariable(uint2, launch_index, rtLaunchIndex, ); +rtDeclareVariable(float4, sphere, , ); + +RT_PROGRAM void start() { + Ray ray = make_Ray(make_float3(float(launch_index.x), 0, -1), make_float3(0,0,1), 0, 0.0, RT_DEFAULT_MAX); + char unused = 0; + rtTrace(bvh, ray, unused); +} + +RT_PROGRAM void intersect(int primIdx) +{ + float3 center = make_float3(sphere); + float3 O = ray.origin - center; + float l = 1 / length(ray.direction); + float3 D = ray.direction * l; + float radius = sphere.w; + + float b = dot(O, D); + float c = dot(O, O)-radius*radius; + float disc = b*b-c; + if(disc > 0.0f){ + float sdisc = sqrtf(disc); + float root1 = (-b - sdisc); + + float root11 = 0.0f; + + bool check_second = true; + if( rtPotentialIntersection( (root1 + root11) * l ) ) { + if(rtReportIntersection(0)) + { + origin[launch_index.x][0] = ray.origin; + direction[launch_index.x][0] = ray.direction; + check_second = false; + } + } + if(check_second) { + float root2 = (-b + sdisc); + if( rtPotentialIntersection( root2 * l ) ) { + if(rtReportIntersection(0)) + { + origin[launch_index.x][0] = ray.origin; + direction[launch_index.x][0] = ray.direction; + } + } + } + } +} + +RT_PROGRAM void bounds (int, float result[6]) +{ + const float3 cen = make_float3( sphere ); + const float3 rad = make_float3( sphere.w ); + + optix::Aabb* aabb = (optix::Aabb*)result; + + if( rad.x > 0.0f && !isinf(rad.x) ) { + aabb->m_min = cen - rad; + aabb->m_max = cen + rad; + } else { + aabb->invalidate(); + } +} + +// attribute uses the same ray as related *hit function, +// so transformed for anyhit and untransformed for closesthit +RT_PROGRAM void attribute() { + origin[launch_index.x][0] = ray.origin; + direction[launch_index.x][0] = ray.direction; +} + +RT_PROGRAM void any_hit() { + origin[launch_index.x][1] = ray.origin; + direction[launch_index.x][1] = ray.direction; +} + +RT_PROGRAM void closest_hit() { + origin[launch_index.x][2] = ray.origin; + direction[launch_index.x][2] = ray.direction; + is_triangle[launch_index.x] = rtIsTriangleHit(); +} diff --git a/zluda_rt/src/tests/transform.ptx b/zluda_rt/src/tests/transform.ptx new file mode 100644 index 0000000..f04f24d --- /dev/null +++ b/zluda_rt/src/tests/transform.ptx @@ -0,0 +1,462 @@ +//
+// Generated by NVIDIA NVVM Compiler
+//
+// Compiler Build ID: CL-31833905
+// Cuda compilation tools, release 11.8, V11.8.89
+// Based on NVVM 7.0.1
+//
+
+.version 7.8
+.target sm_52
+.address_size 64
+
+ // .globl _Z5startv
+.visible .global .align 1 .b8 is_triangle[1];
+.visible .global .align 1 .b8 origin[1];
+.visible .global .align 1 .b8 direction[1];
+.visible .global .align 4 .b8 ray[36];
+.visible .global .align 4 .b8 bvh[4];
+.visible .global .align 8 .b8 launch_index[8];
+.visible .global .align 16 .b8 sphere[16];
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo3rayE[8] = {82, 97, 121, 0, 36, 0, 0, 0};
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo3bvhE[8] = {82, 97, 121, 0, 4, 0, 0, 0};
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo12launch_indexE[8] = {82, 97, 121, 0, 8, 0, 0, 0};
+.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo6sphereE[8] = {82, 97, 121, 0, 16, 0, 0, 0};
+.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E;
+.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E;
+.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE;
+.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE;
+.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE;
+.visible .global .align 1 .b8 _ZN21rti_internal_typename3rayE[11] = {111, 112, 116, 105, 120, 58, 58, 82, 97, 121, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_typename3bvhE[9] = {114, 116, 79, 98, 106, 101, 99, 116, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_typename12launch_indexE[6] = {117, 105, 110, 116, 50, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_typename6sphereE[7] = {102, 108, 111, 97, 116, 52, 0};
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum3rayE = 4919;
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum3bvhE = 4919;
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum12launch_indexE = 4919;
+.visible .global .align 4 .u32 _ZN21rti_internal_typeenum6sphereE = 4919;
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic3rayE[13] = {114, 116, 67, 117, 114, 114, 101, 110, 116, 82, 97, 121, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic3bvhE[1];
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic12launch_indexE[14] = {114, 116, 76, 97, 117, 110, 99, 104, 73, 110, 100, 101, 120, 0};
+.visible .global .align 1 .b8 _ZN21rti_internal_semantic6sphereE[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation3rayE[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation3bvhE[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation12launch_indexE[1];
+.visible .global .align 1 .b8 _ZN23rti_internal_annotation6sphereE[1];
+
+.visible .entry _Z5startv()
+{
+ .local .align 1 .b8 __local_depot0[1];
+ .reg .b64 %SP;
+ .reg .b64 %SPL;
+ .reg .b16 %rs<2>;
+ .reg .f32 %f<9>;
+ .reg .b32 %r<7>;
+ .reg .b64 %rd<3>;
+
+
+ mov.u64 %SPL, __local_depot0;
+ cvta.local.u64 %SP, %SPL;
+ add.u64 %rd1, %SP, 0;
+ add.u64 %rd2, %SPL, 0;
+ ld.global.u32 %r6, [launch_index];
+ cvt.rn.f32.u32 %f1, %r6;
+ mov.u16 %rs1, 0;
+ st.local.u8 [%rd2], %rs1;
+ ld.global.u32 %r1, [bvh];
+ mov.f32 %f3, 0fBF800000;
+ mov.f32 %f6, 0f3F800000;
+ mov.f32 %f7, 0f00000000;
+ mov.f32 %f8, 0f6C4ECB8F;
+ mov.u32 %r3, 255;
+ mov.u32 %r4, 0;
+ mov.u32 %r5, 1;
+ // begin inline asm
+ call _rt_trace_mask_flags_64, (%r1, %f1, %f7, %f3, %f7, %f7, %f6, %r4, %f7, %f8, %r3, %r4, %rd1, %r5);
+ // end inline asm
+ ret;
+
+}
+ // .globl _Z9intersecti
+.visible .entry _Z9intersecti(
+ .param .u32 _Z9intersecti_param_0
+)
+{
+ .reg .pred %p<6>;
+ .reg .f32 %f<55>;
+ .reg .b32 %r<27>;
+ .reg .b64 %rd<49>;
+
+
+ ld.global.v4.f32 {%f5, %f6, %f7, %f8}, [sphere];
+ ld.global.f32 %f13, [ray];
+ sub.f32 %f14, %f13, %f5;
+ ld.global.f32 %f15, [ray+4];
+ sub.f32 %f16, %f15, %f6;
+ ld.global.f32 %f17, [ray+8];
+ sub.f32 %f18, %f17, %f7;
+ ld.global.f32 %f19, [ray+12];
+ ld.global.f32 %f20, [ray+16];
+ mul.f32 %f21, %f20, %f20;
+ fma.rn.f32 %f22, %f19, %f19, %f21;
+ ld.global.f32 %f23, [ray+20];
+ fma.rn.f32 %f24, %f23, %f23, %f22;
+ sqrt.rn.f32 %f25, %f24;
+ rcp.rn.f32 %f1, %f25;
+ mul.f32 %f26, %f19, %f1;
+ mul.f32 %f27, %f1, %f20;
+ mul.f32 %f28, %f1, %f23;
+ mul.f32 %f29, %f16, %f27;
+ fma.rn.f32 %f30, %f14, %f26, %f29;
+ fma.rn.f32 %f2, %f18, %f28, %f30;
+ mul.f32 %f31, %f16, %f16;
+ fma.rn.f32 %f32, %f14, %f14, %f31;
+ fma.rn.f32 %f33, %f18, %f18, %f32;
+ mul.f32 %f34, %f8, %f8;
+ sub.f32 %f35, %f33, %f34;
+ mul.f32 %f36, %f2, %f2;
+ sub.f32 %f3, %f36, %f35;
+ setp.leu.f32 %p1, %f3, 0f00000000;
+ @%p1 bra $L__BB1_7;
+
+ sqrt.rn.f32 %f4, %f3;
+ neg.f32 %f38, %f2;
+ sub.f32 %f39, %f38, %f4;
+ add.f32 %f40, %f39, 0f00000000;
+ mul.f32 %f37, %f1, %f40;
+ // begin inline asm
+ call (%r1), _rt_potential_intersection, (%f37);
+ // end inline asm
+ setp.eq.s32 %p2, %r1, 0;
+ @%p2 bra $L__BB1_4;
+
+ mov.u32 %r3, 0;
+ // begin inline asm
+ call (%r2), _rt_report_intersection, (%r3);
+ // end inline asm
+ setp.eq.s32 %p3, %r2, 0;
+ @%p3 bra $L__BB1_4;
+
+ ld.global.u32 %rd3, [launch_index];
+ mov.u64 %rd22, 0;
+ mov.u64 %rd23, origin;
+ cvta.global.u64 %rd2, %rd23;
+ mov.u32 %r12, 1;
+ mov.u32 %r10, 4;
+ // begin inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r12, %r10, %rd3, %rd22, %rd22, %rd22);
+ // end inline asm
+ ld.u32 %r6, [%rd1];
+ mov.u32 %r13, 12;
+ // begin inline asm
+ call (%rd7), _rt_buffer_get_id_64, (%r6, %r12, %r13, %rd22, %rd22, %rd22, %rd22);
+ // end inline asm
+ ld.global.f32 %f41, [ray];
+ ld.global.f32 %f42, [ray+4];
+ ld.global.f32 %f43, [ray+8];
+ st.f32 [%rd7], %f41;
+ st.f32 [%rd7+4], %f42;
+ st.f32 [%rd7+8], %f43;
+ ld.global.u32 %rd14, [launch_index];
+ mov.u64 %rd24, direction;
+ cvta.global.u64 %rd13, %rd24;
+ // begin inline asm
+ call (%rd12), _rt_buffer_get_64, (%rd13, %r12, %r10, %rd14, %rd22, %rd22, %rd22);
+ // end inline asm
+ ld.u32 %r11, [%rd12];
+ // begin inline asm
+ call (%rd18), _rt_buffer_get_id_64, (%r11, %r12, %r13, %rd22, %rd22, %rd22, %rd22);
+ // end inline asm
+ ld.global.f32 %f44, [ray+12];
+ ld.global.f32 %f45, [ray+16];
+ ld.global.f32 %f46, [ray+20];
+ st.f32 [%rd18], %f44;
+ st.f32 [%rd18+4], %f45;
+ st.f32 [%rd18+8], %f46;
+ bra.uni $L__BB1_7;
+
+$L__BB1_4:
+ sub.f32 %f48, %f4, %f2;
+ mul.f32 %f47, %f1, %f48;
+ // begin inline asm
+ call (%r14), _rt_potential_intersection, (%f47);
+ // end inline asm
+ setp.eq.s32 %p4, %r14, 0;
+ @%p4 bra $L__BB1_7;
+
+ mov.u32 %r16, 0;
+ // begin inline asm
+ call (%r15), _rt_report_intersection, (%r16);
+ // end inline asm
+ setp.eq.s32 %p5, %r15, 0;
+ @%p5 bra $L__BB1_7;
+
+ ld.global.u32 %rd27, [launch_index];
+ mov.u64 %rd46, 0;
+ mov.u64 %rd47, origin;
+ cvta.global.u64 %rd26, %rd47;
+ mov.u32 %r25, 1;
+ mov.u32 %r23, 4;
+ // begin inline asm
+ call (%rd25), _rt_buffer_get_64, (%rd26, %r25, %r23, %rd27, %rd46, %rd46, %rd46);
+ // end inline asm
+ ld.u32 %r19, [%rd25];
+ mov.u32 %r26, 12;
+ // begin inline asm
+ call (%rd31), _rt_buffer_get_id_64, (%r19, %r25, %r26, %rd46, %rd46, %rd46, %rd46);
+ // end inline asm
+ ld.global.f32 %f49, [ray];
+ ld.global.f32 %f50, [ray+4];
+ ld.global.f32 %f51, [ray+8];
+ st.f32 [%rd31], %f49;
+ st.f32 [%rd31+4], %f50;
+ st.f32 [%rd31+8], %f51;
+ ld.global.u32 %rd38, [launch_index];
+ mov.u64 %rd48, direction;
+ cvta.global.u64 %rd37, %rd48;
+ // begin inline asm
+ call (%rd36), _rt_buffer_get_64, (%rd37, %r25, %r23, %rd38, %rd46, %rd46, %rd46);
+ // end inline asm
+ ld.u32 %r24, [%rd36];
+ // begin inline asm
+ call (%rd42), _rt_buffer_get_id_64, (%r24, %r25, %r26, %rd46, %rd46, %rd46, %rd46);
+ // end inline asm
+ ld.global.f32 %f52, [ray+12];
+ ld.global.f32 %f53, [ray+16];
+ ld.global.f32 %f54, [ray+20];
+ st.f32 [%rd42], %f52;
+ st.f32 [%rd42+4], %f53;
+ st.f32 [%rd42+8], %f54;
+
+$L__BB1_7:
+ ret;
+
+}
+ // .globl _Z6boundsiPf
+.visible .entry _Z6boundsiPf(
+ .param .u32 _Z6boundsiPf_param_0,
+ .param .u64 _Z6boundsiPf_param_1
+)
+{
+ .reg .pred %p<3>;
+ .reg .f32 %f<17>;
+ .reg .b32 %r<3>;
+ .reg .b64 %rd<3>;
+
+
+ ld.param.u64 %rd2, [_Z6boundsiPf_param_1];
+ cvta.to.global.u64 %rd1, %rd2;
+ ld.global.v4.f32 {%f6, %f7, %f8, %f9}, [sphere];
+ setp.leu.f32 %p1, %f9, 0f00000000;
+ @%p1 bra $L__BB2_2;
+
+ abs.f32 %f10, %f9;
+ setp.neu.f32 %p2, %f10, 0f7F800000;
+ @%p2 bra $L__BB2_3;
+ bra.uni $L__BB2_2;
+
+$L__BB2_3:
+ sub.f32 %f11, %f6, %f9;
+ st.global.f32 [%rd1], %f11;
+ sub.f32 %f12, %f7, %f9;
+ st.global.f32 [%rd1+4], %f12;
+ sub.f32 %f13, %f8, %f9;
+ st.global.f32 [%rd1+8], %f13;
+ add.f32 %f14, %f6, %f9;
+ st.global.f32 [%rd1+12], %f14;
+ add.f32 %f15, %f7, %f9;
+ st.global.f32 [%rd1+16], %f15;
+ add.f32 %f16, %f8, %f9;
+ st.global.f32 [%rd1+20], %f16;
+ bra.uni $L__BB2_4;
+
+$L__BB2_2:
+ mov.u32 %r1, 2096152002;
+ st.global.u32 [%rd1], %r1;
+ st.global.u32 [%rd1+4], %r1;
+ st.global.u32 [%rd1+8], %r1;
+ mov.u32 %r2, -51331646;
+ st.global.u32 [%rd1+12], %r2;
+ st.global.u32 [%rd1+16], %r2;
+ st.global.u32 [%rd1+20], %r2;
+
+$L__BB2_4:
+ ret;
+
+}
+ // .globl _Z9attributev
+.visible .entry _Z9attributev()
+{
+ .reg .f32 %f<7>;
+ .reg .b32 %r<11>;
+ .reg .b64 %rd<25>;
+
+
+ ld.global.u32 %rd3, [launch_index];
+ mov.u64 %rd22, 0;
+ mov.u64 %rd23, origin;
+ cvta.global.u64 %rd2, %rd23;
+ mov.u32 %r9, 1;
+ mov.u32 %r7, 4;
+ // begin inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r9, %r7, %rd3, %rd22, %rd22, %rd22);
+ // end inline asm
+ ld.u32 %r3, [%rd1];
+ mov.u32 %r10, 12;
+ // begin inline asm
+ call (%rd7), _rt_buffer_get_id_64, (%r3, %r9, %r10, %rd22, %rd22, %rd22, %rd22);
+ // end inline asm
+ ld.global.f32 %f1, [ray];
+ ld.global.f32 %f2, [ray+4];
+ ld.global.f32 %f3, [ray+8];
+ st.f32 [%rd7], %f1;
+ st.f32 [%rd7+4], %f2;
+ st.f32 [%rd7+8], %f3;
+ ld.global.u32 %rd14, [launch_index];
+ mov.u64 %rd24, direction;
+ cvta.global.u64 %rd13, %rd24;
+ // begin inline asm
+ call (%rd12), _rt_buffer_get_64, (%rd13, %r9, %r7, %rd14, %rd22, %rd22, %rd22);
+ // end inline asm
+ ld.u32 %r8, [%rd12];
+ // begin inline asm
+ call (%rd18), _rt_buffer_get_id_64, (%r8, %r9, %r10, %rd22, %rd22, %rd22, %rd22);
+ // end inline asm
+ ld.global.f32 %f4, [ray+12];
+ ld.global.f32 %f5, [ray+16];
+ ld.global.f32 %f6, [ray+20];
+ st.f32 [%rd18], %f4;
+ st.f32 [%rd18+4], %f5;
+ st.f32 [%rd18+8], %f6;
+ ret;
+
+}
+ // .globl _Z7any_hitv
+.visible .entry _Z7any_hitv()
+{
+ .reg .f32 %f<7>;
+ .reg .b32 %r<11>;
+ .reg .b64 %rd<25>;
+
+
+ ld.global.u32 %rd3, [launch_index];
+ mov.u64 %rd22, 0;
+ mov.u64 %rd23, origin;
+ cvta.global.u64 %rd2, %rd23;
+ mov.u32 %r9, 1;
+ mov.u32 %r7, 4;
+ // begin inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r9, %r7, %rd3, %rd22, %rd22, %rd22);
+ // end inline asm
+ ld.u32 %r3, [%rd1];
+ mov.u32 %r10, 12;
+ mov.u64 %rd19, 1;
+ // begin inline asm
+ call (%rd7), _rt_buffer_get_id_64, (%r3, %r9, %r10, %rd19, %rd22, %rd22, %rd22);
+ // end inline asm
+ ld.global.f32 %f1, [ray];
+ ld.global.f32 %f2, [ray+4];
+ ld.global.f32 %f3, [ray+8];
+ st.f32 [%rd7], %f1;
+ st.f32 [%rd7+4], %f2;
+ st.f32 [%rd7+8], %f3;
+ ld.global.u32 %rd14, [launch_index];
+ mov.u64 %rd24, direction;
+ cvta.global.u64 %rd13, %rd24;
+ // begin inline asm
+ call (%rd12), _rt_buffer_get_64, (%rd13, %r9, %r7, %rd14, %rd22, %rd22, %rd22);
+ // end inline asm
+ ld.u32 %r8, [%rd12];
+ // begin inline asm
+ call (%rd18), _rt_buffer_get_id_64, (%r8, %r9, %r10, %rd19, %rd22, %rd22, %rd22);
+ // end inline asm
+ ld.global.f32 %f4, [ray+12];
+ ld.global.f32 %f5, [ray+16];
+ ld.global.f32 %f6, [ray+20];
+ st.f32 [%rd18], %f4;
+ st.f32 [%rd18+4], %f5;
+ st.f32 [%rd18+8], %f6;
+ ret;
+
+}
+ // .globl _Z11closest_hitv
+.visible .entry _Z11closest_hitv()
+{
+ .reg .pred %p<2>;
+ .reg .f32 %f<7>;
+ .reg .b32 %r<15>;
+ .reg .b64 %rd<32>;
+
+
+ ld.global.u32 %rd3, [launch_index];
+ mov.u64 %rd28, 0;
+ mov.u64 %rd29, origin;
+ cvta.global.u64 %rd2, %rd29;
+ mov.u32 %r12, 1;
+ mov.u32 %r13, 4;
+ // begin inline asm
+ call (%rd1), _rt_buffer_get_64, (%rd2, %r12, %r13, %rd3, %rd28, %rd28, %rd28);
+ // end inline asm
+ ld.u32 %r3, [%rd1];
+ mov.u32 %r10, 12;
+ mov.u64 %rd19, 2;
+ // begin inline asm
+ call (%rd7), _rt_buffer_get_id_64, (%r3, %r12, %r10, %rd19, %rd28, %rd28, %rd28);
+ // end inline asm
+ ld.global.f32 %f1, [ray];
+ ld.global.f32 %f2, [ray+4];
+ ld.global.f32 %f3, [ray+8];
+ st.f32 [%rd7], %f1;
+ st.f32 [%rd7+4], %f2;
+ st.f32 [%rd7+8], %f3;
+ ld.global.u32 %rd14, [launch_index];
+ mov.u64 %rd30, direction;
+ cvta.global.u64 %rd13, %rd30;
+ // begin inline asm
+ call (%rd12), _rt_buffer_get_64, (%rd13, %r12, %r13, %rd14, %rd28, %rd28, %rd28);
+ // end inline asm
+ ld.u32 %r8, [%rd12];
+ // begin inline asm
+ call (%rd18), _rt_buffer_get_id_64, (%r8, %r12, %r10, %rd19, %rd28, %rd28, %rd28);
+ // end inline asm
+ ld.global.f32 %f4, [ray+12];
+ ld.global.f32 %f5, [ray+16];
+ ld.global.f32 %f6, [ray+20];
+ st.f32 [%rd18], %f4;
+ st.f32 [%rd18+4], %f5;
+ st.f32 [%rd18+8], %f6;
+ // begin inline asm
+ call (%r11), _rt_is_triangle_hit, ();
+ // end inline asm
+ setp.ne.s32 %p1, %r11, 0;
+ selp.u32 %r14, 1, 0, %p1;
+ ld.global.u32 %rd25, [launch_index];
+ mov.u64 %rd31, is_triangle;
+ cvta.global.u64 %rd24, %rd31;
+ // begin inline asm
+ call (%rd23), _rt_buffer_get_64, (%rd24, %r12, %r13, %rd25, %rd28, %rd28, %rd28);
+ // end inline asm
+ st.u32 [%rd23], %r14;
+ ret;
+
+}
+
diff --git a/zluda_rt/src/tests/triangle_front.cu b/zluda_rt/src/tests/triangle_front.cu new file mode 100644 index 0000000..c6204a2 --- /dev/null +++ b/zluda_rt/src/tests/triangle_front.cu @@ -0,0 +1,76 @@ +// nvcc triangle_front.cu -I"C:\dev\OptiX SDK 6.5.0\include" -ptx -x cu -dc +#include <optix.h> +#include <optixu/optixu_math_namespace.h> +#include <optix_world.h> + +using namespace optix; + +rtBuffer<uint, 1> output_buffer1; +rtBuffer<uint, 1> output_buffer2; +rtBuffer<uint, 1> output_buffer3; +rtDeclareVariable(rtObject, bvh, , ); +rtDeclareVariable(optix::Ray, ray, rtCurrentRay, ); +rtDeclareVariable(float4, sphere, , ); +rtDeclareVariable(int2, launch_index, rtLaunchIndex, ); + +RT_PROGRAM void start() { + Ray ray; + if (launch_index.x == 1) + ray = make_Ray(make_float3(launch_index.x, 0, 1), make_float3(0, 0, -1), 0, 0.0, RT_DEFAULT_MAX); + else + ray = make_Ray(make_float3(launch_index.x, 0, -1), make_float3(0, 0, 1), 0, 0.0, RT_DEFAULT_MAX); + char unused = 0; + rtTrace(bvh, ray, unused); +} + +RT_PROGRAM void intersect(int primIdx) +{ + float3 center = make_float3(sphere); + float3 O = ray.origin - center; + float l = 1 / length(ray.direction); + float3 D = ray.direction * l; + float radius = sphere.w; + + float b = dot(O, D); + float c = dot(O, O)-radius*radius; + float disc = b*b-c; + if(disc > 0.0f){ + float sdisc = sqrtf(disc); + float root1 = (-b - sdisc); + + float root11 = 0.0f; + + bool check_second = true; + if( rtPotentialIntersection( (root1 + root11) * l ) ) { + if(rtReportIntersection(0)) + check_second = false; + } + if(check_second) { + float root2 = (-b + sdisc); + if( rtPotentialIntersection( root2 * l ) ) { + rtReportIntersection(0); + } + } + } +} + +RT_PROGRAM void bounds (int, float result[6]) +{ + const float3 cen = make_float3( sphere ); + const float3 rad = make_float3( sphere.w ); + + optix::Aabb* aabb = (optix::Aabb*)result; + + if( rad.x > 0.0f && !isinf(rad.x) ) { + aabb->m_min = cen - rad; + aabb->m_max = cen + rad; + } else { + aabb->invalidate(); + } +} + +RT_PROGRAM void closest_hit() { + output_buffer1[launch_index.x] = rtIsTriangleHit() + 1; + output_buffer2[launch_index.x] = rtIsTriangleHitBackFace() + 1; + output_buffer3[launch_index.x] = rtIsTriangleHitFrontFace() + 1; +} diff --git a/zluda_rt/src/tests/triangle_front.ptx b/zluda_rt/src/tests/triangle_front.ptx new file mode 100644 index 0000000..24089a7 --- /dev/null +++ b/zluda_rt/src/tests/triangle_front.ptx @@ -0,0 +1,280 @@ +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-31833905 +// Cuda compilation tools, release 11.8, V11.8.89 +// Based on NVVM 7.0.1 +// + +.version 7.8 +.target sm_52 +.address_size 64 + + // .globl _Z5startv +.visible .global .align 1 .b8 output_buffer1[1]; +.visible .global .align 1 .b8 output_buffer2[1]; +.visible .global .align 1 .b8 output_buffer3[1]; +.visible .global .align 4 .b8 bvh[4]; +.visible .global .align 4 .b8 ray[36]; +.visible .global .align 16 .b8 sphere[16]; +.visible .global .align 8 .b8 launch_index[8]; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo3bvhE[8] = {82, 97, 121, 0, 4, 0, 0, 0}; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo3rayE[8] = {82, 97, 121, 0, 36, 0, 0, 0}; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo6sphereE[8] = {82, 97, 121, 0, 16, 0, 0, 0}; +.visible .global .align 4 .b8 _ZN21rti_internal_typeinfo12launch_indexE[8] = {82, 97, 121, 0, 8, 0, 0, 0}; +.visible .global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E; +.visible .global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E; +.visible .global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE; +.visible .global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE; +.visible .global .align 1 .b8 _ZN21rti_internal_typename3bvhE[9] = {114, 116, 79, 98, 106, 101, 99, 116, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_typename3rayE[11] = {111, 112, 116, 105, 120, 58, 58, 82, 97, 121, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_typename6sphereE[7] = {102, 108, 111, 97, 116, 52, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_typename12launch_indexE[5] = {105, 110, 116, 50, 0}; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum3bvhE = 4919; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum3rayE = 4919; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum6sphereE = 4919; +.visible .global .align 4 .u32 _ZN21rti_internal_typeenum12launch_indexE = 4919; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic3bvhE[1]; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic3rayE[13] = {114, 116, 67, 117, 114, 114, 101, 110, 116, 82, 97, 121, 0}; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic6sphereE[1]; +.visible .global .align 1 .b8 _ZN21rti_internal_semantic12launch_indexE[14] = {114, 116, 76, 97, 117, 110, 99, 104, 73, 110, 100, 101, 120, 0}; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation3bvhE[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation3rayE[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation6sphereE[1]; +.visible .global .align 1 .b8 _ZN23rti_internal_annotation12launch_indexE[1]; + +.visible .entry _Z5startv() +{ + .local .align 1 .b8 __local_depot0[1]; + .reg .b64 %SP; + .reg .b64 %SPL; + .reg .pred %p<2>; + .reg .b16 %rs<2>; + .reg .f32 %f<9>; + .reg .b32 %r<7>; + .reg .b64 %rd<3>; + + + mov.u64 %SPL, __local_depot0; + cvta.local.u64 %SP, %SPL; + add.u64 %rd1, %SP, 0; + add.u64 %rd2, %SPL, 0; + ld.global.u32 %r6, [launch_index]; + setp.eq.s32 %p1, %r6, 1; + mov.u32 %r5, 1; + cvt.rn.f32.s32 %f1, %r6; + selp.f32 %f3, 0f3F800000, 0fBF800000, %p1; + selp.f32 %f6, 0fBF800000, 0f3F800000, %p1; + mov.u16 %rs1, 0; + st.local.u8 [%rd2], %rs1; + ld.global.u32 %r1, [bvh]; + mov.f32 %f7, 0f00000000; + mov.f32 %f8, 0f6C4ECB8F; + mov.u32 %r3, 255; + mov.u32 %r4, 0; + // begin inline asm + call _rt_trace_mask_flags_64, (%r1, %f1, %f7, %f3, %f7, %f7, %f6, %r4, %f7, %f8, %r3, %r4, %rd1, %r5); + // end inline asm + ret; + +} + // .globl _Z9intersecti +.visible .entry _Z9intersecti( + .param .u32 _Z9intersecti_param_0 +) +{ + .reg .pred %p<5>; + .reg .f32 %f<43>; + .reg .b32 %r<7>; + + + ld.global.v4.f32 {%f5, %f6, %f7, %f8}, [sphere]; + ld.global.f32 %f13, [ray]; + sub.f32 %f14, %f13, %f5; + ld.global.f32 %f15, [ray+4]; + sub.f32 %f16, %f15, %f6; + ld.global.f32 %f17, [ray+8]; + sub.f32 %f18, %f17, %f7; + ld.global.f32 %f19, [ray+12]; + ld.global.f32 %f20, [ray+16]; + mul.f32 %f21, %f20, %f20; + fma.rn.f32 %f22, %f19, %f19, %f21; + ld.global.f32 %f23, [ray+20]; + fma.rn.f32 %f24, %f23, %f23, %f22; + sqrt.rn.f32 %f25, %f24; + rcp.rn.f32 %f1, %f25; + mul.f32 %f26, %f19, %f1; + mul.f32 %f27, %f1, %f20; + mul.f32 %f28, %f1, %f23; + mul.f32 %f29, %f16, %f27; + fma.rn.f32 %f30, %f14, %f26, %f29; + fma.rn.f32 %f2, %f18, %f28, %f30; + mul.f32 %f31, %f16, %f16; + fma.rn.f32 %f32, %f14, %f14, %f31; + fma.rn.f32 %f33, %f18, %f18, %f32; + mul.f32 %f34, %f8, %f8; + sub.f32 %f35, %f33, %f34; + mul.f32 %f36, %f2, %f2; + sub.f32 %f3, %f36, %f35; + setp.leu.f32 %p1, %f3, 0f00000000; + @%p1 bra $L__BB1_5; + + sqrt.rn.f32 %f4, %f3; + neg.f32 %f38, %f2; + sub.f32 %f39, %f38, %f4; + add.f32 %f40, %f39, 0f00000000; + mul.f32 %f37, %f1, %f40; + // begin inline asm + call (%r1), _rt_potential_intersection, (%f37); + // end inline asm + setp.eq.s32 %p2, %r1, 0; + @%p2 bra $L__BB1_3; + + mov.u32 %r3, 0; + // begin inline asm + call (%r2), _rt_report_intersection, (%r3); + // end inline asm + setp.ne.s32 %p3, %r2, 0; + @%p3 bra $L__BB1_5; + +$L__BB1_3: + sub.f32 %f42, %f4, %f2; + mul.f32 %f41, %f1, %f42; + // begin inline asm + call (%r4), _rt_potential_intersection, (%f41); + // end inline asm + setp.eq.s32 %p4, %r4, 0; + @%p4 bra $L__BB1_5; + + mov.u32 %r6, 0; + // begin inline asm + call (%r5), _rt_report_intersection, (%r6); + // end inline asm + +$L__BB1_5: + ret; + +} + // .globl _Z6boundsiPf +.visible .entry _Z6boundsiPf( + .param .u32 _Z6boundsiPf_param_0, + .param .u64 _Z6boundsiPf_param_1 +) +{ + .reg .pred %p<3>; + .reg .f32 %f<17>; + .reg .b32 %r<3>; + .reg .b64 %rd<3>; + + + ld.param.u64 %rd2, [_Z6boundsiPf_param_1]; + cvta.to.global.u64 %rd1, %rd2; + ld.global.v4.f32 {%f6, %f7, %f8, %f9}, [sphere]; + setp.leu.f32 %p1, %f9, 0f00000000; + @%p1 bra $L__BB2_2; + + abs.f32 %f10, %f9; + setp.neu.f32 %p2, %f10, 0f7F800000; + @%p2 bra $L__BB2_3; + bra.uni $L__BB2_2; + +$L__BB2_3: + sub.f32 %f11, %f6, %f9; + st.global.f32 [%rd1], %f11; + sub.f32 %f12, %f7, %f9; + st.global.f32 [%rd1+4], %f12; + sub.f32 %f13, %f8, %f9; + st.global.f32 [%rd1+8], %f13; + add.f32 %f14, %f6, %f9; + st.global.f32 [%rd1+12], %f14; + add.f32 %f15, %f7, %f9; + st.global.f32 [%rd1+16], %f15; + add.f32 %f16, %f8, %f9; + st.global.f32 [%rd1+20], %f16; + bra.uni $L__BB2_4; + +$L__BB2_2: + mov.u32 %r1, 2096152002; + st.global.u32 [%rd1], %r1; + st.global.u32 [%rd1+4], %r1; + st.global.u32 [%rd1+8], %r1; + mov.u32 %r2, -51331646; + st.global.u32 [%rd1+12], %r2; + st.global.u32 [%rd1+16], %r2; + st.global.u32 [%rd1+20], %r2; + +$L__BB2_4: + ret; + +} + // .globl _Z11closest_hitv +.visible .entry _Z11closest_hitv() +{ + .reg .pred %p<4>; + .reg .b32 %r<13>; + .reg .b64 %rd<22>; + + + // begin inline asm + call (%r1), _rt_is_triangle_hit, (); + // end inline asm + setp.eq.s32 %p1, %r1, 0; + selp.b32 %r10, 1, 2, %p1; + mov.u32 %r8, 1; + ld.global.s32 %rd3, [launch_index]; + mov.u64 %rd18, 0; + mov.u64 %rd19, output_buffer1; + cvta.global.u64 %rd2, %rd19; + mov.u32 %r9, 4; + // begin inline asm + call (%rd1), _rt_buffer_get_64, (%rd2, %r8, %r9, %rd3, %rd18, %rd18, %rd18); + // end inline asm + st.u32 [%rd1], %r10; + // begin inline asm + call (%r4), _rt_is_triangle_hit_back_face, (); + // end inline asm + setp.eq.s32 %p2, %r4, 0; + selp.b32 %r11, 1, 2, %p2; + ld.global.s32 %rd9, [launch_index]; + mov.u64 %rd20, output_buffer2; + cvta.global.u64 %rd8, %rd20; + // begin inline asm + call (%rd7), _rt_buffer_get_64, (%rd8, %r8, %r9, %rd9, %rd18, %rd18, %rd18); + // end inline asm + st.u32 [%rd7], %r11; + // begin inline asm + call (%r7), _rt_is_triangle_hit_front_face, (); + // end inline asm + setp.eq.s32 %p3, %r7, 0; + selp.b32 %r12, 1, 2, %p3; + ld.global.s32 %rd15, [launch_index]; + mov.u64 %rd21, output_buffer3; + cvta.global.u64 %rd14, %rd21; + // begin inline asm + call (%rd13), _rt_buffer_get_64, (%rd14, %r8, %r9, %rd15, %rd18, %rd18, %rd18); + // end inline asm + st.u32 [%rd13], %r12; + ret; + +} + diff --git a/zluda_rt/src/texture_sampler.rs b/zluda_rt/src/texture_sampler.rs new file mode 100644 index 0000000..cf8dfb0 --- /dev/null +++ b/zluda_rt/src/texture_sampler.rs @@ -0,0 +1,411 @@ +use crate::{
+ buffer::{Buffer, BufferData},
+ context::{self, Context, ContextData},
+ hip, null_check, null_unwrap, MaybeWeakRefMut, OptixCell, OptixObjectData, TypeTag,
+};
+use hip_runtime_sys::*;
+use optix_types::*;
+use std::{
+ mem, ptr,
+ rc::{Rc, Weak},
+};
+
+pub(crate) type TextureSampler = *const OptixCell<TextureSamplerData>;
+
+pub(crate) struct TextureSamplerData {
+ pub(crate) context: Weak<OptixCell<ContextData>>,
+ pub(crate) index: u32,
+ pub(crate) hip_object: hipTextureObject_t,
+ wrap_mode: [hipTextureAddressMode; 3],
+ normalized_coordinates: bool,
+ read_mode: hipTextureReadMode,
+ perform_srgb_conversion: bool,
+ max_anisotropy: f32,
+ mip_level_count: u32,
+ array_size: u32,
+ minification: RTfiltermode,
+ magnification: RTfiltermode,
+ mipmapping: RTfiltermode,
+ buffer: Option<Weak<OptixCell<BufferData>>>,
+}
+
+impl TextureSamplerData {
+ fn new(weak_context: Weak<OptixCell<ContextData>>, context: &mut ContextData) -> Self {
+ context.texture_counter += 1;
+ let index = context.texture_counter;
+ Self {
+ context: weak_context,
+ index,
+ hip_object: ptr::null_mut(),
+ wrap_mode: [
+ hipTextureAddressMode::hipAddressModeWrap,
+ hipTextureAddressMode::hipAddressModeWrap,
+ hipTextureAddressMode::hipAddressModeWrap,
+ ],
+ normalized_coordinates: true,
+ read_mode: hipTextureReadMode::hipReadModeNormalizedFloat,
+ perform_srgb_conversion: false,
+ max_anisotropy: 1.0,
+ mip_level_count: 0,
+ array_size: 0,
+ buffer: None,
+ minification: RTfiltermode::RT_FILTER_LINEAR,
+ magnification: RTfiltermode::RT_FILTER_LINEAR,
+ mipmapping: RTfiltermode::RT_FILTER_LINEAR,
+ }
+ }
+
+ fn register(this: Rc<OptixCell<Self>>, context: &mut ContextData) {
+ context.texture_samplers.insert(this);
+ }
+
+ unsafe fn create(context: Context) -> Result<TextureSampler, RTresult> {
+ context::create_subobject(context, Self::new, Self::register)
+ }
+
+ pub(crate) unsafe fn create_underlying(&mut self) -> Result<(), RTresult> {
+ let buffer = match self.buffer {
+ Some(ref buffer) => buffer.upgrade().ok_or(RTresult::RT_ERROR_INVALID_CONTEXT)?,
+ None => return Err(RTresult::RT_ERROR_INVALID_CONTEXT),
+ };
+ let buffer = buffer.borrow()?;
+ // TODO: create mipmapped arrays when they work in HIP
+ // Currently hipMipmappedArrayCreate(...) fails, because it's "unuspported"
+ // (tested on Linux ROCm 5.4.3 on RX 6800 XT)
+ let mut hip_texture = ptr::null_mut();
+ let mut array = ptr::null_mut();
+ let array_desc = HIP_ARRAY3D_DESCRIPTOR {
+ Width: buffer.metadata.width as usize,
+ Height: buffer.metadata.height as usize,
+ Depth: buffer.metadata.depth(),
+ Format: buffer.metadata.array_format()?,
+ NumChannels: buffer.metadata.channels()?,
+ Flags: 0,
+ };
+ hip! { hipArray3DCreate(&mut array, &array_desc), RT_ERROR_UNKNOWN };
+ let copy_height = buffer.metadata.height.max(1);
+ let copy_depth = buffer.metadata.depth().max(1);
+ let params = hipMemcpy3DParms {
+ srcArray: ptr::null_mut(),
+ srcPos: hipPos { x: 0, y: 0, z: 0 },
+ srcPtr: hipPitchedPtr {
+ ptr: buffer.pointer_mip0().0,
+ pitch: (buffer.metadata.width * buffer.metadata.element_size) as usize,
+ xsize: buffer.metadata.width as usize,
+ ysize: copy_height as usize,
+ },
+ dstArray: array,
+ dstPos: hipPos { x: 0, y: 0, z: 0 },
+ dstPtr: mem::zeroed::<hipPitchedPtr>(),
+ extent: hipExtent {
+ width: buffer.metadata.width as usize,
+ height: copy_height as usize,
+ depth: copy_depth,
+ },
+ kind: hipMemcpyKind::hipMemcpyDeviceToDevice,
+ };
+ hip! { hipMemcpy3D(¶ms), RT_ERROR_UNKNOWN };
+ let resource_desc = hipResourceDesc {
+ resType: hipResourceType::hipResourceTypeArray,
+ res: hipResourceDesc__bindgen_ty_1 {
+ array: hipResourceDesc__bindgen_ty_1__bindgen_ty_1 { array },
+ },
+ };
+ let tex_desc = hipTextureDesc {
+ addressMode: self.wrap_mode,
+ filterMode: self.filter_mode(),
+ readMode: self.read_mode,
+ sRGB: self.perform_srgb_conversion as i32,
+ borderColor: [0.0, 0.0, 0.0, 0.0],
+ normalizedCoords: self.normalized_coordinates as i32,
+ maxAnisotropy: 0, // other values not supported by HIP
+ mipmapFilterMode: if self.mipmapping == RTfiltermode::RT_FILTER_LINEAR {
+ hipTextureFilterMode::hipFilterModeLinear
+ } else {
+ hipTextureFilterMode::hipFilterModePoint
+ },
+ mipmapLevelBias: 0.0, // other values not supported by HIP
+ minMipmapLevelClamp: 0.0, // other values not supported by HIP
+ maxMipmapLevelClamp: 0.0, // other values not supported by HIP
+ };
+ hip! { hipCreateTextureObject(&mut hip_texture, &resource_desc, &tex_desc, ptr::null()), RT_ERROR_UNKNOWN };
+ self.hip_object = hip_texture;
+ Ok(())
+ }
+
+ // TODO: this is as good as it gets under CUDA/HIP
+ fn filter_mode(&self) -> hipTextureFilterMode {
+ if self.magnification == RTfiltermode::RT_FILTER_LINEAR
+ || self.minification == RTfiltermode::RT_FILTER_LINEAR
+ {
+ hipTextureFilterMode::hipFilterModeLinear
+ } else {
+ hipTextureFilterMode::hipFilterModePoint
+ }
+ }
+}
+
+impl OptixObjectData for TextureSamplerData {
+ const TYPE: TypeTag = TypeTag::TextureSampler;
+
+ fn deregister(&mut self, this: &Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ if let Some(context) = self.context.upgrade() {
+ let mut context = (*context).borrow_mut()?;
+ context.texture_samplers.remove(this);
+ }
+ Ok(())
+ }
+
+ fn context<'a>(&'a mut self) -> crate::MaybeWeakRefMut<'a, ContextData> {
+ MaybeWeakRefMut::Weak(&self.context)
+ }
+}
+
+pub(crate) unsafe fn create(
+ context: *const OptixCell<ContextData>,
+ texturesampler: *mut TextureSampler,
+) -> Result<(), RTresult> {
+ null_check(context)?;
+ null_check(texturesampler)?;
+ *texturesampler = TextureSamplerData::create(context)?;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_wrap_mode(
+ texturesampler: TextureSampler,
+ dimension: u32,
+ wrap_mode: RTwrapmode,
+) -> Result<(), RTresult> {
+ if dimension > 2 {
+ return Err(RTresult::RT_ERROR_INVALID_VALUE);
+ }
+ let wrap_mode = to_hip_address_mode(wrap_mode)?;
+ let texturesampler = null_unwrap(texturesampler)?;
+ let mut texturesampler = texturesampler.borrow_mut()?;
+ texturesampler.wrap_mode[dimension as usize] = wrap_mode;
+ Ok(())
+}
+
+fn to_hip_address_mode(wrap_mode: RTwrapmode) -> Result<hipTextureAddressMode, RTresult> {
+ Ok(match wrap_mode {
+ RTwrapmode::RT_WRAP_REPEAT => hipTextureAddressMode::hipAddressModeWrap,
+ RTwrapmode::RT_WRAP_CLAMP_TO_EDGE => hipTextureAddressMode::hipAddressModeClamp,
+ RTwrapmode::RT_WRAP_MIRROR => hipTextureAddressMode::hipAddressModeMirror,
+ RTwrapmode::RT_WRAP_CLAMP_TO_BORDER => hipTextureAddressMode::hipAddressModeBorder,
+ _ => return Err(RTresult::RT_ERROR_INVALID_VALUE),
+ })
+}
+
+pub(crate) unsafe fn set_indexing_mode(
+ texturesampler: TextureSampler,
+ index_mode: RTtextureindexmode,
+) -> Result<(), RTresult> {
+ let normalized_coordinates = match index_mode {
+ RTtextureindexmode::RT_TEXTURE_INDEX_NORMALIZED_COORDINATES => true,
+ RTtextureindexmode::RT_TEXTURE_INDEX_ARRAY_INDEX => false,
+ _ => return Err(RTresult::RT_ERROR_INVALID_VALUE),
+ };
+ let texturesampler = null_unwrap(texturesampler)?;
+ let mut texturesampler = texturesampler.borrow_mut()?;
+ texturesampler.normalized_coordinates = normalized_coordinates;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_read_mode(
+ texturesampler: TextureSampler,
+ read_mode: RTtexturereadmode,
+) -> Result<(), RTresult> {
+ let (read_mode, perform_srgb_conversion) = to_hip_read_mode(read_mode)?;
+ let texturesampler = null_unwrap(texturesampler)?;
+ let mut texturesampler = texturesampler.borrow_mut()?;
+ texturesampler.read_mode = read_mode;
+ texturesampler.perform_srgb_conversion = perform_srgb_conversion;
+ Ok(())
+}
+
+fn to_hip_read_mode(read_mode: RTtexturereadmode) -> Result<(hipTextureReadMode, bool), RTresult> {
+ Ok(match read_mode {
+ RTtexturereadmode::RT_TEXTURE_READ_ELEMENT_TYPE => {
+ (hipTextureReadMode::hipReadModeElementType, false)
+ }
+ RTtexturereadmode::RT_TEXTURE_READ_NORMALIZED_FLOAT => {
+ (hipTextureReadMode::hipReadModeNormalizedFloat, false)
+ }
+ RTtexturereadmode::RT_TEXTURE_READ_ELEMENT_TYPE_SRGB => {
+ (hipTextureReadMode::hipReadModeElementType, true)
+ }
+ RTtexturereadmode::RT_TEXTURE_READ_NORMALIZED_FLOAT_SRGB => {
+ (hipTextureReadMode::hipReadModeNormalizedFloat, true)
+ }
+ _ => return Err(RTresult::RT_ERROR_INVALID_VALUE),
+ })
+}
+
+pub(crate) unsafe fn set_max_anisotropy(
+ texturesampler: TextureSampler,
+ max_anisotropy: f32,
+) -> Result<(), RTresult> {
+ let texturesampler = null_unwrap(texturesampler)?;
+ let mut texturesampler = texturesampler.borrow_mut()?;
+ texturesampler.max_anisotropy = max_anisotropy;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_mip_level_count(
+ texturesampler: TextureSampler,
+ mip_level_count: u32,
+) -> Result<(), RTresult> {
+ let texturesampler = null_unwrap(texturesampler)?;
+ let mut texturesampler = texturesampler.borrow_mut()?;
+ texturesampler.mip_level_count = mip_level_count;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_array_size(
+ texturesampler: TextureSampler,
+ array_size: u32,
+) -> Result<(), RTresult> {
+ let texturesampler = null_unwrap(texturesampler)?;
+ let mut texturesampler = texturesampler.borrow_mut()?;
+ texturesampler.array_size = array_size;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_buffer(
+ texturesampler: TextureSampler,
+ buffer: Buffer,
+) -> Result<(), RTresult> {
+ let buffer = if buffer == ptr::null() {
+ None
+ } else {
+ Some(OptixCell::clone_weak(buffer))
+ };
+ let texturesampler = null_unwrap(texturesampler)?;
+ let mut texturesampler = texturesampler.borrow_mut()?;
+ texturesampler.buffer = buffer;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_filtering_modes(
+ texturesampler: TextureSampler,
+ minification: RTfiltermode,
+ magnification: RTfiltermode,
+ mipmapping: RTfiltermode,
+) -> Result<(), RTresult> {
+ let texturesampler = null_unwrap(texturesampler)?;
+ let mut texturesampler = texturesampler.borrow_mut()?;
+ texturesampler.minification = minification;
+ texturesampler.magnification = magnification;
+ texturesampler.mipmapping = mipmapping;
+ Ok(())
+}
+
+pub(crate) unsafe fn get_id(
+ texturesampler: TextureSampler,
+ texture_id: *mut i32,
+) -> Result<(), RTresult> {
+ let texturesampler = null_unwrap(texturesampler)?;
+ let texturesampler = texturesampler.borrow()?;
+ *texture_id = texturesampler.index as i32;
+ Ok(())
+}
+
+pub(crate) unsafe fn get_buffer(
+ texturesampler: *const OptixCell<TextureSamplerData>,
+ _deprecated0: u32,
+ _deprecated1: u32,
+ buffer: *mut *const OptixCell<BufferData>,
+) -> Result<(), RTresult> {
+ null_check(buffer)?;
+ let texturesampler = null_unwrap(texturesampler)?;
+ let texturesampler = texturesampler.borrow()?;
+ match texturesampler.buffer {
+ Some(ref weak_buffer) => {
+ *buffer = Weak::as_ptr(weak_buffer);
+ Ok(())
+ }
+ None => {
+ *buffer = ptr::null_mut();
+ Err(RTresult::RT_ERROR_INVALID_VALUE)
+ }
+ }
+}
+
+pub(crate) unsafe fn get_context(
+ texturesampler: TextureSampler,
+ context: *mut *const OptixCell<ContextData>,
+) -> Result<(), RTresult> {
+ let texturesampler = null_unwrap(texturesampler)?;
+ let texturesampler = texturesampler.borrow()?;
+ *context = texturesampler.context.as_ptr();
+ Ok(())
+}
+
+pub(crate) fn destroy(
+ _texturesampler: *const OptixCell<TextureSamplerData>,
+) -> Result<(), RTresult> {
+ // TODO: implement
+ Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::optix_test;
+ use crate::test_common::OptixFns;
+ use optix_types::*;
+ use std::{mem, ptr};
+
+ optix_test!(default_texture_sampler);
+
+ unsafe fn default_texture_sampler<Optix: OptixFns>(o: Optix) {
+ let mut ctx = ptr::null_mut();
+ o.rtContextCreate(&mut ctx);
+ let mut sampler = ptr::null_mut();
+ o.rtTextureSamplerCreate(ctx, &mut sampler);
+ let mut array_size = mem::zeroed();
+ o.rtTextureSamplerGetArraySize(sampler, &mut array_size);
+ let mut buffer = mem::zeroed();
+ o.rtTextureSamplerGetBuffer(sampler, 0, 0, &mut buffer);
+ let mut minification = mem::zeroed();
+ let mut magnification = mem::zeroed();
+ let mut mipmapping = mem::zeroed();
+ o.rtTextureSamplerGetFilteringModes(
+ sampler,
+ &mut minification,
+ &mut magnification,
+ &mut mipmapping,
+ );
+ let mut index_mode = mem::zeroed();
+ o.rtTextureSamplerGetIndexingMode(sampler, &mut index_mode);
+ let mut max_aniso = mem::zeroed();
+ o.rtTextureSamplerGetMaxAnisotropy(sampler, &mut max_aniso);
+ let mut mip_level = mem::zeroed();
+ o.rtTextureSamplerGetMipLevelCount(sampler, &mut mip_level);
+ let mut read_mode = mem::zeroed();
+ o.rtTextureSamplerGetReadMode(sampler, &mut read_mode);
+ let mut wrapmode0 = mem::zeroed();
+ let mut wrapmode1 = mem::zeroed();
+ let mut wrapmode2 = mem::zeroed();
+ o.rtTextureSamplerGetWrapMode(sampler, 0, &mut wrapmode0);
+ o.rtTextureSamplerGetWrapMode(sampler, 0, &mut wrapmode1);
+ o.rtTextureSamplerGetWrapMode(sampler, 0, &mut wrapmode2);
+ assert_eq!(array_size, 0);
+ assert_eq!(buffer, ptr::null_mut());
+ assert_eq!(minification, RTfiltermode::RT_FILTER_LINEAR);
+ assert_eq!(magnification, RTfiltermode::RT_FILTER_LINEAR);
+ assert_eq!(mipmapping, RTfiltermode::RT_FILTER_LINEAR);
+ assert_eq!(
+ index_mode,
+ RTtextureindexmode::RT_TEXTURE_INDEX_NORMALIZED_COORDINATES
+ );
+ assert_eq!(max_aniso, 1.0);
+ assert_eq!(mip_level, 0);
+ assert_eq!(
+ read_mode,
+ RTtexturereadmode::RT_TEXTURE_READ_NORMALIZED_FLOAT
+ );
+ assert_eq!(wrapmode0, RTwrapmode::RT_WRAP_REPEAT);
+ assert_eq!(wrapmode1, RTwrapmode::RT_WRAP_REPEAT);
+ assert_eq!(wrapmode2, RTwrapmode::RT_WRAP_REPEAT);
+ }
+}
diff --git a/zluda_rt/src/transform.rs b/zluda_rt/src/transform.rs new file mode 100644 index 0000000..1d5cc79 --- /dev/null +++ b/zluda_rt/src/transform.rs @@ -0,0 +1,196 @@ +use crate::{
+ context::{self, Context, ContextData},
+ geometry_group::GeometryGroupData,
+ null_check, null_unwrap,
+ repr_gpu::{self, TrivialHIPAllocator},
+ MaybeWeakRefMut, OptixCell, OptixObjectData, TypeTag, TypedObjectWeak, UntypedObject,
+};
+
+use glam::{Quat, Vec3};
+use hip_runtime_sys::hipDeviceptr_t;
+use hiprt_sys::{hiprtFloat3, hiprtFloat4, hiprtFrame};
+use optix_types::*;
+use std::{
+ ptr,
+ rc::{Rc, Weak},
+};
+
+pub(crate) type Transform = *const OptixCell<TransformData>;
+
+pub(crate) struct TransformData {
+ pub(crate) context: Weak<OptixCell<ContextData>>,
+ pub(crate) transform: [f32; 16],
+ pub(crate) inverse_transform: [f32; 16],
+ pub(crate) scale: Vec3,
+ pub(crate) rotation: Quat,
+ pub(crate) translation: Vec3,
+ pub(crate) child: Option<Weak<OptixCell<GeometryGroupData>>>,
+}
+
+impl TransformData {
+ fn new(weak_context: Weak<OptixCell<ContextData>>, _: &mut ContextData) -> Self {
+ let scale = Vec3::ONE;
+ let rotation = Quat::IDENTITY;
+ let translation = Vec3::ZERO;
+ let matrix = glam::Mat4::from_scale_rotation_translation(scale, rotation, translation);
+ let transform = matrix.transpose().to_cols_array();
+ let inverse_transform = transform.clone();
+ Self {
+ context: weak_context,
+ child: None,
+ scale,
+ rotation,
+ translation,
+ transform,
+ inverse_transform,
+ }
+ }
+
+ fn register(this: Rc<OptixCell<Self>>, context: &mut ContextData) {
+ context.transforms.insert(this);
+ }
+
+ unsafe fn create(context: Context) -> Result<Transform, RTresult> {
+ context::create_subobject(context, Self::new, Self::register)
+ }
+
+ pub(crate) fn allocate(
+ &self,
+ allocator: &mut TrivialHIPAllocator,
+ ) -> Result<hipDeviceptr_t, RTresult> {
+ let host_side = [repr_gpu::OptixTransform {
+ transform: self.transform,
+ inverse_transform: self.inverse_transform,
+ }];
+ allocator
+ .copy_to_device(&host_side)
+ .map_err(|_| RTresult::RT_ERROR_UNKNOWN)
+ }
+
+ pub(crate) fn to_hiprt(&self) -> hiprtFrame {
+ let rotation = Self::quat_to_hiprt(self.rotation);
+ let scale = Self::vec3_to_hiprt(self.scale);
+ let translation = Self::vec3_to_hiprt(self.translation);
+ hiprtFrame {
+ rotation,
+ scale,
+ translation,
+ time: 0.0,
+ pad: 0,
+ }
+ }
+
+ fn vec3_to_hiprt(v: Vec3) -> hiprtFloat3 {
+ hiprtFloat3 {
+ x: v.x,
+ y: v.y,
+ z: v.z,
+ }
+ }
+
+ fn quat_to_hiprt(q: Quat) -> hiprtFloat4 {
+ let (axis, angle) = q.to_axis_angle();
+ hiprtFloat4 {
+ x: axis.x,
+ y: axis.y,
+ z: axis.z,
+ w: angle,
+ }
+ }
+}
+
+impl OptixObjectData for TransformData {
+ const TYPE: TypeTag = TypeTag::Transform;
+
+ fn deregister(&mut self, this: &Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ if let Some(context) = self.context.upgrade() {
+ let mut context = (*context).borrow_mut()?;
+ context.transforms.remove(this);
+ }
+ Ok(())
+ }
+
+ fn context<'a>(&'a mut self) -> crate::MaybeWeakRefMut<'a, ContextData> {
+ MaybeWeakRefMut::Weak(&self.context)
+ }
+}
+
+pub(crate) unsafe fn create(context: Context, transform: *mut Transform) -> Result<(), RTresult> {
+ null_check(context)?;
+ null_check(transform)?;
+ *transform = TransformData::create(context)?;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_matrix(
+ transform: Transform,
+ transpose: i32,
+ matrix: *const f32,
+ inverse_matrix: *const f32,
+) -> Result<(), RTresult> {
+ null_check(matrix)?;
+ let transform = null_unwrap(transform)?;
+ let mut transform = transform.borrow_mut()?;
+ let mut matrix4 = glam::Mat4::from_cols_array(&*(matrix as *const [f32; 16]));
+ if transpose == 0 {
+ matrix4 = matrix4.transpose();
+ }
+ let inverse_matrix = if inverse_matrix != ptr::null_mut() {
+ if transpose == 0 {
+ *&*(matrix as *const [f32; 16])
+ } else {
+ let transposed_inverse_matrix4 =
+ glam::Mat4::from_cols_array(&*(matrix as *const [f32; 16]));
+ transposed_inverse_matrix4.to_cols_array()
+ }
+ } else {
+ matrix4.inverse().transpose().to_cols_array()
+ };
+ let (scale, rotation, translation) = matrix4.to_scale_rotation_translation();
+ transform.transform = matrix4.transpose().to_cols_array();
+ transform.inverse_transform = inverse_matrix;
+ transform.scale = scale;
+ transform.rotation = rotation;
+ transform.translation = translation;
+ Ok(())
+}
+
+pub(crate) unsafe fn set_child(
+ transform: Transform,
+ object: UntypedObject,
+) -> Result<(), RTresult> {
+ let object = TypedObjectWeak::clone_from(object)?;
+ let transform = null_unwrap(transform)?;
+ let mut transform = transform.borrow_mut()?;
+ let geometry_group = match object {
+ TypedObjectWeak::GeometryGroup(geometry_group) => geometry_group,
+ _ => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ };
+ transform.child = Some(geometry_group);
+ Ok(())
+}
+
+pub(crate) unsafe fn get_motion_key_count(
+ transform: Transform,
+ n: *mut u32,
+) -> Result<(), RTresult> {
+ null_check(transform)?;
+ null_check(n)?;
+ *n = 1;
+ Ok(())
+}
+
+pub(crate) fn destroy(_transform: Transform) -> Result<(), RTresult> {
+ // TODO: implement
+ Ok(())
+}
+
+pub(crate) unsafe fn get_context(
+ transform: *const OptixCell<TransformData>,
+ context: *mut *const OptixCell<ContextData>,
+) -> Result<(), RTresult> {
+ let transform = null_unwrap(transform)?;
+ let transform = transform.borrow()?;
+ *context = transform.context.as_ptr();
+ Ok(())
+}
diff --git a/zluda_rt/src/variable.rs b/zluda_rt/src/variable.rs new file mode 100644 index 0000000..764b8fb --- /dev/null +++ b/zluda_rt/src/variable.rs @@ -0,0 +1,237 @@ +use crate::{
+ context::ContextData, null_check, null_unwrap, AlignedBuffer, MaybeWeakRefMut, OptixCell,
+ OptixObjectData, TypeTag, TypedObjectWeak, UntypedObject,
+};
+use optix_types::RTresult;
+use std::{
+ alloc::Layout,
+ mem, ptr,
+ rc::{Rc, Weak},
+ slice,
+};
+
+pub(crate) type Variable = *const OptixCell<VariableData>;
+
+#[derive(Clone)]
+pub(crate) struct VariableData {
+ pub(crate) context: Weak<OptixCell<ContextData>>,
+ pub(crate) value: VariableValue,
+}
+
+#[derive(Clone)]
+pub(crate) enum VariableValue {
+ None,
+ Object(TypedObjectWeak),
+ Inline {
+ data: [u8; 4 * mem::size_of::<f32>()],
+ size: u8,
+ },
+ Boxed(AlignedBuffer),
+}
+
+impl VariableValue {
+ fn with_bytes<T>(&self, f: impl FnOnce(&[u8]) -> T) -> Result<T, RTresult> {
+ Ok(match self {
+ VariableValue::None => return Err(RTresult::RT_ERROR_UNKNOWN),
+ VariableValue::Object(object) => match object {
+ TypedObjectWeak::Buffer(buffer) => {
+ let buffer = buffer.upgrade().ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let buffer = buffer.borrow()?;
+ let device_buffer = buffer.get_device_mip0();
+ let byte_slice = unsafe {
+ slice::from_raw_parts(
+ &device_buffer as *const _ as *const u8,
+ mem::size_of_val(&device_buffer),
+ )
+ };
+ f(byte_slice)
+ }
+ TypedObjectWeak::GeometryGroup(gg) => {
+ let gg = gg.upgrade().ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let gg = gg.borrow()?;
+ f(&gg.index.to_ne_bytes())
+ }
+ TypedObjectWeak::Group(group) => {
+ let group = group.upgrade().ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let group = group.borrow()?;
+ f(&group.index.to_ne_bytes())
+ }
+ TypedObjectWeak::TextureSampler(texture) => {
+ let texture = texture.upgrade().ok_or(RTresult::RT_ERROR_UNKNOWN)?;
+ let texture = texture.borrow()?;
+ f(&(texture.hip_object as usize).to_ne_bytes())
+ }
+ TypedObjectWeak::Transform(_)
+ | TypedObjectWeak::Material(_)
+ | TypedObjectWeak::Geometry(_)
+ | TypedObjectWeak::GeometryTriangles(_)
+ | TypedObjectWeak::GeometryInstance(_) => todo!(),
+ TypedObjectWeak::Context(_)
+ | TypedObjectWeak::Variable(_)
+ | TypedObjectWeak::Program(_)
+ | TypedObjectWeak::Acceleration(_) => return Err(RTresult::RT_ERROR_NOT_SUPPORTED),
+ },
+ VariableValue::Inline { data, size } => f(&data[..*size as usize]),
+ VariableValue::Boxed(data) => f(data.as_bytes()),
+ })
+ }
+}
+
+impl VariableData {
+ pub(crate) fn new<T: OptixObjectData>(
+ owner: &mut T,
+ ) -> Result<Rc<OptixCell<VariableData>>, RTresult> {
+ let context = owner.context();
+ let context = match context {
+ MaybeWeakRefMut::Weak(weak_ctx) => weak_ctx.clone(),
+ MaybeWeakRefMut::Ref(_) => return Err(RTresult::RT_ERROR_UNKNOWN),
+ };
+ Ok(Rc::new(OptixCell::new(Self {
+ context,
+ value: VariableValue::None,
+ })))
+ }
+
+ pub(crate) fn new_with_context(
+ context: &OptixCell<ContextData>,
+ ) -> Result<Rc<OptixCell<VariableData>>, RTresult> {
+ let context = unsafe { OptixCell::clone_weak(context as _) };
+ Ok(Rc::new(OptixCell::new(Self {
+ context: context,
+ value: VariableValue::None,
+ })))
+ }
+
+ pub(crate) fn copy_into_buffer(&self, buffer: &mut [u8]) -> Result<(), RTresult> {
+ self.value.with_bytes(|byte_buffer_host| {
+ if buffer.len() != byte_buffer_host.len() {
+ return Err(RTresult::RT_ERROR_UNKNOWN);
+ }
+ buffer.copy_from_slice(byte_buffer_host);
+ Ok(())
+ })?
+ }
+}
+
+impl OptixObjectData for VariableData {
+ const TYPE: TypeTag = TypeTag::Variable;
+
+ fn deregister(&mut self, _this: &Rc<OptixCell<Self>>) -> Result<(), RTresult> {
+ // Variables are only ever destroyed implicitly
+ Err(RTresult::RT_ERROR_UNKNOWN)
+ }
+
+ fn context<'a>(&'a mut self) -> MaybeWeakRefMut<'a, ContextData> {
+ MaybeWeakRefMut::Weak(&self.context)
+ }
+}
+
+pub(crate) unsafe fn set_object(v: Variable, object: UntypedObject) -> Result<(), RTresult> {
+ let var = null_unwrap(v)?;
+ let object = TypedObjectWeak::clone_from(object)?;
+ let mut var = var.borrow_mut()?;
+ var.value = VariableValue::Object(object);
+ Ok(())
+}
+
+pub(crate) unsafe fn get_object(v: Variable, result: *mut UntypedObject) -> Result<(), RTresult> {
+ null_check(result)?;
+ let var = null_unwrap(v)?;
+ let var = var.borrow()?;
+ let object = match var.value {
+ VariableValue::Object(ref obj) => obj,
+ _ => return Err(RTresult::RT_ERROR_INVALID_VALUE),
+ };
+ *result = object.as_untyped();
+ Ok(())
+}
+
+pub(crate) unsafe fn set_1f(v: Variable, f1: f32) -> Result<(), RTresult> {
+ let var = null_unwrap(v)?;
+ let mut var = var.borrow_mut()?;
+ var.value = pack_into_value(f1);
+ Ok(())
+}
+
+pub(crate) unsafe fn set_1i(v: *const OptixCell<VariableData>, i1: i32) -> Result<(), RTresult> {
+ let var = null_unwrap(v)?;
+ let mut var = var.borrow_mut()?;
+ var.value = pack_into_value(i1);
+ Ok(())
+}
+
+pub(crate) unsafe fn set_1ui(v: Variable, u1: u32) -> Result<(), RTresult> {
+ let var = null_unwrap(v)?;
+ let mut var = var.borrow_mut()?;
+ var.value = pack_into_value(u1);
+ Ok(())
+}
+
+pub(crate) unsafe fn set_3f(v: Variable, f1: f32, f2: f32, f3: f32) -> Result<(), RTresult> {
+ let var = null_unwrap(v)?;
+ let mut var = var.borrow_mut()?;
+ var.value = pack_into_value([f1, f2, f3]);
+ Ok(())
+}
+
+pub(crate) unsafe fn set_3fv(v: Variable, f: *const f32) -> Result<(), RTresult> {
+ null_check(f)?;
+ let f = *(f as *const [f32; 3]);
+ set_3f(v, f[0], f[1], f[2])
+}
+
+pub(crate) unsafe fn set_4f(
+ v: Variable,
+ f1: f32,
+ f2: f32,
+ f3: f32,
+ f4: f32,
+) -> Result<(), RTresult> {
+ let var = null_unwrap(v)?;
+ let mut var = var.borrow_mut()?;
+ var.value = pack_into_value([f1, f2, f3, f4]);
+ Ok(())
+}
+
+pub(crate) unsafe fn set_4fv(v: Variable, f: *const f32) -> Result<(), RTresult> {
+ null_check(f)?;
+ let f = *(f as *const [f32; 4]);
+ set_4f(v, f[0], f[1], f[2], f[3])
+}
+
+unsafe fn pack_into_value<T>(t: T) -> VariableValue {
+ if mem::size_of::<T>() > 4 * mem::size_of::<f32>() {
+ let buffer = AlignedBuffer::new(Layout::new::<T>());
+ VariableValue::Boxed(buffer)
+ } else {
+ let mut data = [0u8; 4 * mem::size_of::<f32>()];
+ ptr::copy_nonoverlapping::<T>(&t, data.as_mut_ptr() as _, 1);
+ let size = mem::size_of::<T>() as u8;
+ VariableValue::Inline { data, size }
+ }
+}
+
+pub(crate) unsafe fn set_user_data(
+ v: Variable,
+ size: u64,
+ ptr: *const std::ffi::c_void,
+) -> Result<(), RTresult> {
+ null_check(ptr)?;
+ let var = null_unwrap(v)?;
+ let mut var = var.borrow_mut()?;
+ let buffer = AlignedBuffer::new(Layout::from_size_align_unchecked(size as usize, 1));
+ ptr::copy_nonoverlapping(
+ ptr.cast::<u8>(),
+ buffer.as_ptr().cast::<u8>(),
+ size as usize,
+ );
+ var.value = VariableValue::Boxed(buffer);
+ Ok(())
+}
+
+pub(crate) unsafe fn set_1ull(v: Variable, ull1: u64) -> Result<(), RTresult> {
+ let var = null_unwrap(v)?;
+ let mut var = var.borrow_mut()?;
+ var.value = pack_into_value(ull1);
+ Ok(())
+}
|