INTRODUCTION TO OPTIX Martin Stich, Engineering Manager OptiX Basics Advanced Topics AGENDA Case Studies Feature Outlook
2 OPTIX BASICS
3 IN A NUTSHELL The OptiX Ray Tracing SDK
State-of-the-art performance: 500M+ rays/sec Algorithm and hardware agnostic Shaders with single-ray programming model, recursion Available free for private and commercial use
4 RELEASE TIMELINE
OptiX 1.0 OptiX 3.9 OptiX 4.0 OptiX 4.1 Pascal Support LLVM Pipeline Performance Hello World! ... NVLINK Scaling CUDA 8, VS2015
2009 Jan 2016 Summer 2016 TODAY!
5 MODERN RAY TRACING
Rasterization:
6 MODERN RAY TRACING
Rasterization:
Ray Tracing:
7 THE RAY TRACING PIPELINE
INTERSECTION ANY-HIT
Programmable: INTERSECTION ANY-HIT Ray Generation ← Launch entry CLOSEST-HIT Closest Hit, Any Hit, Miss ← Shading
Intersection ← Geometry RAY GENERATION
8 MAIN OPERATIONS
RAY GEN INTERSECT ANY HIT CLOSEST HIT MISS rtTrace ✔ ✔ ✔ rtPotentialIntersection ✔ rtReportIntersection ✔ rtIgnoreIntersection ✔ rtTerminateRay ✔
9 SHADER COMMUNICATION
Variables Example: similar to uniforms in other systems Textures constants, system values, buffers, textures, … Current ray
10 SHADER COMMUNICATION
Variables Example: similar to uniforms in other systems Textures constants, system values, buffers, textures, … Current ray Ray Payload
arbitrary data associated with ray Example: in/out from rtTrace to Any-hit, Closest-hit, Miss Color data
11 SHADER COMMUNICATION
Variables Example: similar to uniforms in other systems Textures constants, system values, buffers, textures, … Current ray Ray Payload
arbitrary data associated with ray Example: in/out from rtTrace to Any-hit, Closest-hit, Miss Color data Attributes arbitrary data associated with hit Example: generated in Intersection program Barycentrics consumed by Any-hit, Closest-hit
12 EXAMPLE Miniature Path Tracer
See optixPathTracer SDK sample for a slightly less minimalistic version
13 GLOBAL ILLUMINATION
14 GLOBAL ILLUMINATION
15 GLOBAL ILLUMINATION
16 GLOBAL ILLUMINATION
17 HOWEVER…
18 RAY PAYLOAD Path State Definition
struct RayPayload {
float3 radiance; // stores contribution after we hit the light source
float3 attenuation; // accumulated attenuation of radiance due to materials
float3 next_origin; // reflection ray to be traced for the next float3 next_direction; // ..segment of the path
unsigned seed; // random number generator state
bool done; // whether we’re done tracing this path
};
19 INTEGRATION
RT_PROGRAM void ray_generation() { unsigned int seed = tea<16>( launch_index.x + launch_index.y*output_buffer.size().x, frame_number ); float3 ray_origin = eye; 1 float3 ray_direction = compute_jittered_ray_dir( seed );
RayPayload payload; payload.radiance = make_float3(0,0,0); payload.attenuation = make_float3(1,1,1); payload.seed = seed; 2 payload.done = false; int depth = 0;
while( !payload.done && depth++ < 10 ) { Ray ray = make_Ray( ray_origin, ray_direction, 0, 0.001f, RT_DEFAULT_MAX ); rtTrace( scene, ray, payload ); 3 ray_origin = payload.next_origin; ray_direction = payload.next_direction; }
const float3 result = payload.radiance * payload.attenuation;
const float lerp_t = frame_number > 1 ? 1.0f / frame_number : 1.0f; const float3 prev_col = make_float3( output_buffer[launch_index] ); 4 output_buffer[launch_index] = make_float4( lerp( prev_col, result, lerp_t ), 1.0f ); }
20 DIFFUSE MATERIAL
rtDeclareVariable( float3, diffuse_color, , ); rtDeclareVariable( float3, normal, attribute normal, ); rtDeclareVariable( optix::Ray, ray, rtCurrentRay, ); rtDeclareVariable( float, t_hit, rtIntersectionDistance, ); rtDeclareVariable( RayPayload, current_payload, rtPayload, );
RT_PROGRAM void closest_hit_diffuse() { const float3 hitpoint = ray.origin + t_hit * ray.direction;
const float z1 = rnd( current_payload.seed ); const float z2 = rnd( current_payload.seed ); float3 dir; cosine_sample_hemisphere( z1, z2, dir ); optix::Onb onb( normal ); onb.inverse_transform( dir );
current_payload.next_origin = hitpoint; current_payload.next_direction = dir;
current_payload.attenuation *= diffuse_color; }
21 LIGHT MATERIAL AND MISS
rtDeclareVariable( RayPayload, current_payload, rtPayload, ); rtDeclareVariable( float3, emission_color, , );
RT_PROGRAM void closest_hit_light() { current_payload.radiance = emission_color; current_payload.done = true; }
RT_PROGRAM void miss() { current_payload.radiance = make_float3(0,0,0); current_payload.done = true; }
22 SCENE GEOMETRY rtDeclareVariable( float3, anchor, , ); rtDeclareVariable( float3, v1, , ); rtDeclareVariable( float3, v2, , ); rtDeclareVariable( float4, plane, , ); rtDeclareVariable( float3, normal, attribute normal, ); rtDeclareVariable( optix::Ray, ray, rtCurrentRay, );
RT_PROGRAM void intersect( int primIdx ) { const float3 n = make_float3( plane ); const float dt = dot( ray.direction, n ); const float t = (plane.w - dot(n, ray.origin)) / dt; const float3 p = ray.origin + ray.direction * t; const float3 vi = p - anchor; const float a1 = dot( v1, vi ); const float a2 = dot( v2, vi );
if( a1 >= 0 && a1 <= 1 && a2 >= 0 && a2 <= 1 ) { if( rtPotentialIntersection( t ) ) { normal = n; rtReportIntersection( 0 ); } } }
23 RESULT Accumulation Over Time
24 NEXT STEP IDEAS Mini Path Tracer Reader Exercises
Next event estimation Russian roulette Sphere primitives Mirror material Glass material Triangle meshes Environment maps ... 25 NODE GRAPH
Context
Group
Geometry Group
Geometry Variables Instance
Intersection Geometry Program
Any Hit + Material Closest Hit
26 ADVANCED TOPICS
27 INTEROP Share CUDA and OpenGL Resources
OpenGL Share textures and vertex buffers
rtBufferCreateFromGLBO rtTextureSamplerCreateFromGLImage
CUDA Share CUDA allocations
rtBufferSetDevicePointer rtBufferGetDevicePointer
28 BINDLESS OBJECTS
“Bindless”: powerful concept to dynamically select textures, buffers, and programs at runtime Allows efficient implementation of large shading networks Reduces code size, compile times, and number of compiles
Pixar Animation Studio’s “Flow” material editing tool. Visit the NVIDIA website to watch a SIGGRAPH 2015 talk describing the system in detail.
29 PTX GENERATION Options
Offline CUDA-C++ nvcc.exe
CUDA-C++ nvrtc.dll PTX OptiX
JIT NVVM/ LLVM-IR NVPTX
30 MULTI-GPU AND NVLINK Automatic Scaling
110M Triangles 23M Grass Blades 15GB Textures 2 x GP100
31 Monsters University data set courtesy of Pixar Studios REMOTE RENDERING On Quadro VCA or DGX-1
Incremental updates
Progressive OptiX App video stream ETHERNET INTERNET OptiX Server(s)
32 OPTIX PRIME Simple intersection-only API
Rays OptiX Prime Intersections (primIdx, t, u, v) GPU or CPU Triangles
Supports instancing, async operations, ray masks Performance similar to OptiX Same semantics via OptiX: See optixRaycasting sample 33 CASE STUDIES
34 PERFORMANCE Raw Traversal on Titan X Pascal
+3% in OptiX 4.1 +16% in OptiX 4.1
35 NVIDIA: IRAY & MENTAL RAY
36 NVIDIA: GVDB Sparse Volume Rendering
OpenVDB format support Image Property of DreamWorks Animation Live interaction with multiple- bounce GI scattering, 10x-30x faster than CPU Introduction and Techniques with NVIDIA GVDB Voxels Monday, 9:00 AM – Room 231
37 AAA-STUDIO: FURRYBALL PRODUCTION RENDERING
38 VISUAL MOLECULAR DYNAMICS (VMD)
Molecular Visualization package with hundreds of thousands of users Developed by John Stone of U Illinois
Cutting Edge OptiX Ray Tracing Techniques for Visualization of Biomolecular and Cellular Simulations in VMD Tuesday, 3:30 PM – Room 230C
39 OLCF: SCIENTIFIC VISUALIZATION
Visualizing laser interaction with metals, hundreds of millions of primitives on DGX-1 Developed by Benjamin Hernandez, OLCF-ORNL
Exploratory Visualization of Petascale Particle Data in NVIDIA DGX-1 Tuesday, 3:30 PM – Room 212B
Simulation: OLCF INCITE 2017 "Petascale Simulations of Short Pulse Laser Interaction with Metals" PI Leonid Zhigilei, University of Virginia 40 NVIDIA: VRWORKS AUDIO
Sound waves reflect Path traced audio off of the environment
NVIDIA VRWORKS AUDIO - Improving VR Immersion with acoustic fidelity Thursday, 11:00 AM – Room 230B 41 BUNGIE: LIGHT BAKING FOR GAMES
Light and occlusion baking is a major bottleneck in game design Workflow-changing speedups by switching from CPU render farms to OptiX
42 LIGHTMASS Light baking in Unreal Engine 4
Total Time on Titan X
CPU 1638s GPU 1X 426s GPU 2X 298s
43 ADVANCED SAMPLES Maintained on Github
A collection of larger, more sophisticated sample applications than the ones that come with the SDK
Available at: https://github.com/ nvpro-samples/optix_advanced_samples
44 FEATURE OUTLOOK
45 UNDER DEVELOPMENT
SEPARATE MOTION BLUR PERFORMANCE COMPILATION
Shorter per-shader Transform and Performance is always a compile times deformation blur focus
Fast incremental addition Tackling some big ticket and removal of programs ideas over the next ~year
Overall speedup through parallel compilation
46 THANK YOU!
Related Talks:
S7185 - LEVERAGING NVRTC RUNTIME COMPILATION FOR DYNAMICALLY BUILDING OPTIX SHADERS FROM MDL MATERIALS
S7454 - NVIDIA ADVANCED RENDERING
S7452 - CUTTING EDGE OPTIX RAY TRACING TECHNIQUES FOR VISUALIZATION OF BIOMOLECULAR AND CELLULAR SIMULATIONS IN VMD
S7175 - EXPLORATORY VISUALIZATION OF PETASCALE PARTICLE DATA IN NVIDIA DGX-1
S7400 - GPU-CLOUD PHOTOREALISTIC RENDERING FOR THE NEXT GENERATION OF CLOUD CAD TOOLS
H7106 - PHYSICALLY BASED RAY TRACING WITH OPTIX
S7391 - TURBOCHARGING VMD MOLECULAR VISUALIZATIONS WITH STATE-OF-THE-ART RENDERING AND VR TECHNOLOGIES
S7424 - INTRODUCTION AND TECHNIQUES WITH NVIDIA GVDB VOXELS
S7135 - NVIDIA VRWORKS AUDIO - IMPROVING VR IMMERSION WITH ACOUSTIC FIDELITY
47