Cycles: OpenCL split kernel cleanup, move casts from .h files to .cl files

Ideally we shouldn't use char* at all, but for now we have to, so at least
let's assume common .h files are free from pointer magic.
This commit is contained in:
Sergey Sharybin
2015-10-29 21:44:36 +05:00
parent fc5f717888
commit 4ca688a963
18 changed files with 68 additions and 103 deletions

View File

@@ -17,9 +17,9 @@
#include "split/kernel_background_buffer_update.h"
__kernel void kernel_ocl_path_trace_background_buffer_update(
ccl_global char *globals,
ccl_global char *kg,
ccl_constant KernelData *data,
ccl_global char *shader_data,
ccl_global char *sd,
ccl_global float *per_sample_output_buffers,
ccl_global uint *rng_state,
ccl_global uint *rng_coop, /* Required for buffer Update */
@@ -83,9 +83,9 @@ __kernel void kernel_ocl_path_trace_background_buffer_update(
if(ray_index != QUEUE_EMPTY_SLOT) {
#endif
enqueue_flag =
kernel_background_buffer_update(globals,
kernel_background_buffer_update((KernelGlobals *)kg,
data,
shader_data,
(ShaderData *)sd,
per_sample_output_buffers,
rng_state,
rng_coop,

View File

@@ -17,9 +17,9 @@
#include "split/kernel_data_init.h"
__kernel void kernel_ocl_path_trace_data_init(
ccl_global char *globals,
ccl_global char *shader_data_sd, /* Arguments related to ShaderData */
ccl_global char *shader_data_sd_DL_shadow, /* Arguments related to ShaderData */
ccl_global char *kg,
ccl_global char *sd,
ccl_global char *sd_DL_shadow,
ccl_global float3 *P_sd,
ccl_global float3 *P_sd_DL_shadow,
@@ -141,9 +141,9 @@ __kernel void kernel_ocl_path_trace_data_init(
#endif
int parallel_samples) /* Number of samples to be processed in parallel */
{
kernel_data_init(globals,
shader_data_sd,
shader_data_sd_DL_shadow,
kernel_data_init((KernelGlobals *)kg,
(ShaderData *)sd,
(ShaderData *)sd_DL_shadow,
P_sd,
P_sd_DL_shadow,
N_sd,

View File

@@ -17,10 +17,10 @@
#include "split/kernel_direct_lighting.h"
__kernel void kernel_ocl_path_trace_direct_lighting(
ccl_global char *globals,
ccl_global char *kg,
ccl_constant KernelData *data,
ccl_global char *shader_data, /* Required for direct lighting */
ccl_global char *shader_DL, /* Required for direct lighting */
ccl_global char *sd, /* Required for direct lighting */
ccl_global char *sd_DL, /* Required for direct lighting */
ccl_global uint *rng_coop, /* Required for direct lighting */
ccl_global PathState *PathState_coop, /* Required for direct lighting */
ccl_global int *ISLamp_coop, /* Required for direct lighting */
@@ -61,10 +61,10 @@ __kernel void kernel_ocl_path_trace_direct_lighting(
#ifndef __COMPUTE_DEVICE_GPU__
if(ray_index != QUEUE_EMPTY_SLOT) {
#endif
enqueue_flag = kernel_direct_lighting(globals,
enqueue_flag = kernel_direct_lighting((KernelGLobals *)kg,
data,
shader_data,
shader_DL,
(ShaderData *)sd,
(ShaderData *)sd_DL,
rng_coop,
PathState_coop,
ISLamp_coop,

View File

@@ -17,9 +17,9 @@
#include "split/kernel_holdout_emission_blurring_pathtermination_ao.h"
__kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao(
ccl_global char *globals,
ccl_global char *kg,
ccl_constant KernelData *data,
ccl_global char *shader_data, /* Required throughout the kernel except probabilistic path termination and AO */
ccl_global char *sd, /* Required throughout the kernel except probabilistic path termination and AO */
ccl_global float *per_sample_output_buffers,
ccl_global uint *rng_coop, /* Required for "kernel_write_data_passes" and AO */
ccl_global float3 *throughput_coop, /* Required for handling holdout material and AO */
@@ -75,9 +75,9 @@ __kernel void kernel_ocl_path_trace_holdout_emission_blurring_pathtermination_ao
if(ray_index != QUEUE_EMPTY_SLOT) {
#endif
kernel_holdout_emission_blurring_pathtermination_ao(
globals,
(KernelGlobals *)kg,
data,
shader_data,
(ShaderData *)sd,
per_sample_output_buffers,
rng_coop,
throughput_coop,

View File

@@ -17,9 +17,9 @@
#include "split/kernel_lamp_emission.h"
__kernel void kernel_ocl_path_trace_lamp_emission(
ccl_global char *globals,
ccl_global char *kg,
ccl_constant KernelData *data,
ccl_global char *shader_data, /* Required for lamp emission */
ccl_global char *sd, /* Required for lamp emission */
ccl_global float3 *throughput_coop, /* Required for lamp emission */
PathRadiance *PathRadiance_coop, /* Required for lamp emission */
ccl_global Ray *Ray_coop, /* Required for lamp emission */
@@ -68,9 +68,9 @@ __kernel void kernel_ocl_path_trace_lamp_emission(
}
}
kernel_lamp_emission(globals,
kernel_lamp_emission((KenrelGLobals *)kg,
data,
shader_data,
(ShaderData *)sd,
throughput_coop,
PathRadiance_coop,
Ray_coop,

View File

@@ -17,9 +17,9 @@
#include "split/kernel_next_iteration_setup.h"
__kernel void kernel_ocl_path_trace_next_iteration_setup(
ccl_global char *globals,
ccl_global char *kg,
ccl_constant KernelData *data,
ccl_global char *shader_data, /* Required for setting up ray for next iteration */
ccl_global char *sd, /* Required for setting up ray for next iteration */
ccl_global uint *rng_coop, /* Required for setting up ray for next iteration */
ccl_global float3 *throughput_coop, /* Required for setting up ray for next iteration */
PathRadiance *PathRadiance_coop, /* Required for setting up ray for next iteration */
@@ -83,9 +83,9 @@ __kernel void kernel_ocl_path_trace_next_iteration_setup(
#ifndef __COMPUTE_DEVICE_GPU__
if(ray_index != QUEUE_EMPTY_SLOT) {
#endif
enqueue_flag = kernel_next_iteration_setup(globals,
enqueue_flag = kernel_next_iteration_setup((KernelGlobals *)kg,
data,
shader_data,
(ShaderData *)sd,
rng_coop,
throughput_coop,
PathRadiance_coop,

View File

@@ -17,7 +17,7 @@
#include "split/kernel_scene_intersect.h"
__kernel void kernel_ocl_path_trace_scene_intersect(
ccl_global char *globals,
ccl_global char *kg,
ccl_constant KernelData *data,
ccl_global uint *rng_coop,
ccl_global Ray *Ray_coop, /* Required for scene_intersect */
@@ -65,7 +65,7 @@ __kernel void kernel_ocl_path_trace_scene_intersect(
}
}
kernel_scene_intersect(globals,
kernel_scene_intersect((KernelGlobals *)kg,
data,
rng_coop,
Ray_coop,

View File

@@ -17,9 +17,9 @@
#include "split/kernel_shader_eval.h"
__kernel void kernel_ocl_path_trace_shader_eval(
ccl_global char *globals,
ccl_global char *kg,
ccl_constant KernelData *data,
ccl_global char *shader_data, /* Output ShaderData structure to be filled */
ccl_global char *sd, /* Output ShaderData structure to be filled */
ccl_global uint *rng_coop, /* Required for rbsdf calculation */
ccl_global Ray *Ray_coop, /* Required for setting up shader from ray */
ccl_global PathState *PathState_coop, /* Required for all functions in this kernel */
@@ -57,9 +57,9 @@ __kernel void kernel_ocl_path_trace_shader_eval(
Queue_index);
/* Continue on with shader evaluation. */
kernel_shader_eval(globals,
kernel_shader_eval((KernelGlobals *)kg,
data,
shader_data,
(ShaderData *)sd,
rng_coop,
Ray_coop,
PathState_coop,

View File

@@ -17,9 +17,9 @@
#include "split/kernel_shadow_blocked.h"
__kernel void kernel_ocl_path_trace_shadow_blocked(
ccl_global char *globals,
ccl_global char *kg,
ccl_constant KernelData *data,
ccl_global char *shader_shadow, /* Required for shadow blocked */
ccl_global char *sd_shadow, /* Required for shadow blocked */
ccl_global PathState *PathState_coop, /* Required for shadow blocked */
ccl_global Ray *LightRay_dl_coop, /* Required for direct lighting's shadow blocked */
ccl_global Ray *LightRay_ao_coop, /* Required for AO's shadow blocked */
@@ -68,9 +68,9 @@ __kernel void kernel_ocl_path_trace_shadow_blocked(
if(ray_index == QUEUE_EMPTY_SLOT)
return;
kernel_shadow_blocked(globals,
kernel_shadow_blocked((KernelGlobals *)kg,
data,
shader_shadow,
(ShaderData *)sd_shadow,
PathState_coop,
LightRay_dl_coop,
LightRay_ao_coop,

View File

@@ -57,7 +57,7 @@
* work_pool_wgs ----------------------------------------| |
* num_samples ------------------------------------------| |
*
* note on shader_data : shader_data argument is neither an input nor an output for this kernel. It is just filled and consumed here itself.
* note on sd : sd argument is neither an input nor an output for this kernel. It is just filled and consumed here itself.
* Note on Queues :
* This kernel fetches rays from QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue.
*
@@ -70,9 +70,9 @@
* QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty
*/
ccl_device char kernel_background_buffer_update(
ccl_global char *globals,
KernelGlobals *kg,
ccl_constant KernelData *data,
ccl_global char *shader_data,
ShaderData *sd,
ccl_global float *per_sample_output_buffers,
ccl_global uint *rng_state,
ccl_global uint *rng_coop, /* Required for buffer Update */
@@ -100,11 +100,6 @@ ccl_device char kernel_background_buffer_update(
int ray_index)
{
char enqueue_flag = 0;
/* Load kernel globals structure and ShaderData strucuture */
KernelGlobals *kg = (KernelGlobals *)globals;
ShaderData *sd = (ShaderData *)shader_data;
#ifdef __KERNEL_DEBUG__
DebugData *debug_data = &debugdata_coop[ray_index];
#endif

View File

@@ -51,9 +51,9 @@
* The number of elements in the queues is initialized to 0;
*/
ccl_device void kernel_data_init(
ccl_global char *globals,
ccl_global char *shader_data_sd, /* Arguments related to ShaderData */
ccl_global char *shader_data_sd_DL_shadow, /* Arguments related to ShaderData */
KernelGlobals *kg,
ShaderData *sd,
ShaderData *sd_DL_shadow,
ccl_global float3 *P_sd,
ccl_global float3 *P_sd_DL_shadow,
@@ -175,19 +175,11 @@ ccl_device void kernel_data_init(
#endif
int parallel_samples) /* Number of samples to be processed in parallel */
{
/* Load kernel globals structure */
KernelGlobals *kg = (KernelGlobals *)globals;
kg->data = data;
#define KERNEL_TEX(type, ttype, name) \
kg->name = name;
#include "../kernel_textures.h"
/* Load ShaderData structure */
ShaderData *sd = (ShaderData *)shader_data_sd;
ShaderData *sd_DL_shadow = (ShaderData *)shader_data_sd_DL_shadow;
sd->P = P_sd;
sd_DL_shadow->P = P_sd_DL_shadow;

View File

@@ -30,13 +30,13 @@
*
* rng_coop -----------------------------------------|--- kernel_direct_lighting --|--- BSDFEval_coop
* PathState_coop -----------------------------------| |--- ISLamp_coop
* shader_data --------------------------------------| |--- LightRay_coop
* sd -----------------------------------------------| |--- LightRay_coop
* ray_state ----------------------------------------| |--- ray_state
* Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) ---| |
* kg (globals + data) ------------------------------| |
* queuesize ----------------------------------------| |
*
* note on shader_DL : shader_DL is neither input nor output to this kernel; shader_DL is filled and consumed in this kernel itself.
* note on sd_DL : sd_DL is neither input nor output to this kernel; sd_DL is filled and consumed in this kernel itself.
* Note on Queues :
* This kernel only reads from the QUEUE_ACTIVE_AND_REGENERATED_RAYS queue and processes
* only the rays of state RAY_ACTIVE; If a ray needs to execute the corresponding shadow_blocked
@@ -49,10 +49,10 @@
* kernel call. Before this kernel call the QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty.
*/
ccl_device char kernel_direct_lighting(
ccl_global char *globals,
KernelGlobals *kg,
ccl_constant KernelData *data,
ccl_global char *shader_data, /* Required for direct lighting */
ccl_global char *shader_DL, /* Required for direct lighting */
ShaderData *sd, /* Required for direct lighting */
ShaderData *sd_DL, /* Required for direct lighting */
ccl_global uint *rng_coop, /* Required for direct lighting */
ccl_global PathState *PathState_coop, /* Required for direct lighting */
ccl_global int *ISLamp_coop, /* Required for direct lighting */
@@ -63,11 +63,6 @@ ccl_device char kernel_direct_lighting(
{
char enqueue_flag = 0;
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
/* Load kernel globals structure and ShaderData structure. */
KernelGlobals *kg = (KernelGlobals *)globals;
ShaderData *sd = (ShaderData *)shader_data;
ShaderData *sd_DL = (ShaderData *)shader_DL;
ccl_global PathState *state = &PathState_coop[ray_index];
/* direct lighting */

View File

@@ -36,7 +36,7 @@
* Intersection_coop ------------------------------------| |--- L_transparent_coop
* PathState_coop ---------------------------------------| |--- per_sample_output_buffers
* L_transparent_coop -----------------------------------| |--- PathRadiance_coop
* shader_data ------------------------------------------| |--- ShaderData
* sd ---------------------------------------------------| |--- ShaderData
* ray_state --------------------------------------------| |--- ray_state
* Queue_data (QUEUE_ACTIVE_AND_REGENERATED_RAYS) -------| |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
* Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| |--- AOAlpha_coop
@@ -71,9 +71,9 @@
* QUEUE_SHADOW_RAY_CAST_AO_RAYS will be filled with rays marked with flag RAY_SHADOW_RAY_CAST_AO
*/
ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
ccl_global char *globals,
KernelGlobals *kg,
ccl_constant KernelData *data,
ccl_global char *shader_data, /* Required throughout the kernel except probabilistic path termination and AO */
ShaderData *sd, /* Required throughout the kernel except probabilistic path termination and AO */
ccl_global float *per_sample_output_buffers,
ccl_global uint *rng_coop, /* Required for "kernel_write_data_passes" and AO */
ccl_global float3 *throughput_coop, /* Required for handling holdout material and AO */
@@ -95,10 +95,6 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
char *enqueue_flag,
char *enqueue_flag_AO_SHADOW_RAY_CAST)
{
/* Load kernel globals structure and ShaderData structure */
KernelGlobals *kg = (KernelGlobals *)globals;
ShaderData *sd = (ShaderData *)shader_data;
#ifdef __WORK_STEALING__
unsigned int my_work;
unsigned int pixel_x;

View File

@@ -37,12 +37,12 @@
* sh -------------------------------------------------| |
* parallel_samples -----------------------------------| |
*
* note : shader_data is neither input nor output. Its just filled and consumed in the same, kernel_lamp_emission, kernel.
* note : sd is neither input nor output. Its just filled and consumed in the same, kernel_lamp_emission, kernel.
*/
ccl_device void kernel_lamp_emission(
ccl_global char *globals,
KernelGlobals *kg,
ccl_constant KernelData *data,
ccl_global char *shader_data, /* Required for lamp emission */
ShaderData *sd, /* Required for lamp emission */
ccl_global float3 *throughput_coop, /* Required for lamp emission */
PathRadiance *PathRadiance_coop, /* Required for lamp emission */
ccl_global Ray *Ray_coop, /* Required for lamp emission */
@@ -59,8 +59,6 @@ ccl_device void kernel_lamp_emission(
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND))
{
KernelGlobals *kg = (KernelGlobals *)globals;
ShaderData *sd = (ShaderData *)shader_data;
PathRadiance *L = &PathRadiance_coop[ray_index];
float3 throughput = throughput_coop[ray_index];

View File

@@ -30,7 +30,7 @@
* throughput_coop --------------------------------------| |--- Queue_data (QUEUE_HITBF_BUFF_UPDATE_TOREGEN_RAYS)
* PathRadiance_coop ------------------------------------| |--- throughput_coop
* PathState_coop ---------------------------------------| |--- PathRadiance_coop
* shader_data ------------------------------------------| |--- PathState_coop
* sd ---------------------------------------------------| |--- PathState_coop
* ray_state --------------------------------------------| |--- ray_state
* Queue_data (QUEUE_ACTIVE_AND_REGENERATD_RAYS) --------| |--- Ray_coop
* Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS) ---| |--- use_queues_flag
@@ -60,9 +60,9 @@
* QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE and more RAY_UPDATE_BUFFER rays
*/
ccl_device char kernel_next_iteration_setup(
ccl_global char *globals,
KernelGlobals *kg,
ccl_constant KernelData *data,
ccl_global char *shader_data, /* Required for setting up ray for next iteration */
ShaderData *sd, /* Required for setting up ray for next iteration */
ccl_global uint *rng_coop, /* Required for setting up ray for next iteration */
ccl_global float3 *throughput_coop, /* Required for setting up ray for next iteration */
PathRadiance *PathRadiance_coop, /* Required for setting up ray for next iteration */
@@ -81,11 +81,9 @@ ccl_device char kernel_next_iteration_setup(
{
char enqueue_flag = 0;
/* Load kernel globals structure and ShaderData structure. */
KernelGlobals *kg = (KernelGlobals *)globals;
ShaderData *sd = (ShaderData *)shader_data;
PathRadiance *L = 0x0;
ccl_global PathState *state = 0x0;
/* Load ShaderData structure. */
PathRadiance *L = NULL;
ccl_global PathState *state = NULL;
/* Path radiance update for AO/Direct_lighting's shadow blocked. */
if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) ||

View File

@@ -63,7 +63,7 @@
*/
ccl_device void kernel_scene_intersect(
ccl_global char *globals,
KernelGlobals *kg,
ccl_constant KernelData *data,
ccl_global uint *rng_coop,
ccl_global Ray *Ray_coop, /* Required for scene_intersect */
@@ -86,9 +86,6 @@ ccl_device void kernel_scene_intersect(
if(!IS_STATE(ray_state, ray_index, RAY_ACTIVE))
return;
/* Load kernel globals structure */
KernelGlobals *kg = (KernelGlobals *)globals;
#ifdef __KERNEL_DEBUG__
DebugData *debug_data = &debugdata_coop[ray_index];
#endif

View File

@@ -23,7 +23,7 @@
* the rays of state RAY_TO_REGENERATE and enqueues them in QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS queue.
*
* The input and output of the kernel is as follows,
* rng_coop -------------------------------------------|--- kernel_shader_eval --|--- shader_data
* rng_coop -------------------------------------------|--- kernel_shader_eval --|--- sd
* Ray_coop -------------------------------------------| |--- Queue_data (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
* PathState_coop -------------------------------------| |--- Queue_index (QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS)
* Intersection_coop ----------------------------------| |
@@ -45,9 +45,9 @@
* QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be filled with RAY_TO_REGENERATE rays
*/
ccl_device void kernel_shader_eval(
ccl_global char *globals,
KernelGlobals *kg,
ccl_constant KernelData *data,
ccl_global char *shader_data, /* Output ShaderData structure to be filled */
ShaderData *sd, /* Output ShaderData structure to be filled */
ccl_global uint *rng_coop, /* Required for rbsdf calculation */
ccl_global Ray *Ray_coop, /* Required for setting up shader from ray */
ccl_global PathState *PathState_coop, /* Required for all functions in this kernel */
@@ -56,8 +56,6 @@ ccl_device void kernel_shader_eval(
int ray_index)
{
if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
KernelGlobals *kg = (KernelGlobals *)globals;
ShaderData *sd = (ShaderData *)shader_data;
Intersection *isect = &Intersection_coop[ray_index];
ccl_global uint *rng = &rng_coop[ray_index];
ccl_global PathState *state = &PathState_coop[ray_index];

View File

@@ -34,7 +34,7 @@
* kg (globals + data) -----------------------------| |
* queuesize ---------------------------------------| |
*
* Note on shader_shadow : shader_shadow is neither input nor output to this kernel. shader_shadow is filled and consumed in this kernel itself.
* Note on sd_shadow : sd_shadow is neither input nor output to this kernel. sd_shadow is filled and consumed in this kernel itself.
* Note on queues :
* The kernel fetches from QUEUE_SHADOW_RAY_CAST_AO_RAYS and QUEUE_SHADOW_RAY_CAST_DL_RAYS queues. We will empty
* these queues this kernel.
@@ -46,9 +46,9 @@
* QUEUE_SHADOW_RAY_CAST_AO_RAYS and QUEUE_SHADOW_RAY_CAST_DL_RAYS will be empty at kernel exit.
*/
ccl_device void kernel_shadow_blocked(
ccl_global char *globals,
KernelGlobals *kg,
ccl_constant KernelData *data,
ccl_global char *shader_shadow, /* Required for shadow blocked */
ShaderData *sd_shadow, /* Required for shadow blocked */
ccl_global PathState *PathState_coop, /* Required for shadow blocked */
ccl_global Ray *LightRay_dl_coop, /* Required for direct lighting's shadow blocked */
ccl_global Ray *LightRay_ao_coop, /* Required for AO's shadow blocked */
@@ -65,10 +65,6 @@ ccl_device void kernel_shadow_blocked(
if(IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_DL) ||
IS_FLAG(ray_state, ray_index, RAY_SHADOW_RAY_CAST_AO))
{
/* Load kernel global structure. */
KernelGlobals *kg = (KernelGlobals *)globals;
ShaderData *sd_shadow = (ShaderData *)shader_shadow;
ccl_global PathState *state = &PathState_coop[ray_index];
ccl_global Ray *light_ray_dl_global = &LightRay_dl_coop[ray_index];
ccl_global Ray *light_ray_ao_global = &LightRay_ao_coop[ray_index];