Code refactor: store device/interp/extension/type in each device_memory.
This commit is contained in:
@@ -26,6 +26,7 @@ set(SRC
|
|||||||
device_cpu.cpp
|
device_cpu.cpp
|
||||||
device_cuda.cpp
|
device_cuda.cpp
|
||||||
device_denoising.cpp
|
device_denoising.cpp
|
||||||
|
device_memory.cpp
|
||||||
device_multi.cpp
|
device_multi.cpp
|
||||||
device_opencl.cpp
|
device_opencl.cpp
|
||||||
device_split_kernel.cpp
|
device_split_kernel.cpp
|
||||||
|
@@ -87,7 +87,7 @@ Device::~Device()
|
|||||||
|
|
||||||
void Device::pixels_alloc(device_memory& mem)
|
void Device::pixels_alloc(device_memory& mem)
|
||||||
{
|
{
|
||||||
mem_alloc("pixels", mem, MEM_READ_WRITE);
|
mem_alloc(mem);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Device::pixels_copy_from(device_memory& mem, int y, int w, int h)
|
void Device::pixels_copy_from(device_memory& mem, int y, int w, int h)
|
||||||
@@ -429,16 +429,4 @@ void Device::free_memory()
|
|||||||
devices.free_memory();
|
devices.free_memory();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
device_sub_ptr::device_sub_ptr(Device *device, device_memory& mem, int offset, int size, MemoryType type)
|
|
||||||
: device(device)
|
|
||||||
{
|
|
||||||
ptr = device->mem_alloc_sub_ptr(mem, offset, size, type);
|
|
||||||
}
|
|
||||||
|
|
||||||
device_sub_ptr::~device_sub_ptr()
|
|
||||||
{
|
|
||||||
device->mem_free_sub_ptr(ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
@@ -253,7 +253,7 @@ protected:
|
|||||||
/* used for real time display */
|
/* used for real time display */
|
||||||
unsigned int vertex_buffer;
|
unsigned int vertex_buffer;
|
||||||
|
|
||||||
virtual device_ptr mem_alloc_sub_ptr(device_memory& /*mem*/, int /*offset*/, int /*size*/, MemoryType /*type*/)
|
virtual device_ptr mem_alloc_sub_ptr(device_memory& /*mem*/, int /*offset*/, int /*size*/)
|
||||||
{
|
{
|
||||||
/* Only required for devices that implement denoising. */
|
/* Only required for devices that implement denoising. */
|
||||||
assert(false);
|
assert(false);
|
||||||
@@ -282,7 +282,7 @@ public:
|
|||||||
Stats &stats;
|
Stats &stats;
|
||||||
|
|
||||||
/* regular memory */
|
/* regular memory */
|
||||||
virtual void mem_alloc(const char *name, device_memory& mem, MemoryType type) = 0;
|
virtual void mem_alloc(device_memory& mem) = 0;
|
||||||
virtual void mem_copy_to(device_memory& mem) = 0;
|
virtual void mem_copy_to(device_memory& mem) = 0;
|
||||||
virtual void mem_copy_from(device_memory& mem,
|
virtual void mem_copy_from(device_memory& mem,
|
||||||
int y, int w, int h, int elem) = 0;
|
int y, int w, int h, int elem) = 0;
|
||||||
@@ -295,15 +295,7 @@ public:
|
|||||||
virtual void const_copy_to(const char *name, void *host, size_t size) = 0;
|
virtual void const_copy_to(const char *name, void *host, size_t size) = 0;
|
||||||
|
|
||||||
/* texture memory */
|
/* texture memory */
|
||||||
virtual void tex_alloc(const char * /*name*/,
|
virtual void tex_alloc(device_memory& /*mem*/) {};
|
||||||
device_memory& /*mem*/,
|
|
||||||
InterpolationType interpolation = INTERPOLATION_NONE,
|
|
||||||
ExtensionType extension = EXTENSION_REPEAT)
|
|
||||||
{
|
|
||||||
(void)interpolation; /* Ignored. */
|
|
||||||
(void)extension; /* Ignored. */
|
|
||||||
};
|
|
||||||
|
|
||||||
virtual void tex_free(device_memory& /*mem*/) {};
|
virtual void tex_free(device_memory& /*mem*/) {};
|
||||||
|
|
||||||
/* pixel memory */
|
/* pixel memory */
|
||||||
|
@@ -209,6 +209,7 @@ public:
|
|||||||
|
|
||||||
CPUDevice(DeviceInfo& info_, Stats &stats_, bool background_)
|
CPUDevice(DeviceInfo& info_, Stats &stats_, bool background_)
|
||||||
: Device(info_, stats_, background_),
|
: Device(info_, stats_, background_),
|
||||||
|
texture_info(this, "__texture_info"),
|
||||||
#define REGISTER_KERNEL(name) name ## _kernel(KERNEL_FUNCTIONS(name))
|
#define REGISTER_KERNEL(name) name ## _kernel(KERNEL_FUNCTIONS(name))
|
||||||
REGISTER_KERNEL(path_trace),
|
REGISTER_KERNEL(path_trace),
|
||||||
REGISTER_KERNEL(convert_to_half_float),
|
REGISTER_KERNEL(convert_to_half_float),
|
||||||
@@ -280,15 +281,15 @@ public:
|
|||||||
{
|
{
|
||||||
if(need_texture_info) {
|
if(need_texture_info) {
|
||||||
tex_free(texture_info);
|
tex_free(texture_info);
|
||||||
tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT);
|
tex_alloc(texture_info);
|
||||||
need_texture_info = false;
|
need_texture_info = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
|
void mem_alloc(device_memory& mem)
|
||||||
{
|
{
|
||||||
if(name) {
|
if(mem.name) {
|
||||||
VLOG(1) << "Buffer allocate: " << name << ", "
|
VLOG(1) << "Buffer allocate: " << mem.name << ", "
|
||||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||||
}
|
}
|
||||||
@@ -332,7 +333,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/, MemoryType /*type*/)
|
virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/)
|
||||||
{
|
{
|
||||||
return (device_ptr) (((char*) mem.device_pointer) + mem.memory_elements_size(offset));
|
return (device_ptr) (((char*) mem.device_pointer) + mem.memory_elements_size(offset));
|
||||||
}
|
}
|
||||||
@@ -342,32 +343,25 @@ public:
|
|||||||
kernel_const_copy(&kernel_globals, name, host, size);
|
kernel_const_copy(&kernel_globals, name, host, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void tex_alloc(const char *name,
|
void tex_alloc(device_memory& mem)
|
||||||
device_memory& mem,
|
|
||||||
InterpolationType interpolation,
|
|
||||||
ExtensionType extension)
|
|
||||||
{
|
{
|
||||||
VLOG(1) << "Texture allocate: " << name << ", "
|
VLOG(1) << "Texture allocate: " << mem.name << ", "
|
||||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||||
|
|
||||||
if(interpolation == INTERPOLATION_NONE) {
|
if(mem.interpolation == INTERPOLATION_NONE) {
|
||||||
/* Data texture. */
|
/* Data texture. */
|
||||||
kernel_tex_copy(&kernel_globals,
|
kernel_tex_copy(&kernel_globals,
|
||||||
name,
|
mem.name,
|
||||||
mem.data_pointer,
|
mem.data_pointer,
|
||||||
mem.data_width,
|
mem.data_width);
|
||||||
mem.data_height,
|
|
||||||
mem.data_depth,
|
|
||||||
interpolation,
|
|
||||||
extension);
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* Image Texture. */
|
/* Image Texture. */
|
||||||
int flat_slot = 0;
|
int flat_slot = 0;
|
||||||
if(string_startswith(name, "__tex_image")) {
|
if(string_startswith(mem.name, "__tex_image")) {
|
||||||
int pos = string(name).rfind("_");
|
int pos = string(mem.name).rfind("_");
|
||||||
flat_slot = atoi(name + pos + 1);
|
flat_slot = atoi(mem.name + pos + 1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
assert(0);
|
assert(0);
|
||||||
@@ -382,8 +376,8 @@ public:
|
|||||||
TextureInfo& info = texture_info[flat_slot];
|
TextureInfo& info = texture_info[flat_slot];
|
||||||
info.data = (uint64_t)mem.data_pointer;
|
info.data = (uint64_t)mem.data_pointer;
|
||||||
info.cl_buffer = 0;
|
info.cl_buffer = 0;
|
||||||
info.interpolation = interpolation;
|
info.interpolation = mem.interpolation;
|
||||||
info.extension = extension;
|
info.extension = mem.extension;
|
||||||
info.width = mem.data_width;
|
info.width = mem.data_width;
|
||||||
info.height = mem.data_height;
|
info.height = mem.data_height;
|
||||||
info.depth = mem.data_depth;
|
info.depth = mem.data_depth;
|
||||||
@@ -437,7 +431,7 @@ public:
|
|||||||
|
|
||||||
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
|
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
|
||||||
{
|
{
|
||||||
mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_ONLY);
|
mem_alloc(task->tiles_mem);
|
||||||
|
|
||||||
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
|
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
|
||||||
for(int i = 0; i < 9; i++) {
|
for(int i = 0; i < 9; i++) {
|
||||||
@@ -728,9 +722,9 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* allocate buffer for kernel globals */
|
/* allocate buffer for kernel globals */
|
||||||
device_only_memory<KernelGlobals> kgbuffer;
|
device_only_memory<KernelGlobals> kgbuffer(this, "kernel_globals");
|
||||||
kgbuffer.resize(1);
|
kgbuffer.resize(1);
|
||||||
mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE);
|
mem_alloc(kgbuffer);
|
||||||
|
|
||||||
KernelGlobals *kg = new ((void*) kgbuffer.device_pointer) KernelGlobals(thread_kernel_globals_init());
|
KernelGlobals *kg = new ((void*) kgbuffer.device_pointer) KernelGlobals(thread_kernel_globals_init());
|
||||||
|
|
||||||
@@ -751,8 +745,8 @@ public:
|
|||||||
while(task.acquire_tile(this, tile)) {
|
while(task.acquire_tile(this, tile)) {
|
||||||
if(tile.task == RenderTile::PATH_TRACE) {
|
if(tile.task == RenderTile::PATH_TRACE) {
|
||||||
if(use_split_kernel) {
|
if(use_split_kernel) {
|
||||||
device_memory data;
|
device_memory void_buffer(this, "void_buffer", MEM_READ_ONLY);
|
||||||
split_kernel->path_trace(&task, tile, kgbuffer, data);
|
split_kernel->path_trace(&task, tile, kgbuffer, void_buffer);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
path_trace(task, tile, kg);
|
path_trace(task, tile, kg);
|
||||||
|
@@ -217,7 +217,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
CUDADevice(DeviceInfo& info, Stats &stats, bool background_)
|
CUDADevice(DeviceInfo& info, Stats &stats, bool background_)
|
||||||
: Device(info, stats, background_)
|
: Device(info, stats, background_),
|
||||||
|
texture_info(this, "__texture_info")
|
||||||
{
|
{
|
||||||
first_error = true;
|
first_error = true;
|
||||||
background = background_;
|
background = background_;
|
||||||
@@ -548,17 +549,17 @@ public:
|
|||||||
{
|
{
|
||||||
if(info.has_bindless_textures && need_texture_info) {
|
if(info.has_bindless_textures && need_texture_info) {
|
||||||
tex_free(texture_info);
|
tex_free(texture_info);
|
||||||
tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT);
|
tex_alloc(texture_info);
|
||||||
need_texture_info = false;
|
need_texture_info = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
|
void mem_alloc(device_memory& mem)
|
||||||
{
|
{
|
||||||
CUDAContextScope scope(this);
|
CUDAContextScope scope(this);
|
||||||
|
|
||||||
if(name) {
|
if(mem.name) {
|
||||||
VLOG(1) << "Buffer allocate: " << name << ", "
|
VLOG(1) << "Buffer allocate: " << mem.name << ", "
|
||||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||||
}
|
}
|
||||||
@@ -619,7 +620,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/, MemoryType /*type*/)
|
virtual device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int /*size*/)
|
||||||
{
|
{
|
||||||
return (device_ptr) (((char*) mem.device_pointer) + mem.memory_elements_size(offset));
|
return (device_ptr) (((char*) mem.device_pointer) + mem.memory_elements_size(offset));
|
||||||
}
|
}
|
||||||
@@ -635,14 +636,11 @@ public:
|
|||||||
cuda_assert(cuMemcpyHtoD(mem, host, size));
|
cuda_assert(cuMemcpyHtoD(mem, host, size));
|
||||||
}
|
}
|
||||||
|
|
||||||
void tex_alloc(const char *name,
|
void tex_alloc(device_memory& mem)
|
||||||
device_memory& mem,
|
|
||||||
InterpolationType interpolation,
|
|
||||||
ExtensionType extension)
|
|
||||||
{
|
{
|
||||||
CUDAContextScope scope(this);
|
CUDAContextScope scope(this);
|
||||||
|
|
||||||
VLOG(1) << "Texture allocate: " << name << ", "
|
VLOG(1) << "Texture allocate: " << mem.name << ", "
|
||||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||||
|
|
||||||
@@ -650,12 +648,12 @@ public:
|
|||||||
bool has_bindless_textures = info.has_bindless_textures;
|
bool has_bindless_textures = info.has_bindless_textures;
|
||||||
|
|
||||||
/* General variables for both architectures */
|
/* General variables for both architectures */
|
||||||
string bind_name = name;
|
string bind_name = mem.name;
|
||||||
size_t dsize = datatype_size(mem.data_type);
|
size_t dsize = datatype_size(mem.data_type);
|
||||||
size_t size = mem.memory_size();
|
size_t size = mem.memory_size();
|
||||||
|
|
||||||
CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
|
CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
|
||||||
switch(extension) {
|
switch(mem.extension) {
|
||||||
case EXTENSION_REPEAT:
|
case EXTENSION_REPEAT:
|
||||||
address_mode = CU_TR_ADDRESS_MODE_WRAP;
|
address_mode = CU_TR_ADDRESS_MODE_WRAP;
|
||||||
break;
|
break;
|
||||||
@@ -671,7 +669,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
CUfilter_mode filter_mode;
|
CUfilter_mode filter_mode;
|
||||||
if(interpolation == INTERPOLATION_CLOSEST) {
|
if(mem.interpolation == INTERPOLATION_CLOSEST) {
|
||||||
filter_mode = CU_TR_FILTER_MODE_POINT;
|
filter_mode = CU_TR_FILTER_MODE_POINT;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -681,13 +679,13 @@ public:
|
|||||||
/* General variables for Fermi */
|
/* General variables for Fermi */
|
||||||
CUtexref texref = NULL;
|
CUtexref texref = NULL;
|
||||||
|
|
||||||
if(!has_bindless_textures && interpolation != INTERPOLATION_NONE) {
|
if(!has_bindless_textures && mem.interpolation != INTERPOLATION_NONE) {
|
||||||
if(mem.data_depth > 1) {
|
if(mem.data_depth > 1) {
|
||||||
/* Kernel uses different bind names for 2d and 3d float textures,
|
/* Kernel uses different bind names for 2d and 3d float textures,
|
||||||
* so we have to adjust couple of things here.
|
* so we have to adjust couple of things here.
|
||||||
*/
|
*/
|
||||||
vector<string> tokens;
|
vector<string> tokens;
|
||||||
string_split(tokens, name, "_");
|
string_split(tokens, mem.name, "_");
|
||||||
bind_name = string_printf("__tex_image_%s_3d_%s",
|
bind_name = string_printf("__tex_image_%s_3d_%s",
|
||||||
tokens[2].c_str(),
|
tokens[2].c_str(),
|
||||||
tokens[3].c_str());
|
tokens[3].c_str());
|
||||||
@@ -700,9 +698,9 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(interpolation == INTERPOLATION_NONE) {
|
if(mem.interpolation == INTERPOLATION_NONE) {
|
||||||
/* Data Storage */
|
/* Data Storage */
|
||||||
mem_alloc(NULL, mem, MEM_READ_ONLY);
|
mem_alloc(mem);
|
||||||
mem_copy_to(mem);
|
mem_copy_to(mem);
|
||||||
|
|
||||||
CUdeviceptr cumem;
|
CUdeviceptr cumem;
|
||||||
@@ -802,9 +800,9 @@ public:
|
|||||||
if(has_bindless_textures) {
|
if(has_bindless_textures) {
|
||||||
/* Bindless Textures - Kepler */
|
/* Bindless Textures - Kepler */
|
||||||
int flat_slot = 0;
|
int flat_slot = 0;
|
||||||
if(string_startswith(name, "__tex_image")) {
|
if(string_startswith(mem.name, "__tex_image")) {
|
||||||
int pos = string(name).rfind("_");
|
int pos = string(mem.name).rfind("_");
|
||||||
flat_slot = atoi(name + pos + 1);
|
flat_slot = atoi(mem.name + pos + 1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
assert(0);
|
assert(0);
|
||||||
@@ -843,8 +841,8 @@ public:
|
|||||||
TextureInfo& info = texture_info[flat_slot];
|
TextureInfo& info = texture_info[flat_slot];
|
||||||
info.data = (uint64_t)tex;
|
info.data = (uint64_t)tex;
|
||||||
info.cl_buffer = 0;
|
info.cl_buffer = 0;
|
||||||
info.interpolation = interpolation;
|
info.interpolation = mem.interpolation;
|
||||||
info.extension = extension;
|
info.extension = mem.extension;
|
||||||
info.width = mem.data_width;
|
info.width = mem.data_width;
|
||||||
info.height = mem.data_height;
|
info.height = mem.data_height;
|
||||||
info.depth = mem.data_depth;
|
info.depth = mem.data_depth;
|
||||||
@@ -869,7 +867,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Fermi and Kepler */
|
/* Fermi and Kepler */
|
||||||
tex_interp_map[mem.device_pointer] = (interpolation != INTERPOLATION_NONE);
|
tex_interp_map[mem.device_pointer] = (mem.interpolation != INTERPOLATION_NONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
void tex_free(device_memory& mem)
|
void tex_free(device_memory& mem)
|
||||||
@@ -900,7 +898,7 @@ public:
|
|||||||
|
|
||||||
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
|
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
|
||||||
{
|
{
|
||||||
mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_ONLY);
|
mem_alloc(task->tiles_mem);
|
||||||
|
|
||||||
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
|
TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
|
||||||
for(int i = 0; i < 9; i++) {
|
for(int i = 0; i < 9; i++) {
|
||||||
@@ -1297,7 +1295,7 @@ public:
|
|||||||
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
|
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));
|
||||||
|
|
||||||
/* Allocate work tile. */
|
/* Allocate work tile. */
|
||||||
device_vector<WorkTile> work_tiles;
|
device_vector<WorkTile> work_tiles(this, "work_tiles", MEM_READ_ONLY);
|
||||||
work_tiles.resize(1);
|
work_tiles.resize(1);
|
||||||
|
|
||||||
WorkTile *wtile = work_tiles.get_data();
|
WorkTile *wtile = work_tiles.get_data();
|
||||||
@@ -1308,7 +1306,7 @@ public:
|
|||||||
wtile->offset = rtile.offset;
|
wtile->offset = rtile.offset;
|
||||||
wtile->stride = rtile.stride;
|
wtile->stride = rtile.stride;
|
||||||
wtile->buffer = (float*)cuda_device_ptr(rtile.buffer);
|
wtile->buffer = (float*)cuda_device_ptr(rtile.buffer);
|
||||||
mem_alloc("work_tiles", work_tiles, MEM_READ_ONLY);
|
mem_alloc(work_tiles);
|
||||||
|
|
||||||
CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
|
CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
|
||||||
|
|
||||||
@@ -1730,7 +1728,7 @@ public:
|
|||||||
while(task->acquire_tile(this, tile)) {
|
while(task->acquire_tile(this, tile)) {
|
||||||
if(tile.task == RenderTile::PATH_TRACE) {
|
if(tile.task == RenderTile::PATH_TRACE) {
|
||||||
if(use_split_kernel()) {
|
if(use_split_kernel()) {
|
||||||
device_memory void_buffer;
|
device_memory void_buffer(this, "void_buffer", MEM_READ_ONLY);
|
||||||
split_kernel->path_trace(task, tile, void_buffer, void_buffer);
|
split_kernel->path_trace(task, tile, void_buffer, void_buffer);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -1885,9 +1883,9 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory
|
|||||||
{
|
{
|
||||||
CUDAContextScope scope(device);
|
CUDAContextScope scope(device);
|
||||||
|
|
||||||
device_vector<uint64_t> size_buffer;
|
device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE);
|
||||||
size_buffer.resize(1);
|
size_buffer.resize(1);
|
||||||
device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
|
device->mem_alloc(size_buffer);
|
||||||
|
|
||||||
uint threads = num_threads;
|
uint threads = num_threads;
|
||||||
CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer);
|
CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer);
|
||||||
|
@@ -76,21 +76,21 @@ bool DenoisingTask::run_denoising()
|
|||||||
buffer.h = rect.w - rect.y;
|
buffer.h = rect.w - rect.y;
|
||||||
buffer.pass_stride = align_up(buffer.w * buffer.h, divide_up(device->mem_address_alignment(), sizeof(float)));
|
buffer.pass_stride = align_up(buffer.w * buffer.h, divide_up(device->mem_address_alignment(), sizeof(float)));
|
||||||
buffer.mem.resize(buffer.pass_stride * buffer.passes);
|
buffer.mem.resize(buffer.pass_stride * buffer.passes);
|
||||||
device->mem_alloc("Denoising Pixel Buffer", buffer.mem, MEM_READ_WRITE);
|
device->mem_alloc(buffer.mem);
|
||||||
|
|
||||||
device_ptr null_ptr = (device_ptr) 0;
|
device_ptr null_ptr = (device_ptr) 0;
|
||||||
|
|
||||||
/* Prefilter shadow feature. */
|
/* Prefilter shadow feature. */
|
||||||
{
|
{
|
||||||
device_sub_ptr unfiltered_a (device, buffer.mem, 0, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr unfiltered_a (buffer.mem, 0, buffer.pass_stride);
|
||||||
device_sub_ptr unfiltered_b (device, buffer.mem, 1*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr unfiltered_b (buffer.mem, 1*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr sample_var (device, buffer.mem, 2*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr sample_var (buffer.mem, 2*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr sample_var_var (device, buffer.mem, 3*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr sample_var_var (buffer.mem, 3*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr buffer_var (device, buffer.mem, 5*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr buffer_var (buffer.mem, 5*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr filtered_var (device, buffer.mem, 6*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr filtered_var (buffer.mem, 6*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr nlm_temporary_1(device, buffer.mem, 7*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr nlm_temporary_1(buffer.mem, 7*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr nlm_temporary_2(device, buffer.mem, 8*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr nlm_temporary_2(buffer.mem, 8*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr nlm_temporary_3(device, buffer.mem, 9*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr nlm_temporary_3(buffer.mem, 9*buffer.pass_stride, buffer.pass_stride);
|
||||||
|
|
||||||
nlm_state.temporary_1_ptr = *nlm_temporary_1;
|
nlm_state.temporary_1_ptr = *nlm_temporary_1;
|
||||||
nlm_state.temporary_2_ptr = *nlm_temporary_2;
|
nlm_state.temporary_2_ptr = *nlm_temporary_2;
|
||||||
@@ -123,17 +123,17 @@ bool DenoisingTask::run_denoising()
|
|||||||
functions.non_local_means(filtered_b, filtered_a, residual_var, final_b);
|
functions.non_local_means(filtered_b, filtered_a, residual_var, final_b);
|
||||||
|
|
||||||
/* Combine the two double-filtered halves to a final shadow feature. */
|
/* Combine the two double-filtered halves to a final shadow feature. */
|
||||||
device_sub_ptr shadow_pass(device, buffer.mem, 4*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr shadow_pass(buffer.mem, 4*buffer.pass_stride, buffer.pass_stride);
|
||||||
functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect);
|
functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Prefilter general features. */
|
/* Prefilter general features. */
|
||||||
{
|
{
|
||||||
device_sub_ptr unfiltered (device, buffer.mem, 8*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr unfiltered (buffer.mem, 8*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr variance (device, buffer.mem, 9*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr variance (buffer.mem, 9*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr nlm_temporary_1(device, buffer.mem, 10*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr nlm_temporary_1(buffer.mem, 10*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr nlm_temporary_2(device, buffer.mem, 11*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr nlm_temporary_2(buffer.mem, 11*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr nlm_temporary_3(device, buffer.mem, 12*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr nlm_temporary_3(buffer.mem, 12*buffer.pass_stride, buffer.pass_stride);
|
||||||
|
|
||||||
nlm_state.temporary_1_ptr = *nlm_temporary_1;
|
nlm_state.temporary_1_ptr = *nlm_temporary_1;
|
||||||
nlm_state.temporary_2_ptr = *nlm_temporary_2;
|
nlm_state.temporary_2_ptr = *nlm_temporary_2;
|
||||||
@@ -143,7 +143,7 @@ bool DenoisingTask::run_denoising()
|
|||||||
int variance_from[] = { 3, 4, 5, 13, 9, 10, 11};
|
int variance_from[] = { 3, 4, 5, 13, 9, 10, 11};
|
||||||
int pass_to[] = { 1, 2, 3, 0, 5, 6, 7};
|
int pass_to[] = { 1, 2, 3, 0, 5, 6, 7};
|
||||||
for(int pass = 0; pass < 7; pass++) {
|
for(int pass = 0; pass < 7; pass++) {
|
||||||
device_sub_ptr feature_pass(device, buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr feature_pass(buffer.mem, pass_to[pass]*buffer.pass_stride, buffer.pass_stride);
|
||||||
/* Get the unfiltered pass and its variance from the RenderBuffers. */
|
/* Get the unfiltered pass and its variance from the RenderBuffers. */
|
||||||
functions.get_feature(mean_from[pass], variance_from[pass], *unfiltered, *variance);
|
functions.get_feature(mean_from[pass], variance_from[pass], *unfiltered, *variance);
|
||||||
/* Smooth the pass and store the result in the denoising buffers. */
|
/* Smooth the pass and store the result in the denoising buffers. */
|
||||||
@@ -160,20 +160,20 @@ bool DenoisingTask::run_denoising()
|
|||||||
int variance_to[] = {11, 12, 13};
|
int variance_to[] = {11, 12, 13};
|
||||||
int num_color_passes = 3;
|
int num_color_passes = 3;
|
||||||
|
|
||||||
device_only_memory<float> temp_color;
|
device_only_memory<float> temp_color(device, "Denoising temporary color");
|
||||||
temp_color.resize(3*buffer.pass_stride);
|
temp_color.resize(3*buffer.pass_stride);
|
||||||
device->mem_alloc("Denoising temporary color", temp_color, MEM_READ_WRITE);
|
device->mem_alloc(temp_color);
|
||||||
|
|
||||||
for(int pass = 0; pass < num_color_passes; pass++) {
|
for(int pass = 0; pass < num_color_passes; pass++) {
|
||||||
device_sub_ptr color_pass(device, temp_color, pass*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr color_pass(temp_color, pass*buffer.pass_stride, buffer.pass_stride);
|
||||||
device_sub_ptr color_var_pass(device, buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr color_var_pass(buffer.mem, variance_to[pass]*buffer.pass_stride, buffer.pass_stride);
|
||||||
functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass);
|
functions.get_feature(mean_from[pass], variance_from[pass], *color_pass, *color_var_pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
device_sub_ptr depth_pass (device, buffer.mem, 0, buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr depth_pass (buffer.mem, 0, buffer.pass_stride);
|
||||||
device_sub_ptr color_var_pass(device, buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr color_var_pass(buffer.mem, variance_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
|
||||||
device_sub_ptr output_pass (device, buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr output_pass (buffer.mem, mean_to[0]*buffer.pass_stride, 3*buffer.pass_stride);
|
||||||
functions.detect_outliers(temp_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
|
functions.detect_outliers(temp_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -184,24 +184,24 @@ bool DenoisingTask::run_denoising()
|
|||||||
storage.h = filter_area.w;
|
storage.h = filter_area.w;
|
||||||
storage.transform.resize(storage.w*storage.h*TRANSFORM_SIZE);
|
storage.transform.resize(storage.w*storage.h*TRANSFORM_SIZE);
|
||||||
storage.rank.resize(storage.w*storage.h);
|
storage.rank.resize(storage.w*storage.h);
|
||||||
device->mem_alloc("Denoising Transform", storage.transform, MEM_READ_WRITE);
|
device->mem_alloc(storage.transform);
|
||||||
device->mem_alloc("Denoising Rank", storage.rank, MEM_READ_WRITE);
|
device->mem_alloc(storage.rank);
|
||||||
|
|
||||||
functions.construct_transform();
|
functions.construct_transform();
|
||||||
|
|
||||||
device_only_memory<float> temporary_1;
|
device_only_memory<float> temporary_1(device, "Denoising NLM temporary 1");
|
||||||
device_only_memory<float> temporary_2;
|
device_only_memory<float> temporary_2(device, "Denoising NLM temporary 2");
|
||||||
temporary_1.resize(buffer.w*buffer.h);
|
temporary_1.resize(buffer.w*buffer.h);
|
||||||
temporary_2.resize(buffer.w*buffer.h);
|
temporary_2.resize(buffer.w*buffer.h);
|
||||||
device->mem_alloc("Denoising NLM temporary 1", temporary_1, MEM_READ_WRITE);
|
device->mem_alloc(temporary_1);
|
||||||
device->mem_alloc("Denoising NLM temporary 2", temporary_2, MEM_READ_WRITE);
|
device->mem_alloc(temporary_2);
|
||||||
reconstruction_state.temporary_1_ptr = temporary_1.device_pointer;
|
reconstruction_state.temporary_1_ptr = temporary_1.device_pointer;
|
||||||
reconstruction_state.temporary_2_ptr = temporary_2.device_pointer;
|
reconstruction_state.temporary_2_ptr = temporary_2.device_pointer;
|
||||||
|
|
||||||
storage.XtWX.resize(storage.w*storage.h*XTWX_SIZE);
|
storage.XtWX.resize(storage.w*storage.h*XTWX_SIZE);
|
||||||
storage.XtWY.resize(storage.w*storage.h*XTWY_SIZE);
|
storage.XtWY.resize(storage.w*storage.h*XTWY_SIZE);
|
||||||
device->mem_alloc("Denoising XtWX", storage.XtWX, MEM_READ_WRITE);
|
device->mem_alloc(storage.XtWX);
|
||||||
device->mem_alloc("Denoising XtWY", storage.XtWY, MEM_READ_WRITE);
|
device->mem_alloc(storage.XtWY);
|
||||||
|
|
||||||
reconstruction_state.filter_rect = make_int4(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h);
|
reconstruction_state.filter_rect = make_int4(filter_area.x-rect.x, filter_area.y-rect.y, storage.w, storage.h);
|
||||||
int tile_coordinate_offset = filter_area.y*render_buffer.stride + filter_area.x;
|
int tile_coordinate_offset = filter_area.y*render_buffer.stride + filter_area.x;
|
||||||
@@ -213,8 +213,8 @@ bool DenoisingTask::run_denoising()
|
|||||||
reconstruction_state.source_h = rect.w-rect.y;
|
reconstruction_state.source_h = rect.w-rect.y;
|
||||||
|
|
||||||
{
|
{
|
||||||
device_sub_ptr color_ptr (device, buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr color_ptr (buffer.mem, 8*buffer.pass_stride, 3*buffer.pass_stride);
|
||||||
device_sub_ptr color_var_ptr(device, buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride, MEM_READ_WRITE);
|
device_sub_ptr color_var_ptr(buffer.mem, 11*buffer.pass_stride, 3*buffer.pass_stride);
|
||||||
functions.reconstruct(*color_ptr, *color_var_ptr, render_buffer.ptr);
|
functions.reconstruct(*color_ptr, *color_var_ptr, render_buffer.ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -123,9 +123,21 @@ public:
|
|||||||
device_only_memory<float3> XtWY;
|
device_only_memory<float3> XtWY;
|
||||||
int w;
|
int w;
|
||||||
int h;
|
int h;
|
||||||
|
|
||||||
|
Storage(Device *device)
|
||||||
|
: transform(device, "denoising transform"),
|
||||||
|
rank(device, "denoising rank"),
|
||||||
|
XtWX(device, "denoising XtWX"),
|
||||||
|
XtWY(device, "denoising XtWY")
|
||||||
|
{}
|
||||||
} storage;
|
} storage;
|
||||||
|
|
||||||
DenoisingTask(Device *device) : device(device) {}
|
DenoisingTask(Device *device)
|
||||||
|
: tiles_mem(device, "denoising tiles_mem", MEM_READ_WRITE),
|
||||||
|
storage(device),
|
||||||
|
buffer(device),
|
||||||
|
device(device)
|
||||||
|
{}
|
||||||
|
|
||||||
void init_from_devicetask(const DeviceTask &task);
|
void init_from_devicetask(const DeviceTask &task);
|
||||||
|
|
||||||
@@ -137,6 +149,10 @@ public:
|
|||||||
int w;
|
int w;
|
||||||
int h;
|
int h;
|
||||||
device_only_memory<float> mem;
|
device_only_memory<float> mem;
|
||||||
|
|
||||||
|
DenoiseBuffers(Device *device)
|
||||||
|
: mem(device, "denoising pixel buffer")
|
||||||
|
{}
|
||||||
} buffer;
|
} buffer;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
60
intern/cycles/device/device_memory.cpp
Normal file
60
intern/cycles/device/device_memory.cpp
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2011-2017 Blender Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "device/device.h"
|
||||||
|
#include "device/device_memory.h"
|
||||||
|
|
||||||
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
/* Device Memory */
|
||||||
|
|
||||||
|
device_memory::device_memory(Device *device, const char *name, MemoryType type)
|
||||||
|
: data_type(device_type_traits<uchar>::data_type),
|
||||||
|
data_elements(device_type_traits<uchar>::num_elements),
|
||||||
|
data_pointer(0),
|
||||||
|
data_size(0),
|
||||||
|
device_size(0),
|
||||||
|
data_width(0),
|
||||||
|
data_height(0),
|
||||||
|
data_depth(0),
|
||||||
|
type(type),
|
||||||
|
name(name),
|
||||||
|
interpolation(INTERPOLATION_NONE),
|
||||||
|
extension(EXTENSION_REPEAT),
|
||||||
|
device(device),
|
||||||
|
device_pointer(0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
device_memory::~device_memory()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Device Sub Ptr */
|
||||||
|
|
||||||
|
device_sub_ptr::device_sub_ptr(device_memory& mem, int offset, int size)
|
||||||
|
: device(mem.device)
|
||||||
|
{
|
||||||
|
ptr = device->mem_alloc_sub_ptr(mem, offset, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
device_sub_ptr::~device_sub_ptr()
|
||||||
|
{
|
||||||
|
device->mem_free_sub_ptr(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
CCL_NAMESPACE_END
|
||||||
|
|
@@ -30,6 +30,7 @@
|
|||||||
|
|
||||||
#include "util/util_debug.h"
|
#include "util/util_debug.h"
|
||||||
#include "util/util_half.h"
|
#include "util/util_half.h"
|
||||||
|
#include "util/util_texture.h"
|
||||||
#include "util/util_types.h"
|
#include "util/util_types.h"
|
||||||
#include "util/util_vector.h"
|
#include "util/util_vector.h"
|
||||||
|
|
||||||
@@ -190,23 +191,17 @@ public:
|
|||||||
size_t data_width;
|
size_t data_width;
|
||||||
size_t data_height;
|
size_t data_height;
|
||||||
size_t data_depth;
|
size_t data_depth;
|
||||||
|
MemoryType type;
|
||||||
|
const char *name;
|
||||||
|
InterpolationType interpolation;
|
||||||
|
ExtensionType extension;
|
||||||
|
|
||||||
/* device pointer */
|
/* device pointer */
|
||||||
|
Device *device;
|
||||||
device_ptr device_pointer;
|
device_ptr device_pointer;
|
||||||
|
|
||||||
device_memory()
|
device_memory(Device *device, const char *name, MemoryType type);
|
||||||
{
|
virtual ~device_memory();
|
||||||
data_type = device_type_traits<uchar>::data_type;
|
|
||||||
data_elements = device_type_traits<uchar>::num_elements;
|
|
||||||
data_pointer = 0;
|
|
||||||
data_size = 0;
|
|
||||||
device_size = 0;
|
|
||||||
data_width = 0;
|
|
||||||
data_height = 0;
|
|
||||||
data_depth = 0;
|
|
||||||
device_pointer = 0;
|
|
||||||
}
|
|
||||||
virtual ~device_memory() { assert(!device_pointer); }
|
|
||||||
|
|
||||||
void resize(size_t size)
|
void resize(size_t size)
|
||||||
{
|
{
|
||||||
@@ -224,7 +219,8 @@ template<typename T>
|
|||||||
class device_only_memory : public device_memory
|
class device_only_memory : public device_memory
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
device_only_memory()
|
device_only_memory(Device *device, const char *name)
|
||||||
|
: device_memory(device, name, MEM_READ_WRITE)
|
||||||
{
|
{
|
||||||
data_type = device_type_traits<T>::data_type;
|
data_type = device_type_traits<T>::data_type;
|
||||||
data_elements = max(device_type_traits<T>::num_elements, 1);
|
data_elements = max(device_type_traits<T>::num_elements, 1);
|
||||||
@@ -241,7 +237,8 @@ public:
|
|||||||
template<typename T> class device_vector : public device_memory
|
template<typename T> class device_vector : public device_memory
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
device_vector()
|
device_vector(Device *device, const char *name, MemoryType type = MEM_READ_ONLY)
|
||||||
|
: device_memory(device, name, type)
|
||||||
{
|
{
|
||||||
data_type = device_type_traits<T>::data_type;
|
data_type = device_type_traits<T>::data_type;
|
||||||
data_elements = device_type_traits<T>::num_elements;
|
data_elements = device_type_traits<T>::num_elements;
|
||||||
@@ -317,7 +314,7 @@ private:
|
|||||||
class device_sub_ptr
|
class device_sub_ptr
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
device_sub_ptr(Device *device, device_memory& mem, int offset, int size, MemoryType type);
|
device_sub_ptr(device_memory& mem, int offset, int size);
|
||||||
~device_sub_ptr();
|
~device_sub_ptr();
|
||||||
/* No copying. */
|
/* No copying. */
|
||||||
device_sub_ptr& operator = (const device_sub_ptr&);
|
device_sub_ptr& operator = (const device_sub_ptr&);
|
||||||
|
@@ -106,11 +106,11 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mem_alloc(const char *name, device_memory& mem, MemoryType type)
|
void mem_alloc(device_memory& mem)
|
||||||
{
|
{
|
||||||
foreach(SubDevice& sub, devices) {
|
foreach(SubDevice& sub, devices) {
|
||||||
mem.device_pointer = 0;
|
mem.device_pointer = 0;
|
||||||
sub.device->mem_alloc(name, mem, type);
|
sub.device->mem_alloc(mem);
|
||||||
sub.ptr_map[unique_ptr] = mem.device_pointer;
|
sub.ptr_map[unique_ptr] = mem.device_pointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -179,19 +179,15 @@ public:
|
|||||||
sub.device->const_copy_to(name, host, size);
|
sub.device->const_copy_to(name, host, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void tex_alloc(const char *name,
|
void tex_alloc(device_memory& mem)
|
||||||
device_memory& mem,
|
|
||||||
InterpolationType
|
|
||||||
interpolation,
|
|
||||||
ExtensionType extension)
|
|
||||||
{
|
{
|
||||||
VLOG(1) << "Texture allocate: " << name << ", "
|
VLOG(1) << "Texture allocate: " << mem.name << ", "
|
||||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||||
|
|
||||||
foreach(SubDevice& sub, devices) {
|
foreach(SubDevice& sub, devices) {
|
||||||
mem.device_pointer = 0;
|
mem.device_pointer = 0;
|
||||||
sub.device->tex_alloc(name, mem, interpolation, extension);
|
sub.device->tex_alloc(mem);
|
||||||
sub.ptr_map[unique_ptr] = mem.device_pointer;
|
sub.ptr_map[unique_ptr] = mem.device_pointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -314,7 +310,7 @@ public:
|
|||||||
tiles[i].buffers->copy_from_device();
|
tiles[i].buffers->copy_from_device();
|
||||||
device_ptr original_ptr = mem.device_pointer;
|
device_ptr original_ptr = mem.device_pointer;
|
||||||
mem.device_pointer = 0;
|
mem.device_pointer = 0;
|
||||||
sub_device->mem_alloc("Temporary memory for neighboring tile", mem, MEM_READ_WRITE);
|
sub_device->mem_alloc(mem);
|
||||||
sub_device->mem_copy_to(mem);
|
sub_device->mem_copy_to(mem);
|
||||||
tiles[i].buffer = mem.device_pointer;
|
tiles[i].buffer = mem.device_pointer;
|
||||||
mem.device_pointer = original_ptr;
|
mem.device_pointer = original_ptr;
|
||||||
|
@@ -87,10 +87,10 @@ public:
|
|||||||
snd.write();
|
snd.write();
|
||||||
}
|
}
|
||||||
|
|
||||||
void mem_alloc(const char *name, device_memory& mem, MemoryType type)
|
void mem_alloc(device_memory& mem)
|
||||||
{
|
{
|
||||||
if(name) {
|
if(mem.name) {
|
||||||
VLOG(1) << "Buffer allocate: " << name << ", "
|
VLOG(1) << "Buffer allocate: " << mem.name << ", "
|
||||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||||
}
|
}
|
||||||
@@ -100,9 +100,7 @@ public:
|
|||||||
mem.device_pointer = ++mem_counter;
|
mem.device_pointer = ++mem_counter;
|
||||||
|
|
||||||
RPCSend snd(socket, &error_func, "mem_alloc");
|
RPCSend snd(socket, &error_func, "mem_alloc");
|
||||||
|
|
||||||
snd.add(mem);
|
snd.add(mem);
|
||||||
snd.add(type);
|
|
||||||
snd.write();
|
snd.write();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -174,12 +172,9 @@ public:
|
|||||||
snd.write_buffer(host, size);
|
snd.write_buffer(host, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void tex_alloc(const char *name,
|
void tex_alloc(device_memory& mem)
|
||||||
device_memory& mem,
|
|
||||||
InterpolationType interpolation,
|
|
||||||
ExtensionType extension)
|
|
||||||
{
|
{
|
||||||
VLOG(1) << "Texture allocate: " << name << ", "
|
VLOG(1) << "Texture allocate: " << mem.name << ", "
|
||||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||||
|
|
||||||
@@ -188,13 +183,7 @@ public:
|
|||||||
mem.device_pointer = ++mem_counter;
|
mem.device_pointer = ++mem_counter;
|
||||||
|
|
||||||
RPCSend snd(socket, &error_func, "tex_alloc");
|
RPCSend snd(socket, &error_func, "tex_alloc");
|
||||||
|
|
||||||
string name_string(name);
|
|
||||||
|
|
||||||
snd.add(name_string);
|
|
||||||
snd.add(mem);
|
snd.add(mem);
|
||||||
snd.add(interpolation);
|
|
||||||
snd.add(extension);
|
|
||||||
snd.write();
|
snd.write();
|
||||||
snd.write_buffer((void*)mem.data_pointer, mem.memory_size());
|
snd.write_buffer((void*)mem.data_pointer, mem.memory_size());
|
||||||
}
|
}
|
||||||
@@ -470,16 +459,12 @@ protected:
|
|||||||
void process(RPCReceive& rcv, thread_scoped_lock &lock)
|
void process(RPCReceive& rcv, thread_scoped_lock &lock)
|
||||||
{
|
{
|
||||||
if(rcv.name == "mem_alloc") {
|
if(rcv.name == "mem_alloc") {
|
||||||
MemoryType type;
|
string name;
|
||||||
network_device_memory mem;
|
network_device_memory mem(device);
|
||||||
device_ptr client_pointer;
|
rcv.read(mem, name);
|
||||||
|
|
||||||
rcv.read(mem);
|
|
||||||
rcv.read(type);
|
|
||||||
|
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
|
|
||||||
client_pointer = mem.device_pointer;
|
device_ptr client_pointer = mem.device_pointer;
|
||||||
|
|
||||||
/* create a memory buffer for the device buffer */
|
/* create a memory buffer for the device buffer */
|
||||||
size_t data_size = mem.memory_size();
|
size_t data_size = mem.memory_size();
|
||||||
@@ -491,15 +476,15 @@ protected:
|
|||||||
mem.data_pointer = 0;
|
mem.data_pointer = 0;
|
||||||
|
|
||||||
/* perform the allocation on the actual device */
|
/* perform the allocation on the actual device */
|
||||||
device->mem_alloc(NULL, mem, type);
|
device->mem_alloc(mem);
|
||||||
|
|
||||||
/* store a mapping to/from client_pointer and real device pointer */
|
/* store a mapping to/from client_pointer and real device pointer */
|
||||||
pointer_mapping_insert(client_pointer, mem.device_pointer);
|
pointer_mapping_insert(client_pointer, mem.device_pointer);
|
||||||
}
|
}
|
||||||
else if(rcv.name == "mem_copy_to") {
|
else if(rcv.name == "mem_copy_to") {
|
||||||
network_device_memory mem;
|
string name;
|
||||||
|
network_device_memory mem(device);
|
||||||
rcv.read(mem);
|
rcv.read(mem, name);
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
|
|
||||||
device_ptr client_pointer = mem.device_pointer;
|
device_ptr client_pointer = mem.device_pointer;
|
||||||
@@ -521,10 +506,11 @@ protected:
|
|||||||
device->mem_copy_to(mem);
|
device->mem_copy_to(mem);
|
||||||
}
|
}
|
||||||
else if(rcv.name == "mem_copy_from") {
|
else if(rcv.name == "mem_copy_from") {
|
||||||
network_device_memory mem;
|
string name;
|
||||||
|
network_device_memory mem(device);
|
||||||
int y, w, h, elem;
|
int y, w, h, elem;
|
||||||
|
|
||||||
rcv.read(mem);
|
rcv.read(mem, name);
|
||||||
rcv.read(y);
|
rcv.read(y);
|
||||||
rcv.read(w);
|
rcv.read(w);
|
||||||
rcv.read(h);
|
rcv.read(h);
|
||||||
@@ -547,9 +533,9 @@ protected:
|
|||||||
lock.unlock();
|
lock.unlock();
|
||||||
}
|
}
|
||||||
else if(rcv.name == "mem_zero") {
|
else if(rcv.name == "mem_zero") {
|
||||||
network_device_memory mem;
|
string name;
|
||||||
|
network_device_memory mem(device);
|
||||||
rcv.read(mem);
|
rcv.read(mem, name);
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
|
|
||||||
device_ptr client_pointer = mem.device_pointer;
|
device_ptr client_pointer = mem.device_pointer;
|
||||||
@@ -562,13 +548,13 @@ protected:
|
|||||||
device->mem_zero(mem);
|
device->mem_zero(mem);
|
||||||
}
|
}
|
||||||
else if(rcv.name == "mem_free") {
|
else if(rcv.name == "mem_free") {
|
||||||
network_device_memory mem;
|
string name;
|
||||||
device_ptr client_pointer;
|
network_device_memory mem(device);
|
||||||
|
|
||||||
rcv.read(mem);
|
rcv.read(mem, name);
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
|
|
||||||
client_pointer = mem.device_pointer;
|
device_ptr client_pointer = mem.device_pointer;
|
||||||
|
|
||||||
mem.device_pointer = device_ptr_from_client_pointer_erase(client_pointer);
|
mem.device_pointer = device_ptr_from_client_pointer_erase(client_pointer);
|
||||||
|
|
||||||
@@ -588,16 +574,11 @@ protected:
|
|||||||
device->const_copy_to(name_string.c_str(), &host_vector[0], size);
|
device->const_copy_to(name_string.c_str(), &host_vector[0], size);
|
||||||
}
|
}
|
||||||
else if(rcv.name == "tex_alloc") {
|
else if(rcv.name == "tex_alloc") {
|
||||||
network_device_memory mem;
|
|
||||||
string name;
|
string name;
|
||||||
InterpolationType interpolation;
|
network_device_memory mem(device);
|
||||||
ExtensionType extension_type;
|
|
||||||
device_ptr client_pointer;
|
device_ptr client_pointer;
|
||||||
|
|
||||||
rcv.read(name);
|
rcv.read(mem, name);
|
||||||
rcv.read(mem);
|
|
||||||
rcv.read(interpolation);
|
|
||||||
rcv.read(extension_type);
|
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
|
|
||||||
client_pointer = mem.device_pointer;
|
client_pointer = mem.device_pointer;
|
||||||
@@ -613,15 +594,16 @@ protected:
|
|||||||
|
|
||||||
rcv.read_buffer((uint8_t*)mem.data_pointer, data_size);
|
rcv.read_buffer((uint8_t*)mem.data_pointer, data_size);
|
||||||
|
|
||||||
device->tex_alloc(name.c_str(), mem, interpolation, extension_type);
|
device->tex_alloc(mem);
|
||||||
|
|
||||||
pointer_mapping_insert(client_pointer, mem.device_pointer);
|
pointer_mapping_insert(client_pointer, mem.device_pointer);
|
||||||
}
|
}
|
||||||
else if(rcv.name == "tex_free") {
|
else if(rcv.name == "tex_free") {
|
||||||
network_device_memory mem;
|
string name;
|
||||||
|
network_device_memory mem(device);
|
||||||
device_ptr client_pointer;
|
device_ptr client_pointer;
|
||||||
|
|
||||||
rcv.read(mem);
|
rcv.read(mem, name);
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
|
|
||||||
client_pointer = mem.device_pointer;
|
client_pointer = mem.device_pointer;
|
||||||
|
@@ -38,6 +38,7 @@
|
|||||||
#include "util/util_foreach.h"
|
#include "util/util_foreach.h"
|
||||||
#include "util/util_list.h"
|
#include "util/util_list.h"
|
||||||
#include "util/util_map.h"
|
#include "util/util_map.h"
|
||||||
|
#include "util/util_param.h"
|
||||||
#include "util/util_string.h"
|
#include "util/util_string.h"
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
@@ -68,8 +69,15 @@ typedef boost::archive::binary_iarchive i_archive;
|
|||||||
class network_device_memory : public device_memory
|
class network_device_memory : public device_memory
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
network_device_memory() {}
|
network_device_memory(Device *device)
|
||||||
~network_device_memory() { device_pointer = 0; };
|
: device_memory(device, "", MEM_READ_ONLY)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
~network_device_memory()
|
||||||
|
{
|
||||||
|
device_pointer = 0;
|
||||||
|
};
|
||||||
|
|
||||||
vector<char> local_data;
|
vector<char> local_data;
|
||||||
};
|
};
|
||||||
@@ -119,6 +127,9 @@ public:
|
|||||||
{
|
{
|
||||||
archive & mem.data_type & mem.data_elements & mem.data_size;
|
archive & mem.data_type & mem.data_elements & mem.data_size;
|
||||||
archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer;
|
archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer;
|
||||||
|
archive & mem.type & string(mem.name);
|
||||||
|
archive & mem.interpolation & mem.extension;
|
||||||
|
archive & mem.device_pointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T> void add(const T& data)
|
template<typename T> void add(const T& data)
|
||||||
@@ -258,11 +269,15 @@ public:
|
|||||||
delete archive_stream;
|
delete archive_stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
void read(network_device_memory& mem)
|
void read(network_device_memory& mem, string& name)
|
||||||
{
|
{
|
||||||
*archive & mem.data_type & mem.data_elements & mem.data_size;
|
*archive & mem.data_type & mem.data_elements & mem.data_size;
|
||||||
*archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer;
|
*archive & mem.data_width & mem.data_height & mem.data_depth & mem.device_pointer;
|
||||||
|
*archive & mem.type & name;
|
||||||
|
*archive & mem.interpolation & mem.extension;
|
||||||
|
*archive & mem.device_pointer;
|
||||||
|
|
||||||
|
mem.name = name.c_str();
|
||||||
mem.data_pointer = 0;
|
mem.data_pointer = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -26,7 +26,13 @@ CCL_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
static const double alpha = 0.1; /* alpha for rolling average */
|
static const double alpha = 0.1; /* alpha for rolling average */
|
||||||
|
|
||||||
DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
|
DeviceSplitKernel::DeviceSplitKernel(Device *device)
|
||||||
|
: device(device),
|
||||||
|
split_data(device, "split_data", MEM_READ_WRITE),
|
||||||
|
ray_state(device, "ray_state", MEM_READ_WRITE),
|
||||||
|
queue_index(device, "queue_index"),
|
||||||
|
use_queues_flag(device, "use_queues_flag"),
|
||||||
|
work_pool_wgs(device, "work_pool_wgs")
|
||||||
{
|
{
|
||||||
current_max_closure = -1;
|
current_max_closure = -1;
|
||||||
first_tile = true;
|
first_tile = true;
|
||||||
@@ -170,19 +176,19 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
|
|||||||
|
|
||||||
/* Allocate work_pool_wgs memory. */
|
/* Allocate work_pool_wgs memory. */
|
||||||
work_pool_wgs.resize(max_work_groups);
|
work_pool_wgs.resize(max_work_groups);
|
||||||
device->mem_alloc("work_pool_wgs", work_pool_wgs, MEM_READ_WRITE);
|
device->mem_alloc(work_pool_wgs);
|
||||||
|
|
||||||
queue_index.resize(NUM_QUEUES);
|
queue_index.resize(NUM_QUEUES);
|
||||||
device->mem_alloc("queue_index", queue_index, MEM_READ_WRITE);
|
device->mem_alloc(queue_index);
|
||||||
|
|
||||||
use_queues_flag.resize(1);
|
use_queues_flag.resize(1);
|
||||||
device->mem_alloc("use_queues_flag", use_queues_flag, MEM_READ_WRITE);
|
device->mem_alloc(use_queues_flag);
|
||||||
|
|
||||||
ray_state.resize(num_global_elements);
|
ray_state.resize(num_global_elements);
|
||||||
device->mem_alloc("ray_state", ray_state, MEM_READ_WRITE);
|
device->mem_alloc(ray_state);
|
||||||
|
|
||||||
split_data.resize(state_buffer_size(kgbuffer, kernel_data, num_global_elements));
|
split_data.resize(state_buffer_size(kgbuffer, kernel_data, num_global_elements));
|
||||||
device->mem_alloc("split_data", split_data, MEM_READ_WRITE);
|
device->mem_alloc(split_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \
|
#define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \
|
||||||
|
@@ -73,10 +73,12 @@ void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
device_memory *new_buffer = new device_memory;
|
device_memory *new_buffer = new device_memory(device,
|
||||||
|
"memory manager buffer",
|
||||||
|
MEM_READ_ONLY);
|
||||||
|
|
||||||
new_buffer->resize(total_size);
|
new_buffer->resize(total_size);
|
||||||
device->mem_alloc(string_printf("buffer_%p", this).data(), *new_buffer, MEM_READ_ONLY);
|
device->mem_alloc(*new_buffer);
|
||||||
|
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
|
|
||||||
@@ -161,8 +163,14 @@ MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer()
|
|||||||
return smallest;
|
return smallest;
|
||||||
}
|
}
|
||||||
|
|
||||||
MemoryManager::MemoryManager(OpenCLDeviceBase *device) : device(device), need_update(false)
|
MemoryManager::MemoryManager(OpenCLDeviceBase *device)
|
||||||
|
: device(device), need_update(false)
|
||||||
{
|
{
|
||||||
|
foreach(DeviceBuffer& device_buffer, device_buffers) {
|
||||||
|
device_buffer.buffer = new device_memory(device,
|
||||||
|
"memory manager buffer",
|
||||||
|
MEM_READ_ONLY);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryManager::free()
|
void MemoryManager::free()
|
||||||
|
@@ -60,11 +60,13 @@ private:
|
|||||||
vector<Allocation*> allocations;
|
vector<Allocation*> allocations;
|
||||||
size_t size; /* Size of all allocations. */
|
size_t size; /* Size of all allocations. */
|
||||||
|
|
||||||
DeviceBuffer() : buffer(new device_memory), size(0)
|
DeviceBuffer()
|
||||||
|
: buffer(NULL), size(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
~DeviceBuffer() {
|
~DeviceBuffer()
|
||||||
|
{
|
||||||
delete buffer;
|
delete buffer;
|
||||||
buffer = NULL;
|
buffer = NULL;
|
||||||
}
|
}
|
||||||
|
@@ -340,7 +340,7 @@ public:
|
|||||||
virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
|
virtual bool load_kernels(const DeviceRequestedFeatures& requested_features,
|
||||||
vector<OpenCLProgram*> &programs) = 0;
|
vector<OpenCLProgram*> &programs) = 0;
|
||||||
|
|
||||||
void mem_alloc(const char *name, device_memory& mem, MemoryType type);
|
void mem_alloc(device_memory& mem);
|
||||||
void mem_copy_to(device_memory& mem);
|
void mem_copy_to(device_memory& mem);
|
||||||
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem);
|
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem);
|
||||||
void mem_zero(device_memory& mem);
|
void mem_zero(device_memory& mem);
|
||||||
@@ -349,10 +349,7 @@ public:
|
|||||||
int mem_address_alignment();
|
int mem_address_alignment();
|
||||||
|
|
||||||
void const_copy_to(const char *name, void *host, size_t size);
|
void const_copy_to(const char *name, void *host, size_t size);
|
||||||
void tex_alloc(const char *name,
|
void tex_alloc(device_memory& mem);
|
||||||
device_memory& mem,
|
|
||||||
InterpolationType /*interpolation*/,
|
|
||||||
ExtensionType /*extension*/);
|
|
||||||
void tex_free(device_memory& mem);
|
void tex_free(device_memory& mem);
|
||||||
|
|
||||||
size_t global_size_round_up(int group_size, int global_size);
|
size_t global_size_round_up(int group_size, int global_size);
|
||||||
@@ -440,7 +437,7 @@ protected:
|
|||||||
bool denoising_set_tiles(device_ptr *buffers,
|
bool denoising_set_tiles(device_ptr *buffers,
|
||||||
DenoisingTask *task);
|
DenoisingTask *task);
|
||||||
|
|
||||||
device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size, MemoryType type);
|
device_ptr mem_alloc_sub_ptr(device_memory& mem, int offset, int size);
|
||||||
void mem_free_sub_ptr(device_ptr ptr);
|
void mem_free_sub_ptr(device_ptr ptr);
|
||||||
|
|
||||||
class ArgumentWrapper {
|
class ArgumentWrapper {
|
||||||
@@ -550,21 +547,7 @@ private:
|
|||||||
vector<TextureInfo> texture_info;
|
vector<TextureInfo> texture_info;
|
||||||
device_memory texture_info_buffer;
|
device_memory texture_info_buffer;
|
||||||
|
|
||||||
struct Texture {
|
typedef map<string, device_memory*> TexturesMap;
|
||||||
Texture() {}
|
|
||||||
Texture(device_memory* mem,
|
|
||||||
InterpolationType interpolation,
|
|
||||||
ExtensionType extension)
|
|
||||||
: mem(mem),
|
|
||||||
interpolation(interpolation),
|
|
||||||
extension(extension) {
|
|
||||||
}
|
|
||||||
device_memory* mem;
|
|
||||||
InterpolationType interpolation;
|
|
||||||
ExtensionType extension;
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef map<string, Texture> TexturesMap;
|
|
||||||
TexturesMap textures;
|
TexturesMap textures;
|
||||||
|
|
||||||
bool textures_need_update;
|
bool textures_need_update;
|
||||||
|
@@ -72,7 +72,9 @@ void OpenCLDeviceBase::opencl_assert_err(cl_int err, const char* where)
|
|||||||
}
|
}
|
||||||
|
|
||||||
OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_)
|
OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_)
|
||||||
: Device(info, stats, background_), memory_manager(this)
|
: Device(info, stats, background_),
|
||||||
|
memory_manager(this),
|
||||||
|
texture_info_buffer(this, "__texture_info", MEM_READ_ONLY)
|
||||||
{
|
{
|
||||||
cpPlatform = NULL;
|
cpPlatform = NULL;
|
||||||
cdDevice = NULL;
|
cdDevice = NULL;
|
||||||
@@ -286,10 +288,10 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryType type)
|
void OpenCLDeviceBase::mem_alloc(device_memory& mem)
|
||||||
{
|
{
|
||||||
if(name) {
|
if(mem.name) {
|
||||||
VLOG(1) << "Buffer allocate: " << name << ", "
|
VLOG(1) << "Buffer allocate: " << mem.name << ", "
|
||||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||||
}
|
}
|
||||||
@@ -307,8 +309,8 @@ void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryTyp
|
|||||||
|
|
||||||
if(size > max_alloc_size) {
|
if(size > max_alloc_size) {
|
||||||
string error = "Scene too complex to fit in available memory.";
|
string error = "Scene too complex to fit in available memory.";
|
||||||
if(name != NULL) {
|
if(mem.name != NULL) {
|
||||||
error += string_printf(" (allocating buffer %s failed.)", name);
|
error += string_printf(" (allocating buffer %s failed.)", mem.name);
|
||||||
}
|
}
|
||||||
set_error(error);
|
set_error(error);
|
||||||
|
|
||||||
@@ -318,9 +320,9 @@ void OpenCLDeviceBase::mem_alloc(const char *name, device_memory& mem, MemoryTyp
|
|||||||
cl_mem_flags mem_flag;
|
cl_mem_flags mem_flag;
|
||||||
void *mem_ptr = NULL;
|
void *mem_ptr = NULL;
|
||||||
|
|
||||||
if(type == MEM_READ_ONLY)
|
if(mem.type == MEM_READ_ONLY)
|
||||||
mem_flag = CL_MEM_READ_ONLY;
|
mem_flag = CL_MEM_READ_ONLY;
|
||||||
else if(type == MEM_WRITE_ONLY)
|
else if(mem.type == MEM_WRITE_ONLY)
|
||||||
mem_flag = CL_MEM_WRITE_ONLY;
|
mem_flag = CL_MEM_WRITE_ONLY;
|
||||||
else
|
else
|
||||||
mem_flag = CL_MEM_READ_WRITE;
|
mem_flag = CL_MEM_READ_WRITE;
|
||||||
@@ -461,12 +463,12 @@ int OpenCLDeviceBase::mem_address_alignment()
|
|||||||
return OpenCLInfo::mem_address_alignment(cdDevice);
|
return OpenCLInfo::mem_address_alignment(cdDevice);
|
||||||
}
|
}
|
||||||
|
|
||||||
device_ptr OpenCLDeviceBase::mem_alloc_sub_ptr(device_memory& mem, int offset, int size, MemoryType type)
|
device_ptr OpenCLDeviceBase::mem_alloc_sub_ptr(device_memory& mem, int offset, int size)
|
||||||
{
|
{
|
||||||
cl_mem_flags mem_flag;
|
cl_mem_flags mem_flag;
|
||||||
if(type == MEM_READ_ONLY)
|
if(mem.type == MEM_READ_ONLY)
|
||||||
mem_flag = CL_MEM_READ_ONLY;
|
mem_flag = CL_MEM_READ_ONLY;
|
||||||
else if(type == MEM_WRITE_ONLY)
|
else if(mem.type == MEM_WRITE_ONLY)
|
||||||
mem_flag = CL_MEM_WRITE_ONLY;
|
mem_flag = CL_MEM_WRITE_ONLY;
|
||||||
else
|
else
|
||||||
mem_flag = CL_MEM_READ_WRITE;
|
mem_flag = CL_MEM_READ_WRITE;
|
||||||
@@ -497,10 +499,10 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size)
|
|||||||
device_vector<uchar> *data;
|
device_vector<uchar> *data;
|
||||||
|
|
||||||
if(i == const_mem_map.end()) {
|
if(i == const_mem_map.end()) {
|
||||||
data = new device_vector<uchar>();
|
data = new device_vector<uchar>(this, name, MEM_READ_ONLY);
|
||||||
data->resize(size);
|
data->resize(size);
|
||||||
|
|
||||||
mem_alloc(name, *data, MEM_READ_ONLY);
|
mem_alloc(*data);
|
||||||
const_mem_map.insert(ConstMemMap::value_type(name, data));
|
const_mem_map.insert(ConstMemMap::value_type(name, data));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@@ -511,19 +513,16 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size)
|
|||||||
mem_copy_to(*data);
|
mem_copy_to(*data);
|
||||||
}
|
}
|
||||||
|
|
||||||
void OpenCLDeviceBase::tex_alloc(const char *name,
|
void OpenCLDeviceBase::tex_alloc(device_memory& mem)
|
||||||
device_memory& mem,
|
|
||||||
InterpolationType interpolation,
|
|
||||||
ExtensionType extension)
|
|
||||||
{
|
{
|
||||||
VLOG(1) << "Texture allocate: " << name << ", "
|
VLOG(1) << "Texture allocate: " << mem.name << ", "
|
||||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||||
|
|
||||||
memory_manager.alloc(name, mem);
|
memory_manager.alloc(mem.name, mem);
|
||||||
/* Set the pointer to non-null to keep code that inspects its value from thinking its unallocated. */
|
/* Set the pointer to non-null to keep code that inspects its value from thinking its unallocated. */
|
||||||
mem.device_pointer = 1;
|
mem.device_pointer = 1;
|
||||||
textures[name] = Texture(&mem, interpolation, extension);
|
textures[mem.name] = &mem;
|
||||||
textures_need_update = true;
|
textures_need_update = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -537,7 +536,7 @@ void OpenCLDeviceBase::tex_free(device_memory& mem)
|
|||||||
}
|
}
|
||||||
|
|
||||||
foreach(TexturesMap::value_type& value, textures) {
|
foreach(TexturesMap::value_type& value, textures) {
|
||||||
if(value.second.mem == &mem) {
|
if(value.second == &mem) {
|
||||||
textures.erase(value.first);
|
textures.erase(value.first);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -658,22 +657,21 @@ void OpenCLDeviceBase::flush_texture_buffers()
|
|||||||
|
|
||||||
/* Fill in descriptors */
|
/* Fill in descriptors */
|
||||||
foreach(texture_slot_t& slot, texture_slots) {
|
foreach(texture_slot_t& slot, texture_slots) {
|
||||||
Texture& tex = textures[slot.name];
|
|
||||||
|
|
||||||
TextureInfo& info = texture_info[slot.slot];
|
TextureInfo& info = texture_info[slot.slot];
|
||||||
|
|
||||||
MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name);
|
MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name);
|
||||||
|
|
||||||
info.data = desc.offset;
|
info.data = desc.offset;
|
||||||
info.cl_buffer = desc.device_buffer;
|
info.cl_buffer = desc.device_buffer;
|
||||||
|
|
||||||
if(string_startswith(slot.name, "__tex_image")) {
|
if(string_startswith(slot.name, "__tex_image")) {
|
||||||
info.width = tex.mem->data_width;
|
device_memory *mem = textures[slot.name];
|
||||||
info.height = tex.mem->data_height;
|
|
||||||
info.depth = tex.mem->data_depth;
|
|
||||||
|
|
||||||
info.interpolation = tex.interpolation;
|
info.width = mem->data_width;
|
||||||
info.extension = tex.extension;
|
info.height = mem->data_height;
|
||||||
|
info.depth = mem->data_depth;
|
||||||
|
|
||||||
|
info.interpolation = mem->interpolation;
|
||||||
|
info.extension = mem->extension;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1045,7 +1043,7 @@ bool OpenCLDeviceBase::denoising_detect_outliers(device_ptr image_ptr,
|
|||||||
bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers,
|
bool OpenCLDeviceBase::denoising_set_tiles(device_ptr *buffers,
|
||||||
DenoisingTask *task)
|
DenoisingTask *task)
|
||||||
{
|
{
|
||||||
mem_alloc("Denoising Tile Info", task->tiles_mem, MEM_READ_WRITE);
|
mem_alloc(task->tiles_mem);
|
||||||
mem_copy_to(task->tiles_mem);
|
mem_copy_to(task->tiles_mem);
|
||||||
|
|
||||||
cl_mem tiles_mem = CL_MEM_PTR(task->tiles_mem.device_pointer);
|
cl_mem tiles_mem = CL_MEM_PTR(task->tiles_mem.device_pointer);
|
||||||
|
@@ -127,9 +127,9 @@ public:
|
|||||||
} KernelGlobals;
|
} KernelGlobals;
|
||||||
|
|
||||||
/* Allocate buffer for kernel globals */
|
/* Allocate buffer for kernel globals */
|
||||||
device_memory kgbuffer;
|
device_memory kgbuffer(this, "kernel_globals", MEM_READ_WRITE);
|
||||||
kgbuffer.resize(sizeof(KernelGlobals));
|
kgbuffer.resize(sizeof(KernelGlobals));
|
||||||
mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE);
|
mem_alloc(kgbuffer);
|
||||||
|
|
||||||
/* Keep rendering tiles until done. */
|
/* Keep rendering tiles until done. */
|
||||||
while(task->acquire_tile(this, tile)) {
|
while(task->acquire_tile(this, tile)) {
|
||||||
@@ -288,9 +288,9 @@ public:
|
|||||||
|
|
||||||
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads)
|
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads)
|
||||||
{
|
{
|
||||||
device_vector<uint64_t> size_buffer;
|
device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE);
|
||||||
size_buffer.resize(1);
|
size_buffer.resize(1);
|
||||||
device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
|
device->mem_alloc(size_buffer);
|
||||||
|
|
||||||
uint threads = num_threads;
|
uint threads = num_threads;
|
||||||
device->kernel_set_args(device->program_state_buffer_size(), 0, kg, data, threads, size_buffer);
|
device->kernel_set_args(device->program_state_buffer_size(), 0, kg, data, threads, size_buffer);
|
||||||
|
@@ -41,11 +41,7 @@ void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t s
|
|||||||
void kernel_tex_copy(KernelGlobals *kg,
|
void kernel_tex_copy(KernelGlobals *kg,
|
||||||
const char *name,
|
const char *name,
|
||||||
device_ptr mem,
|
device_ptr mem,
|
||||||
size_t width,
|
size_t size);
|
||||||
size_t height,
|
|
||||||
size_t depth,
|
|
||||||
InterpolationType interpolation=INTERPOLATION_LINEAR,
|
|
||||||
ExtensionType extension = EXTENSION_REPEAT);
|
|
||||||
|
|
||||||
#define KERNEL_ARCH cpu
|
#define KERNEL_ARCH cpu
|
||||||
#include "kernel/kernels/cpu/kernel_cpu.h"
|
#include "kernel/kernels/cpu/kernel_cpu.h"
|
||||||
|
@@ -75,11 +75,7 @@ void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t s
|
|||||||
void kernel_tex_copy(KernelGlobals *kg,
|
void kernel_tex_copy(KernelGlobals *kg,
|
||||||
const char *name,
|
const char *name,
|
||||||
device_ptr mem,
|
device_ptr mem,
|
||||||
size_t width,
|
size_t size)
|
||||||
size_t height,
|
|
||||||
size_t depth,
|
|
||||||
InterpolationType interpolation,
|
|
||||||
ExtensionType extension)
|
|
||||||
{
|
{
|
||||||
if(0) {
|
if(0) {
|
||||||
}
|
}
|
||||||
@@ -87,7 +83,7 @@ void kernel_tex_copy(KernelGlobals *kg,
|
|||||||
#define KERNEL_TEX(type, tname) \
|
#define KERNEL_TEX(type, tname) \
|
||||||
else if(strcmp(name, #tname) == 0) { \
|
else if(strcmp(name, #tname) == 0) { \
|
||||||
kg->tname.data = (type*)mem; \
|
kg->tname.data = (type*)mem; \
|
||||||
kg->tname.width = width; \
|
kg->tname.width = size; \
|
||||||
}
|
}
|
||||||
#define KERNEL_IMAGE_TEX(type, tname)
|
#define KERNEL_IMAGE_TEX(type, tname)
|
||||||
#include "kernel/kernel_textures.h"
|
#include "kernel/kernel_textures.h"
|
||||||
|
@@ -150,7 +150,7 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
|
|||||||
size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
|
size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
|
||||||
|
|
||||||
/* setup input for device task */
|
/* setup input for device task */
|
||||||
device_vector<uint4> d_input;
|
device_vector<uint4> d_input(device, "bake_input", MEM_READ_ONLY);
|
||||||
uint4 *d_input_data = d_input.resize(shader_size * 2);
|
uint4 *d_input_data = d_input.resize(shader_size * 2);
|
||||||
size_t d_input_size = 0;
|
size_t d_input_size = 0;
|
||||||
|
|
||||||
@@ -165,15 +165,15 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* run device task */
|
/* run device task */
|
||||||
device_vector<float4> d_output;
|
device_vector<float4> d_output(device, "bake_output", MEM_READ_WRITE);
|
||||||
d_output.resize(shader_size);
|
d_output.resize(shader_size);
|
||||||
|
|
||||||
/* needs to be up to data for attribute access */
|
/* needs to be up to data for attribute access */
|
||||||
device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
|
device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
|
||||||
|
|
||||||
device->mem_alloc("bake_input", d_input, MEM_READ_ONLY);
|
device->mem_alloc(d_input);
|
||||||
device->mem_copy_to(d_input);
|
device->mem_copy_to(d_input);
|
||||||
device->mem_alloc("bake_output", d_output, MEM_READ_WRITE);
|
device->mem_alloc(d_output);
|
||||||
device->mem_zero(d_output);
|
device->mem_zero(d_output);
|
||||||
|
|
||||||
DeviceTask task(DeviceTask::SHADER);
|
DeviceTask task(DeviceTask::SHADER);
|
||||||
|
@@ -114,9 +114,10 @@ RenderTile::RenderTile()
|
|||||||
|
|
||||||
/* Render Buffers */
|
/* Render Buffers */
|
||||||
|
|
||||||
RenderBuffers::RenderBuffers(Device *device_)
|
RenderBuffers::RenderBuffers(Device *device)
|
||||||
|
: buffer(device, "RenderBuffers", MEM_READ_WRITE),
|
||||||
|
device(device)
|
||||||
{
|
{
|
||||||
device = device_;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
RenderBuffers::~RenderBuffers()
|
RenderBuffers::~RenderBuffers()
|
||||||
@@ -138,10 +139,10 @@ void RenderBuffers::reset(Device *device, BufferParams& params_)
|
|||||||
|
|
||||||
/* free existing buffers */
|
/* free existing buffers */
|
||||||
device_free();
|
device_free();
|
||||||
|
|
||||||
/* allocate buffer */
|
/* allocate buffer */
|
||||||
buffer.resize(params.width*params.height*params.get_passes_size());
|
buffer.resize(params.width*params.height*params.get_passes_size());
|
||||||
device->mem_alloc("render_buffer", buffer, MEM_READ_WRITE);
|
device->mem_alloc(buffer);
|
||||||
device->mem_zero(buffer);
|
device->mem_zero(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -396,13 +397,15 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int
|
|||||||
|
|
||||||
/* Display Buffer */
|
/* Display Buffer */
|
||||||
|
|
||||||
DisplayBuffer::DisplayBuffer(Device *device_, bool linear)
|
DisplayBuffer::DisplayBuffer(Device *device, bool linear)
|
||||||
|
: draw_width(0),
|
||||||
|
draw_height(0),
|
||||||
|
transparent(true), /* todo: determine from background */
|
||||||
|
half_float(linear),
|
||||||
|
rgba_byte(device, "display buffer byte", MEM_WRITE_ONLY),
|
||||||
|
rgba_half(device, "display buffer half", MEM_WRITE_ONLY),
|
||||||
|
device(device)
|
||||||
{
|
{
|
||||||
device = device_;
|
|
||||||
draw_width = 0;
|
|
||||||
draw_height = 0;
|
|
||||||
transparent = true; /* todo: determine from background */
|
|
||||||
half_float = linear;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DisplayBuffer::~DisplayBuffer()
|
DisplayBuffer::~DisplayBuffer()
|
||||||
|
@@ -729,7 +729,7 @@ void ImageManager::device_load_image(Device *device,
|
|||||||
|
|
||||||
/* Create new texture. */
|
/* Create new texture. */
|
||||||
if(type == IMAGE_DATA_TYPE_FLOAT4) {
|
if(type == IMAGE_DATA_TYPE_FLOAT4) {
|
||||||
device_vector<float4> *tex_img = new device_vector<float4>();
|
device_vector<float4> *tex_img = new device_vector<float4>(device, name.c_str());
|
||||||
|
|
||||||
if(!file_load_image<TypeDesc::FLOAT, float>(img,
|
if(!file_load_image<TypeDesc::FLOAT, float>(img,
|
||||||
type,
|
type,
|
||||||
@@ -748,7 +748,7 @@ void ImageManager::device_load_image(Device *device,
|
|||||||
img->mem = tex_img;
|
img->mem = tex_img;
|
||||||
}
|
}
|
||||||
else if(type == IMAGE_DATA_TYPE_FLOAT) {
|
else if(type == IMAGE_DATA_TYPE_FLOAT) {
|
||||||
device_vector<float> *tex_img = new device_vector<float>();
|
device_vector<float> *tex_img = new device_vector<float>(device, name.c_str());
|
||||||
|
|
||||||
if(!file_load_image<TypeDesc::FLOAT, float>(img,
|
if(!file_load_image<TypeDesc::FLOAT, float>(img,
|
||||||
type,
|
type,
|
||||||
@@ -764,7 +764,7 @@ void ImageManager::device_load_image(Device *device,
|
|||||||
img->mem = tex_img;
|
img->mem = tex_img;
|
||||||
}
|
}
|
||||||
else if(type == IMAGE_DATA_TYPE_BYTE4) {
|
else if(type == IMAGE_DATA_TYPE_BYTE4) {
|
||||||
device_vector<uchar4> *tex_img = new device_vector<uchar4>();
|
device_vector<uchar4> *tex_img = new device_vector<uchar4>(device, name.c_str());
|
||||||
|
|
||||||
if(!file_load_image<TypeDesc::UINT8, uchar>(img,
|
if(!file_load_image<TypeDesc::UINT8, uchar>(img,
|
||||||
type,
|
type,
|
||||||
@@ -783,7 +783,7 @@ void ImageManager::device_load_image(Device *device,
|
|||||||
img->mem = tex_img;
|
img->mem = tex_img;
|
||||||
}
|
}
|
||||||
else if(type == IMAGE_DATA_TYPE_BYTE) {
|
else if(type == IMAGE_DATA_TYPE_BYTE) {
|
||||||
device_vector<uchar> *tex_img = new device_vector<uchar>();
|
device_vector<uchar> *tex_img = new device_vector<uchar>(device, name.c_str());
|
||||||
|
|
||||||
if(!file_load_image<TypeDesc::UINT8, uchar>(img,
|
if(!file_load_image<TypeDesc::UINT8, uchar>(img,
|
||||||
type,
|
type,
|
||||||
@@ -798,7 +798,7 @@ void ImageManager::device_load_image(Device *device,
|
|||||||
img->mem = tex_img;
|
img->mem = tex_img;
|
||||||
}
|
}
|
||||||
else if(type == IMAGE_DATA_TYPE_HALF4) {
|
else if(type == IMAGE_DATA_TYPE_HALF4) {
|
||||||
device_vector<half4> *tex_img = new device_vector<half4>();
|
device_vector<half4> *tex_img = new device_vector<half4>(device, name.c_str());
|
||||||
|
|
||||||
if(!file_load_image<TypeDesc::HALF, half>(img,
|
if(!file_load_image<TypeDesc::HALF, half>(img,
|
||||||
type,
|
type,
|
||||||
@@ -816,7 +816,7 @@ void ImageManager::device_load_image(Device *device,
|
|||||||
img->mem = tex_img;
|
img->mem = tex_img;
|
||||||
}
|
}
|
||||||
else if(type == IMAGE_DATA_TYPE_HALF) {
|
else if(type == IMAGE_DATA_TYPE_HALF) {
|
||||||
device_vector<half> *tex_img = new device_vector<half>();
|
device_vector<half> *tex_img = new device_vector<half>(device, name.c_str());
|
||||||
|
|
||||||
if(!file_load_image<TypeDesc::HALF, half>(img,
|
if(!file_load_image<TypeDesc::HALF, half>(img,
|
||||||
type,
|
type,
|
||||||
@@ -833,11 +833,11 @@ void ImageManager::device_load_image(Device *device,
|
|||||||
|
|
||||||
/* Copy to device. */
|
/* Copy to device. */
|
||||||
if(img->mem) {
|
if(img->mem) {
|
||||||
|
img->mem->interpolation = img->interpolation;
|
||||||
|
img->mem->extension = img->extension;
|
||||||
|
|
||||||
thread_scoped_lock device_lock(device_mutex);
|
thread_scoped_lock device_lock(device_mutex);
|
||||||
device->tex_alloc(name.c_str(),
|
device->tex_alloc(*img->mem);
|
||||||
*img->mem,
|
|
||||||
img->interpolation,
|
|
||||||
img->extension);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -195,7 +195,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
|
|||||||
|
|
||||||
sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions);
|
sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions);
|
||||||
|
|
||||||
device->tex_alloc("__sobol_directions", dscene->sobol_directions);
|
device->tex_alloc(dscene->sobol_directions);
|
||||||
|
|
||||||
/* Clamping. */
|
/* Clamping. */
|
||||||
bool use_sample_clamp = (sample_clamp_direct != 0.0f ||
|
bool use_sample_clamp = (sample_clamp_direct != 0.0f ||
|
||||||
|
@@ -36,8 +36,8 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res
|
|||||||
int width = res;
|
int width = res;
|
||||||
int height = res;
|
int height = res;
|
||||||
|
|
||||||
device_vector<uint4> d_input;
|
device_vector<uint4> d_input(device, "background_input", MEM_READ_ONLY);
|
||||||
device_vector<float4> d_output;
|
device_vector<float4> d_output(device, "background_output", MEM_WRITE_ONLY);
|
||||||
|
|
||||||
uint4 *d_input_data = d_input.resize(width*height);
|
uint4 *d_input_data = d_input.resize(width*height);
|
||||||
|
|
||||||
@@ -57,9 +57,9 @@ static void shade_background_pixels(Device *device, DeviceScene *dscene, int res
|
|||||||
|
|
||||||
device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
|
device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
|
||||||
|
|
||||||
device->mem_alloc("shade_background_pixels_input", d_input, MEM_READ_ONLY);
|
device->mem_alloc(d_input);
|
||||||
device->mem_copy_to(d_input);
|
device->mem_copy_to(d_input);
|
||||||
device->mem_alloc("shade_background_pixels_output", d_output, MEM_WRITE_ONLY);
|
device->mem_alloc(d_output);
|
||||||
device->mem_zero(d_output);
|
device->mem_zero(d_output);
|
||||||
|
|
||||||
DeviceTask main_task(DeviceTask::SHADER);
|
DeviceTask main_task(DeviceTask::SHADER);
|
||||||
@@ -451,7 +451,7 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
|
|||||||
kfilm->pass_shadow_scale *= (float)(num_lights - num_background_lights)/(float)num_lights;
|
kfilm->pass_shadow_scale *= (float)(num_lights - num_background_lights)/(float)num_lights;
|
||||||
|
|
||||||
/* CDF */
|
/* CDF */
|
||||||
device->tex_alloc("__light_distribution", dscene->light_distribution);
|
device->tex_alloc(dscene->light_distribution);
|
||||||
|
|
||||||
/* Portals */
|
/* Portals */
|
||||||
if(num_portals > 0) {
|
if(num_portals > 0) {
|
||||||
@@ -611,8 +611,8 @@ void LightManager::device_update_background(Device *device,
|
|||||||
VLOG(2) << "Background MIS build time " << time_dt() - time_start << "\n";
|
VLOG(2) << "Background MIS build time " << time_dt() - time_start << "\n";
|
||||||
|
|
||||||
/* update device */
|
/* update device */
|
||||||
device->tex_alloc("__light_background_marginal_cdf", dscene->light_background_marginal_cdf);
|
device->tex_alloc(dscene->light_background_marginal_cdf);
|
||||||
device->tex_alloc("__light_background_conditional_cdf", dscene->light_background_conditional_cdf);
|
device->tex_alloc(dscene->light_background_conditional_cdf);
|
||||||
}
|
}
|
||||||
|
|
||||||
void LightManager::device_update_points(Device *device,
|
void LightManager::device_update_points(Device *device,
|
||||||
@@ -813,7 +813,7 @@ void LightManager::device_update_points(Device *device,
|
|||||||
VLOG(1) << "Number of lights without contribution: "
|
VLOG(1) << "Number of lights without contribution: "
|
||||||
<< num_scene_lights - light_index;
|
<< num_scene_lights - light_index;
|
||||||
|
|
||||||
device->tex_alloc("__light_data", dscene->light_data);
|
device->tex_alloc(dscene->light_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
void LightManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
|
void LightManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
|
||||||
|
@@ -1359,7 +1359,7 @@ void MeshManager::update_svm_attributes(Device *device, DeviceScene *dscene, Sce
|
|||||||
|
|
||||||
/* copy to device */
|
/* copy to device */
|
||||||
dscene->data.bvh.attributes_map_stride = attr_map_stride;
|
dscene->data.bvh.attributes_map_stride = attr_map_stride;
|
||||||
device->tex_alloc("__attributes_map", dscene->attributes_map);
|
device->tex_alloc(dscene->attributes_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void update_attribute_element_size(Mesh *mesh,
|
static void update_attribute_element_size(Mesh *mesh,
|
||||||
@@ -1617,13 +1617,13 @@ void MeshManager::device_update_attributes(Device *device, DeviceScene *dscene,
|
|||||||
progress.set_status("Updating Mesh", "Copying Attributes to device");
|
progress.set_status("Updating Mesh", "Copying Attributes to device");
|
||||||
|
|
||||||
if(dscene->attributes_float.size()) {
|
if(dscene->attributes_float.size()) {
|
||||||
device->tex_alloc("__attributes_float", dscene->attributes_float);
|
device->tex_alloc(dscene->attributes_float);
|
||||||
}
|
}
|
||||||
if(dscene->attributes_float3.size()) {
|
if(dscene->attributes_float3.size()) {
|
||||||
device->tex_alloc("__attributes_float3", dscene->attributes_float3);
|
device->tex_alloc(dscene->attributes_float3);
|
||||||
}
|
}
|
||||||
if(dscene->attributes_uchar4.size()) {
|
if(dscene->attributes_uchar4.size()) {
|
||||||
device->tex_alloc("__attributes_uchar4", dscene->attributes_uchar4);
|
device->tex_alloc(dscene->attributes_uchar4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1754,11 +1754,11 @@ void MeshManager::device_update_mesh(Device *device,
|
|||||||
/* vertex coordinates */
|
/* vertex coordinates */
|
||||||
progress.set_status("Updating Mesh", "Copying Mesh to device");
|
progress.set_status("Updating Mesh", "Copying Mesh to device");
|
||||||
|
|
||||||
device->tex_alloc("__tri_shader", dscene->tri_shader);
|
device->tex_alloc(dscene->tri_shader);
|
||||||
device->tex_alloc("__tri_vnormal", dscene->tri_vnormal);
|
device->tex_alloc(dscene->tri_vnormal);
|
||||||
device->tex_alloc("__tri_vindex", dscene->tri_vindex);
|
device->tex_alloc(dscene->tri_vindex);
|
||||||
device->tex_alloc("__tri_patch", dscene->tri_patch);
|
device->tex_alloc(dscene->tri_patch);
|
||||||
device->tex_alloc("__tri_patch_uv", dscene->tri_patch_uv);
|
device->tex_alloc(dscene->tri_patch_uv);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(curve_size != 0) {
|
if(curve_size != 0) {
|
||||||
@@ -1772,8 +1772,8 @@ void MeshManager::device_update_mesh(Device *device,
|
|||||||
if(progress.get_cancel()) return;
|
if(progress.get_cancel()) return;
|
||||||
}
|
}
|
||||||
|
|
||||||
device->tex_alloc("__curve_keys", dscene->curve_keys);
|
device->tex_alloc(dscene->curve_keys);
|
||||||
device->tex_alloc("__curves", dscene->curves);
|
device->tex_alloc(dscene->curves);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(patch_size != 0) {
|
if(patch_size != 0) {
|
||||||
@@ -1791,7 +1791,7 @@ void MeshManager::device_update_mesh(Device *device,
|
|||||||
if(progress.get_cancel()) return;
|
if(progress.get_cancel()) return;
|
||||||
}
|
}
|
||||||
|
|
||||||
device->tex_alloc("__patches", dscene->patches);
|
device->tex_alloc(dscene->patches);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(for_displacement) {
|
if(for_displacement) {
|
||||||
@@ -1805,7 +1805,7 @@ void MeshManager::device_update_mesh(Device *device,
|
|||||||
prim_tri_verts[offset + 2] = float3_to_float4(mesh->verts[t.v[2]]);
|
prim_tri_verts[offset + 2] = float3_to_float4(mesh->verts[t.v[2]]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
device->tex_alloc("__prim_tri_verts", dscene->prim_tri_verts);
|
device->tex_alloc(dscene->prim_tri_verts);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1841,43 +1841,43 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
|
|||||||
|
|
||||||
if(pack.nodes.size()) {
|
if(pack.nodes.size()) {
|
||||||
dscene->bvh_nodes.steal_data(pack.nodes);
|
dscene->bvh_nodes.steal_data(pack.nodes);
|
||||||
device->tex_alloc("__bvh_nodes", dscene->bvh_nodes);
|
device->tex_alloc(dscene->bvh_nodes);
|
||||||
}
|
}
|
||||||
if(pack.leaf_nodes.size()) {
|
if(pack.leaf_nodes.size()) {
|
||||||
dscene->bvh_leaf_nodes.steal_data(pack.leaf_nodes);
|
dscene->bvh_leaf_nodes.steal_data(pack.leaf_nodes);
|
||||||
device->tex_alloc("__bvh_leaf_nodes", dscene->bvh_leaf_nodes);
|
device->tex_alloc(dscene->bvh_leaf_nodes);
|
||||||
}
|
}
|
||||||
if(pack.object_node.size()) {
|
if(pack.object_node.size()) {
|
||||||
dscene->object_node.steal_data(pack.object_node);
|
dscene->object_node.steal_data(pack.object_node);
|
||||||
device->tex_alloc("__object_node", dscene->object_node);
|
device->tex_alloc(dscene->object_node);
|
||||||
}
|
}
|
||||||
if(pack.prim_tri_index.size()) {
|
if(pack.prim_tri_index.size()) {
|
||||||
dscene->prim_tri_index.steal_data(pack.prim_tri_index);
|
dscene->prim_tri_index.steal_data(pack.prim_tri_index);
|
||||||
device->tex_alloc("__prim_tri_index", dscene->prim_tri_index);
|
device->tex_alloc(dscene->prim_tri_index);
|
||||||
}
|
}
|
||||||
if(pack.prim_tri_verts.size()) {
|
if(pack.prim_tri_verts.size()) {
|
||||||
dscene->prim_tri_verts.steal_data(pack.prim_tri_verts);
|
dscene->prim_tri_verts.steal_data(pack.prim_tri_verts);
|
||||||
device->tex_alloc("__prim_tri_verts", dscene->prim_tri_verts);
|
device->tex_alloc(dscene->prim_tri_verts);
|
||||||
}
|
}
|
||||||
if(pack.prim_type.size()) {
|
if(pack.prim_type.size()) {
|
||||||
dscene->prim_type.steal_data(pack.prim_type);
|
dscene->prim_type.steal_data(pack.prim_type);
|
||||||
device->tex_alloc("__prim_type", dscene->prim_type);
|
device->tex_alloc(dscene->prim_type);
|
||||||
}
|
}
|
||||||
if(pack.prim_visibility.size()) {
|
if(pack.prim_visibility.size()) {
|
||||||
dscene->prim_visibility.steal_data(pack.prim_visibility);
|
dscene->prim_visibility.steal_data(pack.prim_visibility);
|
||||||
device->tex_alloc("__prim_visibility", dscene->prim_visibility);
|
device->tex_alloc(dscene->prim_visibility);
|
||||||
}
|
}
|
||||||
if(pack.prim_index.size()) {
|
if(pack.prim_index.size()) {
|
||||||
dscene->prim_index.steal_data(pack.prim_index);
|
dscene->prim_index.steal_data(pack.prim_index);
|
||||||
device->tex_alloc("__prim_index", dscene->prim_index);
|
device->tex_alloc(dscene->prim_index);
|
||||||
}
|
}
|
||||||
if(pack.prim_object.size()) {
|
if(pack.prim_object.size()) {
|
||||||
dscene->prim_object.steal_data(pack.prim_object);
|
dscene->prim_object.steal_data(pack.prim_object);
|
||||||
device->tex_alloc("__prim_object", dscene->prim_object);
|
device->tex_alloc(dscene->prim_object);
|
||||||
}
|
}
|
||||||
if(pack.prim_time.size()) {
|
if(pack.prim_time.size()) {
|
||||||
dscene->prim_time.steal_data(pack.prim_time);
|
dscene->prim_time.steal_data(pack.prim_time);
|
||||||
device->tex_alloc("__prim_time", dscene->prim_time);
|
device->tex_alloc(dscene->prim_time);
|
||||||
}
|
}
|
||||||
|
|
||||||
dscene->data.bvh.root = pack.root_index;
|
dscene->data.bvh.root = pack.root_index;
|
||||||
|
@@ -64,7 +64,7 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me
|
|||||||
/* setup input for device task */
|
/* setup input for device task */
|
||||||
const size_t num_verts = mesh->verts.size();
|
const size_t num_verts = mesh->verts.size();
|
||||||
vector<bool> done(num_verts, false);
|
vector<bool> done(num_verts, false);
|
||||||
device_vector<uint4> d_input;
|
device_vector<uint4> d_input(device, "displace_input", MEM_READ_ONLY);
|
||||||
uint4 *d_input_data = d_input.resize(num_verts);
|
uint4 *d_input_data = d_input.resize(num_verts);
|
||||||
size_t d_input_size = 0;
|
size_t d_input_size = 0;
|
||||||
|
|
||||||
@@ -115,15 +115,15 @@ bool MeshManager::displace(Device *device, DeviceScene *dscene, Scene *scene, Me
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* run device task */
|
/* run device task */
|
||||||
device_vector<float4> d_output;
|
device_vector<float4> d_output(device, "displace_output", MEM_WRITE_ONLY);
|
||||||
d_output.resize(d_input_size);
|
d_output.resize(d_input_size);
|
||||||
|
|
||||||
/* needs to be up to data for attribute access */
|
/* needs to be up to data for attribute access */
|
||||||
device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
|
device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));
|
||||||
|
|
||||||
device->mem_alloc("displace_input", d_input, MEM_READ_ONLY);
|
device->mem_alloc(d_input);
|
||||||
device->mem_copy_to(d_input);
|
device->mem_copy_to(d_input);
|
||||||
device->mem_alloc("displace_output", d_output, MEM_WRITE_ONLY);
|
device->mem_alloc(d_output);
|
||||||
device->mem_zero(d_output);
|
device->mem_zero(d_output);
|
||||||
|
|
||||||
DeviceTask task(DeviceTask::SHADER);
|
DeviceTask task(DeviceTask::SHADER);
|
||||||
|
@@ -534,9 +534,9 @@ void ObjectManager::device_update_transforms(Device *device,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
device->tex_alloc("__objects", dscene->objects);
|
device->tex_alloc(dscene->objects);
|
||||||
if(state.need_motion == Scene::MOTION_PASS) {
|
if(state.need_motion == Scene::MOTION_PASS) {
|
||||||
device->tex_alloc("__objects_vector", dscene->objects_vector);
|
device->tex_alloc(dscene->objects_vector);
|
||||||
}
|
}
|
||||||
|
|
||||||
dscene->data.bvh.have_motion = state.have_motion;
|
dscene->data.bvh.have_motion = state.have_motion;
|
||||||
@@ -638,7 +638,7 @@ void ObjectManager::device_update_flags(Device *device,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* allocate object flag */
|
/* allocate object flag */
|
||||||
device->tex_alloc("__object_flag", dscene->object_flag);
|
device->tex_alloc(dscene->object_flag);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ObjectManager::device_update_patch_map_offsets(Device *device, DeviceScene *dscene, Scene *scene)
|
void ObjectManager::device_update_patch_map_offsets(Device *device, DeviceScene *dscene, Scene *scene)
|
||||||
@@ -672,7 +672,7 @@ void ObjectManager::device_update_patch_map_offsets(Device *device, DeviceScene
|
|||||||
|
|
||||||
if(update) {
|
if(update) {
|
||||||
device->tex_free(dscene->objects);
|
device->tex_free(dscene->objects);
|
||||||
device->tex_alloc("__objects", dscene->objects);
|
device->tex_alloc(dscene->objects);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -91,7 +91,7 @@ void ParticleSystemManager::device_update_particles(Device *device, DeviceScene
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
device->tex_alloc("__particles", dscene->particles);
|
device->tex_alloc(dscene->particles);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ParticleSystemManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
|
void ParticleSystemManager::device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress& progress)
|
||||||
|
@@ -40,8 +40,47 @@
|
|||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
DeviceScene::DeviceScene(Device *device)
|
||||||
|
: bvh_nodes(device, "__bvh_nodes"),
|
||||||
|
bvh_leaf_nodes(device, "__bvh_leaf_nodes"),
|
||||||
|
object_node(device, "__object_node"),
|
||||||
|
prim_tri_index(device, "__prim_tri_index"),
|
||||||
|
prim_tri_verts(device, "__prim_tri_verts"),
|
||||||
|
prim_type(device, "__prim_type"),
|
||||||
|
prim_visibility(device, "__prim_visibility"),
|
||||||
|
prim_index(device, "__prim_index"),
|
||||||
|
prim_object(device, "__prim_object"),
|
||||||
|
prim_time(device, "__prim_time"),
|
||||||
|
tri_shader(device, "__tri_shader"),
|
||||||
|
tri_vnormal(device, "__tri_vnormal"),
|
||||||
|
tri_vindex(device, "__tri_vindex"),
|
||||||
|
tri_patch(device, "__tri_patch"),
|
||||||
|
tri_patch_uv(device, "__tri_patch_uv"),
|
||||||
|
curves(device, "__curves"),
|
||||||
|
curve_keys(device, "__curve_keys"),
|
||||||
|
patches(device, "__patches"),
|
||||||
|
objects(device, "__objects"),
|
||||||
|
objects_vector(device, "__objects_vector"),
|
||||||
|
attributes_map(device, "__attributes_map"),
|
||||||
|
attributes_float(device, "__attributes_float"),
|
||||||
|
attributes_float3(device, "__attributes_float3"),
|
||||||
|
attributes_uchar4(device, "__attributes_uchar4"),
|
||||||
|
light_distribution(device, "__light_distribution"),
|
||||||
|
light_data(device, "__light_data"),
|
||||||
|
light_background_marginal_cdf(device, "__light_background_marginal_cdf"),
|
||||||
|
light_background_conditional_cdf(device, "__light_background_conditional_cdf"),
|
||||||
|
particles(device, "__particles"),
|
||||||
|
svm_nodes(device, "__svm_nodes"),
|
||||||
|
shader_flag(device, "__shader_flag"),
|
||||||
|
object_flag(device, "__object_flag"),
|
||||||
|
lookup_table(device, "__lookup_table"),
|
||||||
|
sobol_directions(device, "__sobol_directions")
|
||||||
|
{
|
||||||
|
memset(&data, 0, sizeof(data));
|
||||||
|
}
|
||||||
|
|
||||||
Scene::Scene(const SceneParams& params_, Device *device)
|
Scene::Scene(const SceneParams& params_, Device *device)
|
||||||
: device(device), params(params_)
|
: device(device), dscene(device), params(params_)
|
||||||
{
|
{
|
||||||
memset(&dscene.data, 0, sizeof(dscene.data));
|
memset(&dscene.data, 0, sizeof(dscene.data));
|
||||||
|
|
||||||
|
@@ -114,6 +114,8 @@ public:
|
|||||||
device_vector<uint> sobol_directions;
|
device_vector<uint> sobol_directions;
|
||||||
|
|
||||||
KernelData data;
|
KernelData data;
|
||||||
|
|
||||||
|
DeviceScene(Device *device);
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Scene Parameters */
|
/* Scene Parameters */
|
||||||
|
@@ -479,7 +479,7 @@ void ShaderManager::device_update_common(Device *device,
|
|||||||
has_transparent_shadow |= (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
|
has_transparent_shadow |= (flag & SD_HAS_TRANSPARENT_SHADOW) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
device->tex_alloc("__shader_flag", dscene->shader_flag);
|
device->tex_alloc(dscene->shader_flag);
|
||||||
|
|
||||||
/* lookup tables */
|
/* lookup tables */
|
||||||
KernelTables *ktables = &dscene->data.tables;
|
KernelTables *ktables = &dscene->data.tables;
|
||||||
|
@@ -130,7 +130,7 @@ void SVMShaderManager::device_update(Device *device, DeviceScene *dscene, Scene
|
|||||||
}
|
}
|
||||||
|
|
||||||
dscene->svm_nodes.steal_data(svm_nodes);
|
dscene->svm_nodes.steal_data(svm_nodes);
|
||||||
device->tex_alloc("__svm_nodes", dscene->svm_nodes);
|
device->tex_alloc(dscene->svm_nodes);
|
||||||
|
|
||||||
for(i = 0; i < scene->shaders.size(); i++) {
|
for(i = 0; i < scene->shaders.size(); i++) {
|
||||||
Shader *shader = scene->shaders[i];
|
Shader *shader = scene->shaders[i];
|
||||||
|
@@ -45,7 +45,7 @@ void LookupTables::device_update(Device *device, DeviceScene *dscene)
|
|||||||
device->tex_free(dscene->lookup_table);
|
device->tex_free(dscene->lookup_table);
|
||||||
|
|
||||||
if(lookup_tables.size() > 0)
|
if(lookup_tables.size() > 0)
|
||||||
device->tex_alloc("__lookup_table", dscene->lookup_table);
|
device->tex_alloc(dscene->lookup_table);
|
||||||
|
|
||||||
need_update = false;
|
need_update = false;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user