Cycles: Pack kernel textures into buffers for OpenCL

Image textures were being packed into a single buffer for OpenCL, which
limited the amount of memory available for images to the size of one
buffer (usually 4gb on AMD hardware). By packing textures into multiple
buffers that limit is removed, while simultaneously reducing the number
of buffers that need to be passed to each kernel.

Benchmarks were within 2%.

Fixes T51554.

Differential Revision: https://developer.blender.org/D2745
This commit is contained in:
Mai Lavelle
2017-08-08 07:12:04 -04:00
parent b53e35c655
commit ec8ae4d5e9
25 changed files with 685 additions and 328 deletions

View File

@@ -34,11 +34,13 @@ set(SRC
set(SRC_OPENCL set(SRC_OPENCL
opencl/opencl.h opencl/opencl.h
opencl/memory_manager.h
opencl/opencl_base.cpp opencl/opencl_base.cpp
opencl/opencl_mega.cpp opencl/opencl_mega.cpp
opencl/opencl_split.cpp opencl/opencl_split.cpp
opencl/opencl_util.cpp opencl/opencl_util.cpp
opencl/memory_manager.cpp
) )
if(WITH_CYCLES_NETWORK) if(WITH_CYCLES_NETWORK)

View File

@@ -379,11 +379,9 @@ DeviceInfo Device::get_multi_device(vector<DeviceInfo> subdevices)
info.num = 0; info.num = 0;
info.has_bindless_textures = true; info.has_bindless_textures = true;
info.pack_images = false;
foreach(DeviceInfo &device, subdevices) { foreach(DeviceInfo &device, subdevices) {
assert(device.type == info.multi_devices[0].type); assert(device.type == info.multi_devices[0].type);
info.pack_images |= device.pack_images;
info.has_bindless_textures &= device.has_bindless_textures; info.has_bindless_textures &= device.has_bindless_textures;
} }

View File

@@ -53,7 +53,6 @@ public:
int num; int num;
bool display_device; bool display_device;
bool advanced_shading; bool advanced_shading;
bool pack_images;
bool has_bindless_textures; /* flag for GPU and Multi device */ bool has_bindless_textures; /* flag for GPU and Multi device */
bool use_split_kernel; /* Denotes if the device is going to run cycles using split-kernel */ bool use_split_kernel; /* Denotes if the device is going to run cycles using split-kernel */
vector<DeviceInfo> multi_devices; vector<DeviceInfo> multi_devices;
@@ -65,7 +64,6 @@ public:
num = 0; num = 0;
display_device = false; display_device = false;
advanced_shading = true; advanced_shading = true;
pack_images = false;
has_bindless_textures = false; has_bindless_textures = false;
use_split_kernel = false; use_split_kernel = false;
} }

View File

@@ -977,7 +977,6 @@ void device_cpu_info(vector<DeviceInfo>& devices)
info.id = "CPU"; info.id = "CPU";
info.num = 0; info.num = 0;
info.advanced_shading = true; info.advanced_shading = true;
info.pack_images = false;
devices.insert(devices.begin(), info); devices.insert(devices.begin(), info);
} }

View File

@@ -2164,7 +2164,6 @@ void device_cuda_info(vector<DeviceInfo>& devices)
info.advanced_shading = (major >= 2); info.advanced_shading = (major >= 2);
info.has_bindless_textures = (major >= 3); info.has_bindless_textures = (major >= 3);
info.pack_images = false;
int pci_location[3] = {0, 0, 0}; int pci_location[3] = {0, 0, 0};
cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num); cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);

View File

@@ -95,7 +95,6 @@ void device_opencl_info(vector<DeviceInfo>& devices)
/* We don't know if it's used for display, but assume it is. */ /* We don't know if it's used for display, but assume it is. */
info.display_device = true; info.display_device = true;
info.advanced_shading = OpenCLInfo::kernel_use_advanced_shading(platform_name); info.advanced_shading = OpenCLInfo::kernel_use_advanced_shading(platform_name);
info.pack_images = true;
info.use_split_kernel = OpenCLInfo::kernel_use_split(platform_name, info.use_split_kernel = OpenCLInfo::kernel_use_split(platform_name,
device_type); device_type);
info.id = string("OPENCL_") + platform_name + "_" + device_name + "_" + hardware_id; info.id = string("OPENCL_") + platform_name + "_" + device_name + "_" + hardware_id;

View File

@@ -0,0 +1,253 @@
/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef WITH_OPENCL
#include "util/util_foreach.h"
#include "device/opencl/opencl.h"
#include "device/opencl/memory_manager.h"
CCL_NAMESPACE_BEGIN
void MemoryManager::DeviceBuffer::add_allocation(Allocation& allocation)
{
allocations.push_back(&allocation);
}
void MemoryManager::DeviceBuffer::update_device_memory(OpenCLDeviceBase *device)
{
bool need_realloc = false;
/* Calculate total size and remove any freed. */
size_t total_size = 0;
for(int i = allocations.size()-1; i >= 0; i--) {
Allocation* allocation = allocations[i];
/* Remove allocations that have been freed. */
if(!allocation->mem || allocation->mem->memory_size() == 0) {
allocation->device_buffer = NULL;
allocation->size = 0;
allocations.erase(allocations.begin()+i);
need_realloc = true;
continue;
}
/* Get actual size for allocation. */
size_t alloc_size = align_up(allocation->mem->memory_size(), 16);
if(allocation->size != alloc_size) {
/* Allocation is either new or resized. */
allocation->size = alloc_size;
allocation->needs_copy_to_device = true;
need_realloc = true;
}
total_size += alloc_size;
}
if(need_realloc) {
cl_ulong max_buffer_size;
clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
if(total_size > max_buffer_size) {
device->set_error("Scene too complex to fit in available memory.");
return;
}
device_memory *new_buffer = new device_memory;
new_buffer->resize(total_size);
device->mem_alloc(string_printf("buffer_%p", this).data(), *new_buffer, MEM_READ_ONLY);
size_t offset = 0;
foreach(Allocation* allocation, allocations) {
if(allocation->needs_copy_to_device) {
/* Copy from host to device. */
opencl_device_assert(device, clEnqueueWriteBuffer(device->cqCommandQueue,
CL_MEM_PTR(new_buffer->device_pointer),
CL_FALSE,
offset,
allocation->mem->memory_size(),
(void*)allocation->mem->data_pointer,
0, NULL, NULL
));
allocation->needs_copy_to_device = false;
}
else {
/* Fast copy from memory already on device. */
opencl_device_assert(device, clEnqueueCopyBuffer(device->cqCommandQueue,
CL_MEM_PTR(buffer->device_pointer),
CL_MEM_PTR(new_buffer->device_pointer),
allocation->desc.offset,
offset,
allocation->mem->memory_size(),
0, NULL, NULL
));
}
allocation->desc.offset = offset;
offset += allocation->size;
}
device->mem_free(*buffer);
delete buffer;
buffer = new_buffer;
}
else {
assert(total_size == buffer->data_size);
size_t offset = 0;
foreach(Allocation* allocation, allocations) {
if(allocation->needs_copy_to_device) {
/* Copy from host to device. */
opencl_device_assert(device, clEnqueueWriteBuffer(device->cqCommandQueue,
CL_MEM_PTR(buffer->device_pointer),
CL_FALSE,
offset,
allocation->mem->memory_size(),
(void*)allocation->mem->data_pointer,
0, NULL, NULL
));
allocation->needs_copy_to_device = false;
}
offset += allocation->size;
}
}
/* Not really necessary, but seems to improve responsiveness for some reason. */
clFinish(device->cqCommandQueue);
}
void MemoryManager::DeviceBuffer::free(OpenCLDeviceBase *device)
{
device->mem_free(*buffer);
}
MemoryManager::DeviceBuffer* MemoryManager::smallest_device_buffer()
{
DeviceBuffer* smallest = device_buffers;
foreach(DeviceBuffer& device_buffer, device_buffers) {
if(device_buffer.size < smallest->size) {
smallest = &device_buffer;
}
}
return smallest;
}
MemoryManager::MemoryManager(OpenCLDeviceBase *device) : device(device), need_update(false)
{
}
void MemoryManager::free()
{
foreach(DeviceBuffer& device_buffer, device_buffers) {
device_buffer.free(device);
}
}
void MemoryManager::alloc(const char *name, device_memory& mem)
{
Allocation& allocation = allocations[name];
allocation.mem = &mem;
allocation.needs_copy_to_device = true;
if(!allocation.device_buffer) {
DeviceBuffer* device_buffer = smallest_device_buffer();
allocation.device_buffer = device_buffer;
allocation.desc.device_buffer = device_buffer - device_buffers;
device_buffer->add_allocation(allocation);
device_buffer->size += mem.memory_size();
}
need_update = true;
}
bool MemoryManager::free(device_memory& mem)
{
foreach(AllocationsMap::value_type& value, allocations) {
Allocation& allocation = value.second;
if(allocation.mem == &mem) {
allocation.device_buffer->size -= mem.memory_size();
allocation.mem = NULL;
allocation.needs_copy_to_device = false;
need_update = true;
return true;
}
}
return false;
}
MemoryManager::BufferDescriptor MemoryManager::get_descriptor(string name)
{
update_device_memory();
Allocation& allocation = allocations[name];
return allocation.desc;
}
void MemoryManager::update_device_memory()
{
if(!need_update) {
return;
}
need_update = false;
foreach(DeviceBuffer& device_buffer, device_buffers) {
device_buffer.update_device_memory(device);
}
}
void MemoryManager::set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg)
{
update_device_memory();
foreach(DeviceBuffer& device_buffer, device_buffers) {
if(device_buffer.buffer->device_pointer) {
device->kernel_set_args(kernel, (*narg)++, *device_buffer.buffer);
}
else {
device->kernel_set_args(kernel, (*narg)++, device->null_mem);
}
}
}
CCL_NAMESPACE_END
#endif /* WITH_OPENCL */

View File

@@ -0,0 +1,105 @@
/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "device/device.h"
#include "util/util_map.h"
#include "util/util_vector.h"
#include "util/util_string.h"
#include "clew.h"
CCL_NAMESPACE_BEGIN
class OpenCLDeviceBase;
class MemoryManager {
public:
static const int NUM_DEVICE_BUFFERS = 8;
struct BufferDescriptor {
uint device_buffer;
cl_ulong offset;
};
private:
struct DeviceBuffer;
struct Allocation {
device_memory *mem;
DeviceBuffer *device_buffer;
size_t size; /* Size of actual allocation, may be larger than requested. */
BufferDescriptor desc;
bool needs_copy_to_device;
Allocation() : mem(NULL), device_buffer(NULL), size(0), needs_copy_to_device(false)
{
}
};
struct DeviceBuffer {
device_memory *buffer;
vector<Allocation*> allocations;
size_t size; /* Size of all allocations. */
DeviceBuffer() : buffer(new device_memory), size(0)
{
}
~DeviceBuffer() {
delete buffer;
buffer = NULL;
}
void add_allocation(Allocation& allocation);
void update_device_memory(OpenCLDeviceBase *device);
void free(OpenCLDeviceBase *device);
};
OpenCLDeviceBase *device;
DeviceBuffer device_buffers[NUM_DEVICE_BUFFERS];
typedef unordered_map<string, Allocation> AllocationsMap;
AllocationsMap allocations;
bool need_update;
DeviceBuffer* smallest_device_buffer();
public:
MemoryManager(OpenCLDeviceBase *device);
void free(); /* Free all memory. */
void alloc(const char *name, device_memory& mem);
bool free(device_memory& mem);
BufferDescriptor get_descriptor(string name);
void update_device_memory();
void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg);
};
CCL_NAMESPACE_END

View File

@@ -25,6 +25,8 @@
#include "clew.h" #include "clew.h"
#include "device/opencl/memory_manager.h"
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
/* Disable workarounds, seems to be working fine on latest drivers. */ /* Disable workarounds, seems to be working fine on latest drivers. */
@@ -224,6 +226,18 @@ public:
static string get_kernel_md5(); static string get_kernel_md5();
}; };
#define opencl_device_assert(device, stmt) \
{ \
cl_int err = stmt; \
\
if(err != CL_SUCCESS) { \
string message = string_printf("OpenCL error: %s in %s (%s:%d)", clewErrorString(err), #stmt, __FILE__, __LINE__); \
if((device)->error_msg == "") \
(device)->error_msg = message; \
fprintf(stderr, "%s\n", message.c_str()); \
} \
} (void)0
#define opencl_assert(stmt) \ #define opencl_assert(stmt) \
{ \ { \
cl_int err = stmt; \ cl_int err = stmt; \
@@ -344,6 +358,7 @@ public:
size_t global_size_round_up(int group_size, int global_size); size_t global_size_round_up(int group_size, int global_size);
void enqueue_kernel(cl_kernel kernel, size_t w, size_t h, size_t max_workgroup_size = -1); void enqueue_kernel(cl_kernel kernel, size_t w, size_t h, size_t max_workgroup_size = -1);
void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name); void set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name);
void set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg);
void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half); void film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half);
void shader(DeviceTask& task); void shader(DeviceTask& task);
@@ -525,6 +540,34 @@ protected:
virtual string build_options_for_base_program( virtual string build_options_for_base_program(
const DeviceRequestedFeatures& /*requested_features*/); const DeviceRequestedFeatures& /*requested_features*/);
private:
MemoryManager memory_manager;
friend MemoryManager;
struct tex_info_t {
uint buffer, padding;
cl_ulong offset;
uint width, height, depth, options;
};
static_assert_align(tex_info_t, 16);
vector<tex_info_t> texture_descriptors;
device_memory texture_descriptors_buffer;
struct Texture {
device_memory* mem;
InterpolationType interpolation;
ExtensionType extension;
};
typedef map<string, Texture> TexturesMap;
TexturesMap textures;
bool textures_need_update;
protected:
void flush_texture_buffers();
}; };
Device *opencl_create_mega_device(DeviceInfo& info, Stats& stats, bool background); Device *opencl_create_mega_device(DeviceInfo& info, Stats& stats, bool background);

View File

@@ -63,7 +63,7 @@ void OpenCLDeviceBase::opencl_assert_err(cl_int err, const char* where)
} }
OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_) OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool background_)
: Device(info, stats, background_) : Device(info, stats, background_), memory_manager(this)
{ {
cpPlatform = NULL; cpPlatform = NULL;
cdDevice = NULL; cdDevice = NULL;
@@ -71,6 +71,7 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou
cqCommandQueue = NULL; cqCommandQueue = NULL;
null_mem = 0; null_mem = 0;
device_initialized = false; device_initialized = false;
textures_need_update = true;
vector<OpenCLPlatformDevice> usable_devices; vector<OpenCLPlatformDevice> usable_devices;
OpenCLInfo::get_usable_devices(&usable_devices); OpenCLInfo::get_usable_devices(&usable_devices);
@@ -126,6 +127,12 @@ OpenCLDeviceBase::OpenCLDeviceBase(DeviceInfo& info, Stats &stats, bool backgrou
return; return;
} }
/* Allocate this right away so that texture_descriptors_buffer is placed at offset 0 in the device memory buffers */
texture_descriptors.resize(1);
texture_descriptors_buffer.resize(1);
texture_descriptors_buffer.data_pointer = (device_ptr)&texture_descriptors[0];
memory_manager.alloc("texture_descriptors", texture_descriptors_buffer);
fprintf(stderr, "Device init success\n"); fprintf(stderr, "Device init success\n");
device_initialized = true; device_initialized = true;
} }
@@ -134,6 +141,8 @@ OpenCLDeviceBase::~OpenCLDeviceBase()
{ {
task_pool.stop(); task_pool.stop();
memory_manager.free();
if(null_mem) if(null_mem)
clReleaseMemObject(CL_MEM_PTR(null_mem)); clReleaseMemObject(CL_MEM_PTR(null_mem));
@@ -493,29 +502,31 @@ void OpenCLDeviceBase::const_copy_to(const char *name, void *host, size_t size)
void OpenCLDeviceBase::tex_alloc(const char *name, void OpenCLDeviceBase::tex_alloc(const char *name,
device_memory& mem, device_memory& mem,
InterpolationType /*interpolation*/, InterpolationType interpolation,
ExtensionType /*extension*/) ExtensionType extension)
{ {
VLOG(1) << "Texture allocate: " << name << ", " VLOG(1) << "Texture allocate: " << name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. (" << string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")"; << string_human_readable_size(mem.memory_size()) << ")";
mem_alloc(NULL, mem, MEM_READ_ONLY);
mem_copy_to(mem); memory_manager.alloc(name, mem);
assert(mem_map.find(name) == mem_map.end());
mem_map.insert(MemMap::value_type(name, mem.device_pointer)); textures[name] = {&mem, interpolation, extension};
textures_need_update = true;
} }
void OpenCLDeviceBase::tex_free(device_memory& mem) void OpenCLDeviceBase::tex_free(device_memory& mem)
{ {
if(mem.device_pointer) { if(memory_manager.free(mem)) {
foreach(const MemMap::value_type& value, mem_map) { textures_need_update = true;
if(value.second == mem.device_pointer) {
mem_map.erase(value.first);
break;
}
} }
mem_free(mem); foreach(TexturesMap::value_type& value, textures) {
if(value.second.mem == &mem) {
textures.erase(value.first);
break;
}
} }
} }
@@ -581,6 +592,104 @@ void OpenCLDeviceBase::set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const
opencl_assert(clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr)); opencl_assert(clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr));
} }
void OpenCLDeviceBase::set_kernel_arg_buffers(cl_kernel kernel, cl_uint *narg)
{
flush_texture_buffers();
memory_manager.set_kernel_arg_buffers(kernel, narg);
}
void OpenCLDeviceBase::flush_texture_buffers()
{
if(!textures_need_update) {
return;
}
textures_need_update = false;
/* Setup slots for textures. */
int num_slots = 0;
struct texture_slot_t {
string name;
int slot;
};
vector<texture_slot_t> texture_slots;
#define KERNEL_TEX(type, ttype, name) \
if(textures.find(#name) != textures.end()) { \
texture_slots.push_back({#name, num_slots}); \
} \
num_slots++;
#include "kernel/kernel_textures.h"
int num_data_slots = num_slots;
foreach(TexturesMap::value_type& tex, textures) {
string name = tex.first;
if(string_startswith(name, "__tex_image")) {
int pos = name.rfind("_");
int id = atoi(name.data() + pos + 1);
texture_slots.push_back({name, num_data_slots + id});
num_slots = max(num_slots, num_data_slots + id + 1);
}
}
/* Realloc texture descriptors buffer. */
memory_manager.free(texture_descriptors_buffer);
texture_descriptors.resize(num_slots);
texture_descriptors_buffer.resize(num_slots * sizeof(tex_info_t));
texture_descriptors_buffer.data_pointer = (device_ptr)&texture_descriptors[0];
memory_manager.alloc("texture_descriptors", texture_descriptors_buffer);
/* Fill in descriptors */
foreach(texture_slot_t& slot, texture_slots) {
Texture& tex = textures[slot.name];
tex_info_t& info = texture_descriptors[slot.slot];
MemoryManager::BufferDescriptor desc = memory_manager.get_descriptor(slot.name);
info.offset = desc.offset;
info.buffer = desc.device_buffer;
if(string_startswith(slot.name, "__tex_image")) {
info.width = tex.mem->data_width;
info.height = tex.mem->data_height;
info.depth = tex.mem->data_depth;
info.options = 0;
if(tex.interpolation == INTERPOLATION_CLOSEST) {
info.options |= (1 << 0);
}
switch(tex.extension) {
case EXTENSION_REPEAT:
info.options |= (1 << 1);
break;
case EXTENSION_EXTEND:
info.options |= (1 << 2);
break;
case EXTENSION_CLIP:
info.options |= (1 << 3);
break;
default:
break;
}
}
}
/* Force write of descriptors. */
memory_manager.free(texture_descriptors_buffer);
memory_manager.alloc("texture_descriptors", texture_descriptors_buffer);
}
void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half) void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_ptr rgba_byte, device_ptr rgba_half)
{ {
/* cast arguments to cl types */ /* cast arguments to cl types */
@@ -605,10 +714,7 @@ void OpenCLDeviceBase::film_convert(DeviceTask& task, device_ptr buffer, device_
d_rgba, d_rgba,
d_buffer); d_buffer);
#define KERNEL_TEX(type, ttype, name) \ set_kernel_arg_buffers(ckFilmConvertKernel, &start_arg_index);
set_kernel_arg_mem(ckFilmConvertKernel, &start_arg_index, #name);
#include "kernel/kernel_textures.h"
#undef KERNEL_TEX
start_arg_index += kernel_set_args(ckFilmConvertKernel, start_arg_index += kernel_set_args(ckFilmConvertKernel,
start_arg_index, start_arg_index,
@@ -1030,10 +1136,7 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
d_output_luma); d_output_luma);
} }
#define KERNEL_TEX(type, ttype, name) \ set_kernel_arg_buffers(kernel, &start_arg_index);
set_kernel_arg_mem(kernel, &start_arg_index, #name);
#include "kernel/kernel_textures.h"
#undef KERNEL_TEX
start_arg_index += kernel_set_args(kernel, start_arg_index += kernel_set_args(kernel,
start_arg_index, start_arg_index,

View File

@@ -82,10 +82,7 @@ public:
d_buffer, d_buffer,
d_rng_state); d_rng_state);
#define KERNEL_TEX(type, ttype, name) \ set_kernel_arg_buffers(ckPathTraceKernel, &start_arg_index);
set_kernel_arg_mem(ckPathTraceKernel, &start_arg_index, #name);
#include "kernel/kernel_textures.h"
#undef KERNEL_TEX
start_arg_index += kernel_set_args(ckPathTraceKernel, start_arg_index += kernel_set_args(ckPathTraceKernel,
start_arg_index, start_arg_index,

View File

@@ -99,6 +99,8 @@ public:
void thread_run(DeviceTask *task) void thread_run(DeviceTask *task)
{ {
flush_texture_buffers();
if(task->type == DeviceTask::FILM_CONVERT) { if(task->type == DeviceTask::FILM_CONVERT) {
film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half);
} }
@@ -113,10 +115,19 @@ public:
*/ */
typedef struct KernelGlobals { typedef struct KernelGlobals {
ccl_constant KernelData *data; ccl_constant KernelData *data;
ccl_global char *buffers[8];
typedef struct _tex_info_t {
uint buffer, padding;
ulong offset;
uint width, height, depth, options;
} _tex_info_t;
#define KERNEL_TEX(type, ttype, name) \ #define KERNEL_TEX(type, ttype, name) \
ccl_global type *name; _tex_info_t name;
#include "kernel/kernel_textures.h" #include "kernel/kernel_textures.h"
#undef KERNEL_TEX #undef KERNEL_TEX
SplitData split_data; SplitData split_data;
SplitParams split_param_data; SplitParams split_param_data;
} KernelGlobals; } KernelGlobals;
@@ -217,11 +228,7 @@ public:
*cached_memory.ray_state, *cached_memory.ray_state,
*cached_memory.rng_state); *cached_memory.rng_state);
/* TODO(sergey): Avoid map lookup here. */ device->set_kernel_arg_buffers(program(), &start_arg_index);
#define KERNEL_TEX(type, ttype, name) \
device->set_kernel_arg_mem(program(), &start_arg_index, #name);
#include "kernel/kernel_textures.h"
#undef KERNEL_TEX
start_arg_index += start_arg_index +=
device->kernel_set_args(program(), device->kernel_set_args(program(),
@@ -352,11 +359,7 @@ public:
ray_state, ray_state,
rtile.rng_state); rtile.rng_state);
/* TODO(sergey): Avoid map lookup here. */ device->set_kernel_arg_buffers(device->program_data_init(), &start_arg_index);
#define KERNEL_TEX(type, ttype, name) \
device->set_kernel_arg_mem(device->program_data_init(), &start_arg_index, #name);
#include "kernel/kernel_textures.h"
#undef KERNEL_TEX
start_arg_index += start_arg_index +=
device->kernel_set_args(device->program_data_init(), device->kernel_set_args(device->program_data_init(),

View File

@@ -142,7 +142,7 @@
/* data lookup defines */ /* data lookup defines */
#define kernel_data (*kg->data) #define kernel_data (*kg->data)
#define kernel_tex_fetch(t, index) kg->t[index] #define kernel_tex_fetch(tex, index) ((ccl_global tex##_t*)(kg->buffers[kg->tex.buffer] + kg->tex.offset))[(index)]
/* define NULL */ /* define NULL */
#define NULL 0 #define NULL 0

View File

@@ -23,6 +23,10 @@
# include "util/util_vector.h" # include "util/util_vector.h"
#endif #endif
#ifdef __KERNEL_OPENCL__
# include "util/util_atomic.h"
#endif
CCL_NAMESPACE_BEGIN CCL_NAMESPACE_BEGIN
/* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in /* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in
@@ -109,11 +113,22 @@ typedef struct KernelGlobals {
#ifdef __KERNEL_OPENCL__ #ifdef __KERNEL_OPENCL__
# define KERNEL_TEX(type, ttype, name) \
typedef type name##_t;
# include "kernel/kernel_textures.h"
typedef struct tex_info_t {
uint buffer, padding;
ulong offset;
uint width, height, depth, options;
} tex_info_t;
typedef ccl_addr_space struct KernelGlobals { typedef ccl_addr_space struct KernelGlobals {
ccl_constant KernelData *data; ccl_constant KernelData *data;
ccl_global char *buffers[8];
# define KERNEL_TEX(type, ttype, name) \ # define KERNEL_TEX(type, ttype, name) \
ccl_global type *name; tex_info_t name;
# include "kernel/kernel_textures.h" # include "kernel/kernel_textures.h"
# ifdef __SPLIT_KERNEL__ # ifdef __SPLIT_KERNEL__
@@ -122,6 +137,57 @@ typedef ccl_addr_space struct KernelGlobals {
# endif # endif
} KernelGlobals; } KernelGlobals;
#define KERNEL_BUFFER_PARAMS \
ccl_global char *buffer0, \
ccl_global char *buffer1, \
ccl_global char *buffer2, \
ccl_global char *buffer3, \
ccl_global char *buffer4, \
ccl_global char *buffer5, \
ccl_global char *buffer6, \
ccl_global char *buffer7
#define KERNEL_BUFFER_ARGS buffer0, buffer1, buffer2, buffer3, buffer4, buffer5, buffer6, buffer7
ccl_device_inline void kernel_set_buffer_pointers(KernelGlobals *kg, KERNEL_BUFFER_PARAMS)
{
#ifdef __SPLIT_KERNEL__
if(ccl_local_id(0) + ccl_local_id(1) == 0)
#endif
{
kg->buffers[0] = buffer0;
kg->buffers[1] = buffer1;
kg->buffers[2] = buffer2;
kg->buffers[3] = buffer3;
kg->buffers[4] = buffer4;
kg->buffers[5] = buffer5;
kg->buffers[6] = buffer6;
kg->buffers[7] = buffer7;
}
# ifdef __SPLIT_KERNEL__
ccl_barrier(CCL_LOCAL_MEM_FENCE);
# endif
}
ccl_device_inline void kernel_set_buffer_info(KernelGlobals *kg)
{
# ifdef __SPLIT_KERNEL__
if(ccl_local_id(0) + ccl_local_id(1) == 0)
# endif
{
ccl_global tex_info_t *info = (ccl_global tex_info_t*)kg->buffers[0];
# define KERNEL_TEX(type, ttype, name) \
kg->name = *(info++);
# include "kernel/kernel_textures.h"
}
# ifdef __SPLIT_KERNEL__
ccl_barrier(CCL_LOCAL_MEM_FENCE);
# endif
}
#endif /* __KERNEL_OPENCL__ */ #endif /* __KERNEL_OPENCL__ */
/* Interpolated lookup table access */ /* Interpolated lookup table access */

View File

@@ -15,30 +15,42 @@
*/ */
/* For OpenCL all images are packed in a single array, and we do manual lookup /* For OpenCL we do manual lookup and interpolation. */
* and interpolation. */
ccl_device_inline ccl_global tex_info_t* kernel_tex_info(KernelGlobals *kg, uint id) {
const uint tex_offset = id
#define KERNEL_TEX(type, ttype, name) + 1
#include "kernel/kernel_textures.h"
;
return &((ccl_global tex_info_t*)kg->buffers[0])[tex_offset];
}
#define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->buffer] + info->offset))[(index)]
ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset) ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
{ {
const ccl_global tex_info_t *info = kernel_tex_info(kg, id);
const int texture_type = kernel_tex_type(id); const int texture_type = kernel_tex_type(id);
/* Float4 */ /* Float4 */
if(texture_type == IMAGE_DATA_TYPE_FLOAT4) { if(texture_type == IMAGE_DATA_TYPE_FLOAT4) {
return kernel_tex_fetch(__tex_image_float4_packed, offset); return tex_fetch(float4, info, offset);
} }
/* Byte4 */ /* Byte4 */
else if(texture_type == IMAGE_DATA_TYPE_BYTE4) { else if(texture_type == IMAGE_DATA_TYPE_BYTE4) {
uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset); uchar4 r = tex_fetch(uchar4, info, offset);
float f = 1.0f/255.0f; float f = 1.0f/255.0f;
return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
} }
/* Float */ /* Float */
else if(texture_type == IMAGE_DATA_TYPE_FLOAT) { else if(texture_type == IMAGE_DATA_TYPE_FLOAT) {
float f = kernel_tex_fetch(__tex_image_float_packed, offset); float f = tex_fetch(float, info, offset);
return make_float4(f, f, f, 1.0f); return make_float4(f, f, f, 1.0f);
} }
/* Byte */ /* Byte */
else { else {
uchar r = kernel_tex_fetch(__tex_image_byte_packed, offset); uchar r = tex_fetch(uchar, info, offset);
float f = r * (1.0f/255.0f); float f = r * (1.0f/255.0f);
return make_float4(f, f, f, 1.0f); return make_float4(f, f, f, 1.0f);
} }
@@ -64,17 +76,17 @@ ccl_device_inline float svm_image_texture_frac(float x, int *ix)
return x - (float)i; return x - (float)i;
} }
ccl_device_inline uint kernel_decode_image_interpolation(uint4 info) ccl_device_inline uint kernel_decode_image_interpolation(uint info)
{ {
return (info.w & (1 << 0)) ? INTERPOLATION_CLOSEST : INTERPOLATION_LINEAR; return (info & (1 << 0)) ? INTERPOLATION_CLOSEST : INTERPOLATION_LINEAR;
} }
ccl_device_inline uint kernel_decode_image_extension(uint4 info) ccl_device_inline uint kernel_decode_image_extension(uint info)
{ {
if(info.w & (1 << 1)) { if(info & (1 << 1)) {
return EXTENSION_REPEAT; return EXTENSION_REPEAT;
} }
else if(info.w & (1 << 2)) { else if(info & (1 << 2)) {
return EXTENSION_EXTEND; return EXTENSION_EXTEND;
} }
else { else {
@@ -84,13 +96,16 @@ ccl_device_inline uint kernel_decode_image_extension(uint4 info)
ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y) ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
{ {
uint4 info = kernel_tex_fetch(__tex_image_packed_info, id*2); const ccl_global tex_info_t *info = kernel_tex_info(kg, id);
uint width = info.x;
uint height = info.y; uint width = info->width;
uint offset = info.z; uint height = info->height;
uint offset = 0;
/* Decode image options. */ /* Decode image options. */
uint interpolation = kernel_decode_image_interpolation(info); uint interpolation = kernel_decode_image_interpolation(info->options);
uint extension = kernel_decode_image_extension(info); uint extension = kernel_decode_image_extension(info->options);
/* Actual sampling. */ /* Actual sampling. */
float4 r; float4 r;
int ix, iy, nix, niy; int ix, iy, nix, niy;
@@ -150,14 +165,17 @@ ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, fl
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z) ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z)
{ {
uint4 info = kernel_tex_fetch(__tex_image_packed_info, id*2); const ccl_global tex_info_t *info = kernel_tex_info(kg, id);
uint width = info.x;
uint height = info.y; uint width = info->width;
uint offset = info.z; uint height = info->height;
uint depth = kernel_tex_fetch(__tex_image_packed_info, id*2+1).x; uint offset = 0;
uint depth = info->depth;
/* Decode image options. */ /* Decode image options. */
uint interpolation = kernel_decode_image_interpolation(info); uint interpolation = kernel_decode_image_interpolation(info->options);
uint extension = kernel_decode_image_extension(info); uint extension = kernel_decode_image_extension(info->options);
/* Actual sampling. */ /* Actual sampling. */
float4 r; float4 r;
int ix, iy, iz, nix, niy, niz; int ix, iy, iz, nix, niy, niz;

View File

@@ -184,15 +184,8 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_665)
# else # else
/* bindless textures */ /* bindless textures */
KERNEL_TEX(uint, texture_uint, __bindless_mapping) KERNEL_TEX(uint, texture_uint, __bindless_mapping)
# endif # endif /* __CUDA_ARCH__ */
#endif #endif /* __KERNEL_CUDA__ */
/* packed image (opencl) */
KERNEL_TEX(uchar4, texture_uchar4, __tex_image_byte4_packed)
KERNEL_TEX(float4, texture_float4, __tex_image_float4_packed)
KERNEL_TEX(uchar, texture_uchar, __tex_image_byte_packed)
KERNEL_TEX(float, texture_float, __tex_image_float_packed)
KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info)
#undef KERNEL_TEX #undef KERNEL_TEX
#undef KERNEL_IMAGE_TEX #undef KERNEL_IMAGE_TEX

View File

@@ -52,9 +52,7 @@ __kernel void kernel_ocl_path_trace(
ccl_global float *buffer, ccl_global float *buffer,
ccl_global uint *rng_state, ccl_global uint *rng_state,
#define KERNEL_TEX(type, ttype, name) \ KERNEL_BUFFER_PARAMS,
ccl_global type *name,
#include "kernel/kernel_textures.h"
int sample, int sample,
int sx, int sy, int sw, int sh, int offset, int stride) int sx, int sy, int sw, int sh, int offset, int stride)
@@ -63,9 +61,8 @@ __kernel void kernel_ocl_path_trace(
kg->data = data; kg->data = data;
#define KERNEL_TEX(type, ttype, name) \ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
kg->name = name; kernel_set_buffer_info(kg);
#include "kernel/kernel_textures.h"
int x = sx + ccl_global_id(0); int x = sx + ccl_global_id(0);
int y = sy + ccl_global_id(1); int y = sy + ccl_global_id(1);
@@ -82,9 +79,7 @@ __kernel void kernel_ocl_shader(
ccl_global float4 *output, ccl_global float4 *output,
ccl_global float *output_luma, ccl_global float *output_luma,
#define KERNEL_TEX(type, ttype, name) \ KERNEL_BUFFER_PARAMS,
ccl_global type *name,
#include "kernel/kernel_textures.h"
int type, int sx, int sw, int offset, int sample) int type, int sx, int sw, int offset, int sample)
{ {
@@ -92,9 +87,8 @@ __kernel void kernel_ocl_shader(
kg->data = data; kg->data = data;
#define KERNEL_TEX(type, ttype, name) \ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
kg->name = name; kernel_set_buffer_info(kg);
#include "kernel/kernel_textures.h"
int x = sx + ccl_global_id(0); int x = sx + ccl_global_id(0);
@@ -114,9 +108,7 @@ __kernel void kernel_ocl_bake(
ccl_global uint4 *input, ccl_global uint4 *input,
ccl_global float4 *output, ccl_global float4 *output,
#define KERNEL_TEX(type, ttype, name) \ KERNEL_BUFFER_PARAMS,
ccl_global type *name,
#include "kernel/kernel_textures.h"
int type, int filter, int sx, int sw, int offset, int sample) int type, int filter, int sx, int sw, int offset, int sample)
{ {
@@ -124,9 +116,8 @@ __kernel void kernel_ocl_bake(
kg->data = data; kg->data = data;
#define KERNEL_TEX(type, ttype, name) \ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
kg->name = name; kernel_set_buffer_info(kg);
#include "kernel/kernel_textures.h"
int x = sx + ccl_global_id(0); int x = sx + ccl_global_id(0);
@@ -144,9 +135,7 @@ __kernel void kernel_ocl_convert_to_byte(
ccl_global uchar4 *rgba, ccl_global uchar4 *rgba,
ccl_global float *buffer, ccl_global float *buffer,
#define KERNEL_TEX(type, ttype, name) \ KERNEL_BUFFER_PARAMS,
ccl_global type *name,
#include "kernel/kernel_textures.h"
float sample_scale, float sample_scale,
int sx, int sy, int sw, int sh, int offset, int stride) int sx, int sy, int sw, int sh, int offset, int stride)
@@ -155,9 +144,8 @@ __kernel void kernel_ocl_convert_to_byte(
kg->data = data; kg->data = data;
#define KERNEL_TEX(type, ttype, name) \ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
kg->name = name; kernel_set_buffer_info(kg);
#include "kernel/kernel_textures.h"
int x = sx + ccl_global_id(0); int x = sx + ccl_global_id(0);
int y = sy + ccl_global_id(1); int y = sy + ccl_global_id(1);
@@ -171,9 +159,7 @@ __kernel void kernel_ocl_convert_to_half_float(
ccl_global uchar4 *rgba, ccl_global uchar4 *rgba,
ccl_global float *buffer, ccl_global float *buffer,
#define KERNEL_TEX(type, ttype, name) \ KERNEL_BUFFER_PARAMS,
ccl_global type *name,
#include "kernel/kernel_textures.h"
float sample_scale, float sample_scale,
int sx, int sy, int sw, int sh, int offset, int stride) int sx, int sy, int sw, int sh, int offset, int stride)
@@ -182,9 +168,8 @@ __kernel void kernel_ocl_convert_to_half_float(
kg->data = data; kg->data = data;
#define KERNEL_TEX(type, ttype, name) \ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
kg->name = name; kernel_set_buffer_info(kg);
#include "kernel/kernel_textures.h"
int x = sx + ccl_global_id(0); int x = sx + ccl_global_id(0);
int y = sy + ccl_global_id(1); int y = sy + ccl_global_id(1);

View File

@@ -25,11 +25,7 @@ __kernel void kernel_ocl_path_trace_data_init(
int num_elements, int num_elements,
ccl_global char *ray_state, ccl_global char *ray_state,
ccl_global uint *rng_state, ccl_global uint *rng_state,
KERNEL_BUFFER_PARAMS,
#define KERNEL_TEX(type, ttype, name) \
ccl_global type *name,
#include "kernel/kernel_textures.h"
int start_sample, int start_sample,
int end_sample, int end_sample,
int sx, int sy, int sw, int sh, int offset, int stride, int sx, int sy, int sw, int sh, int offset, int stride,
@@ -46,10 +42,7 @@ __kernel void kernel_ocl_path_trace_data_init(
num_elements, num_elements,
ray_state, ray_state,
rng_state, rng_state,
KERNEL_BUFFER_ARGS,
#define KERNEL_TEX(type, ttype, name) name,
#include "kernel/kernel_textures.h"
start_sample, start_sample,
end_sample, end_sample,
sx, sy, sw, sh, offset, stride, sx, sy, sw, sh, offset, stride,

View File

@@ -25,9 +25,7 @@ __kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)(
ccl_global char *ray_state, ccl_global char *ray_state,
ccl_global uint *rng_state, ccl_global uint *rng_state,
#define KERNEL_TEX(type, ttype, name) \ KERNEL_BUFFER_PARAMS,
ccl_global type *name,
#include "kernel/kernel_textures.h"
ccl_global int *queue_index, ccl_global int *queue_index,
ccl_global char *use_queues_flag, ccl_global char *use_queues_flag,
@@ -52,12 +50,9 @@ __kernel void KERNEL_NAME_EVAL(kernel_ocl_path_trace, KERNEL_NAME)(
split_data_init(kg, &kernel_split_state, ccl_global_size(0)*ccl_global_size(1), split_data_buffer, ray_state); split_data_init(kg, &kernel_split_state, ccl_global_size(0)*ccl_global_size(1), split_data_buffer, ray_state);
#define KERNEL_TEX(type, ttype, name) \
kg->name = name;
#include "kernel/kernel_textures.h"
} }
ccl_barrier(CCL_LOCAL_MEM_FENCE); kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
KERNEL_NAME_EVAL(kernel, KERNEL_NAME)( KERNEL_NAME_EVAL(kernel, KERNEL_NAME)(
kg kg

View File

@@ -52,9 +52,7 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
ccl_global uint *rng_state, ccl_global uint *rng_state,
#ifdef __KERNEL_OPENCL__ #ifdef __KERNEL_OPENCL__
#define KERNEL_TEX(type, ttype, name) \ KERNEL_BUFFER_PARAMS,
ccl_global type *name,
#include "kernel/kernel_textures.h"
#endif #endif
int start_sample, int start_sample,
@@ -100,9 +98,8 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state); split_data_init(kg, &kernel_split_state, num_elements, split_data_buffer, ray_state);
#ifdef __KERNEL_OPENCL__ #ifdef __KERNEL_OPENCL__
#define KERNEL_TEX(type, ttype, name) \ kernel_set_buffer_pointers(kg, KERNEL_BUFFER_ARGS);
kg->name = name; kernel_set_buffer_info(kg);
#include "kernel/kernel_textures.h"
#endif #endif
int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0); int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);

View File

@@ -43,7 +43,6 @@ static bool isfinite(half /*value*/)
ImageManager::ImageManager(const DeviceInfo& info) ImageManager::ImageManager(const DeviceInfo& info)
{ {
need_update = true; need_update = true;
pack_images = false;
osl_texture_system = NULL; osl_texture_system = NULL;
animation_frame = 0; animation_frame = 0;
@@ -87,11 +86,6 @@ ImageManager::~ImageManager()
} }
} }
void ImageManager::set_pack_images(bool pack_images_)
{
pack_images = pack_images_;
}
void ImageManager::set_osl_texture_system(void *texture_system) void ImageManager::set_osl_texture_system(void *texture_system)
{ {
osl_texture_system = texture_system; osl_texture_system = texture_system;
@@ -742,7 +736,7 @@ void ImageManager::device_load_image(Device *device,
pixels[3] = TEX_IMAGE_MISSING_A; pixels[3] = TEX_IMAGE_MISSING_A;
} }
if(!pack_images) { {
thread_scoped_lock device_lock(device_mutex); thread_scoped_lock device_lock(device_mutex);
device->tex_alloc(name.c_str(), device->tex_alloc(name.c_str(),
tex_img, tex_img,
@@ -771,7 +765,7 @@ void ImageManager::device_load_image(Device *device,
pixels[0] = TEX_IMAGE_MISSING_R; pixels[0] = TEX_IMAGE_MISSING_R;
} }
if(!pack_images) { {
thread_scoped_lock device_lock(device_mutex); thread_scoped_lock device_lock(device_mutex);
device->tex_alloc(name.c_str(), device->tex_alloc(name.c_str(),
tex_img, tex_img,
@@ -803,7 +797,7 @@ void ImageManager::device_load_image(Device *device,
pixels[3] = (TEX_IMAGE_MISSING_A * 255); pixels[3] = (TEX_IMAGE_MISSING_A * 255);
} }
if(!pack_images) { {
thread_scoped_lock device_lock(device_mutex); thread_scoped_lock device_lock(device_mutex);
device->tex_alloc(name.c_str(), device->tex_alloc(name.c_str(),
tex_img, tex_img,
@@ -831,7 +825,7 @@ void ImageManager::device_load_image(Device *device,
pixels[0] = (TEX_IMAGE_MISSING_R * 255); pixels[0] = (TEX_IMAGE_MISSING_R * 255);
} }
if(!pack_images) { {
thread_scoped_lock device_lock(device_mutex); thread_scoped_lock device_lock(device_mutex);
device->tex_alloc(name.c_str(), device->tex_alloc(name.c_str(),
tex_img, tex_img,
@@ -862,7 +856,7 @@ void ImageManager::device_load_image(Device *device,
pixels[3] = TEX_IMAGE_MISSING_A; pixels[3] = TEX_IMAGE_MISSING_A;
} }
if(!pack_images) { {
thread_scoped_lock device_lock(device_mutex); thread_scoped_lock device_lock(device_mutex);
device->tex_alloc(name.c_str(), device->tex_alloc(name.c_str(),
tex_img, tex_img,
@@ -890,7 +884,7 @@ void ImageManager::device_load_image(Device *device,
pixels[0] = TEX_IMAGE_MISSING_R; pixels[0] = TEX_IMAGE_MISSING_R;
} }
if(!pack_images) { {
thread_scoped_lock device_lock(device_mutex); thread_scoped_lock device_lock(device_mutex);
device->tex_alloc(name.c_str(), device->tex_alloc(name.c_str(),
tex_img, tex_img,
@@ -1047,9 +1041,6 @@ void ImageManager::device_update(Device *device,
pool.wait_work(); pool.wait_work();
if(pack_images)
device_pack_images(device, dscene, progress);
need_update = false; need_update = false;
} }
@@ -1079,141 +1070,6 @@ void ImageManager::device_update_slot(Device *device,
} }
} }
uint8_t ImageManager::pack_image_options(ImageDataType type, size_t slot)
{
uint8_t options = 0;
/* Image Options are packed into one uint:
* bit 0 -> Interpolation
* bit 1 + 2 + 3 -> Extension
*/
if(images[type][slot]->interpolation == INTERPOLATION_CLOSEST) {
options |= (1 << 0);
}
if(images[type][slot]->extension == EXTENSION_REPEAT) {
options |= (1 << 1);
}
else if(images[type][slot]->extension == EXTENSION_EXTEND) {
options |= (1 << 2);
}
else /* EXTENSION_CLIP */ {
options |= (1 << 3);
}
return options;
}
template<typename T>
void ImageManager::device_pack_images_type(
ImageDataType type,
const vector<device_vector<T>*>& cpu_textures,
device_vector<T> *device_image,
uint4 *info)
{
size_t size = 0, offset = 0;
/* First step is to calculate size of the texture we need. */
for(size_t slot = 0; slot < images[type].size(); slot++) {
if(images[type][slot] == NULL) {
continue;
}
device_vector<T>& tex_img = *cpu_textures[slot];
size += tex_img.size();
}
/* Now we know how much memory we need, so we can allocate and fill. */
T *pixels = device_image->resize(size);
for(size_t slot = 0; slot < images[type].size(); slot++) {
if(images[type][slot] == NULL) {
continue;
}
device_vector<T>& tex_img = *cpu_textures[slot];
uint8_t options = pack_image_options(type, slot);
const int index = type_index_to_flattened_slot(slot, type) * 2;
info[index] = make_uint4(tex_img.data_width,
tex_img.data_height,
offset,
options);
info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0);
memcpy(pixels + offset,
(void*)tex_img.data_pointer,
tex_img.memory_size());
offset += tex_img.size();
}
}
void ImageManager::device_pack_images(Device *device,
DeviceScene *dscene,
Progress& /*progess*/)
{
/* For OpenCL, we pack all image textures into a single large texture, and
* do our own interpolation in the kernel.
*/
/* TODO(sergey): This will over-allocate a bit, but this is constant memory
* so should be fine for a short term.
*/
const size_t info_size = max4(max_flattened_slot(IMAGE_DATA_TYPE_FLOAT4),
max_flattened_slot(IMAGE_DATA_TYPE_BYTE4),
max_flattened_slot(IMAGE_DATA_TYPE_FLOAT),
max_flattened_slot(IMAGE_DATA_TYPE_BYTE));
uint4 *info = dscene->tex_image_packed_info.resize(info_size*2);
/* Pack byte4 textures. */
device_pack_images_type(IMAGE_DATA_TYPE_BYTE4,
dscene->tex_byte4_image,
&dscene->tex_image_byte4_packed,
info);
/* Pack float4 textures. */
device_pack_images_type(IMAGE_DATA_TYPE_FLOAT4,
dscene->tex_float4_image,
&dscene->tex_image_float4_packed,
info);
/* Pack byte textures. */
device_pack_images_type(IMAGE_DATA_TYPE_BYTE,
dscene->tex_byte_image,
&dscene->tex_image_byte_packed,
info);
/* Pack float textures. */
device_pack_images_type(IMAGE_DATA_TYPE_FLOAT,
dscene->tex_float_image,
&dscene->tex_image_float_packed,
info);
/* Push textures to the device. */
if(dscene->tex_image_byte4_packed.size()) {
if(dscene->tex_image_byte4_packed.device_pointer) {
thread_scoped_lock device_lock(device_mutex);
device->tex_free(dscene->tex_image_byte4_packed);
}
device->tex_alloc("__tex_image_byte4_packed", dscene->tex_image_byte4_packed);
}
if(dscene->tex_image_float4_packed.size()) {
if(dscene->tex_image_float4_packed.device_pointer) {
thread_scoped_lock device_lock(device_mutex);
device->tex_free(dscene->tex_image_float4_packed);
}
device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed);
}
if(dscene->tex_image_byte_packed.size()) {
if(dscene->tex_image_byte_packed.device_pointer) {
thread_scoped_lock device_lock(device_mutex);
device->tex_free(dscene->tex_image_byte_packed);
}
device->tex_alloc("__tex_image_byte_packed", dscene->tex_image_byte_packed);
}
if(dscene->tex_image_float_packed.size()) {
if(dscene->tex_image_float_packed.device_pointer) {
thread_scoped_lock device_lock(device_mutex);
device->tex_free(dscene->tex_image_float_packed);
}
device->tex_alloc("__tex_image_float_packed", dscene->tex_image_float_packed);
}
if(dscene->tex_image_packed_info.size()) {
if(dscene->tex_image_packed_info.device_pointer) {
thread_scoped_lock device_lock(device_mutex);
device->tex_free(dscene->tex_image_packed_info);
}
device->tex_alloc("__tex_image_packed_info", dscene->tex_image_packed_info);
}
}
void ImageManager::device_free_builtin(Device *device, DeviceScene *dscene) void ImageManager::device_free_builtin(Device *device, DeviceScene *dscene)
{ {
for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
@@ -1239,18 +1095,6 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene)
dscene->tex_float_image.clear(); dscene->tex_float_image.clear();
dscene->tex_byte_image.clear(); dscene->tex_byte_image.clear();
dscene->tex_half_image.clear(); dscene->tex_half_image.clear();
device->tex_free(dscene->tex_image_float4_packed);
device->tex_free(dscene->tex_image_byte4_packed);
device->tex_free(dscene->tex_image_float_packed);
device->tex_free(dscene->tex_image_byte_packed);
device->tex_free(dscene->tex_image_packed_info);
dscene->tex_image_float4_packed.clear();
dscene->tex_image_byte4_packed.clear();
dscene->tex_image_float_packed.clear();
dscene->tex_image_byte_packed.clear();
dscene->tex_image_packed_info.clear();
} }
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -76,7 +76,6 @@ public:
void device_free_builtin(Device *device, DeviceScene *dscene); void device_free_builtin(Device *device, DeviceScene *dscene);
void set_osl_texture_system(void *texture_system); void set_osl_texture_system(void *texture_system);
void set_pack_images(bool pack_images_);
bool set_animation_frame_update(int frame); bool set_animation_frame_update(int frame);
bool need_update; bool need_update;
@@ -130,7 +129,6 @@ private:
vector<Image*> images[IMAGE_DATA_NUM_TYPES]; vector<Image*> images[IMAGE_DATA_NUM_TYPES];
void *osl_texture_system; void *osl_texture_system;
bool pack_images;
bool file_load_image_generic(Image *img, bool file_load_image_generic(Image *img,
ImageInput **in, ImageInput **in,
@@ -152,8 +150,6 @@ private:
int flattened_slot_to_type_index(int flat_slot, ImageDataType *type); int flattened_slot_to_type_index(int flat_slot, ImageDataType *type);
string name_from_type(int type); string name_from_type(int type);
uint8_t pack_image_options(ImageDataType type, size_t slot);
void device_load_image(Device *device, void device_load_image(Device *device,
DeviceScene *dscene, DeviceScene *dscene,
Scene *scene, Scene *scene,
@@ -164,17 +160,6 @@ private:
DeviceScene *dscene, DeviceScene *dscene,
ImageDataType type, ImageDataType type,
int slot); int slot);
template<typename T>
void device_pack_images_type(
ImageDataType type,
const vector<device_vector<T>*>& cpu_textures,
device_vector<T> *device_image,
uint4 *info);
void device_pack_images(Device *device,
DeviceScene *dscene,
Progress& progess);
}; };
CCL_NAMESPACE_END CCL_NAMESPACE_END

View File

@@ -1925,16 +1925,7 @@ void MeshManager::device_update_displacement_images(Device *device,
if(node->special_type != SHADER_SPECIAL_TYPE_IMAGE_SLOT) { if(node->special_type != SHADER_SPECIAL_TYPE_IMAGE_SLOT) {
continue; continue;
} }
if(device->info.pack_images) {
/* If device requires packed images we need to update all
* images now, even if they're not used for displacement.
*/
image_manager->device_update(device,
dscene,
scene,
progress);
return;
}
ImageSlotTextureNode *image_node = static_cast<ImageSlotTextureNode*>(node); ImageSlotTextureNode *image_node = static_cast<ImageSlotTextureNode*>(node);
int slot = image_node->slot; int slot = image_node->slot;
if(slot != -1) { if(slot != -1) {

View File

@@ -149,8 +149,6 @@ void Scene::device_update(Device *device_, Progress& progress)
* - Lookup tables are done a second time to handle film tables * - Lookup tables are done a second time to handle film tables
*/ */
image_manager->set_pack_images(device->info.pack_images);
progress.set_status("Updating Shaders"); progress.set_status("Updating Shaders");
shader_manager->device_update(device, &dscene, this, progress); shader_manager->device_update(device, &dscene, this, progress);

View File

@@ -121,13 +121,6 @@ public:
vector<device_vector<uchar>* > tex_byte_image; vector<device_vector<uchar>* > tex_byte_image;
vector<device_vector<half>* > tex_half_image; vector<device_vector<half>* > tex_half_image;
/* opencl images */
device_vector<float4> tex_image_float4_packed;
device_vector<uchar4> tex_image_byte4_packed;
device_vector<float> tex_image_float_packed;
device_vector<uchar> tex_image_byte_packed;
device_vector<uint4> tex_image_packed_info;
KernelData data; KernelData data;
}; };