Merge branch 'master' into 28
This commit is contained in:
@@ -1613,10 +1613,23 @@ int2 CUDASplitKernel::split_kernel_local_size()
|
|||||||
return make_int2(32, 1);
|
return make_int2(32, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
int2 CUDASplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask * /*task*/)
|
int2 CUDASplitKernel::split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask * /*task*/)
|
||||||
{
|
{
|
||||||
/* TODO(mai): implement something here to detect ideal work size */
|
size_t free;
|
||||||
return make_int2(256, 256);
|
size_t total;
|
||||||
|
|
||||||
|
device->cuda_push_context();
|
||||||
|
cuda_assert(cuMemGetInfo(&free, &total));
|
||||||
|
device->cuda_pop_context();
|
||||||
|
|
||||||
|
VLOG(1) << "Maximum device allocation size: "
|
||||||
|
<< string_human_readable_number(free) << " bytes. ("
|
||||||
|
<< string_human_readable_size(free) << ").";
|
||||||
|
|
||||||
|
size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2);
|
||||||
|
int2 global_size = make_int2(round_down((int)sqrt(num_elements), 32), (int)sqrt(num_elements));
|
||||||
|
VLOG(1) << "Global size: " << global_size << ".";
|
||||||
|
return global_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool device_cuda_init(void)
|
bool device_cuda_init(void)
|
||||||
|
@@ -128,8 +128,14 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
|
|||||||
local_size[1] = lsize[1];
|
local_size[1] = lsize[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Number of elements in the global state buffer */
|
||||||
|
int num_global_elements = global_size[0] * global_size[1];
|
||||||
|
|
||||||
|
/* Allocate all required global memory once. */
|
||||||
|
if(first_tile) {
|
||||||
|
first_tile = false;
|
||||||
|
|
||||||
/* Set gloabl size */
|
/* Set gloabl size */
|
||||||
size_t global_size[2];
|
|
||||||
{
|
{
|
||||||
int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task);
|
int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task);
|
||||||
|
|
||||||
@@ -140,14 +146,9 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
|
|||||||
global_size[1] = round_up(gsize[1], local_size[1]);
|
global_size[1] = round_up(gsize[1], local_size[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Number of elements in the global state buffer */
|
num_global_elements = global_size[0] * global_size[1];
|
||||||
int num_global_elements = global_size[0] * global_size[1];
|
|
||||||
assert(num_global_elements % WORK_POOL_SIZE == 0);
|
assert(num_global_elements % WORK_POOL_SIZE == 0);
|
||||||
|
|
||||||
/* Allocate all required global memory once. */
|
|
||||||
if(first_tile) {
|
|
||||||
first_tile = false;
|
|
||||||
|
|
||||||
/* Calculate max groups */
|
/* Calculate max groups */
|
||||||
|
|
||||||
/* Denotes the maximum work groups possible w.r.t. current requested tile size. */
|
/* Denotes the maximum work groups possible w.r.t. current requested tile size. */
|
||||||
|
@@ -95,6 +95,9 @@ private:
|
|||||||
/* Marked True in constructor and marked false at the end of path_trace(). */
|
/* Marked True in constructor and marked false at the end of path_trace(). */
|
||||||
bool first_tile;
|
bool first_tile;
|
||||||
|
|
||||||
|
/* Cached global size */
|
||||||
|
size_t global_size[2];
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit DeviceSplitKernel(Device* device);
|
explicit DeviceSplitKernel(Device* device);
|
||||||
virtual ~DeviceSplitKernel();
|
virtual ~DeviceSplitKernel();
|
||||||
|
@@ -1274,6 +1274,7 @@ static void ui_item_rna_size(
|
|||||||
if (!w) {
|
if (!w) {
|
||||||
if (type == PROP_ENUM && icon_only) {
|
if (type == PROP_ENUM && icon_only) {
|
||||||
w = ui_text_icon_width(layout, "", ICON_BLANK1, 0);
|
w = ui_text_icon_width(layout, "", ICON_BLANK1, 0);
|
||||||
|
if (index != RNA_ENUM_VALUE)
|
||||||
w += 0.6f * UI_UNIT_X;
|
w += 0.6f * UI_UNIT_X;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@@ -164,8 +164,8 @@ typedef struct MLoop {
|
|||||||
* MEdge *ed = &medge[mloop[lt->tri[j]].e];
|
* MEdge *ed = &medge[mloop[lt->tri[j]].e];
|
||||||
* unsigned int tri_edge[2] = {mloop[lt->tri[j]].v, mloop[lt->tri[j_next]].v};
|
* unsigned int tri_edge[2] = {mloop[lt->tri[j]].v, mloop[lt->tri[j_next]].v};
|
||||||
*
|
*
|
||||||
* if (ELEM(ed->v1, tri_edge[0], tri_edge[1]) &&
|
* if (((ed->v1 == tri_edge[0]) && (ed->v1 == tri_edge[1])) ||
|
||||||
* ELEM(ed->v2, tri_edge[0], tri_edge[1]))
|
* ((ed->v1 == tri_edge[1]) && (ed->v1 == tri_edge[0])))
|
||||||
* {
|
* {
|
||||||
* printf("real edge found %u %u\n", tri_edge[0], tri_edge[1]);
|
* printf("real edge found %u %u\n", tri_edge[0], tri_edge[1]);
|
||||||
* }
|
* }
|
||||||
|
@@ -507,7 +507,7 @@ static void rna_float_print(FILE *f, float num)
|
|||||||
{
|
{
|
||||||
if (num == -FLT_MAX) fprintf(f, "-FLT_MAX");
|
if (num == -FLT_MAX) fprintf(f, "-FLT_MAX");
|
||||||
else if (num == FLT_MAX) fprintf(f, "FLT_MAX");
|
else if (num == FLT_MAX) fprintf(f, "FLT_MAX");
|
||||||
else if ((int64_t)num == num) fprintf(f, "%.1ff", num);
|
else if ((ABS(num) < INT64_MAX) && ((int64_t)num == num)) fprintf(f, "%.1ff", num);
|
||||||
else fprintf(f, "%.10ff", num);
|
else fprintf(f, "%.10ff", num);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user