Cycles: memory usage report

This commit adds memory usage information while rendering.

It reports memory used by device, meaning:

- For CPU it'll report real memory consumption
- For GPU rendering it'll report GPU memory consumption, but it'll
  also mean the same memory is used from host side.

This information displays information about memory requested by Cycles,
not memory really allocated on a device. Real memory usage might be
higher because of memory fragmentation or optimistic memory allocator.

There's really nothing we can do against this.

Also in contrast with blender internal's render cycles memory usage
does not include memory used by scene, only memory needed by cycles
itself will be displayed. So don't freak out if memory usage reported
by cycles would be much lower than blender internal's.

This commit also adds RenderEngine.update_memory_stats callback which
is used to tell memory consumption from external engine to blender.
This information is used to generate information line after rendering
is finished.
This commit is contained in:
Sergey Sharybin
2012-11-05 08:04:57 +00:00
parent d71004ea69
commit 6eec49ed20
19 changed files with 151 additions and 37 deletions

View File

@@ -477,11 +477,15 @@ void BlenderSession::update_status_progress()
float progress;
double total_time;
char time_str[128];
float mem_used = (float)session->stats.mem_used / 1024.0f / 1024.0f;
float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;
get_status(status, substatus);
get_progress(progress, total_time);
timestatus = b_scene.name();
timestatus = string_printf("Mem: %.2fM, Peak: %.2fM | ", mem_used, mem_peak);
timestatus += b_scene.name();
if(b_rlay_name != "")
timestatus += ", " + b_rlay_name;
timestatus += " | ";
@@ -494,6 +498,7 @@ void BlenderSession::update_status_progress()
if(status != last_status) {
b_engine.update_stats("", (timestatus + status).c_str());
b_engine.update_memory_stats(mem_used, mem_peak);
last_status = status;
}
if(progress != last_progress) {

View File

@@ -78,36 +78,36 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dy, int w
glDisable(GL_BLEND);
}
Device *Device::create(DeviceInfo& info, bool background, int threads)
Device *Device::create(DeviceInfo& info, Stats &stats, bool background, int threads)
{
Device *device;
switch(info.type) {
case DEVICE_CPU:
device = device_cpu_create(info, threads);
device = device_cpu_create(info, stats, threads);
break;
#ifdef WITH_CUDA
case DEVICE_CUDA:
if(cuLibraryInit())
device = device_cuda_create(info, background);
device = device_cuda_create(info, stats, background);
else
device = NULL;
break;
#endif
#ifdef WITH_MULTI
case DEVICE_MULTI:
device = device_multi_create(info, background);
device = device_multi_create(info, stats, background);
break;
#endif
#ifdef WITH_NETWORK
case DEVICE_NETWORK:
device = device_network_create(info, "127.0.0.1");
device = device_network_create(info, stats, "127.0.0.1");
break;
#endif
#ifdef WITH_OPENCL
case DEVICE_OPENCL:
if(clLibraryInit())
device = device_opencl_create(info, background);
device = device_opencl_create(info, stats, background);
else
device = NULL;
break;

View File

@@ -25,6 +25,7 @@
#include "device_task.h"
#include "util_list.h"
#include "util_stats.h"
#include "util_string.h"
#include "util_thread.h"
#include "util_types.h"
@@ -72,7 +73,7 @@ public:
class Device {
protected:
Device() {}
Device(Stats &stats_) : stats(stats_) {}
bool background;
string error_msg;
@@ -84,6 +85,9 @@ public:
DeviceInfo info;
virtual const string& error_message() { return error_msg; }
/* statistics */
Stats &stats;
/* regular memory */
virtual void mem_alloc(device_memory& mem, MemoryType type) = 0;
virtual void mem_copy_to(device_memory& mem) = 0;
@@ -130,7 +134,7 @@ public:
virtual int device_number(Device *sub_device) { return 0; }
/* static */
static Device *create(DeviceInfo& info, bool background = true, int threads = 0);
static Device *create(DeviceInfo& info, Stats &stats, bool background = true, int threads = 0);
static DeviceType type_from_string(const char *name);
static string string_from_type(DeviceType type);

View File

@@ -45,7 +45,7 @@ public:
TaskPool task_pool;
KernelGlobals *kg;
CPUDevice(int threads_num)
CPUDevice(Stats &stats, int threads_num) : Device(stats)
{
kg = kernel_globals_create();
@@ -67,6 +67,8 @@ public:
void mem_alloc(device_memory& mem, MemoryType type)
{
mem.device_pointer = mem.data_pointer;
stats.mem_alloc(mem.memory_size());
}
void mem_copy_to(device_memory& mem)
@@ -87,6 +89,8 @@ public:
void mem_free(device_memory& mem)
{
mem.device_pointer = 0;
stats.mem_free(mem.memory_size());
}
void const_copy_to(const char *name, void *host, size_t size)
@@ -98,11 +102,15 @@ public:
{
kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
mem.device_pointer = mem.data_pointer;
stats.mem_alloc(mem.memory_size());
}
void tex_free(device_memory& mem)
{
mem.device_pointer = 0;
stats.mem_free(mem.memory_size());
}
void *osl_memory()
@@ -283,9 +291,9 @@ public:
}
};
Device *device_cpu_create(DeviceInfo& info, int threads)
Device *device_cpu_create(DeviceInfo& info, Stats &stats, int threads)
{
return new CPUDevice(threads);
return new CPUDevice(stats, threads);
}
void device_cpu_info(vector<DeviceInfo>& devices)

View File

@@ -157,7 +157,7 @@ public:
cuda_assert(cuCtxSetCurrent(NULL));
}
CUDADevice(DeviceInfo& info, bool background_)
CUDADevice(DeviceInfo& info, Stats &stats, bool background_) : Device(stats)
{
background = background_;
@@ -316,8 +316,10 @@ public:
{
cuda_push_context();
CUdeviceptr device_pointer;
cuda_assert(cuMemAlloc(&device_pointer, mem.memory_size()))
size_t size = mem.memory_size();
cuda_assert(cuMemAlloc(&device_pointer, size))
mem.device_pointer = (device_ptr)device_pointer;
stats.mem_alloc(size);
cuda_pop_context();
}
@@ -356,6 +358,8 @@ public:
cuda_pop_context();
mem.device_pointer = 0;
stats.mem_free(mem.memory_size());
}
}
@@ -424,6 +428,8 @@ public:
cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES))
mem.device_pointer = (device_ptr)handle;
stats.mem_alloc(size);
}
else {
cuda_pop_context();
@@ -463,6 +469,8 @@ public:
tex_interp_map.erase(tex_interp_map.find(mem.device_pointer));
mem.device_pointer = 0;
stats.mem_free(mem.memory_size());
}
else {
tex_interp_map.erase(tex_interp_map.find(mem.device_pointer));
@@ -707,6 +715,8 @@ public:
mem.device_pointer = pmem.cuTexId;
pixel_mem_map[mem.device_pointer] = pmem;
stats.mem_alloc(mem.memory_size());
return;
}
else {
@@ -762,6 +772,8 @@ public:
pixel_mem_map.erase(pixel_mem_map.find(mem.device_pointer));
mem.device_pointer = 0;
stats.mem_free(mem.memory_size());
return;
}
@@ -896,9 +908,9 @@ public:
}
};
Device *device_cuda_create(DeviceInfo& info, bool background)
Device *device_cuda_create(DeviceInfo& info, Stats &stats, bool background)
{
return new CUDADevice(info, background);
return new CUDADevice(info, stats, background);
}
void device_cuda_info(vector<DeviceInfo>& devices)

View File

@@ -23,11 +23,11 @@ CCL_NAMESPACE_BEGIN
class Device;
Device *device_cpu_create(DeviceInfo& info, int threads);
Device *device_opencl_create(DeviceInfo& info, bool background);
Device *device_cuda_create(DeviceInfo& info, bool background);
Device *device_network_create(DeviceInfo& info, const char *address);
Device *device_multi_create(DeviceInfo& info, bool background);
Device *device_cpu_create(DeviceInfo& info, Stats &stats, int threads);
Device *device_opencl_create(DeviceInfo& info, Stats &stats, bool background);
Device *device_cuda_create(DeviceInfo& info, Stats &stats, bool background);
Device *device_network_create(DeviceInfo& info, Stats &stats, const char *address);
Device *device_multi_create(DeviceInfo& info, Stats &stats, bool background);
void device_cpu_info(vector<DeviceInfo>& devices);
void device_opencl_info(vector<DeviceInfo>& devices);

View File

@@ -46,14 +46,14 @@ public:
list<SubDevice> devices;
device_ptr unique_ptr;
MultiDevice(DeviceInfo& info, bool background_)
: unique_ptr(1)
MultiDevice(DeviceInfo& info, Stats &stats, bool background_)
: Device(stats), unique_ptr(1)
{
Device *device;
background = background_;
foreach(DeviceInfo& subinfo, info.multi_devices) {
device = Device::create(subinfo, background);
device = Device::create(subinfo, stats, background);
devices.push_back(SubDevice(device));
}
@@ -314,9 +314,9 @@ public:
}
};
Device *device_multi_create(DeviceInfo& info, bool background)
Device *device_multi_create(DeviceInfo& info, Stats &stats, bool background)
{
return new MultiDevice(info, background);
return new MultiDevice(info, stats, background);
}
static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool with_display, bool with_advanced_shading, const char *id_fmt, int num)

View File

@@ -32,8 +32,8 @@ public:
boost::asio::io_service io_service;
tcp::socket socket;
NetworkDevice(const char *address)
: socket(io_service)
NetworkDevice(Stats &stats, const char *address)
: Device(stats), socket(io_service)
{
stringstream portstr;
portstr << SERVER_PORT;
@@ -202,9 +202,9 @@ public:
}
};
Device *device_network_create(DeviceInfo& info, const char *address)
Device *device_network_create(DeviceInfo& info, Stats &stats, const char *address)
{
return new NetworkDevice(address);
return new NetworkDevice(stats, address);
}
void device_network_info(vector<DeviceInfo>& devices)

View File

@@ -144,7 +144,8 @@ public:
}
}
OpenCLDevice(DeviceInfo& info, bool background_)
OpenCLDevice(DeviceInfo& info, Stats &stats, bool background_)
: Device(stats)
{
background = background_;
cpPlatform = NULL;
@@ -473,6 +474,8 @@ public:
mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_WRITE, size, NULL, &ciErr);
opencl_assert(ciErr);
stats.mem_alloc(size);
}
void mem_copy_to(device_memory& mem)
@@ -506,6 +509,8 @@ public:
ciErr = clReleaseMemObject(CL_MEM_PTR(mem.device_pointer));
mem.device_pointer = 0;
opencl_assert(ciErr);
stats.mem_free(mem.memory_size());
}
}
@@ -728,9 +733,9 @@ public:
}
};
Device *device_opencl_create(DeviceInfo& info, bool background)
Device *device_opencl_create(DeviceInfo& info, Stats &stats, bool background)
{
return new OpenCLDevice(info, background);
return new OpenCLDevice(info, stats, background);
}
void device_opencl_info(vector<DeviceInfo>& devices)

View File

@@ -101,7 +101,7 @@ RenderTile::RenderTile()
RenderBuffers::RenderBuffers(Device *device_)
{
device = device_;
device = device_;
}
RenderBuffers::~RenderBuffers()

View File

@@ -42,13 +42,14 @@ Session::Session(const SessionParams& params_)
: params(params_),
tile_manager(params.progressive, params.samples, params.tile_size, params.start_resolution,
params.background == false || params.progressive_refine, params.background,
max(params.device.multi_devices.size(), 1))
max(params.device.multi_devices.size(), 1)),
stats()
{
device_use_gl = ((params.device.type != DEVICE_CPU) && !params.background);
TaskScheduler::init(params.threads);
device = Device::create(params.device, params.background, params.threads);
device = Device::create(params.device, stats, params.background, params.threads);
if(params.background) {
buffers = NULL;

View File

@@ -24,6 +24,7 @@
#include "tile.h"
#include "util_progress.h"
#include "util_stats.h"
#include "util_thread.h"
#include "util_vector.h"
@@ -112,6 +113,7 @@ public:
Progress progress;
SessionParams params;
TileManager tile_manager;
Stats stats;
boost::function<void(RenderTile&)> write_render_tile_cb;
boost::function<void(RenderTile&)> update_render_tile_cb;

View File

@@ -0,0 +1,53 @@
/*
* Copyright 2012, Blender Foundation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __UTIL_STATS_H__
#define __UTIL_STATS_H__
#include "util_thread.h"
CCL_NAMESPACE_BEGIN
class Stats {
public:
Stats() : lock(), mem_used(0), mem_peak(0) {}
void mem_alloc(size_t size) {
lock.lock();
mem_used += size;
if(mem_used > mem_peak)
mem_peak = mem_used;
lock.unlock();
}
void mem_free(size_t size) {
lock.lock();
mem_used -= size;
lock.unlock();
}
spin_lock lock;
size_t mem_used;
size_t mem_peak;
};
CCL_NAMESPACE_END
#endif /* __UTIL_STATS_H__ */

View File

@@ -33,6 +33,9 @@ typedef boost::mutex thread_mutex;
typedef boost::mutex::scoped_lock thread_scoped_lock;
typedef boost::condition_variable thread_condition_variable;
/* use boost for spinlocks as well */
typedef boost::detail::spinlock spin_lock;
/* own pthread based implementation, to avoid boost version conflicts with
* dynamically loaded blender plugins */

View File

@@ -295,7 +295,11 @@ static void make_renderinfo_string(RenderStats *rs, Scene *scene, char *str)
if (rs->tothalo) spos += sprintf(spos, "Ha:%d ", rs->tothalo);
if (rs->totstrand) spos += sprintf(spos, "St:%d ", rs->totstrand);
if (rs->totlamp) spos += sprintf(spos, "La:%d ", rs->totlamp);
spos += sprintf(spos, "Mem:%.2fM (%.2fM, peak %.2fM) ", megs_used_memory, mmap_used_memory, megs_peak_memory);
if (rs->mem_peak == 0.0f)
spos += sprintf(spos, "Mem:%.2fM (%.2fM, peak %.2fM) ", megs_used_memory, mmap_used_memory, megs_peak_memory);
else
spos += sprintf(spos, "Mem:%.2fM, Peak: %.2fM ", rs->mem_used, rs->mem_peak);
if (rs->curfield)
spos += sprintf(spos, "Field %d ", rs->curfield);

View File

@@ -370,6 +370,11 @@ static void rna_def_render_engine(BlenderRNA *brna)
prop = RNA_def_float(func, "progress", 0, 0.0f, 1.0f, "", "Percentage of render that's done", 0.0f, 1.0f);
RNA_def_property_flag(prop, PROP_REQUIRED);
func = RNA_def_function(srna, "update_memory_stats", "RE_engine_update_memory_stats");
RNA_def_float(func, "memory_used", 0, 0.0f, FLT_MAX, "", "Current memory usage in megabytes", 0.0f, FLT_MAX);
RNA_def_float(func, "memory_peak", 0, 0.0f, FLT_MAX, "", "Peak memory usage in megabytes", 0.0f, FLT_MAX);
RNA_def_property_flag(prop, PROP_REQUIRED);
func = RNA_def_function(srna, "report", "RE_engine_report");
prop = RNA_def_enum_flag(func, "type", wm_report_items, 0, "Type", "");
RNA_def_property_flag(prop, PROP_REQUIRED);

View File

@@ -115,6 +115,7 @@ void RE_engine_end_result(RenderEngine *engine, struct RenderResult *result, int
int RE_engine_test_break(RenderEngine *engine);
void RE_engine_update_stats(RenderEngine *engine, const char *stats, const char *info);
void RE_engine_update_progress(RenderEngine *engine, float progress);
void RE_engine_update_memory_stats(RenderEngine *engine, float mem_used, float mem_peak);
void RE_engine_report(RenderEngine *engine, int type, const char *msg);
int RE_engine_render(struct Render *re, int do_all);

View File

@@ -148,6 +148,7 @@ typedef struct RenderStats {
double starttime, lastframetime;
const char *infostr, *statstr;
char scene_name[MAX_ID_NAME - 2];
float mem_used, mem_peak;
} RenderStats;
/* *********************** API ******************** */

View File

@@ -290,6 +290,16 @@ void RE_engine_update_progress(RenderEngine *engine, float progress)
}
}
void RE_engine_update_memory_stats(RenderEngine *engine, float mem_used, float mem_peak)
{
Render *re = engine->re;
if (re) {
re->i.mem_used = mem_used;
re->i.mem_peak = mem_peak;
}
}
void RE_engine_report(RenderEngine *engine, int type, const char *msg)
{
Render *re = engine->re;