From 18da79f471f4aa15df895d253b03c9c3600411cf Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Thu, 17 Apr 2014 11:59:05 +0200 Subject: [PATCH] Cycles CUDA: only do async execution for GPUs not used for display. Otherwise devices used for display will lock up the UI too much. This means you might still get 100% CPU for the display device, but for others CPU usage should be low still. The check to see if a device is used for display may not be entirely reliable, it checks if there is a watchdog timeout on the device, but I'm not entirely sure that always exists for display devices or is disabled for non-display devices, though some tools like cuda-gdb seem to make the same assumption. Ref T39559 --- intern/cycles/device/device_cuda.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index a47d0561e95..9739717df0f 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -666,6 +666,11 @@ public: cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1)) cuda_assert(cuLaunchGridAsync(cuPathTrace, xblocks, yblocks, cuStream)) + if(info.display_device) { + /* don't use async for device used for display, locks up UI too much */ + cuda_assert(cuStreamSynchronize(cuStream)) + } + cuda_pop_context(); } @@ -995,7 +1000,6 @@ public: bool branched = task->integrator_branched; - /* keep rendering tiles until done */ while(task->acquire_tile(this, tile)) { int start_sample = tile.start_sample; @@ -1016,9 +1020,9 @@ public: tile.sample = sample + 1; task->update_progress(tile); - if(sample == sync_sample){ + if(!info.display_device && sample == sync_sample) { cuda_push_context(); - cuda_assert(cuEventRecord(tileDone, cuStream )) + cuda_assert(cuEventRecord(tileDone, cuStream)) cuda_assert(cuEventSynchronize(tileDone)) /* Do some time keeping to find out if we need to sync less */