Multi-thread generated image creation

Gives about 2x speedup on laptop when creating new hires generated image,
regardless of it's type (color, color grid, uv grid).
This commit is contained in:
Sergey Sharybin
2016-05-05 23:33:40 +02:00
parent 3064270e13
commit 9b89071c9c
3 changed files with 267 additions and 47 deletions

View File

@@ -38,7 +38,17 @@
#include "BLF_api.h"
void BKE_image_buf_fill_color(unsigned char *rect, float *rect_float, int width, int height, const float color[4])
typedef struct FillColorThreadData {
unsigned char *rect;
float *rect_float;
int width;
float color[4];
} FillColorThreadData;
static void image_buf_fill_color_slice(unsigned char *rect,
float *rect_float,
int width, int height,
const float color[4])
{
int x, y;
@@ -53,15 +63,12 @@ void BKE_image_buf_fill_color(unsigned char *rect, float *rect_float, int width,
}
}
}
if (rect) {
unsigned char ccol[4];
rgba_float_to_uchar(ccol, color);
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
rect[0] = ccol[0];
rect[1] = ccol[1];
rect[2] = ccol[2];
@@ -72,19 +79,55 @@ void BKE_image_buf_fill_color(unsigned char *rect, float *rect_float, int width,
}
}
void BKE_image_buf_fill_checker(unsigned char *rect, float *rect_float, int width, int height)
static void image_buf_fill_color_thread_do(void *data_v,
int start_scanline,
int num_scanlines)
{
/* these two passes could be combined into one, but it's more readable and
FillColorThreadData *data = (FillColorThreadData *)data_v;
size_t offset = ((size_t)start_scanline) * data->width * 4;
unsigned char *rect = (data->rect != NULL) ? (data->rect + offset) : NULL;
float *rect_float = (data->rect_float != NULL) ? (data->rect_float + offset) : NULL;
image_buf_fill_color_slice(rect,
rect_float,
data->width,
num_scanlines,
data->color);
}
void BKE_image_buf_fill_color(unsigned char *rect,
float *rect_float,
int width, int height,
const float color[4])
{
if (((size_t)width) * height < 64 * 64) {
image_buf_fill_color_slice(rect, rect_float, width, height, color);
}
else {
FillColorThreadData data;
data.rect = rect;
data.rect_float = rect_float;
data.width = width;
copy_v4_v4(data.color, color);
IMB_processor_apply_threaded_scanlines(
height, image_buf_fill_color_thread_do, &data);
}
}
static void image_buf_fill_checker_slice(unsigned char *rect,
float *rect_float,
int width, int height,
int offset)
{
/* these two passes could be combined into one, but it's more readable and
* easy to tweak like this, speed isn't really that much of an issue in this situation... */
int checkerwidth = 32, dark = 1;
int x, y;
unsigned char *rect_orig = rect;
float *rect_float_orig = rect_float;
float h = 0.0, hoffs = 0.0;
float hsv[3] = {0.0f, 0.9f, 0.9f};
float rgb[3];
@@ -96,12 +139,12 @@ void BKE_image_buf_fill_checker(unsigned char *rect, float *rect_float, int widt
}
/* checkers */
for (y = 0; y < height; y++) {
for (y = offset; y < height + offset; y++) {
dark = powf(-1.0f, floorf(y / checkerwidth));
for (x = 0; x < width; x++) {
if (x % checkerwidth == 0) dark = -dark;
if (rect_float) {
if (dark > 0) {
rect_float[0] = rect_float[1] = rect_float[2] = dark_linear_color;
@@ -131,12 +174,12 @@ void BKE_image_buf_fill_checker(unsigned char *rect, float *rect_float, int widt
rect_float = rect_float_orig;
/* 2nd pass, colored + */
for (y = 0; y < height; y++) {
for (y = offset; y < height + offset; y++) {
hoffs = 0.125f * floorf(y / checkerwidth);
for (x = 0; x < width; x++) {
h = 0.125f * floorf(x / checkerwidth);
if ((abs((x % checkerwidth) - (checkerwidth / 2)) < 4) &&
(abs((y % checkerwidth) - (checkerwidth / 2)) < 4))
{
@@ -145,14 +188,14 @@ void BKE_image_buf_fill_checker(unsigned char *rect, float *rect_float, int widt
{
hsv[0] = fmodf(fabsf(h - hoffs), 1.0f);
hsv_to_rgb_v(hsv, rgb);
if (rect) {
rect[0] = (char)(rgb[0] * 255.0f);
rect[1] = (char)(rgb[1] * 255.0f);
rect[2] = (char)(rgb[2] * 255.0f);
rect[3] = 255;
}
if (rect_float) {
srgb_to_linearrgb_v3_v3(rect_float, rgb);
rect_float[3] = 1.0f;
@@ -166,13 +209,55 @@ void BKE_image_buf_fill_checker(unsigned char *rect, float *rect_float, int widt
}
}
typedef struct FillCheckerThreadData {
unsigned char *rect;
float *rect_float;
int width;
} FillCheckerThreadData;
static void image_buf_fill_checker_thread_do(void *data_v,
int start_scanline,
int num_scanlines)
{
FillCheckerThreadData *data = (FillCheckerThreadData *)data_v;
size_t offset = ((size_t)start_scanline) * data->width * 4;
unsigned char *rect = (data->rect != NULL) ? (data->rect + offset) : NULL;
float *rect_float = (data->rect_float != NULL) ? (data->rect_float + offset) : NULL;
image_buf_fill_checker_slice(rect,
rect_float,
data->width,
num_scanlines,
start_scanline);
}
void BKE_image_buf_fill_checker(unsigned char *rect,
float *rect_float,
int width, int height)
{
if (((size_t)width) * height < 64 * 64) {
image_buf_fill_checker_slice(rect, rect_float, width, height, 0);
}
else {
FillCheckerThreadData data;
data.rect = rect;
data.rect_float = rect_float;
data.width = width;
IMB_processor_apply_threaded_scanlines(
height, image_buf_fill_checker_thread_do, &data);
}
}
/* Utility functions for BKE_image_buf_fill_checker_color */
#define BLEND_FLOAT(real, add) (real + add <= 1.0f) ? (real + add) : 1.0f
#define BLEND_CHAR(real, add) ((real + (char)(add * 255.0f)) <= 255) ? (real + (char)(add * 255.0f)) : 255
static void checker_board_color_fill(unsigned char *rect, float *rect_float, int width, int height)
static void checker_board_color_fill(unsigned char *rect,
float *rect_float,
int width,
int height,
int offset,
int total_height)
{
int hue_step, y, x;
float hsv[3], rgb[3];
@@ -182,9 +267,9 @@ static void checker_board_color_fill(unsigned char *rect, float *rect_float, int
hue_step = power_of_2_max_i(width / 8);
if (hue_step < 8) hue_step = 8;
for (y = 0; y < height; y++) {
for (y = offset; y < height + offset; y++) {
hsv[2] = 0.1 + (y * (0.4 / height)); /* use a number lower then 1.0 else its too bright */
hsv[2] = 0.1 + (y * (0.4 / total_height)); /* use a number lower then 1.0 else its too bright */
for (x = 0; x < width; x++) {
hsv[0] = (float)((double)(x / hue_step) * 1.0 / width * hue_step);
hsv_to_rgb_v(hsv, rgb);
@@ -194,7 +279,7 @@ static void checker_board_color_fill(unsigned char *rect, float *rect_float, int
rect[1] = (char)(rgb[1] * 255.0f);
rect[2] = (char)(rgb[2] * 255.0f);
rect[3] = 255;
rect += 4;
}
@@ -203,27 +288,35 @@ static void checker_board_color_fill(unsigned char *rect, float *rect_float, int
rect_float[1] = rgb[1];
rect_float[2] = rgb[2];
rect_float[3] = 1.0f;
rect_float += 4;
}
}
}
}
static void checker_board_color_tint(unsigned char *rect, float *rect_float, int width, int height, int size, float blend)
static void checker_board_color_tint(unsigned char *rect,
float *rect_float,
int width,
int height,
int size,
float blend,
int offset)
{
int x, y;
float blend_half = blend * 0.5f;
for (y = 0; y < height; y++) {
for (y = offset; y < height + offset; y++) {
for (x = 0; x < width; x++) {
if (((y / size) % 2 == 1 && (x / size) % 2 == 1) || ( (y / size) % 2 == 0 && (x / size) % 2 == 0)) {
if (((y / size) % 2 == 1 && (x / size) % 2 == 1) ||
((y / size) % 2 == 0 && (x / size) % 2 == 0))
{
if (rect) {
rect[0] = (char)BLEND_CHAR(rect[0], blend);
rect[1] = (char)BLEND_CHAR(rect[1], blend);
rect[2] = (char)BLEND_CHAR(rect[2], blend);
rect[3] = 255;
rect += 4;
}
if (rect_float) {
@@ -231,7 +324,7 @@ static void checker_board_color_tint(unsigned char *rect, float *rect_float, int
rect_float[1] = BLEND_FLOAT(rect_float[1], blend);
rect_float[2] = BLEND_FLOAT(rect_float[2], blend);
rect_float[3] = 1.0f;
rect_float += 4;
}
}
@@ -241,7 +334,7 @@ static void checker_board_color_tint(unsigned char *rect, float *rect_float, int
rect[1] = (char)BLEND_CHAR(rect[1], blend_half);
rect[2] = (char)BLEND_CHAR(rect[2], blend_half);
rect[3] = 255;
rect += 4;
}
if (rect_float) {
@@ -249,19 +342,24 @@ static void checker_board_color_tint(unsigned char *rect, float *rect_float, int
rect_float[1] = BLEND_FLOAT(rect_float[1], blend_half);
rect_float[2] = BLEND_FLOAT(rect_float[2], blend_half);
rect_float[3] = 1.0f;
rect_float += 4;
}
}
}
}
}
static void checker_board_grid_fill(unsigned char *rect, float *rect_float, int width, int height, float blend)
static void checker_board_grid_fill(unsigned char *rect,
float *rect_float,
int width,
int height,
float blend,
int offset)
{
int x, y;
for (y = 0; y < height; y++) {
for (y = offset; y < height + offset; y++) {
for (x = 0; x < width; x++) {
if (((y % 32) == 0) || ((x % 32) == 0) || x == 0) {
if (rect) {
@@ -277,7 +375,7 @@ static void checker_board_grid_fill(unsigned char *rect, float *rect_float, int
rect_float[1] = BLEND_FLOAT(rect_float[1], blend);
rect_float[2] = BLEND_FLOAT(rect_float[2], blend);
rect_float[3] = 1.0f;
rect_float += 4;
}
}
@@ -291,7 +389,12 @@ static void checker_board_grid_fill(unsigned char *rect, float *rect_float, int
/* defined in image.c */
static void checker_board_text(unsigned char *rect, float *rect_float, int width, int height, int step, int outline)
static void checker_board_text(unsigned char *rect,
float *rect_float,
int width,
int height,
int step,
int outline)
{
int x, y;
int pen_x, pen_y;
@@ -351,14 +454,57 @@ static void checker_board_text(unsigned char *rect, float *rect_float, int width
BLF_buffer(mono, NULL, NULL, 0, 0, 0, NULL);
}
static void checker_board_color_prepare_slice(unsigned char *rect,
float *rect_float,
int width,
int height,
int offset,
int total_height)
{
checker_board_color_fill(rect, rect_float, width, height, offset, total_height);
checker_board_color_tint(rect, rect_float, width, height, 1, 0.03f, offset);
checker_board_color_tint(rect, rect_float, width, height, 4, 0.05f, offset);
checker_board_color_tint(rect, rect_float, width, height, 32, 0.07f, offset);
checker_board_color_tint(rect, rect_float, width, height, 128, 0.15f, offset);
checker_board_grid_fill(rect, rect_float, width, height, 1.0f / 4.0f, offset);
}
typedef struct FillCheckerColorThreadData {
unsigned char *rect;
float *rect_float;
int width, height;
} FillCheckerColorThreadData;
static void checker_board_color_prepare_thread_do(void *data_v,
int start_scanline,
int num_scanlines)
{
FillCheckerColorThreadData *data = (FillCheckerColorThreadData *)data_v;
size_t offset = ((size_t)data->width) * start_scanline * 4;
unsigned char *rect = (data->rect != NULL) ? (data->rect + offset) : NULL;
float *rect_float = (data->rect_float != NULL) ? (data->rect_float + offset) : NULL;
checker_board_color_prepare_slice(rect,
rect_float,
data->width,
num_scanlines,
start_scanline,
data->height);
}
void BKE_image_buf_fill_checker_color(unsigned char *rect, float *rect_float, int width, int height)
{
checker_board_color_fill(rect, rect_float, width, height);
checker_board_color_tint(rect, rect_float, width, height, 1, 0.03f);
checker_board_color_tint(rect, rect_float, width, height, 4, 0.05f);
checker_board_color_tint(rect, rect_float, width, height, 32, 0.07f);
checker_board_color_tint(rect, rect_float, width, height, 128, 0.15f);
checker_board_grid_fill(rect, rect_float, width, height, 1.0f / 4.0f);
if (((size_t)width) * height < 64 * 64) {
checker_board_color_prepare_slice(rect, rect_float, width, height, 0, height);
}
else {
FillCheckerColorThreadData data;
data.rect = rect;
data.rect_float = rect_float;
data.width = width;
data.height = height;
IMB_processor_apply_threaded_scanlines(
height, checker_board_color_prepare_thread_do, &data);
}
checker_board_text(rect, rect_float, width, height, 128, 2);
@@ -367,11 +513,12 @@ void BKE_image_buf_fill_checker_color(unsigned char *rect, float *rect_float, in
* linearize it afterwards. This could be optimized with some smart
* trickery around blending factors and such.
*/
IMB_buffer_float_from_float(rect_float, rect_float,
4,
IB_PROFILE_LINEAR_RGB, IB_PROFILE_SRGB,
true,
width, height,
width, width);
IMB_buffer_float_from_float_threaded(rect_float, rect_float,
4,
IB_PROFILE_LINEAR_RGB,
IB_PROFILE_SRGB,
true,
width, height,
width, width);
}
}

View File

@@ -439,6 +439,9 @@ void IMB_buffer_float_from_byte(float *rect_to, const unsigned char *rect_from,
void IMB_buffer_float_from_float(float *rect_to, const float *rect_from,
int channels_from, int profile_to, int profile_from, bool predivide,
int width, int height, int stride_to, int stride_from);
void IMB_buffer_float_from_float_threaded(float *rect_to, const float *rect_from,
int channels_from, int profile_to, int profile_from, bool predivide,
int width, int height, int stride_to, int stride_from);
void IMB_buffer_float_from_float_mask(float *rect_to, const float *rect_from,
int channels_from, int width, int height, int stride_to, int stride_from, char *mask);
void IMB_buffer_byte_from_byte(unsigned char *rect_to, const unsigned char *rect_from,

View File

@@ -532,6 +532,76 @@ void IMB_buffer_float_from_float(float *rect_to, const float *rect_from,
}
}
typedef struct FloatToFloatThreadData {
float *rect_to;
const float *rect_from;
int channels_from;
int profile_to;
int profile_from;
bool predivide;
int width;
int stride_to;
int stride_from;
} FloatToFloatThreadData;
static void imb_buffer_float_from_float_thread_do(void *data_v,
int start_scanline,
int num_scanlines)
{
FloatToFloatThreadData *data = (FloatToFloatThreadData *)data_v;
size_t offset_from = ((size_t)start_scanline) * data->stride_from * data->channels_from;
size_t offset_to = ((size_t)start_scanline) * data->stride_to * data->channels_from;
IMB_buffer_float_from_float(data->rect_to + offset_to,
data->rect_from + offset_from,
data->channels_from,
data->profile_to,
data->profile_from,
data->predivide,
data->width,
num_scanlines,
data->stride_to,
data->stride_from);
}
void IMB_buffer_float_from_float_threaded(float *rect_to,
const float *rect_from,
int channels_from,
int profile_to,
int profile_from,
bool predivide,
int width,
int height,
int stride_to,
int stride_from)
{
if (((size_t)width) * height < 64 * 64) {
IMB_buffer_float_from_float(rect_to,
rect_from,
channels_from,
profile_to,
profile_from,
predivide,
width,
height,
stride_to,
stride_from);
}
else {
FloatToFloatThreadData data;
data.rect_to = rect_to;
data.rect_from = rect_from;
data.channels_from = channels_from;
data.profile_to = profile_to;
data.profile_from = profile_from;
data.predivide = predivide;
data.width = width;
data.stride_to = stride_to;
data.stride_from = stride_from;
IMB_processor_apply_threaded_scanlines(
height, imb_buffer_float_from_float_thread_do, &data);
}
}
/* float to float pixels, output 4-channel RGBA */
void IMB_buffer_float_from_float_mask(float *rect_to, const float *rect_from, int channels_from,
int width, int height, int stride_to, int stride_from, char *mask)