From 0ef8e84c082a80354b502b169b6c79fccffa4405 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 14:22:11 -0500 Subject: Make a headless entry point for benchmarking purposes --- Makefile.am | 9 +++-- main_glut.c | 75 +++++++++++++++++++++++++++++++++++++ main_headless.c | 32 ++++++++++++++++ ray.c | 112 +++++++++++++------------------------------------------- ray.h | 10 +++++ 5 files changed, 149 insertions(+), 89 deletions(-) create mode 100644 main_glut.c create mode 100644 main_headless.c create mode 100644 ray.h diff --git a/Makefile.am b/Makefile.am index e67837f..2e9d900 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,7 +1,10 @@ -AM_CFLAGS = -Wall -Wextra -pedantic -std=c99 +AM_CFLAGS = -Wall -Wextra -pedantic -std=c99 -flto -bin_PROGRAMS = ray +bin_PROGRAMS = ray ray-headless + +ray_SOURCES = main_glut.c ray.c 3dmath.c 3dmath.h ray_LDFLAGS = -lGL -lGLU -lglut -lm -pthread -ray_SOURCES = ray.c 3dmath.c 3dmath.h +ray_headless_SOURCES = main_headless.c ray.c 3dmath.c 3dmath.h +ray_headless_LDFLAGS = -lm -pthread diff --git a/main_glut.c b/main_glut.c new file mode 100644 index 0000000..cf22a73 --- /dev/null +++ b/main_glut.c @@ -0,0 +1,75 @@ +#include +#include +#include + +#include "ray.h" + +static int threaded; + +static int +init(int argc, char **argv, int w, int h) { + glutInit(&argc, argv); + + glutInitWindowPosition(0, 0); + glutInitWindowSize(w, h); + glutInitDisplayMode(GLUT_RGB); + glutCreateWindow(argv[0]); + + glDepthMask(0); + glDisable(GL_DEPTH_TEST); + glDisable(GL_BLEND); + + return 0; +} + +static void +display(void) { + static int count = 0; + ++count; + if(count > 10000) + exit(0); + float time = (float)glutGet(GLUT_ELAPSED_TIME) / 1000; + + unsigned char* buffer = calloc(4, WIDTH * HEIGHT); + trace_scene(time, buffer, threaded); + glDrawPixels(WIDTH, HEIGHT, GL_BGRA, GL_UNSIGNED_BYTE, buffer); + free(buffer); + + glutSwapBuffers(); +} + +static void +reshape(int w, int h) { + glViewport(0, 0, w, h); +} + +static void +keyboard(unsigned char key, int x, int y) { + switch(key) { + case 27: + exit(EXIT_SUCCESS); + break; + case 't': + if(threaded) + threaded = 0; + else + threaded = 1; + break; + } +} + + +int +main(int argc, char **argv) { + if (init(argc, argv, WIDTH, HEIGHT)) + return EXIT_FAILURE; + + glutDisplayFunc(display); + glutIdleFunc(display); + glutReshapeFunc(reshape); + glutKeyboardFunc(keyboard); + + glutMainLoop(); + + return EXIT_SUCCESS; +} diff --git a/main_headless.c b/main_headless.c new file mode 100644 index 0000000..c5cd5bd --- /dev/null +++ b/main_headless.c @@ -0,0 +1,32 @@ +#include +#include + +#include + +#include "ray.h" + +static const size_t kFramesToRender = 100; + +int main(int argc, char** argv) { + fprintf(stderr, "Rendering %zu frames\n", kFramesToRender); + + unsigned char* buffer = calloc(4, WIDTH * HEIGHT); + + struct timeval start; + gettimeofday(&start, NULL); + + for (size_t i = 0; i < kFramesToRender; ++i) + trace_scene(i * 0.01f, buffer, 0); + + struct timeval end; + gettimeofday(&end, NULL); + + free(buffer); + + fprintf(stderr, "Average %.2f ms/frame\n", + (1.0e3 * (end.tv_sec - start.tv_sec) + + 1.0e-3 * (end.tv_usec - start.tv_usec)) / + kFramesToRender); + + return EXIT_SUCCESS; +} diff --git a/ray.c b/ray.c index d80125c..018ace1 100644 --- a/ray.c +++ b/ray.c @@ -1,16 +1,13 @@ +#include "ray.h" + #include #include #include #include #include -#include -#include -#include #include "3dmath.h" -#define WIDTH 1000 -#define HEIGHT 1000 #define BUFFER_SIZE (WIDTH * HEIGHT * 4) #define LENGTH(array) (sizeof(array) / sizeof(array[0])) @@ -30,8 +27,11 @@ typedef struct { float diffuse[3]; } Light; -static unsigned char threaded = 0; -static unsigned char buffer[BUFFER_SIZE]; +typedef struct { + unsigned char* buffer; + long line; +} ThreadArg; + static Object objects[] = { {.position={-1.414, -1, -3}, .radius=1, .diffuse={.8, 0, .8}}, {.position={0, 1.414, -3}, .radius=1, .diffuse={0, .8, .8}}, @@ -93,23 +93,35 @@ trace_line(int l, unsigned char *buf) { static void * thread(void *arg) { - long line = (long) arg; + ThreadArg* thread_arg = arg; - trace_line(line, buffer + line * 4 * WIDTH); + trace_line(thread_arg->line, thread_arg->buffer + thread_arg->line * 4 * WIDTH); - pthread_exit(NULL); + return NULL; } -static void -trace_scene(unsigned char *buf) { +void +trace_scene(float time, unsigned char *buf, int threaded) { + + objects[0].position[0] = 1.5 * cos(time); + objects[0].position[1] = 1.5 * sin(time); + objects[1].position[0] = 1.5 * cos(time + 1/3. * TAU); + objects[1].position[1] = 1.5 * sin(time + 1/3. * TAU); + objects[3].position[0] = 1.5 * cos(time + 2/3. * TAU); + objects[3].position[1] = 1.5 * sin(time + 2/3. * TAU); + objects[2].position[2] = -3 + 2 * sin(time * 2); if(threaded) { pthread_attr_t attr; pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + ThreadArg thread_args[HEIGHT]; pthread_t threads[HEIGHT]; for(long i = 0; i < HEIGHT; ++i) { - int ret = pthread_create(&threads[i], &attr, thread, (void *)i); + thread_args[i].line = i; + thread_args[i].buffer = buf; + + int ret = pthread_create(&threads[i], &attr, thread, &thread_args[i]); if(ret) { fprintf(stderr, "pthread_create(): %d\n", ret); @@ -122,78 +134,6 @@ trace_scene(unsigned char *buf) { pthread_join(threads[i], &status); } else { for(int i = 0; i < HEIGHT; ++i) - trace_line(i, buffer + i * 4 * WIDTH); - } -} - -static void -display(void) { - static int count = 0; - ++count; - if(count > 10000) - exit(0); - float time = (float)glutGet(GLUT_ELAPSED_TIME) / 1000; - - objects[0].position[0] = 1.5 * cos(time); - objects[0].position[1] = 1.5 * sin(time); - objects[1].position[0] = 1.5 * cos(time + 1/3. * TAU); - objects[1].position[1] = 1.5 * sin(time + 1/3. * TAU); - objects[3].position[0] = 1.5 * cos(time + 2/3. * TAU); - objects[3].position[1] = 1.5 * sin(time + 2/3. * TAU); - objects[2].position[2] = -3 + 2 * sin(time * 2); - - trace_scene(buffer); - glDrawPixels(WIDTH, HEIGHT, GL_BGRA, GL_UNSIGNED_BYTE, buffer); - glutSwapBuffers(); -} - -static void -reshape(int w, int h) { - glViewport(0, 0, w, h); -} - -static void -keyboard(unsigned char key, int x, int y) { - switch(key) { - case 27: - exit(EXIT_SUCCESS); - break; - case 't': - if(threaded) - threaded = 0; - else - threaded = 1; - break; + trace_line(i, buf + i * 4 * WIDTH); } } - -static int -init(int argc, char **argv, int w, int h) { - glutInit(&argc, argv); - - glutInitWindowPosition(0, 0); - glutInitWindowSize(w, h); - glutInitDisplayMode(GLUT_RGB); - glutCreateWindow(argv[0]); - - glDepthMask(0); - glDisable(GL_DEPTH_TEST); - glDisable(GL_BLEND); - - return 0; -} - -int -main(int argc, char **argv) { - if (init(argc, argv, WIDTH, HEIGHT)) - return EXIT_FAILURE; - - glutDisplayFunc(display); - glutIdleFunc(display); - glutReshapeFunc(reshape); - glutKeyboardFunc(keyboard); - - glutMainLoop(); - - return EXIT_SUCCESS; -} diff --git a/ray.h b/ray.h new file mode 100644 index 0000000..83b4cf0 --- /dev/null +++ b/ray.h @@ -0,0 +1,10 @@ +#ifndef RAY_H_ +#define RAY_H_ 1 + +#define WIDTH 1000 +#define HEIGHT 1000 + +void +trace_scene(float time, unsigned char *buf, int threaded); + +#endif // !RAY_H_ -- cgit v1.2.3 From 3d422f2b3d311f1be3658ff1f44d818a99561a66 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 14:43:17 -0500 Subject: Minor style changes --- 3dmath.c | 2 ++ main_headless.c | 3 ++- ray.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/3dmath.c b/3dmath.c index eb9c4a8..ece6f83 100644 --- a/3dmath.c +++ b/3dmath.c @@ -1,3 +1,5 @@ +#include "3dmath.h" + #include #define POW2(x) ((x) * (x)) diff --git a/main_headless.c b/main_headless.c index c5cd5bd..8fd1dcf 100644 --- a/main_headless.c +++ b/main_headless.c @@ -7,7 +7,8 @@ static const size_t kFramesToRender = 100; -int main(int argc, char** argv) { +int +main(int argc, char** argv) { fprintf(stderr, "Rendering %zu frames\n", kFramesToRender); unsigned char* buffer = calloc(4, WIDTH * HEIGHT); diff --git a/ray.c b/ray.c index 018ace1..45597df 100644 --- a/ray.c +++ b/ray.c @@ -102,7 +102,6 @@ thread(void *arg) { void trace_scene(float time, unsigned char *buf, int threaded) { - objects[0].position[0] = 1.5 * cos(time); objects[0].position[1] = 1.5 * sin(time); objects[1].position[0] = 1.5 * cos(time + 1/3. * TAU); @@ -110,6 +109,7 @@ trace_scene(float time, unsigned char *buf, int threaded) { objects[3].position[0] = 1.5 * cos(time + 2/3. * TAU); objects[3].position[1] = 1.5 * sin(time + 2/3. * TAU); objects[2].position[2] = -3 + 2 * sin(time * 2); + if(threaded) { pthread_attr_t attr; pthread_attr_init(&attr); -- cgit v1.2.3 From f4afeeef49b12fffb4ce9e5860c0a79b6809d74b Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 14:54:28 -0500 Subject: 3dmath: Use const-correct function declarations --- 3dmath.c | 4 ++-- 3dmath.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/3dmath.c b/3dmath.c index ece6f83..d5806b6 100644 --- a/3dmath.c +++ b/3dmath.c @@ -5,7 +5,7 @@ #define POW2(x) ((x) * (x)) float -dot(float x[3], float y[3]) { +dot(const float x[3], const float y[3]) { return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]; } @@ -21,7 +21,7 @@ normalize(float x[3]) { } float -sphere_intersect(float y[3], float r[3], float s[3], float d[3], float c[3], float R) { +sphere_intersect(float y[3], float r[3], const float s[3], const float d[3], const float c[3], float R) { int i; float D, n[3], t, v[3]; diff --git a/3dmath.h b/3dmath.h index 4135f3b..0b6e035 100644 --- a/3dmath.h +++ b/3dmath.h @@ -1,3 +1,3 @@ -float dot(float x[3], float y[3]); +float dot(const float x[3], const float y[3]); void normalize(float x[3]); -float sphere_intersect(float y[3], float r[3], float s[3], float d[3], float c[3], float R); +float sphere_intersect(float y[3], float r[3], const float s[3], const float d[3], const float c[3], float R); -- cgit v1.2.3 From 04ee2de37c1f80d098172fc7bc4d42ea1797f128 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 14:54:39 -0500 Subject: ray.c: Precalculate per-pixel ray directions This saves 16ms/frame on yelena. --- ray.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/ray.c b/ray.c index 45597df..6304b36 100644 --- a/ray.c +++ b/ray.c @@ -32,6 +32,9 @@ typedef struct { long line; } ThreadArg; +static float trace_vectors[HEIGHT][WIDTH][3]; +static int trace_vectors_initialized; + static Object objects[] = { {.position={-1.414, -1, -3}, .radius=1, .diffuse={.8, 0, .8}}, {.position={0, 1.414, -3}, .radius=1, .diffuse={0, .8, .8}}, @@ -44,7 +47,7 @@ static Light lights[] = { }; static void -trace(float s[3], float d[3], float pixel[3], int n) { +trace(float s[3], const float d[3], float pixel[3], int n) { int i, j, k, m; float l[3], r[3], t, y[3]; @@ -71,23 +74,16 @@ trace_line(int l, unsigned char *buf) { static float s[3] = {0, 0, 0}; float y = l - HEIGHT / 2; - for(int i = 0; i < 4 * WIDTH; i += 4) { - float x = (i / 4) - WIDTH / 2; - + for(int i = 0; i < WIDTH; ++i) { float pixel[3]; memset(pixel, '\0', sizeof(pixel)); - float d[3]; - d[0] = x / (WIDTH / 2); - d[1] = y / (HEIGHT / 2) * ((float)HEIGHT / (float)WIDTH); - d[2] = -1; - - normalize(d); + const float* d = trace_vectors[l][i]; trace(s, d, pixel, 1); for(int j = 0; j < 3; ++j) - buf[i + j] = MIN(255 * pixel[j], 255); + buf[i * 4 + j] = MIN(255 * pixel[j], 255); } } @@ -100,8 +96,25 @@ thread(void *arg) { return NULL; } +static void +initialize_trace_vectors(void) { + for(int y = 0; y < HEIGHT; ++y) { + for(int x = 0; x < WIDTH; ++x) { + float* d = trace_vectors[y][x]; + d[0] = ((float)x / WIDTH - 0.5f) * 2.0f; + d[1] = ((float)y / HEIGHT - 0.5f) * 2.0f * ((float)HEIGHT / WIDTH); + d[2] = -1; + normalize(d); + } + } + trace_vectors_initialized = 1; +} + void trace_scene(float time, unsigned char *buf, int threaded) { + if (!trace_vectors_initialized) + initialize_trace_vectors(); + objects[0].position[0] = 1.5 * cos(time); objects[0].position[1] = 1.5 * sin(time); objects[1].position[0] = 1.5 * cos(time + 1/3. * TAU); -- cgit v1.2.3 From 8fdc2a63b118f37fa0bad11be0104296556aa5b0 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 14:58:22 -0500 Subject: 3dmath.c: In normalize, calculate reciprocal only once, and use sqrtf This saves about 12ms per frame on yelena. --- 3dmath.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/3dmath.c b/3dmath.c index d5806b6..5b876f2 100644 --- a/3dmath.c +++ b/3dmath.c @@ -11,13 +11,11 @@ dot(const float x[3], const float y[3]) { void normalize(float x[3]) { - float len; - int i; - - len = sqrt(dot(x, x)); + float len = 1.0f / sqrtf(dot(x, x)); - for(i = 0; i < 3; ++i) - x[i] /= len; + x[0] *= len; + x[1] *= len; + x[2] *= len; } float -- cgit v1.2.3 From ef3f2085ff238469207d099562d61635e1f9e138 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 15:02:25 -0500 Subject: 3dmath.c: Throw restrict on all parameters to sphere_intersect This saves 3ms / frame on yelena. --- 3dmath.c | 4 +++- 3dmath.h | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/3dmath.c b/3dmath.c index 5b876f2..2693c32 100644 --- a/3dmath.c +++ b/3dmath.c @@ -19,7 +19,9 @@ normalize(float x[3]) { } float -sphere_intersect(float y[3], float r[3], const float s[3], const float d[3], const float c[3], float R) { +sphere_intersect(float* restrict y, float* restrict r, + const float* restrict s, const float* restrict d, + const float* restrict c, float R) { int i; float D, n[3], t, v[3]; diff --git a/3dmath.h b/3dmath.h index 0b6e035..3a984cb 100644 --- a/3dmath.h +++ b/3dmath.h @@ -1,3 +1,6 @@ float dot(const float x[3], const float y[3]); void normalize(float x[3]); -float sphere_intersect(float y[3], float r[3], const float s[3], const float d[3], const float c[3], float R); + +float sphere_intersect(float* restrict y, float* restrict r, + const float* restrict s, const float* restrict d, + const float* restrict c, float R); -- cgit v1.2.3 From 59eb492aac9d1d5a005858f9e32b34ac79cb02a6 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 15:10:58 -0500 Subject: 3dmath.c: Early out of sphere_intersect on t <= 0 The loop in ray.c discards any data from intersections with t <= 0 anyway, so we might as well not calculate it. This saves 15 ms/frame on yelena. --- 3dmath.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/3dmath.c b/3dmath.c index 2693c32..866af09 100644 --- a/3dmath.c +++ b/3dmath.c @@ -35,6 +35,9 @@ sphere_intersect(float* restrict y, float* restrict r, t = -dot(v, d) - D; + if (t <= 0) + return -1; + for(i = 0; i < 3; ++i) { y[i] = s[i] + t * d[i]; n[i] = y[i] - c[i]; -- cgit v1.2.3 From 1dca69e545ab815850230a377bc346bb5dc33839 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 15:18:32 -0500 Subject: ray.c: Skip normalizing if light has negative contribution This saves about 3ms/frame on yelena. --- ray.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ray.c b/ray.c index 6304b36..1125d19 100644 --- a/ray.c +++ b/ray.c @@ -59,9 +59,12 @@ trace(float s[3], const float d[3], float pixel[3], int n) { for(i = 0; i < 3; ++i) l[i] = lights[m].position[i] - y[i]; - normalize(l); - for(k = 0; k < 3; ++k) - pixel[k] += lights[m].diffuse[k] * objects[j].diffuse[k] * (MAX(dot(l, r), 0)) / (1 << n); + float lr_dot = dot(l, r); + if (lr_dot > 0) { + float scale = lr_dot / sqrtf(dot(l, l)) / (1 << n); + for(k = 0; k < 3; ++k) + pixel[k] += lights[m].diffuse[k] * objects[j].diffuse[k] * scale; + } trace(y, r, pixel, n + 1); } -- cgit v1.2.3 From ce39f5c767ae25ed884aa24120e340d154442342 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 15:39:32 -0500 Subject: ray.c: Simplify trace_line This doesn't save any time. --- ray.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/ray.c b/ray.c index 1125d19..edbb37a 100644 --- a/ray.c +++ b/ray.c @@ -74,19 +74,16 @@ trace(float s[3], const float d[3], float pixel[3], int n) { static void trace_line(int l, unsigned char *buf) { - static float s[3] = {0, 0, 0}; - float y = l - HEIGHT / 2; + static const float s[3] = {0, 0, 0}; - for(int i = 0; i < WIDTH; ++i) { - float pixel[3]; - memset(pixel, '\0', sizeof(pixel)); + for(int i = 0; i < WIDTH; ++i, buf += 4) { + float pixel[3] = { 0, 0, 0 }; - const float* d = trace_vectors[l][i]; + trace(s, trace_vectors[l][i], pixel, 1); - trace(s, d, pixel, 1); - - for(int j = 0; j < 3; ++j) - buf[i * 4 + j] = MIN(255 * pixel[j], 255); + buf[0] = MIN(pixel[0], 1.0f) * 255; + buf[1] = MIN(pixel[1], 1.0f) * 255; + buf[2] = MIN(pixel[2], 1.0f) * 255; } } -- cgit v1.2.3 From 64a24eff5995c44d21e42d0bc654739030eca76c Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 15:50:45 -0500 Subject: ray.c: Don't look for self in reflection on self --- ray.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ray.c b/ray.c index edbb37a..96a605a 100644 --- a/ray.c +++ b/ray.c @@ -47,11 +47,13 @@ static Light lights[] = { }; static void -trace(float s[3], const float d[3], float pixel[3], int n) { +trace(const float s[3], const float d[3], float pixel[3], int n, unsigned int mask) { int i, j, k, m; float l[3], r[3], t, y[3]; for(j = 0; j < LENGTH(objects); ++j) { + if ((1 << j) & mask) continue; + t = sphere_intersect(y, r, s, d, objects[j].position, objects[j].radius); if(t > 0) { @@ -66,7 +68,7 @@ trace(float s[3], const float d[3], float pixel[3], int n) { pixel[k] += lights[m].diffuse[k] * objects[j].diffuse[k] * scale; } - trace(y, r, pixel, n + 1); + trace(y, r, pixel, n + 1, (1 << j)); } } } @@ -79,7 +81,7 @@ trace_line(int l, unsigned char *buf) { for(int i = 0; i < WIDTH; ++i, buf += 4) { float pixel[3] = { 0, 0, 0 }; - trace(s, trace_vectors[l][i], pixel, 1); + trace(s, trace_vectors[l][i], pixel, 1, 0); buf[0] = MIN(pixel[0], 1.0f) * 255; buf[1] = MIN(pixel[1], 1.0f) * 255; -- cgit v1.2.3 From 4ddcfc0bb65d82d4e3fd2d424912086a4b454fdf Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 15:53:29 -0500 Subject: 3dmath.c: Remove sqrt from sphere_intersect Interestingly, this seems to only save 1ms/frame on yelena. But that's still 16% of a 60fps frame. --- 3dmath.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/3dmath.c b/3dmath.c index 866af09..9e7f7a1 100644 --- a/3dmath.c +++ b/3dmath.c @@ -43,10 +43,10 @@ sphere_intersect(float* restrict y, float* restrict r, n[i] = y[i] - c[i]; } - normalize(n); + float two_dot_nd_div_sq_n_mag = 2.0f * dot(n, d) / dot(n, n); - for(i = 0; i < 3; ++i) - r[i] = d[i] - 2 * dot(n, d) * n[i]; + for (i = 0; i < 3; ++i) + r[i] = d[i] - two_dot_nd_div_sq_n_mag * n[i]; return t; } -- cgit v1.2.3 From a9fd902610a81d0bcbfde0fbc7377d839a41ffa2 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 16:20:36 -0500 Subject: ray.c: Use __builtin_expect to mark sphere collisions as unlikely This saves 10ms/frame on yelena. --- ray.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ray.c b/ray.c index 96a605a..64f8810 100644 --- a/ray.c +++ b/ray.c @@ -16,6 +16,14 @@ #define TAU 6.28318531 +#if __GNUC__ >= 3 +# define unlikely(cond) __builtin_expect ((cond), 0) +# define likely(cond) __builtin_expect ((cond), 1) +#else +# define unlikely(cond) (cond) +# define likely(cond) (cond) +#endif + typedef struct { float position[3]; float radius; @@ -56,7 +64,7 @@ trace(const float s[3], const float d[3], float pixel[3], int n, unsigned int ma t = sphere_intersect(y, r, s, d, objects[j].position, objects[j].radius); - if(t > 0) { + if(unlikely(t > 0)) { for(m = 0; m < LENGTH(lights); ++m) { for(i = 0; i < 3; ++i) l[i] = lights[m].position[i] - y[i]; -- cgit v1.2.3 From 656717d3126949955b9a451539f4476e84e308e6 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 16:34:50 -0500 Subject: ray.c: Create only one thread per core --- ray.c | 51 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/ray.c b/ray.c index 64f8810..5e8243e 100644 --- a/ray.c +++ b/ray.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "3dmath.h" @@ -36,8 +37,10 @@ typedef struct { } Light; typedef struct { + pthread_mutex_t mutex; + unsigned char* buffer; - long line; + long next_line; } ThreadArg; static float trace_vectors[HEIGHT][WIDTH][3]; @@ -101,7 +104,16 @@ static void * thread(void *arg) { ThreadArg* thread_arg = arg; - trace_line(thread_arg->line, thread_arg->buffer + thread_arg->line * 4 * WIDTH); + for (;;) { + pthread_mutex_lock(&thread_arg->mutex); + if (thread_arg->next_line == HEIGHT) break; + long line = thread_arg->next_line++; + pthread_mutex_unlock(&thread_arg->mutex); + + trace_line(line, thread_arg->buffer + line * 4 * WIDTH); + } + + pthread_mutex_unlock(&thread_arg->mutex); return NULL; } @@ -134,27 +146,24 @@ trace_scene(float time, unsigned char *buf, int threaded) { objects[2].position[2] = -3 + 2 * sin(time * 2); if(threaded) { - pthread_attr_t attr; - pthread_attr_init(&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); - - ThreadArg thread_args[HEIGHT]; - pthread_t threads[HEIGHT]; - for(long i = 0; i < HEIGHT; ++i) { - thread_args[i].line = i; - thread_args[i].buffer = buf; - - int ret = pthread_create(&threads[i], &attr, thread, &thread_args[i]); - - if(ret) { - fprintf(stderr, "pthread_create(): %d\n", ret); - exit(EXIT_FAILURE); - } + ThreadArg arg; + memset(&arg, 0, sizeof(arg)); + pthread_mutex_init(&arg.mutex, NULL); + arg.buffer = buf; + + int num_threads = sysconf(_SC_NPROCESSORS_CONF) - 1; + pthread_t* threads = NULL; + if (num_threads > 0) { + threads = calloc(sizeof(*threads), num_threads); + + for (int i = 0; i < num_threads; ++i) + pthread_create(&threads[i], NULL, thread, &arg); } - void *status; - for(long i = 0; i < HEIGHT; ++i) - pthread_join(threads[i], &status); + thread(&arg); + + for(int i = 0; i < num_threads; ++i) + pthread_join(threads[i], NULL); } else { for(int i = 0; i < HEIGHT; ++i) trace_line(i, buf + i * 4 * WIDTH); -- cgit v1.2.3 From b95d4c408a9f4161d29cbd9058b62a0b5079dea8 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 16:37:40 -0500 Subject: main_headless.c: Remove whitespace at end of lines --- main_headless.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/main_headless.c b/main_headless.c index 8fd1dcf..ab38414 100644 --- a/main_headless.c +++ b/main_headless.c @@ -10,24 +10,24 @@ static const size_t kFramesToRender = 100; int main(int argc, char** argv) { fprintf(stderr, "Rendering %zu frames\n", kFramesToRender); - + unsigned char* buffer = calloc(4, WIDTH * HEIGHT); - + struct timeval start; gettimeofday(&start, NULL); - + for (size_t i = 0; i < kFramesToRender; ++i) trace_scene(i * 0.01f, buffer, 0); - + struct timeval end; gettimeofday(&end, NULL); - + free(buffer); - + fprintf(stderr, "Average %.2f ms/frame\n", (1.0e3 * (end.tv_sec - start.tv_sec) + 1.0e-3 * (end.tv_usec - start.tv_usec)) / kFramesToRender); - + return EXIT_SUCCESS; } -- cgit v1.2.3 From c1c1559007f5557ebfc2a19899224ee9df2f9447 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 17:21:23 -0500 Subject: ray.c: Calculate reflection only once per hit This saves 10ms on yelena. --- ray.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ray.c b/ray.c index 5e8243e..8a236bd 100644 --- a/ray.c +++ b/ray.c @@ -78,9 +78,9 @@ trace(const float s[3], const float d[3], float pixel[3], int n, unsigned int ma for(k = 0; k < 3; ++k) pixel[k] += lights[m].diffuse[k] * objects[j].diffuse[k] * scale; } - - trace(y, r, pixel, n + 1, (1 << j)); } + + trace(y, r, pixel, n + 1, (1 << j)); } } } -- cgit v1.2.3 From 983bbf379fb22856987a8a9c38226605b7f40d1e Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 17:24:29 -0500 Subject: ray.c: Calculate lighting only for nearest hit This loses 5ms/frame on yelena. --- ray.c | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/ray.c b/ray.c index 8a236bd..218f20e 100644 --- a/ray.c +++ b/ray.c @@ -59,30 +59,45 @@ static Light lights[] = { static void trace(const float s[3], const float d[3], float pixel[3], int n, unsigned int mask) { - int i, j, k, m; - float l[3], r[3], t, y[3]; + float nearest = HUGE_VAL; + int nearest_object = -1; + float nearest_y[3]; + float nearest_r[3]; + + for(int j = 0; j < LENGTH(objects); ++j) { + float r[3], t, y[3]; - for(j = 0; j < LENGTH(objects); ++j) { if ((1 << j) & mask) continue; t = sphere_intersect(y, r, s, d, objects[j].position, objects[j].radius); - if(unlikely(t > 0)) { - for(m = 0; m < LENGTH(lights); ++m) { - for(i = 0; i < 3; ++i) - l[i] = lights[m].position[i] - y[i]; + if(likely(t <= 0)) + continue; + + if (t < nearest) { + nearest = t; + nearest_object = j; + memcpy(nearest_y, y, sizeof(nearest_y)); + memcpy(nearest_r, r, sizeof(nearest_y)); + } + } + + if (nearest_object == -1) return; - float lr_dot = dot(l, r); - if (lr_dot > 0) { - float scale = lr_dot / sqrtf(dot(l, l)) / (1 << n); - for(k = 0; k < 3; ++k) - pixel[k] += lights[m].diffuse[k] * objects[j].diffuse[k] * scale; - } - } + for(int m = 0; m < LENGTH(lights); ++m) { + float l[3]; + for(int i = 0; i < 3; ++i) + l[i] = lights[m].position[i] - nearest_y[i]; - trace(y, r, pixel, n + 1, (1 << j)); + float lr_dot = dot(l, nearest_r); + if (lr_dot > 0) { + float scale = lr_dot / sqrtf(dot(l, l)) / (1 << n); + for(int k = 0; k < 3; ++k) + pixel[k] += lights[m].diffuse[k] * objects[nearest_object].diffuse[k] * scale; } } + + trace(nearest_y, nearest_r, pixel, n + 1, (1 << nearest_object)); } static void -- cgit v1.2.3 From 292d95a35b9d1268492f47c44580ec006f670bdb Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 17:31:37 -0500 Subject: ray.c: Halve the FOV This makes the spheres look more spherical. --- ray.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ray.c b/ray.c index 218f20e..7672779 100644 --- a/ray.c +++ b/ray.c @@ -102,7 +102,7 @@ trace(const float s[3], const float d[3], float pixel[3], int n, unsigned int ma static void trace_line(int l, unsigned char *buf) { - static const float s[3] = {0, 0, 0}; + static const float s[3] = {0, 0, 8}; for(int i = 0; i < WIDTH; ++i, buf += 4) { float pixel[3] = { 0, 0, 0 }; @@ -138,8 +138,8 @@ initialize_trace_vectors(void) { for(int y = 0; y < HEIGHT; ++y) { for(int x = 0; x < WIDTH; ++x) { float* d = trace_vectors[y][x]; - d[0] = ((float)x / WIDTH - 0.5f) * 2.0f; - d[1] = ((float)y / HEIGHT - 0.5f) * 2.0f * ((float)HEIGHT / WIDTH); + d[0] = ((float)x / WIDTH - 0.5f) * 0.5f; + d[1] = ((float)y / HEIGHT - 0.5f) * 0.5f * ((float)HEIGHT / WIDTH); d[2] = -1; normalize(d); } -- cgit v1.2.3 From 5eb4b4bb4e44eec2015ed4ec2bf966e1b0b0f85b Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 17:45:04 -0500 Subject: ray.c: Add missing free --- ray.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ray.c b/ray.c index 7672779..2470777 100644 --- a/ray.c +++ b/ray.c @@ -179,6 +179,7 @@ trace_scene(float time, unsigned char *buf, int threaded) { for(int i = 0; i < num_threads; ++i) pthread_join(threads[i], NULL); + free(threads); } else { for(int i = 0; i < HEIGHT; ++i) trace_line(i, buf + i * 4 * WIDTH); -- cgit v1.2.3 From a64a0186cdeee9189bd4db1303f82908cfe279a4 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 21:30:55 -0500 Subject: ray.c: Prevent lights from subtracting color in the shade Also add some ambient light. --- ray.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ray.c b/ray.c index 2470777..483dc0c 100644 --- a/ray.c +++ b/ray.c @@ -56,6 +56,7 @@ static Light lights[] = { {.position={-3, 3, -4}, .diffuse={0, .6, .6}}, {.position={0, 30, -4}, .diffuse={1, 1, 1}} }; +static float ambient[3] = {0.2, 0.1, 0.1}; static void trace(const float s[3], const float d[3], float pixel[3], int n, unsigned int mask) { @@ -92,8 +93,11 @@ trace(const float s[3], const float d[3], float pixel[3], int n, unsigned int ma float lr_dot = dot(l, nearest_r); if (lr_dot > 0) { float scale = lr_dot / sqrtf(dot(l, l)) / (1 << n); - for(int k = 0; k < 3; ++k) - pixel[k] += lights[m].diffuse[k] * objects[nearest_object].diffuse[k] * scale; + for(int k = 0; k < 3; ++k) { + float diffuse = lights[m].diffuse[k] * objects[nearest_object].diffuse[k] * scale; + if (diffuse < 0.0f) diffuse = 0.0f; + pixel[k] += diffuse + ambient[k] * objects[nearest_object].diffuse[k]; + } } } -- cgit v1.2.3 From 6cee50ceb0d153622b89fb813060419f5985857d Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 21:32:20 -0500 Subject: Subtract the central sphere, and update the animation to match --- 3dmath.c | 9 +++++---- 3dmath.h | 4 +++- ray.c | 58 +++++++++++++++++++++++++++++++++++++--------------------- 3 files changed, 45 insertions(+), 26 deletions(-) diff --git a/3dmath.c b/3dmath.c index 9e7f7a1..5bc1861 100644 --- a/3dmath.c +++ b/3dmath.c @@ -2,8 +2,6 @@ #include -#define POW2(x) ((x) * (x)) - float dot(const float x[3], const float y[3]) { return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]; @@ -21,7 +19,7 @@ normalize(float x[3]) { float sphere_intersect(float* restrict y, float* restrict r, const float* restrict s, const float* restrict d, - const float* restrict c, float R) { + const float* restrict c, float R, int invert) { int i; float D, n[3], t, v[3]; @@ -33,7 +31,10 @@ sphere_intersect(float* restrict y, float* restrict r, if(D < 0) return -1; - t = -dot(v, d) - D; + if (invert) + t = -dot(v, d) + D; + else + t = -dot(v, d) - D; if (t <= 0) return -1; diff --git a/3dmath.h b/3dmath.h index 3a984cb..1635c40 100644 --- a/3dmath.h +++ b/3dmath.h @@ -1,6 +1,8 @@ +#define POW2(x) ((x) * (x)) + float dot(const float x[3], const float y[3]); void normalize(float x[3]); float sphere_intersect(float* restrict y, float* restrict r, const float* restrict s, const float* restrict d, - const float* restrict c, float R); + const float* restrict c, float R, int invert); diff --git a/ray.c b/ray.c index 483dc0c..12b565e 100644 --- a/ray.c +++ b/ray.c @@ -29,6 +29,7 @@ typedef struct { float position[3]; float radius; float diffuse[3]; + int subtract; } Object; typedef struct { @@ -47,10 +48,10 @@ static float trace_vectors[HEIGHT][WIDTH][3]; static int trace_vectors_initialized; static Object objects[] = { - {.position={-1.414, -1, -3}, .radius=1, .diffuse={.8, 0, .8}}, - {.position={0, 1.414, -3}, .radius=1, .diffuse={0, .8, .8}}, - {.position={0, 0, -3}, .radius=.25, .diffuse={.8, .8, .8}}, - {.position={1.414, -1, -3}, .radius=1, .diffuse={.8, .8, 0}} + {.position={-1.414, -1, -3}, .radius=1, .diffuse={.8, 0, .8}, .subtract=0}, + {.position={0, 1.414, -3}, .radius=1, .diffuse={0, .8, .8}, .subtract=0}, + {.position={0, 0, -3}, .radius=1.5, .diffuse={.8, .8, .8}, .subtract=1}, + {.position={1.414, -1, -3}, .radius=1, .diffuse={.8, .8, 0}, .subtract=0} }; static Light lights[] = { {.position={-3, 3, -4}, .diffuse={0, .6, .6}}, @@ -60,27 +61,42 @@ static float ambient[3] = {0.2, 0.1, 0.1}; static void trace(const float s[3], const float d[3], float pixel[3], int n, unsigned int mask) { + // Reflections in concave objects can go really deep, so we need to limit + // the recursion depth. + if (n > 6) return; + float nearest = HUGE_VAL; int nearest_object = -1; float nearest_y[3]; float nearest_r[3]; - for(int j = 0; j < LENGTH(objects); ++j) { + for(size_t j = 0; j < LENGTH(objects); ++j) { float r[3], t, y[3]; - if ((1 << j) & mask) continue; + if (((1 << j) & mask) || objects[j].subtract) continue; - t = sphere_intersect(y, r, s, d, objects[j].position, objects[j].radius); + t = sphere_intersect(y, r, s, d, objects[j].position, objects[j].radius, 0); - if(likely(t <= 0)) + if(likely(t <= 0) || t > nearest) continue; - if (t < nearest) { - nearest = t; - nearest_object = j; - memcpy(nearest_y, y, sizeof(nearest_y)); - memcpy(nearest_r, r, sizeof(nearest_y)); + size_t k; + for (k = 0; k < LENGTH(objects); ++k) { + if (!objects[k].subtract) continue; + if (POW2(y[0] - objects[k].position[0]) + POW2(y[1] - objects[k].position[1]) + POW2(y[2] - objects[k].position[2]) > POW2(objects[k].radius)) continue; + + t = sphere_intersect(y, r, s, d, objects[k].position, objects[k].radius, 1); + + break; } + + if(likely(t <= 0) || t > nearest) + continue; + + nearest = t; + nearest_object = j; + memcpy(nearest_y, y, sizeof(nearest_y)); + memcpy(nearest_r, r, sizeof(nearest_y)); } if (nearest_object == -1) return; @@ -101,7 +117,7 @@ trace(const float s[3], const float d[3], float pixel[3], int n, unsigned int ma } } - trace(nearest_y, nearest_r, pixel, n + 1, (1 << nearest_object)); + trace(nearest_y, nearest_r, pixel, n + 1, 1 << nearest_object); } static void @@ -156,13 +172,13 @@ trace_scene(float time, unsigned char *buf, int threaded) { if (!trace_vectors_initialized) initialize_trace_vectors(); - objects[0].position[0] = 1.5 * cos(time); - objects[0].position[1] = 1.5 * sin(time); - objects[1].position[0] = 1.5 * cos(time + 1/3. * TAU); - objects[1].position[1] = 1.5 * sin(time + 1/3. * TAU); - objects[3].position[0] = 1.5 * cos(time + 2/3. * TAU); - objects[3].position[1] = 1.5 * sin(time + 2/3. * TAU); - objects[2].position[2] = -3 + 2 * sin(time * 2); + objects[0].position[0] = (1.5 + 0.35 * sin(1.1 * time)) * cos(0.5 * time); + objects[0].position[1] = (1.5 + 0.35 * sin(1.1 * time)) * sin(0.5 * time); + objects[1].position[0] = (1.5 + 0.35 * sin(1.1 * time)) * cos(0.5 * time + 1/3. * TAU); + objects[1].position[1] = (1.5 + 0.35 * sin(1.1 * time)) * sin(0.5 * time + 1/3. * TAU); + objects[3].position[0] = (1.5 + 0.35 * sin(1.1 * time)) * cos(0.5 * time + 2/3. * TAU); + objects[3].position[1] = (1.5 + 0.35 * sin(1.1 * time)) * sin(0.5 * time + 2/3. * TAU); + objects[2].position[2] = -3 + 0.5 * sin(time * 2.0); if(threaded) { ThreadArg arg; -- cgit v1.2.3 From 1d257b2051c94988318706a1070794fc2262bd6b Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 21:40:41 -0500 Subject: main_glut.c: Clear window and center animation --- main_glut.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/main_glut.c b/main_glut.c index cf22a73..92e3fdb 100644 --- a/main_glut.c +++ b/main_glut.c @@ -5,6 +5,7 @@ #include "ray.h" static int threaded; +static int viewport_width, viewport_height; static int init(int argc, char **argv, int w, int h) { @@ -32,6 +33,8 @@ display(void) { unsigned char* buffer = calloc(4, WIDTH * HEIGHT); trace_scene(time, buffer, threaded); + glClear(GL_COLOR_BUFFER_BIT); + glRasterPos2d(-(double)WIDTH / viewport_width, -(double)HEIGHT / viewport_height); glDrawPixels(WIDTH, HEIGHT, GL_BGRA, GL_UNSIGNED_BYTE, buffer); free(buffer); @@ -40,6 +43,8 @@ display(void) { static void reshape(int w, int h) { + viewport_width = w; + viewport_height = h; glViewport(0, 0, w, h); } -- cgit v1.2.3 From 128778e557dd665db36f2ac5fe3d21af22d32c50 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 21:53:51 -0500 Subject: main_glut.c: Enable double buffering This prevents screen flickering during screen updates. --- main_glut.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main_glut.c b/main_glut.c index 92e3fdb..ed4eee5 100644 --- a/main_glut.c +++ b/main_glut.c @@ -13,7 +13,7 @@ init(int argc, char **argv, int w, int h) { glutInitWindowPosition(0, 0); glutInitWindowSize(w, h); - glutInitDisplayMode(GLUT_RGB); + glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE); glutCreateWindow(argv[0]); glDepthMask(0); -- cgit v1.2.3 From 10e4a4eb02b50a5751e496d6661a0587a86f11b5 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 21:54:15 -0500 Subject: main_glut.c: Enable threading by default --- main_glut.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/main_glut.c b/main_glut.c index ed4eee5..1717898 100644 --- a/main_glut.c +++ b/main_glut.c @@ -4,7 +4,7 @@ #include "ray.h" -static int threaded; +static int threaded = 1; static int viewport_width, viewport_height; static int @@ -54,11 +54,9 @@ keyboard(unsigned char key, int x, int y) { case 27: exit(EXIT_SUCCESS); break; + case 't': - if(threaded) - threaded = 0; - else - threaded = 1; + threaded = !threaded; break; } } -- cgit v1.2.3 From e9de20a9047ae351d66a4aedc5be53c5ebf61d66 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 22:07:18 -0500 Subject: ray.c: Add specular color multiplier, and fix some shading bugs --- ray.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/ray.c b/ray.c index 12b565e..b9c5f09 100644 --- a/ray.c +++ b/ray.c @@ -29,6 +29,7 @@ typedef struct { float position[3]; float radius; float diffuse[3]; + float specular[3]; int subtract; } Object; @@ -48,16 +49,16 @@ static float trace_vectors[HEIGHT][WIDTH][3]; static int trace_vectors_initialized; static Object objects[] = { - {.position={-1.414, -1, -3}, .radius=1, .diffuse={.8, 0, .8}, .subtract=0}, - {.position={0, 1.414, -3}, .radius=1, .diffuse={0, .8, .8}, .subtract=0}, - {.position={0, 0, -3}, .radius=1.5, .diffuse={.8, .8, .8}, .subtract=1}, - {.position={1.414, -1, -3}, .radius=1, .diffuse={.8, .8, 0}, .subtract=0} + {.position={-1.414, -1, -3}, .radius=1, .diffuse={.8, .0, .8}, .specular={.8, .8, .8}, .subtract=0}, + {.position={0, 1.414, -3}, .radius=1, .diffuse={.0, .8, .8}, .specular={.8, .8, .8}, .subtract=0}, + {.position={0, 0, -3}, .radius=1.5, .diffuse={.8, .8, .8}, .specular={.8, .8, .8}, .subtract=1}, + {.position={1.414, -1, -3}, .radius=1, .diffuse={.8, .8, .0}, .specular={.8, .8, .8}, .subtract=0} }; -static Light lights[] = { +static const Light lights[] = { {.position={-3, 3, -4}, .diffuse={0, .6, .6}}, {.position={0, 30, -4}, .diffuse={1, 1, 1}} }; -static float ambient[3] = {0.2, 0.1, 0.1}; +static const float ambient[3] = {0.2, 0.1, 0.1}; static void trace(const float s[3], const float d[3], float pixel[3], int n, unsigned int mask) { @@ -101,23 +102,27 @@ trace(const float s[3], const float d[3], float pixel[3], int n, unsigned int ma if (nearest_object == -1) return; + trace(nearest_y, nearest_r, pixel, n + 1, 1 << nearest_object); + + for (int k = 0; k < 3; ++k) + pixel[k] = pixel[k] * objects[nearest_object].specular[k] + ambient[k] * objects[nearest_object].diffuse[k]; + for(int m = 0; m < LENGTH(lights); ++m) { float l[3]; for(int i = 0; i < 3; ++i) l[i] = lights[m].position[i] - nearest_y[i]; float lr_dot = dot(l, nearest_r); - if (lr_dot > 0) { - float scale = lr_dot / sqrtf(dot(l, l)) / (1 << n); - for(int k = 0; k < 3; ++k) { - float diffuse = lights[m].diffuse[k] * objects[nearest_object].diffuse[k] * scale; - if (diffuse < 0.0f) diffuse = 0.0f; - pixel[k] += diffuse + ambient[k] * objects[nearest_object].diffuse[k]; - } - } - } + if (lr_dot <= 0) continue; - trace(nearest_y, nearest_r, pixel, n + 1, 1 << nearest_object); + float scale = lr_dot / sqrtf(dot(l, l)) / (1 << n); + // The cutoff at 0.05 is for artistic reasons; 0.0 would be more + // realistic. + if (scale <= 0.05) continue; + + for(int k = 0; k < 3; ++k) + pixel[k] += lights[m].diffuse[k] * objects[nearest_object].diffuse[k] * scale; + } } static void -- cgit v1.2.3 From e11fb53429bc923a9b757bba307c6c8f6f0bf7a9 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Tue, 2 Dec 2014 22:17:50 -0500 Subject: ray.c: Allow self-reflections, now that we have concave objects Also update the animation a bit. --- ray.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/ray.c b/ray.c index b9c5f09..d74492a 100644 --- a/ray.c +++ b/ray.c @@ -49,10 +49,10 @@ static float trace_vectors[HEIGHT][WIDTH][3]; static int trace_vectors_initialized; static Object objects[] = { - {.position={-1.414, -1, -3}, .radius=1, .diffuse={.8, .0, .8}, .specular={.8, .8, .8}, .subtract=0}, - {.position={0, 1.414, -3}, .radius=1, .diffuse={.0, .8, .8}, .specular={.8, .8, .8}, .subtract=0}, - {.position={0, 0, -3}, .radius=1.5, .diffuse={.8, .8, .8}, .specular={.8, .8, .8}, .subtract=1}, - {.position={1.414, -1, -3}, .radius=1, .diffuse={.8, .8, .0}, .specular={.8, .8, .8}, .subtract=0} + {.position={-1.414, -1, -3}, .radius=1, .diffuse={.8, .0, .8}, .specular={.7, .6, .7}, .subtract=0}, + {.position={0, 1.414, -3}, .radius=1, .diffuse={.0, .8, .8}, .specular={.6, .7, .7}, .subtract=0}, + {.position={0, 0, -3}, .radius=1.5, .diffuse={.8, .8, .8}, .specular={.7, .7, .7}, .subtract=1}, + {.position={1.414, -1, -3}, .radius=1, .diffuse={.8, .8, .0}, .specular={.7, .7, .6}, .subtract=0} }; static const Light lights[] = { {.position={-3, 3, -4}, .diffuse={0, .6, .6}}, @@ -61,7 +61,7 @@ static const Light lights[] = { static const float ambient[3] = {0.2, 0.1, 0.1}; static void -trace(const float s[3], const float d[3], float pixel[3], int n, unsigned int mask) { +trace(const float s[3], const float d[3], float pixel[3], int n) { // Reflections in concave objects can go really deep, so we need to limit // the recursion depth. if (n > 6) return; @@ -74,7 +74,7 @@ trace(const float s[3], const float d[3], float pixel[3], int n, unsigned int ma for(size_t j = 0; j < LENGTH(objects); ++j) { float r[3], t, y[3]; - if (((1 << j) & mask) || objects[j].subtract) continue; + if (objects[j].subtract) continue; t = sphere_intersect(y, r, s, d, objects[j].position, objects[j].radius, 0); @@ -102,7 +102,7 @@ trace(const float s[3], const float d[3], float pixel[3], int n, unsigned int ma if (nearest_object == -1) return; - trace(nearest_y, nearest_r, pixel, n + 1, 1 << nearest_object); + trace(nearest_y, nearest_r, pixel, n + 1); for (int k = 0; k < 3; ++k) pixel[k] = pixel[k] * objects[nearest_object].specular[k] + ambient[k] * objects[nearest_object].diffuse[k]; @@ -132,7 +132,7 @@ trace_line(int l, unsigned char *buf) { for(int i = 0; i < WIDTH; ++i, buf += 4) { float pixel[3] = { 0, 0, 0 }; - trace(s, trace_vectors[l][i], pixel, 1, 0); + trace(s, trace_vectors[l][i], pixel, 1); buf[0] = MIN(pixel[0], 1.0f) * 255; buf[1] = MIN(pixel[1], 1.0f) * 255; @@ -177,13 +177,13 @@ trace_scene(float time, unsigned char *buf, int threaded) { if (!trace_vectors_initialized) initialize_trace_vectors(); - objects[0].position[0] = (1.5 + 0.35 * sin(1.1 * time)) * cos(0.5 * time); - objects[0].position[1] = (1.5 + 0.35 * sin(1.1 * time)) * sin(0.5 * time); - objects[1].position[0] = (1.5 + 0.35 * sin(1.1 * time)) * cos(0.5 * time + 1/3. * TAU); - objects[1].position[1] = (1.5 + 0.35 * sin(1.1 * time)) * sin(0.5 * time + 1/3. * TAU); - objects[3].position[0] = (1.5 + 0.35 * sin(1.1 * time)) * cos(0.5 * time + 2/3. * TAU); - objects[3].position[1] = (1.5 + 0.35 * sin(1.1 * time)) * sin(0.5 * time + 2/3. * TAU); - objects[2].position[2] = -3 + 0.5 * sin(time * 2.0); + objects[0].position[0] = (1.5 + 0.35 * sin(1.1 * time + 0.0)) * cos(0.5 * time); + objects[0].position[1] = (1.5 + 0.35 * sin(1.1 * time + 2.5)) * sin(0.5 * time); + objects[1].position[0] = (1.5 + 0.35 * sin(1.1 * time + 2.0)) * cos(0.5 * time + 1/3. * TAU); + objects[1].position[1] = (1.5 + 0.35 * sin(1.1 * time + 1.5)) * sin(0.5 * time + 1/3. * TAU); + objects[3].position[0] = (1.5 + 0.35 * sin(1.1 * time + 1.0)) * cos(0.5 * time + 2/3. * TAU); + objects[3].position[1] = (1.5 + 0.35 * sin(1.1 * time + 0.5)) * sin(0.5 * time + 2/3. * TAU); + objects[2].position[2] = -3 + 0.2 * sin(time * 1.2); if(threaded) { ThreadArg arg; -- cgit v1.2.3 From dbab279df7ffcf5dd55128a1bc44ea52d7584859 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Fri, 12 Dec 2014 15:32:34 -0800 Subject: main_glut.c: Restrict raster position to inside of target window --- main_glut.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/main_glut.c b/main_glut.c index 1717898..272b17e 100644 --- a/main_glut.c +++ b/main_glut.c @@ -34,7 +34,11 @@ display(void) { unsigned char* buffer = calloc(4, WIDTH * HEIGHT); trace_scene(time, buffer, threaded); glClear(GL_COLOR_BUFFER_BIT); - glRasterPos2d(-(double)WIDTH / viewport_width, -(double)HEIGHT / viewport_height); + auto raster_x = -(double)WIDTH / viewport_width; + auto raster_y = -(double)HEIGHT / viewport_height; + if (raster_x < -1.0) raster_x = -1.0; + if (raster_y < -1.0) raster_y = -1.0; + glRasterPos2d(raster_x, raster_y); glDrawPixels(WIDTH, HEIGHT, GL_BGRA, GL_UNSIGNED_BYTE, buffer); free(buffer); -- cgit v1.2.3 From eefec7a70c96e20e3920fb2adea6c4da77c26170 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Fri, 12 Dec 2014 15:41:38 -0800 Subject: Render to window dimensions --- main_glut.c | 14 +++++--------- main_headless.c | 7 +++++-- ray.c | 49 ++++++++++++++++++++++++++++--------------------- ray.h | 5 +---- 4 files changed, 39 insertions(+), 36 deletions(-) diff --git a/main_glut.c b/main_glut.c index 272b17e..e24ab5e 100644 --- a/main_glut.c +++ b/main_glut.c @@ -31,15 +31,11 @@ display(void) { exit(0); float time = (float)glutGet(GLUT_ELAPSED_TIME) / 1000; - unsigned char* buffer = calloc(4, WIDTH * HEIGHT); - trace_scene(time, buffer, threaded); + unsigned char* buffer = calloc(viewport_width * viewport_height, 4); + trace_scene(time, viewport_width, viewport_height, buffer, threaded); glClear(GL_COLOR_BUFFER_BIT); - auto raster_x = -(double)WIDTH / viewport_width; - auto raster_y = -(double)HEIGHT / viewport_height; - if (raster_x < -1.0) raster_x = -1.0; - if (raster_y < -1.0) raster_y = -1.0; - glRasterPos2d(raster_x, raster_y); - glDrawPixels(WIDTH, HEIGHT, GL_BGRA, GL_UNSIGNED_BYTE, buffer); + glRasterPos2d(-1.0, -1.0); + glDrawPixels(viewport_width, viewport_height, GL_BGRA, GL_UNSIGNED_BYTE, buffer); free(buffer); glutSwapBuffers(); @@ -68,7 +64,7 @@ keyboard(unsigned char key, int x, int y) { int main(int argc, char **argv) { - if (init(argc, argv, WIDTH, HEIGHT)) + if (init(argc, argv, 800, 600)) return EXIT_FAILURE; glutDisplayFunc(display); diff --git a/main_headless.c b/main_headless.c index ab38414..0fd5938 100644 --- a/main_headless.c +++ b/main_headless.c @@ -9,15 +9,18 @@ static const size_t kFramesToRender = 100; int main(int argc, char** argv) { + const size_t kWidth = 1000; + const size_t kHeight = 1000; + fprintf(stderr, "Rendering %zu frames\n", kFramesToRender); - unsigned char* buffer = calloc(4, WIDTH * HEIGHT); + unsigned char* buffer = calloc(4, kWidth * kHeight); struct timeval start; gettimeofday(&start, NULL); for (size_t i = 0; i < kFramesToRender; ++i) - trace_scene(i * 0.01f, buffer, 0); + trace_scene(i * 0.01f, kWidth, kHeight, buffer, 0); struct timeval end; gettimeofday(&end, NULL); diff --git a/ray.c b/ray.c index d74492a..4450eba 100644 --- a/ray.c +++ b/ray.c @@ -9,8 +9,6 @@ #include "3dmath.h" -#define BUFFER_SIZE (WIDTH * HEIGHT * 4) - #define LENGTH(array) (sizeof(array) / sizeof(array[0])) #define MAX(x, y) (x > y ? x : y) #define MIN(x, y) (x < y ? x : y) @@ -41,12 +39,13 @@ typedef struct { typedef struct { pthread_mutex_t mutex; + int width, height; unsigned char* buffer; long next_line; } ThreadArg; -static float trace_vectors[HEIGHT][WIDTH][3]; -static int trace_vectors_initialized; +static float* trace_vectors; +static int trace_vectors_width, trace_vectors_height; static Object objects[] = { {.position={-1.414, -1, -3}, .radius=1, .diffuse={.8, .0, .8}, .specular={.7, .6, .7}, .subtract=0}, @@ -126,13 +125,13 @@ trace(const float s[3], const float d[3], float pixel[3], int n) { } static void -trace_line(int l, unsigned char *buf) { +trace_line(int l, int width, unsigned char *buf) { static const float s[3] = {0, 0, 8}; - for(int i = 0; i < WIDTH; ++i, buf += 4) { + for(int i = 0; i < width; ++i, buf += 4) { float pixel[3] = { 0, 0, 0 }; - trace(s, trace_vectors[l][i], pixel, 1); + trace(s, &trace_vectors[(l * width + i) * 3], pixel, 1); buf[0] = MIN(pixel[0], 1.0f) * 255; buf[1] = MIN(pixel[1], 1.0f) * 255; @@ -146,11 +145,11 @@ thread(void *arg) { for (;;) { pthread_mutex_lock(&thread_arg->mutex); - if (thread_arg->next_line == HEIGHT) break; + if (thread_arg->next_line == thread_arg->height) break; long line = thread_arg->next_line++; pthread_mutex_unlock(&thread_arg->mutex); - trace_line(line, thread_arg->buffer + line * 4 * WIDTH); + trace_line(line, thread_arg->width, thread_arg->buffer + line * 4 * thread_arg->width); } pthread_mutex_unlock(&thread_arg->mutex); @@ -159,23 +158,29 @@ thread(void *arg) { } static void -initialize_trace_vectors(void) { - for(int y = 0; y < HEIGHT; ++y) { - for(int x = 0; x < WIDTH; ++x) { - float* d = trace_vectors[y][x]; - d[0] = ((float)x / WIDTH - 0.5f) * 0.5f; - d[1] = ((float)y / HEIGHT - 0.5f) * 0.5f * ((float)HEIGHT / WIDTH); +initialize_trace_vectors(int width, int height) { + trace_vectors = calloc(width * height, 3 * sizeof(float)); + trace_vectors_width = width; + trace_vectors_height = height; + for(int y = 0; y < height; ++y) { + for(int x = 0; x < width; ++x) { + float* d = &trace_vectors[(y * width + x) * 3]; + d[0] = ((float)x / width - 0.5f) * 0.5f * ((float)width / height); + d[1] = ((float)y / height - 0.5f) * 0.5f; d[2] = -1; normalize(d); } } - trace_vectors_initialized = 1; } void -trace_scene(float time, unsigned char *buf, int threaded) { - if (!trace_vectors_initialized) - initialize_trace_vectors(); +trace_scene(float time, int width, int height, unsigned char *buf, int threaded) { + if (trace_vectors && (trace_vectors_width != width || trace_vectors_height != height)) { + free(trace_vectors); + trace_vectors = 0; + } + if (!trace_vectors) + initialize_trace_vectors(width, height); objects[0].position[0] = (1.5 + 0.35 * sin(1.1 * time + 0.0)) * cos(0.5 * time); objects[0].position[1] = (1.5 + 0.35 * sin(1.1 * time + 2.5)) * sin(0.5 * time); @@ -188,6 +193,8 @@ trace_scene(float time, unsigned char *buf, int threaded) { if(threaded) { ThreadArg arg; memset(&arg, 0, sizeof(arg)); + arg.width = width; + arg.height = height; pthread_mutex_init(&arg.mutex, NULL); arg.buffer = buf; @@ -206,7 +213,7 @@ trace_scene(float time, unsigned char *buf, int threaded) { pthread_join(threads[i], NULL); free(threads); } else { - for(int i = 0; i < HEIGHT; ++i) - trace_line(i, buf + i * 4 * WIDTH); + for(int i = 0; i < height; ++i) + trace_line(i, width, buf + i * 4 * width); } } diff --git a/ray.h b/ray.h index 83b4cf0..fb0be48 100644 --- a/ray.h +++ b/ray.h @@ -1,10 +1,7 @@ #ifndef RAY_H_ #define RAY_H_ 1 -#define WIDTH 1000 -#define HEIGHT 1000 - void -trace_scene(float time, unsigned char *buf, int threaded); +trace_scene(float time, int width, int height, unsigned char *buf, int threaded); #endif // !RAY_H_ -- cgit v1.2.3 From e108aca4afc2f5dc06216b0cf2e0bc3500cad21a Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Fri, 12 Dec 2014 15:49:24 -0800 Subject: Re-add center sphere --- ray.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/ray.c b/ray.c index 4450eba..d1840c2 100644 --- a/ray.c +++ b/ray.c @@ -51,7 +51,8 @@ static Object objects[] = { {.position={-1.414, -1, -3}, .radius=1, .diffuse={.8, .0, .8}, .specular={.7, .6, .7}, .subtract=0}, {.position={0, 1.414, -3}, .radius=1, .diffuse={.0, .8, .8}, .specular={.6, .7, .7}, .subtract=0}, {.position={0, 0, -3}, .radius=1.5, .diffuse={.8, .8, .8}, .specular={.7, .7, .7}, .subtract=1}, - {.position={1.414, -1, -3}, .radius=1, .diffuse={.8, .8, .0}, .specular={.7, .7, .6}, .subtract=0} + {.position={1.414, -1, -3}, .radius=1, .diffuse={.8, .8, .0}, .specular={.7, .7, .6}, .subtract=0}, + {.position={0, 0, -3}, .radius=1.1, .diffuse={.9, .9, .9}, .specular={.9, .9, .9}, .subtract=2} }; static const Light lights[] = { {.position={-3, 3, -4}, .diffuse={0, .6, .6}}, @@ -73,25 +74,27 @@ trace(const float s[3], const float d[3], float pixel[3], int n) { for(size_t j = 0; j < LENGTH(objects); ++j) { float r[3], t, y[3]; - if (objects[j].subtract) continue; + if (objects[j].subtract == 1) continue; t = sphere_intersect(y, r, s, d, objects[j].position, objects[j].radius, 0); if(likely(t <= 0) || t > nearest) continue; - size_t k; - for (k = 0; k < LENGTH(objects); ++k) { - if (!objects[k].subtract) continue; - if (POW2(y[0] - objects[k].position[0]) + POW2(y[1] - objects[k].position[1]) + POW2(y[2] - objects[k].position[2]) > POW2(objects[k].radius)) continue; + if (objects[j].subtract == 0) { + size_t k; + for (k = 0; k < LENGTH(objects); ++k) { + if (!objects[k].subtract) continue; + if (POW2(y[0] - objects[k].position[0]) + POW2(y[1] - objects[k].position[1]) + POW2(y[2] - objects[k].position[2]) > POW2(objects[k].radius)) continue; - t = sphere_intersect(y, r, s, d, objects[k].position, objects[k].radius, 1); + t = sphere_intersect(y, r, s, d, objects[k].position, objects[k].radius, 1); - break; - } + break; + } - if(likely(t <= 0) || t > nearest) - continue; + if(likely(t <= 0) || t > nearest) + continue; + } nearest = t; nearest_object = j; @@ -189,6 +192,7 @@ trace_scene(float time, int width, int height, unsigned char *buf, int threaded) objects[3].position[0] = (1.5 + 0.35 * sin(1.1 * time + 1.0)) * cos(0.5 * time + 2/3. * TAU); objects[3].position[1] = (1.5 + 0.35 * sin(1.1 * time + 0.5)) * sin(0.5 * time + 2/3. * TAU); objects[2].position[2] = -3 + 0.2 * sin(time * 1.2); + memcpy(objects[4].position, objects[2].position, sizeof(objects[4].position)); if(threaded) { ThreadArg arg; -- cgit v1.2.3 From a83ff7cd80a9405482272c74b144b9af7ba3bfd1 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Sat, 13 Dec 2014 21:35:27 -0800 Subject: Makefile.am: Use _LDADD for libraries --- Makefile.am | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile.am b/Makefile.am index 2e9d900..aa93cc4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -4,7 +4,9 @@ bin_PROGRAMS = ray ray-headless ray_SOURCES = main_glut.c ray.c 3dmath.c 3dmath.h -ray_LDFLAGS = -lGL -lGLU -lglut -lm -pthread +ray_LDADD = -lGL -lGLU -lglut -lm +ray_LDFLAGS = -pthread ray_headless_SOURCES = main_headless.c ray.c 3dmath.c 3dmath.h -ray_headless_LDFLAGS = -lm -pthread +ray_headless_LDADD = -lm +ray_headless_LDFLAGS = -pthread -- cgit v1.2.3 From 146e7d6cef639abc0e2015534a871a7d2710a889 Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Sat, 13 Dec 2014 22:20:27 -0800 Subject: main_headless.c: Use multi-threading --- main_headless.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main_headless.c b/main_headless.c index 0fd5938..ef66058 100644 --- a/main_headless.c +++ b/main_headless.c @@ -20,7 +20,7 @@ main(int argc, char** argv) { gettimeofday(&start, NULL); for (size_t i = 0; i < kFramesToRender; ++i) - trace_scene(i * 0.01f, kWidth, kHeight, buffer, 0); + trace_scene(i * 0.01f, kWidth, kHeight, buffer, 1); struct timeval end; gettimeofday(&end, NULL); -- cgit v1.2.3 From c1a4ed37fe2a1e069a9b827d0012cab38984ee8f Mon Sep 17 00:00:00 2001 From: Morten Hustveit Date: Thu, 5 Nov 2015 13:51:47 -0500 Subject: Add script to benchmark using -fprofile-use --- Makefile.am | 2 +- test-profile-arcs.sh | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100755 test-profile-arcs.sh diff --git a/Makefile.am b/Makefile.am index aa93cc4..1c47b0c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,4 +1,4 @@ -AM_CFLAGS = -Wall -Wextra -pedantic -std=c99 -flto +AM_CFLAGS = -Wall -pedantic -std=c99 -flto bin_PROGRAMS = ray ray-headless diff --git a/test-profile-arcs.sh b/test-profile-arcs.sh new file mode 100755 index 0000000..e8d390e --- /dev/null +++ b/test-profile-arcs.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +set -e + +autoreconf -f -i + +echo >&2 "Baseline" +./configure CFLAGS='-O3 -march=native' CC=gcc >/dev/null +make clean >/dev/null +make >/dev/null +./ray-headless + + +echo >&2 "Profiling" +./configure CFLAGS='-O3 -march=native -fprofile-generate' CC=gcc >/dev/null +make clean >/dev/null +make >/dev/null +./ray-headless + +echo >&2 "Using profile" +./configure CFLAGS='-O3 -march=native -fprofile-use -fprofile-correction' CC=gcc >/dev/null +make clean >/dev/null +make >/dev/null +./ray-headless -- cgit v1.2.3