diff options
author | Martin StensgÄrd <mastensg@users.noreply.github.com> | 2016-11-02 02:22:42 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-11-02 02:22:42 +0100 |
commit | 73da8a3ac056cb1c442fc3a24b0148037b180392 (patch) | |
tree | 45ffc7233b8bc234d876cab68b20b0664de35694 | |
parent | 706da965d3c7892ef09b83bdf6120f104faa2ede (diff) | |
parent | c1a4ed37fe2a1e069a9b827d0012cab38984ee8f (diff) |
Merge pull request #1 from mortehu/master
Make a headless entry point for benchmarking purposes
-rw-r--r-- | 3dmath.c | 34 | ||||
-rw-r--r-- | 3dmath.h | 9 | ||||
-rw-r--r-- | Makefile.am | 13 | ||||
-rw-r--r-- | main_glut.c | 78 | ||||
-rw-r--r-- | main_headless.c | 36 | ||||
-rw-r--r-- | ray.c | 290 | ||||
-rw-r--r-- | ray.h | 7 | ||||
-rwxr-xr-x | test-profile-arcs.sh | 24 |
8 files changed, 338 insertions, 153 deletions
@@ -1,25 +1,25 @@ -#include <math.h> +#include "3dmath.h" -#define POW2(x) ((x) * (x)) +#include <math.h> float -dot(float x[3], float y[3]) { +dot(const float x[3], const float y[3]) { return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]; } void normalize(float x[3]) { - float len; - int i; - - len = sqrt(dot(x, x)); + float len = 1.0f / sqrtf(dot(x, x)); - for(i = 0; i < 3; ++i) - x[i] /= len; + x[0] *= len; + x[1] *= len; + x[2] *= len; } float -sphere_intersect(float y[3], float r[3], float s[3], float d[3], float c[3], float R) { +sphere_intersect(float* restrict y, float* restrict r, + const float* restrict s, const float* restrict d, + const float* restrict c, float R, int invert) { int i; float D, n[3], t, v[3]; @@ -31,17 +31,23 @@ sphere_intersect(float y[3], float r[3], float s[3], float d[3], float c[3], flo if(D < 0) return -1; - t = -dot(v, d) - D; + if (invert) + t = -dot(v, d) + D; + else + t = -dot(v, d) - D; + + if (t <= 0) + return -1; for(i = 0; i < 3; ++i) { y[i] = s[i] + t * d[i]; n[i] = y[i] - c[i]; } - normalize(n); + float two_dot_nd_div_sq_n_mag = 2.0f * dot(n, d) / dot(n, n); - for(i = 0; i < 3; ++i) - r[i] = d[i] - 2 * dot(n, d) * n[i]; + for (i = 0; i < 3; ++i) + r[i] = d[i] - two_dot_nd_div_sq_n_mag * n[i]; return t; } @@ -1,3 +1,8 @@ -float dot(float x[3], float y[3]); +#define POW2(x) ((x) * (x)) + +float dot(const float x[3], const float y[3]); void normalize(float x[3]); -float sphere_intersect(float y[3], float r[3], float s[3], float d[3], float c[3], float R); + +float sphere_intersect(float* restrict y, float* restrict r, + const float* restrict s, const float* restrict d, + const float* restrict c, float R, int invert); diff --git a/Makefile.am b/Makefile.am index e67837f..1c47b0c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,7 +1,12 @@ -AM_CFLAGS = -Wall -Wextra -pedantic -std=c99 +AM_CFLAGS = -Wall -pedantic -std=c99 -flto -bin_PROGRAMS = ray +bin_PROGRAMS = ray ray-headless -ray_LDFLAGS = -lGL -lGLU -lglut -lm -pthread -ray_SOURCES = ray.c 3dmath.c 3dmath.h +ray_SOURCES = main_glut.c ray.c 3dmath.c 3dmath.h +ray_LDADD = -lGL -lGLU -lglut -lm +ray_LDFLAGS = -pthread + +ray_headless_SOURCES = main_headless.c ray.c 3dmath.c 3dmath.h +ray_headless_LDADD = -lm +ray_headless_LDFLAGS = -pthread diff --git a/main_glut.c b/main_glut.c new file mode 100644 index 0000000..e24ab5e --- /dev/null +++ b/main_glut.c @@ -0,0 +1,78 @@ +#include <GL/gl.h> +#include <GL/glu.h> +#include <GL/glut.h> + +#include "ray.h" + +static int threaded = 1; +static int viewport_width, viewport_height; + +static int +init(int argc, char **argv, int w, int h) { + glutInit(&argc, argv); + + glutInitWindowPosition(0, 0); + glutInitWindowSize(w, h); + glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE); + glutCreateWindow(argv[0]); + + glDepthMask(0); + glDisable(GL_DEPTH_TEST); + glDisable(GL_BLEND); + + return 0; +} + +static void +display(void) { + static int count = 0; + ++count; + if(count > 10000) + exit(0); + float time = (float)glutGet(GLUT_ELAPSED_TIME) / 1000; + + unsigned char* buffer = calloc(viewport_width * viewport_height, 4); + trace_scene(time, viewport_width, viewport_height, buffer, threaded); + glClear(GL_COLOR_BUFFER_BIT); + glRasterPos2d(-1.0, -1.0); + glDrawPixels(viewport_width, viewport_height, GL_BGRA, GL_UNSIGNED_BYTE, buffer); + free(buffer); + + glutSwapBuffers(); +} + +static void +reshape(int w, int h) { + viewport_width = w; + viewport_height = h; + glViewport(0, 0, w, h); +} + +static void +keyboard(unsigned char key, int x, int y) { + switch(key) { + case 27: + exit(EXIT_SUCCESS); + break; + + case 't': + threaded = !threaded; + break; + } +} + + +int +main(int argc, char **argv) { + if (init(argc, argv, 800, 600)) + return EXIT_FAILURE; + + glutDisplayFunc(display); + glutIdleFunc(display); + glutReshapeFunc(reshape); + glutKeyboardFunc(keyboard); + + glutMainLoop(); + + return EXIT_SUCCESS; +} diff --git a/main_headless.c b/main_headless.c new file mode 100644 index 0000000..ef66058 --- /dev/null +++ b/main_headless.c @@ -0,0 +1,36 @@ +#include <stdio.h> +#include <stdlib.h> + +#include <sys/time.h> + +#include "ray.h" + +static const size_t kFramesToRender = 100; + +int +main(int argc, char** argv) { + const size_t kWidth = 1000; + const size_t kHeight = 1000; + + fprintf(stderr, "Rendering %zu frames\n", kFramesToRender); + + unsigned char* buffer = calloc(4, kWidth * kHeight); + + struct timeval start; + gettimeofday(&start, NULL); + + for (size_t i = 0; i < kFramesToRender; ++i) + trace_scene(i * 0.01f, kWidth, kHeight, buffer, 1); + + struct timeval end; + gettimeofday(&end, NULL); + + free(buffer); + + fprintf(stderr, "Average %.2f ms/frame\n", + (1.0e3 * (end.tv_sec - start.tv_sec) + + 1.0e-3 * (end.tv_usec - start.tv_usec)) / + kFramesToRender); + + return EXIT_SUCCESS; +} @@ -1,28 +1,34 @@ +#include "ray.h" + #include <stdio.h> #include <stdlib.h> #include <string.h> #include <math.h> #include <pthread.h> -#include <GL/gl.h> -#include <GL/glu.h> -#include <GL/glut.h> +#include <unistd.h> #include "3dmath.h" -#define WIDTH 1000 -#define HEIGHT 1000 -#define BUFFER_SIZE (WIDTH * HEIGHT * 4) - #define LENGTH(array) (sizeof(array) / sizeof(array[0])) #define MAX(x, y) (x > y ? x : y) #define MIN(x, y) (x < y ? x : y) #define TAU 6.28318531 +#if __GNUC__ >= 3 +# define unlikely(cond) __builtin_expect ((cond), 0) +# define likely(cond) __builtin_expect ((cond), 1) +#else +# define unlikely(cond) (cond) +# define likely(cond) (cond) +#endif + typedef struct { float position[3]; float radius; float diffuse[3]; + float specular[3]; + int subtract; } Object; typedef struct { @@ -30,170 +36,188 @@ typedef struct { float diffuse[3]; } Light; -static unsigned char threaded = 0; -static unsigned char buffer[BUFFER_SIZE]; +typedef struct { + pthread_mutex_t mutex; + + int width, height; + unsigned char* buffer; + long next_line; +} ThreadArg; + +static float* trace_vectors; +static int trace_vectors_width, trace_vectors_height; + static Object objects[] = { - {.position={-1.414, -1, -3}, .radius=1, .diffuse={.8, 0, .8}}, - {.position={0, 1.414, -3}, .radius=1, .diffuse={0, .8, .8}}, - {.position={0, 0, -3}, .radius=.25, .diffuse={.8, .8, .8}}, - {.position={1.414, -1, -3}, .radius=1, .diffuse={.8, .8, 0}} + {.position={-1.414, -1, -3}, .radius=1, .diffuse={.8, .0, .8}, .specular={.7, .6, .7}, .subtract=0}, + {.position={0, 1.414, -3}, .radius=1, .diffuse={.0, .8, .8}, .specular={.6, .7, .7}, .subtract=0}, + {.position={0, 0, -3}, .radius=1.5, .diffuse={.8, .8, .8}, .specular={.7, .7, .7}, .subtract=1}, + {.position={1.414, -1, -3}, .radius=1, .diffuse={.8, .8, .0}, .specular={.7, .7, .6}, .subtract=0}, + {.position={0, 0, -3}, .radius=1.1, .diffuse={.9, .9, .9}, .specular={.9, .9, .9}, .subtract=2} }; -static Light lights[] = { +static const Light lights[] = { {.position={-3, 3, -4}, .diffuse={0, .6, .6}}, {.position={0, 30, -4}, .diffuse={1, 1, 1}} }; +static const float ambient[3] = {0.2, 0.1, 0.1}; static void -trace(float s[3], float d[3], float pixel[3], int n) { - int i, j, k, m; - float l[3], r[3], t, y[3]; +trace(const float s[3], const float d[3], float pixel[3], int n) { + // Reflections in concave objects can go really deep, so we need to limit + // the recursion depth. + if (n > 6) return; - for(j = 0; j < LENGTH(objects); ++j) { - t = sphere_intersect(y, r, s, d, objects[j].position, objects[j].radius); + float nearest = HUGE_VAL; + int nearest_object = -1; + float nearest_y[3]; + float nearest_r[3]; - if(t > 0) { - for(m = 0; m < LENGTH(lights); ++m) { - for(i = 0; i < 3; ++i) - l[i] = lights[m].position[i] - y[i]; + for(size_t j = 0; j < LENGTH(objects); ++j) { + float r[3], t, y[3]; - normalize(l); - for(k = 0; k < 3; ++k) - pixel[k] += lights[m].diffuse[k] * objects[j].diffuse[k] * (MAX(dot(l, r), 0)) / (1 << n); + if (objects[j].subtract == 1) continue; - trace(y, r, pixel, n + 1); - } - } - } -} + t = sphere_intersect(y, r, s, d, objects[j].position, objects[j].radius, 0); -static void -trace_line(int l, unsigned char *buf) { - static float s[3] = {0, 0, 0}; - float y = l - HEIGHT / 2; + if(likely(t <= 0) || t > nearest) + continue; - for(int i = 0; i < 4 * WIDTH; i += 4) { - float x = (i / 4) - WIDTH / 2; + if (objects[j].subtract == 0) { + size_t k; + for (k = 0; k < LENGTH(objects); ++k) { + if (!objects[k].subtract) continue; + if (POW2(y[0] - objects[k].position[0]) + POW2(y[1] - objects[k].position[1]) + POW2(y[2] - objects[k].position[2]) > POW2(objects[k].radius)) continue; - float pixel[3]; - memset(pixel, '\0', sizeof(pixel)); + t = sphere_intersect(y, r, s, d, objects[k].position, objects[k].radius, 1); - float d[3]; - d[0] = x / (WIDTH / 2); - d[1] = y / (HEIGHT / 2) * ((float)HEIGHT / (float)WIDTH); - d[2] = -1; + break; + } - normalize(d); - - trace(s, d, pixel, 1); + if(likely(t <= 0) || t > nearest) + continue; + } - for(int j = 0; j < 3; ++j) - buf[i + j] = MIN(255 * pixel[j], 255); + nearest = t; + nearest_object = j; + memcpy(nearest_y, y, sizeof(nearest_y)); + memcpy(nearest_r, r, sizeof(nearest_y)); } -} -static void * -thread(void *arg) { - long line = (long) arg; + if (nearest_object == -1) return; - trace_line(line, buffer + line * 4 * WIDTH); + trace(nearest_y, nearest_r, pixel, n + 1); - pthread_exit(NULL); -} + for (int k = 0; k < 3; ++k) + pixel[k] = pixel[k] * objects[nearest_object].specular[k] + ambient[k] * objects[nearest_object].diffuse[k]; -static void -trace_scene(unsigned char *buf) { - if(threaded) { - pthread_attr_t attr; - pthread_attr_init(&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); - - pthread_t threads[HEIGHT]; - for(long i = 0; i < HEIGHT; ++i) { - int ret = pthread_create(&threads[i], &attr, thread, (void *)i); - - if(ret) { - fprintf(stderr, "pthread_create(): %d\n", ret); - exit(EXIT_FAILURE); - } - } + for(int m = 0; m < LENGTH(lights); ++m) { + float l[3]; + for(int i = 0; i < 3; ++i) + l[i] = lights[m].position[i] - nearest_y[i]; - void *status; - for(long i = 0; i < HEIGHT; ++i) - pthread_join(threads[i], &status); - } else { - for(int i = 0; i < HEIGHT; ++i) - trace_line(i, buffer + i * 4 * WIDTH); + float lr_dot = dot(l, nearest_r); + if (lr_dot <= 0) continue; + + float scale = lr_dot / sqrtf(dot(l, l)) / (1 << n); + // The cutoff at 0.05 is for artistic reasons; 0.0 would be more + // realistic. + if (scale <= 0.05) continue; + + for(int k = 0; k < 3; ++k) + pixel[k] += lights[m].diffuse[k] * objects[nearest_object].diffuse[k] * scale; } } static void -display(void) { - static int count = 0; - ++count; - if(count > 10000) - exit(0); - float time = (float)glutGet(GLUT_ELAPSED_TIME) / 1000; - - objects[0].position[0] = 1.5 * cos(time); - objects[0].position[1] = 1.5 * sin(time); - objects[1].position[0] = 1.5 * cos(time + 1/3. * TAU); - objects[1].position[1] = 1.5 * sin(time + 1/3. * TAU); - objects[3].position[0] = 1.5 * cos(time + 2/3. * TAU); - objects[3].position[1] = 1.5 * sin(time + 2/3. * TAU); - objects[2].position[2] = -3 + 2 * sin(time * 2); - - trace_scene(buffer); - glDrawPixels(WIDTH, HEIGHT, GL_BGRA, GL_UNSIGNED_BYTE, buffer); - glutSwapBuffers(); -} +trace_line(int l, int width, unsigned char *buf) { + static const float s[3] = {0, 0, 8}; -static void -reshape(int w, int h) { - glViewport(0, 0, w, h); -} + for(int i = 0; i < width; ++i, buf += 4) { + float pixel[3] = { 0, 0, 0 }; -static void -keyboard(unsigned char key, int x, int y) { - switch(key) { - case 27: - exit(EXIT_SUCCESS); - break; - case 't': - if(threaded) - threaded = 0; - else - threaded = 1; - break; + trace(s, &trace_vectors[(l * width + i) * 3], pixel, 1); + + buf[0] = MIN(pixel[0], 1.0f) * 255; + buf[1] = MIN(pixel[1], 1.0f) * 255; + buf[2] = MIN(pixel[2], 1.0f) * 255; } } -static int -init(int argc, char **argv, int w, int h) { - glutInit(&argc, argv); +static void * +thread(void *arg) { + ThreadArg* thread_arg = arg; - glutInitWindowPosition(0, 0); - glutInitWindowSize(w, h); - glutInitDisplayMode(GLUT_RGB); - glutCreateWindow(argv[0]); + for (;;) { + pthread_mutex_lock(&thread_arg->mutex); + if (thread_arg->next_line == thread_arg->height) break; + long line = thread_arg->next_line++; + pthread_mutex_unlock(&thread_arg->mutex); + + trace_line(line, thread_arg->width, thread_arg->buffer + line * 4 * thread_arg->width); + } - glDepthMask(0); - glDisable(GL_DEPTH_TEST); - glDisable(GL_BLEND); + pthread_mutex_unlock(&thread_arg->mutex); - return 0; + return NULL; } -int -main(int argc, char **argv) { - if (init(argc, argv, WIDTH, HEIGHT)) - return EXIT_FAILURE; +static void +initialize_trace_vectors(int width, int height) { + trace_vectors = calloc(width * height, 3 * sizeof(float)); + trace_vectors_width = width; + trace_vectors_height = height; + for(int y = 0; y < height; ++y) { + for(int x = 0; x < width; ++x) { + float* d = &trace_vectors[(y * width + x) * 3]; + d[0] = ((float)x / width - 0.5f) * 0.5f * ((float)width / height); + d[1] = ((float)y / height - 0.5f) * 0.5f; + d[2] = -1; + normalize(d); + } + } +} + +void +trace_scene(float time, int width, int height, unsigned char *buf, int threaded) { + if (trace_vectors && (trace_vectors_width != width || trace_vectors_height != height)) { + free(trace_vectors); + trace_vectors = 0; + } + if (!trace_vectors) + initialize_trace_vectors(width, height); + + objects[0].position[0] = (1.5 + 0.35 * sin(1.1 * time + 0.0)) * cos(0.5 * time); + objects[0].position[1] = (1.5 + 0.35 * sin(1.1 * time + 2.5)) * sin(0.5 * time); + objects[1].position[0] = (1.5 + 0.35 * sin(1.1 * time + 2.0)) * cos(0.5 * time + 1/3. * TAU); + objects[1].position[1] = (1.5 + 0.35 * sin(1.1 * time + 1.5)) * sin(0.5 * time + 1/3. * TAU); + objects[3].position[0] = (1.5 + 0.35 * sin(1.1 * time + 1.0)) * cos(0.5 * time + 2/3. * TAU); + objects[3].position[1] = (1.5 + 0.35 * sin(1.1 * time + 0.5)) * sin(0.5 * time + 2/3. * TAU); + objects[2].position[2] = -3 + 0.2 * sin(time * 1.2); + memcpy(objects[4].position, objects[2].position, sizeof(objects[4].position)); - glutDisplayFunc(display); - glutIdleFunc(display); - glutReshapeFunc(reshape); - glutKeyboardFunc(keyboard); + if(threaded) { + ThreadArg arg; + memset(&arg, 0, sizeof(arg)); + arg.width = width; + arg.height = height; + pthread_mutex_init(&arg.mutex, NULL); + arg.buffer = buf; + + int num_threads = sysconf(_SC_NPROCESSORS_CONF) - 1; + pthread_t* threads = NULL; + if (num_threads > 0) { + threads = calloc(sizeof(*threads), num_threads); + + for (int i = 0; i < num_threads; ++i) + pthread_create(&threads[i], NULL, thread, &arg); + } - glutMainLoop(); + thread(&arg); - return EXIT_SUCCESS; + for(int i = 0; i < num_threads; ++i) + pthread_join(threads[i], NULL); + free(threads); + } else { + for(int i = 0; i < height; ++i) + trace_line(i, width, buf + i * 4 * width); + } } @@ -0,0 +1,7 @@ +#ifndef RAY_H_ +#define RAY_H_ 1 + +void +trace_scene(float time, int width, int height, unsigned char *buf, int threaded); + +#endif // !RAY_H_ diff --git a/test-profile-arcs.sh b/test-profile-arcs.sh new file mode 100755 index 0000000..e8d390e --- /dev/null +++ b/test-profile-arcs.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +set -e + +autoreconf -f -i + +echo >&2 "Baseline" +./configure CFLAGS='-O3 -march=native' CC=gcc >/dev/null +make clean >/dev/null +make >/dev/null +./ray-headless + + +echo >&2 "Profiling" +./configure CFLAGS='-O3 -march=native -fprofile-generate' CC=gcc >/dev/null +make clean >/dev/null +make >/dev/null +./ray-headless + +echo >&2 "Using profile" +./configure CFLAGS='-O3 -march=native -fprofile-use -fprofile-correction' CC=gcc >/dev/null +make clean >/dev/null +make >/dev/null +./ray-headless |