/********************************************************************************************** * * rlsw v1.0 - An OpenGL 1.1-style software renderer implementation * * DESCRIPTION: * rlsw is a custom OpenGL 1.1-style implementation on software, intended to provide all * functionality available on rlgl.h library used by raylib, becoming a direct software * rendering replacement for OpenGL 1.1 backend and allowing to run raylib on GPU-less * devices when required * * FEATURES: * - Rendering to custom internal framebuffer with multiple color modes supported: * - Color buffer: RGB - 8-bit (3:3:2) | RGB - 16-bit (5:6:5) | RGB - 24-bit (8:8:8) * - Depth buffer: D - 8-bit (unorm) | D - 16-bit (unorm) | D - 24-bit (unorm) * - Rendering modes supported: POINT, LINES, TRIANGLE, QUADS * - Additional features: Polygon modes, Point width, Line width * - Clipping support for all rendering modes * - Texture features supported: * - All uncompressed texture formats supported by raylib * - Texture Minification/Magnification checks * - Point and Bilinear filtering * - Texture Wrap Modes with separate checks for S/T coordinates * - Vertex Arrays support with direct primitive drawing mode * - Matrix Stack support (Matrix Push/Pop) * - Other GL misc features: * - GL-style getter functions * - Framebuffer resizing * - Perspective correction * - Scissor clipping * - Depth testing * - Blend modes * - Face culling * * ADDITIONAL NOTES: * Check PR for more info: https://github.com/raysan5/raylib/pull/4832 * * CONFIGURATION: * #define RLSW_IMPLEMENTATION * Generates the implementation of the library into the included file * If not defined, the library is in header only mode and can be included in other headers * or source files without problems. But only ONE file should hold the implementation * * #define RLSW_USE_SIMD_INTRINSICS * Detect and use SIMD intrinsics on the host compilation platform * SIMD could improve rendering considerable vectorizing some raster operations * but the target platforms running the compiled program with SIMD enabled * must support the SIMD the program has been built for, making them only * recommended under specific situations and only if the developers know * what are they doing; this flag is not defined by default * * rlsw capabilities could be customized just defining some internal * values before library inclusion (default values listed): * * #define SW_GL_FRAMEBUFFER_COPY_BGRA true * #define SW_GL_BINDING_COPY_TEXTURE true * #define SW_COLOR_BUFFER_BITS 24 * #define SW_DEPTH_BUFFER_BITS 16 * #define SW_MAX_PROJECTION_STACK_SIZE 2 * #define SW_MAX_MODELVIEW_STACK_SIZE 8 * #define SW_MAX_TEXTURE_STACK_SIZE 2 * #define SW_MAX_TEXTURES 128 * * * LICENSE: MIT * * Copyright (c) 2025-2026 Le Juez Victor (@Bigfoot71), reviewed by Ramon Santamaria (@raysan5) * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * **********************************************************************************************/ #ifndef RLSW_H #define RLSW_H #include #include //---------------------------------------------------------------------------------- // Defines and Macros //---------------------------------------------------------------------------------- // Function specifiers definition #ifndef SWAPI #define SWAPI // Functions defined as 'extern' by default (implicit specifiers) #endif #ifndef SW_MALLOC #define SW_MALLOC(sz) malloc(sz) #endif #ifndef SW_REALLOC #define SW_REALLOC(ptr, newSz) realloc(ptr, newSz) #endif #ifndef SW_FREE #define SW_FREE(ptr) free(ptr) #endif #ifndef SW_RESTRICT #ifdef _MSC_VER #define SW_RESTRICT __restrict #else #define SW_RESTRICT restrict #endif #endif #ifndef SW_GL_FRAMEBUFFER_COPY_BGRA #define SW_GL_FRAMEBUFFER_COPY_BGRA true #endif #ifndef SW_COLOR_BUFFER_BITS #define SW_COLOR_BUFFER_BITS 32 //< 32 (rgba), 16 (rgb packed) or 8 (rgb packed) #endif #ifndef SW_DEPTH_BUFFER_BITS #define SW_DEPTH_BUFFER_BITS 16 //< 32, 24 or 16 #endif #ifndef SW_MAX_PROJECTION_STACK_SIZE #define SW_MAX_PROJECTION_STACK_SIZE 2 #endif #ifndef SW_MAX_MODELVIEW_STACK_SIZE #define SW_MAX_MODELVIEW_STACK_SIZE 8 #endif #ifndef SW_MAX_TEXTURE_STACK_SIZE #define SW_MAX_TEXTURE_STACK_SIZE 2 #endif #ifndef SW_MAX_TEXTURES #define SW_MAX_TEXTURES 128 #endif // Under normal circumstances, clipping a polygon can add at most one vertex per clipping plane // Considering the largest polygon involved is a quadrilateral (4 vertices), // and that clipping occurs against both the frustum (6 planes) and the scissors (4 planes), // the maximum number of vertices after clipping is: // 4 (original vertices) + 6 (frustum planes) + 4 (scissors planes) = 14 #ifndef SW_MAX_CLIPPED_POLYGON_VERTICES #define SW_MAX_CLIPPED_POLYGON_VERTICES 14 #endif #ifndef SW_CLIP_EPSILON #define SW_CLIP_EPSILON 1e-4f #endif //---------------------------------------------------------------------------------- // OpenGL Compatibility Types //---------------------------------------------------------------------------------- typedef unsigned int GLenum; typedef unsigned char GLboolean; typedef unsigned int GLbitfield; typedef void GLvoid; typedef signed char GLbyte; typedef short GLshort; typedef int GLint; typedef unsigned char GLubyte; typedef unsigned short GLushort; typedef unsigned int GLuint; typedef int GLsizei; typedef float GLfloat; typedef float GLclampf; typedef double GLdouble; typedef double GLclampd; //---------------------------------------------------------------------------------- // OpenGL Definitions // NOTE: Not used/supported definitions are commented //---------------------------------------------------------------------------------- #define GL_FALSE 0 #define GL_TRUE 1 #define GL_SCISSOR_TEST 0x0C11 #define GL_TEXTURE_2D 0x0DE1 #define GL_DEPTH_TEST 0x0B71 #define GL_CULL_FACE 0x0B44 #define GL_BLEND 0x0BE2 #define GL_VENDOR 0x1F00 #define GL_RENDERER 0x1F01 #define GL_VERSION 0x1F02 #define GL_EXTENSIONS 0x1F03 //#define GL_ATTRIB_STACK_DEPTH 0x0BB0 //#define GL_CLIENT_ATTRIB_STACK_DEPTH 0x0BB1 #define GL_COLOR_CLEAR_VALUE 0x0C22 #define GL_DEPTH_CLEAR_VALUE 0x0B73 //#define GL_COLOR_WRITEMASK 0x0C23 //#define GL_CURRENT_INDEX 0x0B01 #define GL_CURRENT_COLOR 0x0B00 //#define GL_CURRENT_NORMAL 0x0B02 //#define GL_CURRENT_RASTER_COLOR 0x0B04 //#define GL_CURRENT_RASTER_DISTANCE 0x0B09 //#define GL_CURRENT_RASTER_INDEX 0x0B05 //#define GL_CURRENT_RASTER_POSITION 0x0B07 //#define GL_CURRENT_RASTER_TEXTURE_COORDS 0x0B06 //#define GL_CURRENT_RASTER_POSITION_VALID 0x0B08 #define GL_CURRENT_TEXTURE_COORDS 0x0B03 #define GL_POINT_SIZE 0x0B11 #define GL_LINE_WIDTH 0x0B21 //#define GL_INDEX_CLEAR_VALUE 0x0C20 //#define GL_INDEX_MODE 0x0C30 //#define GL_INDEX_WRITEMASK 0x0C21 #define GL_MODELVIEW_MATRIX 0x0BA6 #define GL_MODELVIEW_STACK_DEPTH 0x0BA3 //#define GL_NAME_STACK_DEPTH 0x0D70 #define GL_PROJECTION_MATRIX 0x0BA7 #define GL_PROJECTION_STACK_DEPTH 0x0BA4 //#define GL_RENDER_MODE 0x0C40 //#define GL_RGBA_MODE 0x0C31 #define GL_TEXTURE_MATRIX 0x0BA8 #define GL_TEXTURE_STACK_DEPTH 0x0BA5 #define GL_VIEWPORT 0x0BA2 #define GL_COLOR_BUFFER_BIT 0x00004000 #define GL_DEPTH_BUFFER_BIT 0x00000100 #define GL_MODELVIEW 0x1700 #define GL_PROJECTION 0x1701 #define GL_TEXTURE 0x1702 #define GL_VERTEX_ARRAY 0x8074 #define GL_NORMAL_ARRAY 0x8075 // WARNING: Not implemented (defined for RLGL) #define GL_COLOR_ARRAY 0x8076 //#define GL_INDEX_ARRAY 0x8077 #define GL_TEXTURE_COORD_ARRAY 0x8078 #define GL_POINTS 0x0000 #define GL_LINES 0x0001 //#define GL_LINE_LOOP 0x0002 //#define GL_LINE_STRIP 0x0003 #define GL_TRIANGLES 0x0004 //#define GL_TRIANGLE_STRIP 0x0005 //#define GL_TRIANGLE_FAN 0x0006 #define GL_QUADS 0x0007 //#define GL_QUAD_STRIP 0x0008 //#define GL_POLYGON 0x0009 #define GL_POINT 0x1B00 #define GL_LINE 0x1B01 #define GL_FILL 0x1B02 #define GL_FRONT 0x0404 #define GL_BACK 0x0405 #define GL_ZERO 0 #define GL_ONE 1 #define GL_SRC_COLOR 0x0300 #define GL_ONE_MINUS_SRC_COLOR 0x0301 #define GL_SRC_ALPHA 0x0302 #define GL_ONE_MINUS_SRC_ALPHA 0x0303 #define GL_DST_ALPHA 0x0304 #define GL_ONE_MINUS_DST_ALPHA 0x0305 #define GL_DST_COLOR 0x0306 #define GL_ONE_MINUS_DST_COLOR 0x0307 #define GL_SRC_ALPHA_SATURATE 0x0308 #define GL_NEAREST 0x2600 #define GL_LINEAR 0x2601 #define GL_REPEAT 0x2901 #define GL_CLAMP 0x2900 #define GL_TEXTURE_MAG_FILTER 0x2800 #define GL_TEXTURE_MIN_FILTER 0x2801 #define GL_TEXTURE_WRAP_S 0x2802 #define GL_TEXTURE_WRAP_T 0x2803 #define GL_NO_ERROR 0 #define GL_INVALID_ENUM 0x0500 #define GL_INVALID_VALUE 0x0501 #define GL_INVALID_OPERATION 0x0502 #define GL_STACK_OVERFLOW 0x0503 #define GL_STACK_UNDERFLOW 0x0504 #define GL_OUT_OF_MEMORY 0x0505 #define GL_ALPHA 0x1906 #define GL_LUMINANCE 0x1909 #define GL_LUMINANCE_ALPHA 0x190A #define GL_RGB 0x1907 #define GL_RGBA 0x1908 #define GL_BYTE 0x1400 #define GL_UNSIGNED_BYTE 0x1401 #define GL_SHORT 0x1402 #define GL_UNSIGNED_SHORT 0x1403 #define GL_INT 0x1404 #define GL_UNSIGNED_INT 0x1405 #define GL_FLOAT 0x1406 // OpenGL Definitions NOT USED #define GL_PERSPECTIVE_CORRECTION_HINT 0x0C50 #define GL_PACK_ALIGNMENT 0x0D05 #define GL_UNPACK_ALIGNMENT 0x0CF5 #define GL_LINE_SMOOTH 0x0B20 #define GL_SMOOTH 0x1D01 #define GL_NICEST 0x1102 #define GL_CCW 0x0901 #define GL_CW 0x0900 #define GL_NEVER 0x0200 #define GL_LESS 0x0201 #define GL_EQUAL 0x0202 #define GL_LEQUAL 0x0203 #define GL_GREATER 0x0204 #define GL_NOTEQUAL 0x0205 #define GL_GEQUAL 0x0206 #define GL_ALWAYS 0x0207 //---------------------------------------------------------------------------------- // OpenGL Bindings to rlsw //---------------------------------------------------------------------------------- #define glReadPixels(x, y, w, h, f, t, p) swCopyFramebuffer((x), (y), (w), (h), (f), (t), (p)) #define glEnable(state) swEnable((state)) #define glDisable(state) swDisable((state)) #define glGetFloatv(pname, params) swGetFloatv((pname), (params)) #define glGetString(pname) swGetString((pname)) #define glGetError() swGetError() #define glViewport(x, y, w, h) swViewport((x), (y), (w), (h)) #define glScissor(x, y, w, h) swScissor((x), (y), (w), (h)) #define glClearColor(r, g, b, a) swClearColor((r), (g), (b), (a)) #define glClearDepth(d) swClearDepth((d)) #define glClear(bitmask) swClear((bitmask)) #define glBlendFunc(sfactor, dfactor) swBlendFunc((sfactor), (dfactor)) #define glPolygonMode(face, mode) swPolygonMode((mode)) #define glCullFace(face) swCullFace((face)) #define glPointSize(size) swPointSize((size)) #define glLineWidth(width) swLineWidth((width)) #define glMatrixMode(mode) swMatrixMode((mode)) #define glPushMatrix() swPushMatrix() #define glPopMatrix() swPopMatrix() #define glLoadIdentity() swLoadIdentity() #define glTranslatef(x, y, z) swTranslatef((x), (y), (z)) #define glRotatef(a, x, y, z) swRotatef((a), (x), (y), (z)) #define glScalef(x, y, z) swScalef((x), (y), (z)) #define glMultMatrixf(v) swMultMatrixf((v)) #define glFrustum(l, r, b, t, n, f) swFrustum((l), (r), (b), (t), (n), (f)) #define glOrtho(l, r, b, t, n, f) swOrtho((l), (r), (b), (t), (n), (f)) #define glBegin(mode) swBegin((mode)) #define glEnd() swEnd() #define glVertex2i(x, y) swVertex2i((x), (y)) #define glVertex2f(x, y) swVertex2f((x), (y)) #define glVertex2fv(v) swVertex2fv((v)) #define glVertex3i(x, y, z) swVertex3i((x), (y), (z)) #define glVertex3f(x, y, z) swVertex3f((x), (y), (z)) #define glvertex3fv(v) swVertex3fv((v)) #define glVertex4i(x, y, z, w) swVertex4i((x), (y), (z), (w)) #define glVertex4f(x, y, z, w) swVertex4f((x), (y), (z), (w)) #define glVertex4fv(v) swVertex4fv((v)) #define glColor3ub(r, g, b) swColor3ub((r), (g), (b)) #define glColor3ubv(v) swColor3ubv((v)) #define glColor3f(r, g, b) swColor3f((r), (g), (b)) #define glColor3fv(v) swColor3fv((v)) #define glColor4ub(r, g, b, a) swColor4ub((r), (g), (b), (a)) #define glColor4ubv(v) swColor4ubv((v)) #define glColor4f(r, g, b, a) swColor4f((r), (g), (b), (a)) #define glColor4fv(v) swColor4fv((v)) #define glTexCoord2f(u, v) swTexCoord2f((u), (v)) #define glTexCoord2fv(v) swTexCoord2fv((v)) #define glEnableClientState(t) ((void)(t)) #define glDisableClientState(t) swBindArray((t), 0) #define glVertexPointer(sz, t, s, p) swBindArray(SW_VERTEX_ARRAY, (p)) #define glTexCoordPointer(sz, t, s, p) swBindArray(SW_TEXTURE_COORD_ARRAY, (p)) #define glColorPointer(sz, t, s, p) swBindArray(SW_COLOR_ARRAY, (p)) #define glDrawArrays(m, o, c) swDrawArrays((m), (o), (c)) #define glDrawElements(m,c,t,i) swDrawElements((m),(c),(t),(i)) #define glGenTextures(c, v) swGenTextures((c), (v)) #define glDeleteTextures(c, v) swDeleteTextures((c), (v)) #define glTexImage2D(tr, l, if, w, h, b, f, t, p) swTexImage2D((w), (h), (f), (t), (p)) #define glTexParameteri(tr, pname, param) swTexParameteri((pname), (param)) #define glBindTexture(tr, id) swBindTexture((id)) // OpenGL functions NOT IMPLEMENTED by rlsw #define glDepthMask(X) ((void)(X)) #define glColorMask(X,Y,Z,W) ((void)(X),(void)(Y),(void)(Z),(void)(W)) #define glPixelStorei(X,Y) ((void)(X),(void)(Y)) #define glHint(X,Y) ((void)(X),(void)(Y)) #define glShadeModel(X) ((void)(X)) #define glFrontFace(X) ((void)(X)) #define glDepthFunc(X) ((void)(X)) #define glTexSubImage2D(X,Y,Z,W,A,B,C,D,E) ((void)(X),(void)(Y),(void)(Z),(void)(W),(void)(A),(void)(B),(void)(C),(void)(D),(void)(E)) #define glGetTexImage(X,Y,Z,W,A) ((void)(X),(void)(Y),(void)(Z),(void)(W),(void)(A)) #define glNormal3f(X,Y,Z) ((void)(X),(void)(Y),(void)(Z)) #define glNormal3fv(X) ((void)(X)) #define glNormalPointer(X,Y,Z) ((void)(X),(void)(Y),(void)(Z)) //---------------------------------------------------------------------------------- // Types and Structures Definition //---------------------------------------------------------------------------------- typedef enum { SW_SCISSOR_TEST = GL_SCISSOR_TEST, SW_TEXTURE_2D = GL_TEXTURE_2D, SW_DEPTH_TEST = GL_DEPTH_TEST, SW_CULL_FACE = GL_CULL_FACE, SW_BLEND = GL_BLEND } SWstate; typedef enum { SW_VENDOR = GL_VENDOR, SW_RENDERER = GL_RENDERER, SW_VERSION = GL_VERSION, SW_EXTENSIONS = GL_EXTENSIONS, SW_COLOR_CLEAR_VALUE = GL_COLOR_CLEAR_VALUE, SW_DEPTH_CLEAR_VALUE = GL_DEPTH_CLEAR_VALUE, SW_CURRENT_COLOR = GL_CURRENT_COLOR, SW_CURRENT_TEXTURE_COORDS = GL_CURRENT_TEXTURE_COORDS, SW_POINT_SIZE = GL_POINT_SIZE, SW_LINE_WIDTH = GL_LINE_WIDTH, SW_MODELVIEW_MATRIX = GL_MODELVIEW_MATRIX, SW_MODELVIEW_STACK_DEPTH = GL_MODELVIEW_STACK_DEPTH, SW_PROJECTION_MATRIX = GL_PROJECTION_MATRIX, SW_PROJECTION_STACK_DEPTH = GL_PROJECTION_STACK_DEPTH, SW_TEXTURE_MATRIX = GL_TEXTURE_MATRIX, SW_TEXTURE_STACK_DEPTH = GL_TEXTURE_STACK_DEPTH, SW_VIEWPORT = GL_VIEWPORT } SWget; typedef enum { SW_COLOR_BUFFER_BIT = GL_COLOR_BUFFER_BIT, SW_DEPTH_BUFFER_BIT = GL_DEPTH_BUFFER_BIT } SWbuffer; typedef enum { SW_PROJECTION = GL_PROJECTION, SW_MODELVIEW = GL_MODELVIEW, SW_TEXTURE = GL_TEXTURE } SWmatrix; typedef enum { SW_VERTEX_ARRAY = GL_VERTEX_ARRAY, SW_TEXTURE_COORD_ARRAY = GL_TEXTURE_COORD_ARRAY, SW_COLOR_ARRAY = GL_COLOR_ARRAY } SWarray; typedef enum { SW_POINTS = GL_POINTS, SW_LINES = GL_LINES, SW_TRIANGLES = GL_TRIANGLES, SW_QUADS = GL_QUADS } SWdraw; typedef enum { SW_POINT = GL_POINT, SW_LINE = GL_LINE, SW_FILL = GL_FILL } SWpoly; typedef enum { SW_FRONT = GL_FRONT, SW_BACK = GL_BACK, } SWface; typedef enum { SW_ZERO = GL_ZERO, SW_ONE = GL_ONE, SW_SRC_COLOR = GL_SRC_COLOR, SW_ONE_MINUS_SRC_COLOR = GL_ONE_MINUS_SRC_COLOR, SW_SRC_ALPHA = GL_SRC_ALPHA, SW_ONE_MINUS_SRC_ALPHA = GL_ONE_MINUS_SRC_ALPHA, SW_DST_ALPHA = GL_DST_ALPHA, SW_ONE_MINUS_DST_ALPHA = GL_ONE_MINUS_DST_ALPHA, SW_DST_COLOR = GL_DST_COLOR, SW_ONE_MINUS_DST_COLOR = GL_ONE_MINUS_DST_COLOR, SW_SRC_ALPHA_SATURATE = GL_SRC_ALPHA_SATURATE } SWfactor; typedef enum { SW_LUMINANCE = GL_LUMINANCE, SW_LUMINANCE_ALPHA = GL_LUMINANCE_ALPHA, SW_RGB = GL_RGB, SW_RGBA = GL_RGBA, } SWformat; typedef enum { SW_UNSIGNED_BYTE = GL_UNSIGNED_BYTE, SW_BYTE = GL_BYTE, SW_UNSIGNED_SHORT = GL_UNSIGNED_SHORT, SW_SHORT = GL_SHORT, SW_UNSIGNED_INT = GL_UNSIGNED_INT, SW_INT = GL_INT, SW_FLOAT = GL_FLOAT } SWtype; typedef enum { SW_NEAREST = GL_NEAREST, SW_LINEAR = GL_LINEAR } SWfilter; typedef enum { SW_REPEAT = GL_REPEAT, SW_CLAMP = GL_CLAMP, } SWwrap; typedef enum { SW_TEXTURE_MIN_FILTER = GL_TEXTURE_MIN_FILTER, SW_TEXTURE_MAG_FILTER = GL_TEXTURE_MAG_FILTER, SW_TEXTURE_WRAP_S = GL_TEXTURE_WRAP_S, SW_TEXTURE_WRAP_T = GL_TEXTURE_WRAP_T } SWtexparam; typedef enum { SW_NO_ERROR = GL_NO_ERROR, SW_INVALID_ENUM = GL_INVALID_ENUM, SW_INVALID_VALUE = GL_INVALID_VALUE, SW_STACK_OVERFLOW = GL_STACK_OVERFLOW, SW_STACK_UNDERFLOW = GL_STACK_UNDERFLOW, SW_INVALID_OPERATION = GL_INVALID_OPERATION, } SWerrcode; //------------------------------------------------------------------------------------ // Functions Declaration - Public API //------------------------------------------------------------------------------------ SWAPI bool swInit(int w, int h); SWAPI void swClose(void); SWAPI bool swResizeFramebuffer(int w, int h); SWAPI void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, void *pixels); SWAPI void swBlitFramebuffer(int xDst, int yDst, int wDst, int hDst, int xSrc, int ySrc, int wSrc, int hSrc, SWformat format, SWtype type, void *pixels); SWAPI void swEnable(SWstate state); SWAPI void swDisable(SWstate state); SWAPI void swGetFloatv(SWget name, float *v); SWAPI const char *swGetString(SWget name); SWAPI SWerrcode swGetError(void); SWAPI void swViewport(int x, int y, int width, int height); SWAPI void swScissor(int x, int y, int width, int height); SWAPI void swClearColor(float r, float g, float b, float a); SWAPI void swClearDepth(float depth); SWAPI void swClear(uint32_t bitmask); SWAPI void swBlendFunc(SWfactor sfactor, SWfactor dfactor); SWAPI void swPolygonMode(SWpoly mode); SWAPI void swCullFace(SWface face); SWAPI void swPointSize(float size); SWAPI void swLineWidth(float width); SWAPI void swMatrixMode(SWmatrix mode); SWAPI void swPushMatrix(void); SWAPI void swPopMatrix(void); SWAPI void swLoadIdentity(void); SWAPI void swTranslatef(float x, float y, float z); SWAPI void swRotatef(float angle, float x, float y, float z); SWAPI void swScalef(float x, float y, float z); SWAPI void swMultMatrixf(const float *mat); SWAPI void swFrustum(double left, double right, double bottom, double top, double znear, double zfar); SWAPI void swOrtho(double left, double right, double bottom, double top, double znear, double zfar); SWAPI void swBegin(SWdraw mode); SWAPI void swEnd(void); SWAPI void swVertex2i(int x, int y); SWAPI void swVertex2f(float x, float y); SWAPI void swVertex2fv(const float *v); SWAPI void swVertex3i(int x, int y, int z); SWAPI void swVertex3f(float x, float y, float z); SWAPI void swVertex3fv(const float *v); SWAPI void swVertex4i(int x, int y, int z, int w); SWAPI void swVertex4f(float x, float y, float z, float w); SWAPI void swVertex4fv(const float *v); SWAPI void swColor3ub(uint8_t r, uint8_t g, uint8_t b); SWAPI void swColor3ubv(const uint8_t *v); SWAPI void swColor3f(float r, float g, float b); SWAPI void swColor3fv(const float *v); SWAPI void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a); SWAPI void swColor4ubv(const uint8_t *v); SWAPI void swColor4f(float r, float g, float b, float a); SWAPI void swColor4fv(const float *v); SWAPI void swTexCoord2f(float u, float v); SWAPI void swTexCoord2fv(const float *v); SWAPI void swBindArray(SWarray type, void *buffer); SWAPI void swDrawArrays(SWdraw mode, int offset, int count); SWAPI void swDrawElements(SWdraw mode, int count, int type, const void *indices); SWAPI void swGenTextures(int count, uint32_t *textures); SWAPI void swDeleteTextures(int count, uint32_t *textures); SWAPI void swTexImage2D(int width, int height, SWformat format, SWtype type, const void *data); SWAPI void swTexParameteri(int param, int value); SWAPI void swBindTexture(uint32_t id); #endif // RLSW_H /*********************************************************************************** * * RLSW IMPLEMENTATION * ************************************************************************************/ #define RLSW_IMPLEMENTATION #if defined(RLSW_IMPLEMENTATION) #include // Required for: malloc(), free() #include // Required for: NULL, size_t, uint8_t, uint16_t, uint32_t... #include // Required for: sinf(), cosf(), floorf(), fabsf(), sqrtf(), roundf() // Simple log system to avoid printf() calls if required // NOTE: Avoiding those calls, also avoids const strings memory usage #define SW_SUPPORT_LOG_INFO #if defined(SW_SUPPORT_LOG_INFO) //&& defined(_DEBUG) // WARNING: LOG() output required for this tool #include #define SW_LOG(...) printf(__VA_ARGS__) #else #define SW_LOG(...) #endif #if defined(_MSC_VER) #define SW_ALIGN(x) __declspec(align(x)) #elif defined(__GNUC__) || defined(__clang__) #define SW_ALIGN(x) __attribute__((aligned(x))) #else #define SW_ALIGN(x) // Do nothing if not available #endif #if defined(_M_X64) || defined(__x86_64__) #define SW_ARCH_X86_64 #elif defined(_M_IX86) || defined(__i386__) #define SW_ARCH_X86 #elif defined(_M_ARM) || defined(__arm__) #define SW_ARCH_ARM32 #elif defined(_M_ARM64) || defined(__aarch64__) #define SW_ARCH_ARM64 #elif defined(__riscv) #define SW_ARCH_RISCV #endif #if defined(RLSW_USE_SIMD_INTRINSICS) // Check for SIMD vector instructions // NOTE: Compiler is responsible to enable required flags for host device, // supported features are detected at compiler init but varies depending on compiler // TODO: This logic must be reviewed to avoid the inclusion of multiple headers // and enable the higher level of SIMD available #if defined(__FMA__) && defined(__AVX2__) #define SW_HAS_FMA_AVX2 #include #elif defined(__FMA__) && defined(__AVX__) #define SW_HAS_FMA_AVX #include #elif defined(__AVX2__) #define SW_HAS_AVX2 #include #elif defined(__AVX__) #define SW_HAS_AVX #include #endif #if defined(__SSE4_2__) #define SW_HAS_SSE42 #include #elif defined(__SSE4_1__) #define SW_HAS_SSE41 #include #elif defined(__SSSE3__) #define SW_HAS_SSSE3 #include #elif defined(__SSE3__) #define SW_HAS_SSE3 #include #elif defined(__SSE2__) || (defined(_M_AMD64) || defined(_M_X64)) // SSE2 x64 #define SW_HAS_SSE2 #include #elif defined(__SSE__) #define SW_HAS_SSE #include #endif #if defined(__ARM_NEON) || defined(__aarch64__) #if defined(__ARM_FEATURE_FMA) #define SW_HAS_NEON_FMA #else #define SW_HAS_NEON #endif #include #endif #if defined(__riscv_vector) // NOTE: Requires compilation flags: -march=rv64gcv -mabi=lp64d #define SW_HAS_RVV #include #endif #endif // RLSW_USE_SIMD_INTRINSICS #ifdef __cplusplus #define SW_CURLY_INIT(name) name #else #define SW_CURLY_INIT(name) (name) #endif //---------------------------------------------------------------------------------- // Defines and Macros //---------------------------------------------------------------------------------- #define SW_PI 3.14159265358979323846f #define SW_INV_255 0.00392156862745098f // 1.0f/255.0f #define SW_DEG2RAD (SW_PI/180.0f) #define SW_RAD2DEG (180.0f/SW_PI) #define SW_COLOR_PIXEL_SIZE (SW_COLOR_BUFFER_BITS >> 3) #define SW_DEPTH_PIXEL_SIZE (SW_DEPTH_BUFFER_BITS >> 3) #define SW_PIXEL_SIZE (SW_COLOR_PIXEL_SIZE + SW_DEPTH_PIXEL_SIZE) #if (SW_PIXEL_SIZE <= 4) #define SW_PIXEL_ALIGNMENT 4 #else // if (SW_PIXEL_SIZE <= 8) #define SW_PIXEL_ALIGNMENT 8 #endif #if (SW_COLOR_BUFFER_BITS == 8) #define SW_COLOR_TYPE uint8_t #define SW_COLOR_IS_PACKED 1 #define SW_COLOR_PACK_COMP 1 #define SW_PACK_COLOR(r,g,b) ((((uint8_t)((r)*7+0.5f))&0x07)<<5 | (((uint8_t)((g)*7+0.5f))&0x07)<<2 | ((uint8_t)((b)*3+0.5f))&0x03) #define SW_UNPACK_R(p) (((p)>>5)&0x07) #define SW_UNPACK_G(p) (((p)>>2)&0x07) #define SW_UNPACK_B(p) ((p)&0x03) #define SW_SCALE_R(v) ((v)*255+3)/7 #define SW_SCALE_G(v) ((v)*255+3)/7 #define SW_SCALE_B(v) ((v)*255+1)/3 #define SW_TO_FLOAT_R(v) ((v)*(1.0f/7.0f)) #define SW_TO_FLOAT_G(v) ((v)*(1.0f/7.0f)) #define SW_TO_FLOAT_B(v) ((v)*(1.0f/3.0f)) #elif (SW_COLOR_BUFFER_BITS == 16) #define SW_COLOR_TYPE uint16_t #define SW_COLOR_IS_PACKED 1 #define SW_COLOR_PACK_COMP 1 #define SW_PACK_COLOR(r,g,b) ((((uint16_t)((r)*31+0.5f))&0x1F)<<11 | (((uint16_t)((g)*63+0.5f))&0x3F)<<5 | ((uint16_t)((b)*31+0.5f))&0x1F) #define SW_UNPACK_R(p) (((p)>>11)&0x1F) #define SW_UNPACK_G(p) (((p)>>5)&0x3F) #define SW_UNPACK_B(p) ((p)&0x1F) #define SW_SCALE_R(v) ((v)*255+15)/31 #define SW_SCALE_G(v) ((v)*255+31)/63 #define SW_SCALE_B(v) ((v)*255+15)/31 #define SW_TO_FLOAT_R(v) ((v)*(1.0f/31.0f)) #define SW_TO_FLOAT_G(v) ((v)*(1.0f/63.0f)) #define SW_TO_FLOAT_B(v) ((v)*(1.0f/31.0f)) #else // 32 bits #define SW_COLOR_TYPE uint8_t #define SW_COLOR_IS_PACKED 0 #define SW_COLOR_PACK_COMP 4 #endif #if (SW_DEPTH_BUFFER_BITS == 16) #define SW_DEPTH_TYPE uint16_t #define SW_DEPTH_IS_PACKED 1 #define SW_DEPTH_PACK_COMP 1 #define SW_DEPTH_MAX UINT16_MAX #define SW_DEPTH_SCALE (1.0f/UINT16_MAX) #define SW_PACK_DEPTH(d) ((SW_DEPTH_TYPE)((d)*SW_DEPTH_MAX)) #define SW_UNPACK_DEPTH(p) (p) #elif (SW_DEPTH_BUFFER_BITS == 24) #define SW_DEPTH_TYPE uint8_t #define SW_DEPTH_IS_PACKED 0 #define SW_DEPTH_PACK_COMP 3 #define SW_DEPTH_MAX 0xFFFFFFU #define SW_DEPTH_SCALE (1.0f/0xFFFFFFU) #define SW_PACK_DEPTH_0(d) ((uint8_t)(((uint32_t)((d)*SW_DEPTH_MAX)>>16)&0xFFU)) #define SW_PACK_DEPTH_1(d) ((uint8_t)(((uint32_t)((d)*SW_DEPTH_MAX)>>8)&0xFFU)) #define SW_PACK_DEPTH_2(d) ((uint8_t)((uint32_t)((d)*SW_DEPTH_MAX)&0xFFU)) #define SW_UNPACK_DEPTH(p) ((((uint32_t)(p)[0]<<16)|((uint32_t)(p)[1]<<8)|(uint32_t)(p)[2])) #else // 32 bits #define SW_DEPTH_TYPE float #define SW_DEPTH_IS_PACKED 1 #define SW_DEPTH_PACK_COMP 1 #define SW_DEPTH_MAX 1.0f #define SW_DEPTH_SCALE 1.0f #define SW_PACK_DEPTH(d) ((SW_DEPTH_TYPE)(d)) #define SW_UNPACK_DEPTH(p) (p) #endif #define SW_STATE_CHECK(flags) (SW_STATE_CHECK_EX(RLSW.stateFlags, (flags))) #define SW_STATE_CHECK_EX(state, flags) (((state) & (flags)) == (flags)) #define SW_STATE_SCISSOR_TEST (1 << 0) #define SW_STATE_TEXTURE_2D (1 << 1) #define SW_STATE_DEPTH_TEST (1 << 2) #define SW_STATE_CULL_FACE (1 << 3) #define SW_STATE_BLEND (1 << 4) //---------------------------------------------------------------------------------- // Module Types and Structures Definition //---------------------------------------------------------------------------------- // Pixel data format type // NOTE: Enum aligned with raylib PixelFormat typedef enum { SW_PIXELFORMAT_UNKNOWN = 0, SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE, // 8 bit per pixel (no alpha) SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA, // 8*2 bpp (2 channels) SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5, // 16 bpp SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8, // 24 bpp SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1, // 16 bpp (1 bit alpha) SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4, // 16 bpp (4 bit alpha) SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8, // 32 bpp SW_PIXELFORMAT_UNCOMPRESSED_R32, // 32 bpp (1 channel - float) SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32, // 32*3 bpp (3 channels - float) SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32, // 32*4 bpp (4 channels - float) SW_PIXELFORMAT_UNCOMPRESSED_R16, // 16 bpp (1 channel - half float) SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16, // 16*3 bpp (3 channels - half float) SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16, // 16*4 bpp (4 channels - half float) } sw_pixelformat_t; typedef void (*sw_factor_f)( float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst ); typedef float sw_matrix_t[4*4]; typedef uint16_t sw_half_t; typedef struct { float position[4]; // Position coordinates float texcoord[2]; // Texture coordinates float color[4]; // Color value (RGBA) float homogeneous[4]; // Homogeneous coordinates float screen[2]; // Screen coordinates } sw_vertex_t; typedef struct { uint8_t *pixels; // Texture pixels (RGBA32) int width, height; // Dimensions of the texture int wMinus1, hMinus1; // Dimensions minus one SWfilter minFilter; // Minification filter SWfilter magFilter; // Magnification filter SWwrap sWrap; // texcoord.x wrap mode SWwrap tWrap; // texcoord.y wrap mode float tx; // Texel width float ty; // Texel height } sw_texture_t; // Pixel data type typedef SW_ALIGN(SW_PIXEL_ALIGNMENT) struct { SW_COLOR_TYPE color[SW_COLOR_PACK_COMP]; SW_DEPTH_TYPE depth[SW_DEPTH_PACK_COMP]; #if (SW_PIXEL_SIZE % SW_PIXEL_ALIGNMENT != 0) uint8_t padding[SW_PIXEL_ALIGNMENT - SW_PIXEL_SIZE % SW_PIXEL_ALIGNMENT]; #endif } sw_pixel_t; typedef struct { sw_pixel_t *pixels; int width; int height; int allocSz; } sw_framebuffer_t; typedef struct { sw_framebuffer_t framebuffer; // Main framebuffer sw_pixel_t clearValue; // Clear value of the framebuffer float vpCenter[2]; // Viewport center float vpHalf[2]; // Viewport half dimensions int vpSize[2]; // Viewport dimensions (minus one) int vpMin[2]; // Viewport minimum renderable point (top-left) int vpMax[2]; // Viewport maximum renderable point (bottom-right) int scMin[2]; // Scissor rectangle minimum renderable point (top-left) int scMax[2]; // Scissor rectangle maximum renderable point (bottom-right) float scClipMin[2]; // Scissor rectangle minimum renderable point in clip space float scClipMax[2]; // Scissor rectangle maximum renderable point in clip space uint32_t currentTexture; // Current active texture id struct { float *positions; float *texcoords; uint8_t *colors; } array; struct { float texcoord[2]; float color[4]; } current; sw_vertex_t vertexBuffer[SW_MAX_CLIPPED_POLYGON_VERTICES]; // Buffer used for storing primitive vertices, used for processing and rendering int vertexCounter; // Number of vertices in 'ctx.vertexBuffer' SWdraw drawMode; // Current primitive mode (e.g., lines, triangles) SWpoly polyMode; // Current polygon filling mode (e.g., lines, triangles) int reqVertices; // Number of vertices required for the primitive being drawn float pointRadius; // Rasterized point radius float lineWidth; // Rasterized line width sw_matrix_t stackProjection[SW_MAX_PROJECTION_STACK_SIZE]; // Projection matrix stack for push/pop operations sw_matrix_t stackModelview[SW_MAX_MODELVIEW_STACK_SIZE]; // Modelview matrix stack for push/pop operations sw_matrix_t stackTexture[SW_MAX_TEXTURE_STACK_SIZE]; // Texture matrix stack for push/pop operations uint32_t stackProjectionCounter; // Counter for matrix stack operations uint32_t stackModelviewCounter; // Counter for matrix stack operations uint32_t stackTextureCounter; // Counter for matrix stack operations SWmatrix currentMatrixMode; // Current matrix mode (e.g., sw_MODELVIEW, sw_PROJECTION) sw_matrix_t *currentMatrix; // Pointer to the currently used matrix according to the mode sw_matrix_t matMVP; // Model view projection matrix, calculated and used internally bool isDirtyMVP; // Indicates if the MVP matrix should be rebuilt SWfactor srcFactor; SWfactor dstFactor; sw_factor_f srcFactorFunc; sw_factor_f dstFactorFunc; SWface cullFace; // Faces to cull SWerrcode errCode; // Last error code sw_texture_t *loadedTextures; int loadedTextureCount; uint32_t *freeTextureIds; int freeTextureIdCount; uint32_t stateFlags; } sw_context_t; //---------------------------------------------------------------------------------- // Global Variables Definition //---------------------------------------------------------------------------------- static sw_context_t RLSW = { 0 }; //---------------------------------------------------------------------------------- // Module Functions Declaration //---------------------------------------------------------------------------------- static inline void sw_matrix_id(sw_matrix_t dst) { dst[0] = 1, dst[1] = 0, dst[2] = 0, dst[3] = 0; dst[4] = 0, dst[5] = 1, dst[6] = 0, dst[7] = 0; dst[8] = 0, dst[9] = 0, dst[10] = 1, dst[11] = 0; dst[12] = 0, dst[13] = 0, dst[14] = 0, dst[15] = 1; } static inline void sw_matrix_mul_rst(float *SW_RESTRICT dst, const float *SW_RESTRICT left, const float *SW_RESTRICT right) { float l00 = left[0], l01 = left[1], l02 = left[2], l03 = left[3]; float l10 = left[4], l11 = left[5], l12 = left[6], l13 = left[7]; float l20 = left[8], l21 = left[9], l22 = left[10], l23 = left[11]; float l30 = left[12], l31 = left[13], l32 = left[14], l33 = left[15]; dst[0] = l00*right[0] + l01*right[4] + l02*right[8] + l03*right[12]; dst[4] = l10*right[0] + l11*right[4] + l12*right[8] + l13*right[12]; dst[8] = l20*right[0] + l21*right[4] + l22*right[8] + l23*right[12]; dst[12] = l30*right[0] + l31*right[4] + l32*right[8] + l33*right[12]; dst[1] = l00*right[1] + l01*right[5] + l02*right[9] + l03*right[13]; dst[5] = l10*right[1] + l11*right[5] + l12*right[9] + l13*right[13]; dst[9] = l20*right[1] + l21*right[5] + l22*right[9] + l23*right[13]; dst[13] = l30*right[1] + l31*right[5] + l32*right[9] + l33*right[13]; dst[2] = l00*right[2] + l01*right[6] + l02*right[10] + l03*right[14]; dst[6] = l10*right[2] + l11*right[6] + l12*right[10] + l13*right[14]; dst[10] = l20*right[2] + l21*right[6] + l22*right[10] + l23*right[14]; dst[14] = l30*right[2] + l31*right[6] + l32*right[10] + l33*right[14]; dst[3] = l00*right[3] + l01*right[7] + l02*right[11] + l03*right[15]; dst[7] = l10*right[3] + l11*right[7] + l12*right[11] + l13*right[15]; dst[11] = l20*right[3] + l21*right[7] + l22*right[11] + l23*right[15]; dst[15] = l30*right[3] + l31*right[7] + l32*right[11] + l33*right[15]; } static inline void sw_matrix_mul(sw_matrix_t dst, const sw_matrix_t left, const sw_matrix_t right) { float result[16]; sw_matrix_mul_rst(result, left, right); for (int i = 0; i < 16; i++) dst[i] = result[i]; } static inline float sw_saturate(float x) { union { float f; uint32_t u; } fb; fb.f = x; // Check if x < 0.0f // If sign bit is set (MSB), x is negative if ((fb.u & 0x80000000) != 0) return 0.0f; // Check if x > 1.0f // Works for positive floats: IEEE 754 ordering matches integer ordering if (fb.u > 0x3F800000) return 1.0f; // x is in [0.0f, 1.0f] return x; } static inline float sw_fract(float x) { return (x - floorf(x)); } static inline int sw_clampi(int v, int min, int max) { if (v < min) return min; if (v > max) return max; return v; } static inline void sw_lerp_vertex_PTCH(sw_vertex_t *SW_RESTRICT out, const sw_vertex_t *SW_RESTRICT a, const sw_vertex_t *SW_RESTRICT b, float t) { const float tInv = 1.0f - t; // Position interpolation (4 components) out->position[0] = a->position[0]*tInv + b->position[0]*t; out->position[1] = a->position[1]*tInv + b->position[1]*t; out->position[2] = a->position[2]*tInv + b->position[2]*t; out->position[3] = a->position[3]*tInv + b->position[3]*t; // Texture coordinate interpolation (2 components) out->texcoord[0] = a->texcoord[0]*tInv + b->texcoord[0]*t; out->texcoord[1] = a->texcoord[1]*tInv + b->texcoord[1]*t; // Color interpolation (4 components) out->color[0] = a->color[0]*tInv + b->color[0]*t; out->color[1] = a->color[1]*tInv + b->color[1]*t; out->color[2] = a->color[2]*tInv + b->color[2]*t; out->color[3] = a->color[3]*tInv + b->color[3]*t; // Homogeneous coordinate interpolation (4 components) out->homogeneous[0] = a->homogeneous[0]*tInv + b->homogeneous[0]*t; out->homogeneous[1] = a->homogeneous[1]*tInv + b->homogeneous[1]*t; out->homogeneous[2] = a->homogeneous[2]*tInv + b->homogeneous[2]*t; out->homogeneous[3] = a->homogeneous[3]*tInv + b->homogeneous[3]*t; } static inline void sw_get_vertex_grad_PTCH(sw_vertex_t *SW_RESTRICT out, const sw_vertex_t *SW_RESTRICT a, const sw_vertex_t *SW_RESTRICT b, float scale) { // Calculate gradients for Position out->position[0] = (b->position[0] - a->position[0])*scale; out->position[1] = (b->position[1] - a->position[1])*scale; out->position[2] = (b->position[2] - a->position[2])*scale; out->position[3] = (b->position[3] - a->position[3])*scale; // Calculate gradients for Texture coordinates out->texcoord[0] = (b->texcoord[0] - a->texcoord[0])*scale; out->texcoord[1] = (b->texcoord[1] - a->texcoord[1])*scale; // Calculate gradients for Color out->color[0] = (b->color[0] - a->color[0])*scale; out->color[1] = (b->color[1] - a->color[1])*scale; out->color[2] = (b->color[2] - a->color[2])*scale; out->color[3] = (b->color[3] - a->color[3])*scale; // Calculate gradients for Homogeneous coordinates out->homogeneous[0] = (b->homogeneous[0] - a->homogeneous[0])*scale; out->homogeneous[1] = (b->homogeneous[1] - a->homogeneous[1])*scale; out->homogeneous[2] = (b->homogeneous[2] - a->homogeneous[2])*scale; out->homogeneous[3] = (b->homogeneous[3] - a->homogeneous[3])*scale; } static inline void sw_add_vertex_grad_PTCH(sw_vertex_t *SW_RESTRICT out, const sw_vertex_t *SW_RESTRICT gradients) { // Add gradients to Position out->position[0] += gradients->position[0]; out->position[1] += gradients->position[1]; out->position[2] += gradients->position[2]; out->position[3] += gradients->position[3]; // Add gradients to Texture coordinates out->texcoord[0] += gradients->texcoord[0]; out->texcoord[1] += gradients->texcoord[1]; // Add gradients to Color out->color[0] += gradients->color[0]; out->color[1] += gradients->color[1]; out->color[2] += gradients->color[2]; out->color[3] += gradients->color[3]; // Add gradients to Homogeneous coordinates out->homogeneous[0] += gradients->homogeneous[0]; out->homogeneous[1] += gradients->homogeneous[1]; out->homogeneous[2] += gradients->homogeneous[2]; out->homogeneous[3] += gradients->homogeneous[3]; } static inline void sw_add_vertex_grad_scaled_PTCH( sw_vertex_t *SW_RESTRICT out, const sw_vertex_t *SW_RESTRICT gradients, float scale) { // Add gradients to Position out->position[0] += gradients->position[0]*scale; out->position[1] += gradients->position[1]*scale; out->position[2] += gradients->position[2]*scale; out->position[3] += gradients->position[3]*scale; // Add gradients to Texture coordinates out->texcoord[0] += gradients->texcoord[0]*scale; out->texcoord[1] += gradients->texcoord[1]*scale; // Add gradients to Color out->color[0] += gradients->color[0]*scale; out->color[1] += gradients->color[1]*scale; out->color[2] += gradients->color[2]*scale; out->color[3] += gradients->color[3]*scale; // Add gradients to Homogeneous coordinates out->homogeneous[0] += gradients->homogeneous[0]*scale; out->homogeneous[1] += gradients->homogeneous[1]*scale; out->homogeneous[2] += gradients->homogeneous[2]*scale; out->homogeneous[3] += gradients->homogeneous[3]*scale; } static inline void sw_float_to_unorm8_simd(uint8_t dst[4], const float src[4]) { #if defined(SW_HAS_NEON) float32x4_t values = vld1q_f32(src); float32x4_t scaled = vmulq_n_f32(values, 255.0f); int32x4_t clamped_s32 = vcvtq_s32_f32(scaled); // f32 -> s32 (truncated) int16x4_t narrow16_s = vqmovn_s32(clamped_s32); int16x8_t combined16_s = vcombine_s16(narrow16_s, narrow16_s); uint8x8_t narrow8_u = vqmovun_s16(combined16_s); vst1_lane_u32((uint32_t*)dst, vreinterpret_u32_u8(narrow8_u), 0); #elif defined(SW_HAS_SSE41) __m128 values = _mm_loadu_ps(src); __m128 scaled = _mm_mul_ps(values, _mm_set1_ps(255.0f)); __m128i clamped = _mm_cvtps_epi32(scaled); // f32 -> s32 (truncated) clamped = _mm_packus_epi32(clamped, clamped); // s32 -> u16 (saturated < 0 to 0) clamped = _mm_packus_epi16(clamped, clamped); // u16 -> u8 (saturated > 255 to 255) *(uint32_t*)dst = _mm_cvtsi128_si32(clamped); #elif defined(SW_HAS_SSE2) __m128 values = _mm_loadu_ps(src); __m128 scaled = _mm_mul_ps(values, _mm_set1_ps(255.0f)); __m128i clamped = _mm_cvtps_epi32(scaled); // f32 -> s32 (truncated) clamped = _mm_packs_epi32(clamped, clamped); // s32 -> s16 (saturated) clamped = _mm_packus_epi16(clamped, clamped); // s16 -> u8 (saturated < 0 to 0) *(uint32_t*)dst = _mm_cvtsi128_si32(clamped); #elif defined(SW_HAS_RVV) // TODO: Sample code generated by AI, needs testing and review // NOTE: RVV 1.0 specs define the use of __riscv_ prefix for instrinsic functions size_t vl = __riscv_vsetvl_e32m1(4); // Load up to 4 floats into a vector register vfloat32m1_t vsrc = __riscv_vle32_v_f32m1(src, vl); // Load float32 values // Clamp to [0.0f, 1.0f] vfloat32m1_t vzero = __riscv_vfmv_v_f_f32m1(0.0f, vl); vfloat32m1_t vone = __riscv_vfmv_v_f_f32m1(1.0f, vl); vsrc = __riscv_vfmin_vv_f32m1(vsrc, vone, vl); vsrc = __riscv_vfmax_vv_f32m1(vsrc, vzero, vl); // Multiply by 255.0f and add 0.5f for rounding vfloat32m1_t vscaled = __riscv_vfmul_vf_f32m1(vsrc, 255.0f, vl); vscaled = __riscv_vfadd_vf_f32m1(vscaled, 0.5f, vl); // Convert to unsigned integer (truncate toward zero) vuint32m1_t vu32 = __riscv_vfcvt_xu_f_v_u32m1(vscaled, vl); // Narrow from u32 -> u8 vuint8m1_t vu8 = __riscv_vnclipu_wx_u8m1(vu32, 0, vl); // Round toward zero __riscv_vse8_v_u8m1(dst, vu8, vl); // Store result #else for (int i = 0; i < 4; i++) { float val = src[i]*255.0f; val = (val > 255.0f)? 255.0f : val; val = (val < 0.0f)? 0.0f : val; dst[i] = (uint8_t)val; } #endif } static inline void sw_float_from_unorm8_simd(float dst[4], const uint8_t src[4]) { #if defined(SW_HAS_NEON) uint8x8_t bytes8 = vld1_u8(src); //< Read 8 bytes, faster, but let's hope we're not at the end of the page (unlikely)... uint16x8_t bytes16 = vmovl_u8(bytes8); uint32x4_t ints = vmovl_u16(vget_low_u16(bytes16)); float32x4_t floats = vcvtq_f32_u32(ints); floats = vmulq_n_f32(floats, SW_INV_255); vst1q_f32(dst, floats); #elif defined(SW_HAS_SSE41) __m128i bytes = _mm_cvtsi32_si128(*(const uint32_t *)src); __m128i ints = _mm_cvtepu8_epi32(bytes); __m128 floats = _mm_cvtepi32_ps(ints); floats = _mm_mul_ps(floats, _mm_set1_ps(SW_INV_255)); _mm_storeu_ps(dst, floats); #elif defined(SW_HAS_SSE2) __m128i bytes = _mm_cvtsi32_si128(*(const uint32_t *)src); bytes = _mm_unpacklo_epi8(bytes, _mm_setzero_si128()); __m128i ints = _mm_unpacklo_epi16(bytes, _mm_setzero_si128()); __m128 floats = _mm_cvtepi32_ps(ints); floats = _mm_mul_ps(floats, _mm_set1_ps(SW_INV_255)); _mm_storeu_ps(dst, floats); #elif defined(SW_HAS_RVV) // TODO: Sample code generated by AI, needs testing and review size_t vl = __riscv_vsetvl_e8m1(4); // Set vector length for 8-bit input elements vuint8m1_t vsrc_u8 = __riscv_vle8_v_u8m1(src, vl); // Load 4 unsigned 8-bit integers vuint32m1_t vsrc_u32 = __riscv_vwcvt_xu_u_v_u32m1(vsrc_u8, vl); // Widen to 32-bit unsigned integers vfloat32m1_t vsrc_f32 = __riscv_vfcvt_f_xu_v_f32m1(vsrc_u32, vl); // Convert to float32 vfloat32m1_t vnorm = __riscv_vfmul_vf_f32m1(vsrc_f32, SW_INV_255, vl); // Multiply by 1/255.0 to normalize __riscv_vse32_v_f32m1(dst, vnorm, vl); // Store result #else dst[0] = (float)src[0]*SW_INV_255; dst[1] = (float)src[1]*SW_INV_255; dst[2] = (float)src[2]*SW_INV_255; dst[3] = (float)src[3]*SW_INV_255; #endif } // Half conversion functions static inline uint32_t sw_half_to_float_ui(uint16_t h) { uint32_t s = (uint32_t)(h & 0x8000) << 16; int32_t em = h & 0x7fff; // bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15) int32_t r = (em + (112 << 10)) << 13; // denormal: flush to zero r = (em < (1 << 10))? 0 : r; // infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases // 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255 r += (em >= (31 << 10))? (112 << 23) : 0; return s | r; } static inline float sw_half_to_float(sw_half_t y) { union { float f; uint32_t i; } v = { .i = sw_half_to_float_ui(y) }; return v.f; } static inline uint16_t sw_half_from_float_ui(uint32_t ui) { int32_t s = (ui >> 16) & 0x8000; int32_t em = ui & 0x7fffffff; // Bias exponent and round to nearest; 112 is relative exponent bias (127-15) int32_t h = (em - (112 << 23) + (1 << 12)) >> 13; // Underflow: flush to zero; 113 encodes exponent -14 h = (em < (113 << 23))? 0 : h; // Overflow: infinity; 143 encodes exponent 16 h = (em >= (143 << 23))? 0x7c00 : h; // NaN; note that we convert all types of NaN to qNaN h = (em > (255 << 23))? 0x7e00 : h; return (uint16_t)(s | h); } static inline sw_half_t sw_half_from_float(float i) { union { float f; uint32_t i; } v; v.f = i; return sw_half_from_float_ui(v.i); } // Framebuffer management functions //------------------------------------------------------------------------------------------- static inline bool sw_framebuffer_load(int w, int h) { int size = w*h; RLSW.framebuffer.pixels = SW_MALLOC(sizeof(sw_pixel_t)*size); if (RLSW.framebuffer.pixels == NULL) return false; RLSW.framebuffer.width = w; RLSW.framebuffer.height = h; RLSW.framebuffer.allocSz = size; return true; } static inline bool sw_framebuffer_resize(int w, int h) { int newSize = w*h; if (newSize <= RLSW.framebuffer.allocSz) { RLSW.framebuffer.width = w; RLSW.framebuffer.height = h; return true; } void *newPixels = SW_REALLOC(RLSW.framebuffer.pixels, sizeof(sw_pixel_t)*newSize); if (newPixels == NULL) return false; RLSW.framebuffer.pixels = newPixels; RLSW.framebuffer.width = w; RLSW.framebuffer.height = h; RLSW.framebuffer.allocSz = newSize; return true; } static inline void sw_framebuffer_read_color(float dst[4], const sw_pixel_t *src) { #if SW_COLOR_IS_PACKED SW_COLOR_TYPE pixel = src->color[0]; dst[0] = SW_TO_FLOAT_R(SW_UNPACK_R(pixel)); dst[1] = SW_TO_FLOAT_G(SW_UNPACK_G(pixel)); dst[2] = SW_TO_FLOAT_B(SW_UNPACK_B(pixel)); dst[3] = 1.0f; #else sw_float_from_unorm8_simd(dst, src->color); #endif } static inline void sw_framebuffer_read_color8(uint8_t dst[4], const sw_pixel_t *src) { #if SW_COLOR_IS_PACKED SW_COLOR_TYPE pixel = src->color[0]; dst[0] = SW_SCALE_R(SW_UNPACK_R(pixel)); dst[1] = SW_SCALE_G(SW_UNPACK_G(pixel)); dst[2] = SW_SCALE_B(SW_UNPACK_B(pixel)); dst[3] = 255; #else const SW_COLOR_TYPE *p = src->color; dst[0] = p[0]; dst[1] = p[1]; dst[2] = p[2]; dst[3] = p[3]; #endif } static inline float sw_framebuffer_read_depth(const sw_pixel_t *src) { #if SW_DEPTH_IS_PACKED return src->depth[0]*SW_DEPTH_SCALE; #else return SW_UNPACK_DEPTH(src->depth)*SW_DEPTH_SCALE; #endif } static inline void sw_framebuffer_write_color(sw_pixel_t *dst, const float src[4]) { #if SW_COLOR_IS_PACKED dst->color[0] = SW_PACK_COLOR(src[0], src[1], src[2]); #else sw_float_to_unorm8_simd(dst->color, src); #endif } static inline void sw_framebuffer_write_depth(sw_pixel_t *dst, float depth) { depth = sw_saturate(depth); // REVIEW: An overflow can occur in certain circumstances with clipping, and needs to be reviewed... #if SW_DEPTH_IS_PACKED dst->depth[0] = SW_PACK_DEPTH(depth); #else dst->depth[0] = SW_PACK_DEPTH_0(depth); dst->depth[1] = SW_PACK_DEPTH_1(depth); dst->depth[2] = SW_PACK_DEPTH_2(depth); #endif } static inline void sw_framebuffer_fill_color(sw_pixel_t *ptr, int size, const SW_COLOR_TYPE color[SW_COLOR_PACK_COMP]) { if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { int w = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { sw_pixel_t *row = ptr + y*RLSW.framebuffer.width + RLSW.scMin[0]; for (int x = 0; x < w; x++, row++) { for (int i = 0; i < SW_COLOR_PACK_COMP; i++) row->color[i] = color[i]; } } } else { for (int i = 0; i < size; i++, ptr++) { for (int j = 0; j < SW_COLOR_PACK_COMP; j++) ptr->color[j] = color[j]; } } } static inline void sw_framebuffer_fill_depth(sw_pixel_t *ptr, int size, const SW_DEPTH_TYPE depth[SW_DEPTH_PACK_COMP]) { if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { int w = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { sw_pixel_t *row = ptr + y*RLSW.framebuffer.width + RLSW.scMin[0]; for (int x = 0; x < w; x++, row++) { for (int i = 0; i < SW_DEPTH_PACK_COMP; i++) row->depth[i] = depth[i]; } } } else { for (int i = 0; i < size; i++, ptr++) { for (int j = 0; j < SW_DEPTH_PACK_COMP; j++) ptr->depth[j] = depth[j]; } } } static inline void sw_framebuffer_fill(sw_pixel_t *ptr, int size, sw_pixel_t value) { if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { int w = RLSW.scMax[0] - RLSW.scMin[0] + 1; for (int y = RLSW.scMin[1]; y <= RLSW.scMax[1]; y++) { sw_pixel_t *row = ptr + y*RLSW.framebuffer.width + RLSW.scMin[0]; for (int x = 0; x < w; x++, row++) *row = value; } } else { for (int i = 0; i < size; i++, ptr++) *ptr = value; } } static inline void sw_framebuffer_copy_fast(void* dst) { int size = RLSW.framebuffer.width*RLSW.framebuffer.height; const sw_pixel_t *pixels = RLSW.framebuffer.pixels; #if SW_COLOR_BUFFER_BITS == 8 uint8_t *dst8 = (uint8_t*)dst; for (int i = 0; i < size; i++) dst8[i] = pixels[i].color[0]; #elif SW_COLOR_BUFFER_BITS == 16 uint16_t *dst16 = (uint16_t*)dst; for (int i = 0; i < size; i++) dst16[i] = *(uint16_t*)pixels[i].color; #else // 32 bits uint32_t *dst32 = (uint32_t*)dst; #if SW_GL_FRAMEBUFFER_COPY_BGRA for (int i = 0; i < size; i++) { const uint8_t *c = pixels[i].color; dst32[i] = (uint32_t)c[2] | ((uint32_t)c[1] << 8) | ((uint32_t)c[0] << 16) | ((uint32_t)c[3] << 24); } #else // RGBA for (int i = 0; i < size; i++) dst32[i] = *(uint32_t*)pixels[i].color; #endif #endif } #define DEFINE_FRAMEBUFFER_COPY_BEGIN(name, DST_PTR_T) \ static inline void sw_framebuffer_copy_to_##name(int x, int y, int w, int h, DST_PTR_T *dst) \ { \ const int stride = RLSW.framebuffer.width; \ const sw_pixel_t *src = RLSW.framebuffer.pixels + (y*stride + x); \ \ for (int iy = 0; iy < h; iy++) { \ const sw_pixel_t *line = src; \ for (int ix = 0; ix < w; ix++) { \ uint8_t color[4]; \ sw_framebuffer_read_color8(color, line); \ #define DEFINE_FRAMEBUFFER_COPY_END() \ ++line; \ } \ src += stride; \ } \ } DEFINE_FRAMEBUFFER_COPY_BEGIN(GRAYSCALE, uint8_t) { // NTSC grayscale conversion: Y = 0.299R + 0.587G + 0.114B uint8_t gray = (uint8_t)((color[0]*299 + color[1]*587 + color[2]*114 + 500)/1000); *dst++ = gray; } DEFINE_FRAMEBUFFER_COPY_END() DEFINE_FRAMEBUFFER_COPY_BEGIN(GRAYALPHA, uint8_t) { // Convert RGB to grayscale using NTSC formula uint8_t gray = (uint8_t)((color[0]*299 + color[1]*587 + color[2]*114 + 500)/1000); dst[0] = gray; dst[1] = color[3]; // alpha dst += 2; } DEFINE_FRAMEBUFFER_COPY_END() DEFINE_FRAMEBUFFER_COPY_BEGIN(R5G6B5, uint16_t) { // Convert 8-bit RGB to 5:6:5 format uint8_t r5 = (color[0]*31 + 127)/255; uint8_t g6 = (color[1]*63 + 127)/255; uint8_t b5 = (color[2]*31 + 127)/255; #if SW_GL_FRAMEBUFFER_COPY_BGRA uint16_t rgb565 = (b5 << 11) | (g6 << 5) | r5; #else // RGBA uint16_t rgb565 = (r5 << 11) | (g6 << 5) | b5; #endif *dst++ = rgb565; } DEFINE_FRAMEBUFFER_COPY_END() DEFINE_FRAMEBUFFER_COPY_BEGIN(R8G8B8, uint8_t) { #if SW_GL_FRAMEBUFFER_COPY_BGRA dst[0] = color[2]; dst[1] = color[1]; dst[2] = color[0]; #else // RGBA dst[0] = color[0]; dst[1] = color[1]; dst[2] = color[2]; #endif dst += 3; } DEFINE_FRAMEBUFFER_COPY_END() DEFINE_FRAMEBUFFER_COPY_BEGIN(R5G5B5A1, uint16_t) { uint8_t r5 = (color[0]*31 + 127)/255; uint8_t g5 = (color[1]*31 + 127)/255; uint8_t b5 = (color[2]*31 + 127)/255; uint8_t a1 = (color[3] >= 128)? 1 : 0; #if SW_GL_FRAMEBUFFER_COPY_BGRA uint16_t pixel = (b5 << 11) | (g5 << 6) | (r5 << 1) | a1; #else // RGBA uint16_t pixel = (r5 << 11) | (g5 << 6) | (b5 << 1) | a1; #endif *dst++ = pixel; } DEFINE_FRAMEBUFFER_COPY_END() DEFINE_FRAMEBUFFER_COPY_BEGIN(R4G4B4A4, uint16_t) { uint8_t r4 = (color[0]*15 + 127)/255; uint8_t g4 = (color[1]*15 + 127)/255; uint8_t b4 = (color[2]*15 + 127)/255; uint8_t a4 = (color[3]*15 + 127)/255; #if SW_GL_FRAMEBUFFER_COPY_BGRA uint16_t pixel = (b4 << 12) | (g4 << 8) | (r4 << 4) | a4; #else // RGBA uint16_t pixel = (r4 << 12) | (g4 << 8) | (b4 << 4) | a4; #endif *dst++ = pixel; } DEFINE_FRAMEBUFFER_COPY_END() DEFINE_FRAMEBUFFER_COPY_BEGIN(R8G8B8A8, uint8_t) { #if SW_GL_FRAMEBUFFER_COPY_BGRA dst[0] = color[2]; dst[1] = color[1]; dst[2] = color[0]; #else // RGBA dst[0] = color[0]; dst[1] = color[1]; dst[2] = color[2]; #endif dst[3] = color[3]; dst += 4; } DEFINE_FRAMEBUFFER_COPY_END() #define DEFINE_FRAMEBUFFER_BLIT_BEGIN(name, DST_PTR_T) \ static inline void sw_framebuffer_blit_to_##name( \ int xDst, int yDst, int wDst, int hDst, \ int xSrc, int ySrc, int wSrc, int hSrc, \ DST_PTR_T *dst) \ { \ const sw_pixel_t *srcBase = RLSW.framebuffer.pixels; \ const int fbWidth = RLSW.framebuffer.width; \ \ const uint32_t xScale = ((uint32_t)wSrc << 16)/(uint32_t)wDst; \ const uint32_t yScale = ((uint32_t)hSrc << 16)/(uint32_t)hDst; \ \ for (int dy = 0; dy < hDst; dy++) { \ uint32_t yFix = ((uint32_t)ySrc << 16) + dy*yScale; \ int sy = yFix >> 16; \ const sw_pixel_t *srcLine = srcBase + sy*fbWidth + xSrc; \ \ const sw_pixel_t *srcPtr = srcLine; \ for (int dx = 0; dx < wDst; dx++) { \ uint32_t xFix = dx*xScale; \ int sx = xFix >> 16; \ const sw_pixel_t *pixel = srcPtr + sx; \ uint8_t color[4]; \ sw_framebuffer_read_color8(color, pixel); #define DEFINE_FRAMEBUFFER_BLIT_END() \ } \ } \ } DEFINE_FRAMEBUFFER_BLIT_BEGIN(GRAYSCALE, uint8_t) { uint8_t gray = (uint8_t)((color[0]*299 + color[1]*587 + color[2]*114 + 500)/1000); *dst++ = gray; } DEFINE_FRAMEBUFFER_BLIT_END() DEFINE_FRAMEBUFFER_BLIT_BEGIN(GRAYALPHA, uint8_t) { uint8_t gray = (uint8_t)((color[0]*299 + color[1]*587 + color[2]*114 + 500)/1000); dst[0] = gray; dst[1] = color[3]; // alpha dst += 2; } DEFINE_FRAMEBUFFER_BLIT_END() DEFINE_FRAMEBUFFER_BLIT_BEGIN(R5G6B5, uint16_t) { uint8_t r5 = (color[0]*31 + 127)/255; uint8_t g6 = (color[1]*63 + 127)/255; uint8_t b5 = (color[2]*31 + 127)/255; #if SW_GL_FRAMEBUFFER_COPY_BGRA uint16_t rgb565 = (b5 << 11) | (g6 << 5) | r5; #else // RGBA uint16_t rgb565 = (r5 << 11) | (g6 << 5) | b5; #endif *dst++ = rgb565; } DEFINE_FRAMEBUFFER_BLIT_END() DEFINE_FRAMEBUFFER_BLIT_BEGIN(R8G8B8, uint8_t) { #if SW_GL_FRAMEBUFFER_COPY_BGRA dst[0] = color[2]; dst[1] = color[1]; dst[2] = color[0]; #else // RGBA dst[0] = color[0]; dst[1] = color[1]; dst[2] = color[2]; #endif dst += 3; } DEFINE_FRAMEBUFFER_BLIT_END() DEFINE_FRAMEBUFFER_BLIT_BEGIN(R5G5B5A1, uint16_t) { uint8_t r5 = (color[0]*31 + 127)/255; uint8_t g5 = (color[1]*31 + 127)/255; uint8_t b5 = (color[2]*31 + 127)/255; uint8_t a1 = (color[3] >= 128)? 1 : 0; #if SW_GL_FRAMEBUFFER_COPY_BGRA uint16_t pixel = (b5 << 11) | (g5 << 6) | (r5 << 1) | a1; #else // RGBA uint16_t pixel = (r5 << 11) | (g5 << 6) | (b5 << 1) | a1; #endif *dst++ = pixel; } DEFINE_FRAMEBUFFER_BLIT_END() DEFINE_FRAMEBUFFER_BLIT_BEGIN(R4G4B4A4, uint16_t) { uint8_t r4 = (color[0]*15 + 127)/255; uint8_t g4 = (color[1]*15 + 127)/255; uint8_t b4 = (color[2]*15 + 127)/255; uint8_t a4 = (color[3]*15 + 127)/255; #if SW_GL_FRAMEBUFFER_COPY_BGRA uint16_t pixel = (b4 << 12) | (g4 << 8) | (r4 << 4) | a4; #else // RGBA uint16_t pixel = (r4 << 12) | (g4 << 8) | (b4 << 4) | a4; #endif *dst++ = pixel; } DEFINE_FRAMEBUFFER_BLIT_END() DEFINE_FRAMEBUFFER_BLIT_BEGIN(R8G8B8A8, uint8_t) { #if SW_GL_FRAMEBUFFER_COPY_BGRA dst[0] = color[2]; dst[1] = color[1]; dst[2] = color[0]; #else // RGBA dst[0] = color[0]; dst[1] = color[1]; dst[2] = color[2]; #endif dst[3] = color[3]; dst += 4; } DEFINE_FRAMEBUFFER_BLIT_END() //------------------------------------------------------------------------------------------- // Pixel format management functions //------------------------------------------------------------------------------------------- static inline int sw_get_pixel_format(SWformat format, SWtype type) { int channels = 0; int bitsPerChannel = 8; // Default: 8 bits per channel // Determine the number of channels (format) switch (format) { case SW_LUMINANCE: channels = 1; break; case SW_LUMINANCE_ALPHA: channels = 2; break; case SW_RGB: channels = 3; break; case SW_RGBA: channels = 4; break; default: return SW_PIXELFORMAT_UNKNOWN; } // Determine the depth of each channel (type) switch (type) { case SW_UNSIGNED_BYTE: bitsPerChannel = 8; break; case SW_BYTE: bitsPerChannel = 8; break; case SW_UNSIGNED_SHORT: bitsPerChannel = 16; break; case SW_SHORT: bitsPerChannel = 16; break; case SW_UNSIGNED_INT: bitsPerChannel = 32; break; case SW_INT: bitsPerChannel = 32; break; case SW_FLOAT: bitsPerChannel = 32; break; default: return SW_PIXELFORMAT_UNKNOWN; } // Map the format and type to the correct internal format if (bitsPerChannel == 8) { if (channels == 1) return SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE; if (channels == 2) return SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA; if (channels == 3) return SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8; if (channels == 4) return SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8; } else if (bitsPerChannel == 16) { if (channels == 1) return SW_PIXELFORMAT_UNCOMPRESSED_R16; if (channels == 3) return SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16; if (channels == 4) return SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16; } else if (bitsPerChannel == 32) { if (channels == 1) return SW_PIXELFORMAT_UNCOMPRESSED_R32; if (channels == 3) return SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32; if (channels == 4) return SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32; } return SW_PIXELFORMAT_UNKNOWN; } static inline void sw_get_pixel(uint8_t *color, const void *pixels, uint32_t offset, sw_pixelformat_t format) { switch (format) { case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: { uint8_t gray = ((const uint8_t*)pixels)[offset]; color[0] = gray; color[1] = gray; color[2] = gray; color[3] = 255; break; } case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: { const uint8_t *src = &((const uint8_t*)pixels)[offset*2]; color[0] = src[0]; color[1] = src[0]; color[2] = src[0]; color[3] = src[1]; break; } case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: { uint16_t pixel = ((const uint16_t*)pixels)[offset]; color[0] = ((pixel >> 11) & 0x1F)*255/31; // R (5 bits) color[1] = ((pixel >> 5) & 0x3F)*255/63; // G (6 bits) color[2] = (pixel & 0x1F)*255/31; // B (5 bits) color[3] = 255; break; } case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: { const uint8_t *src = &((const uint8_t*)pixels)[offset*3]; color[0] = src[0]; color[1] = src[1]; color[2] = src[2]; color[3] = 255; break; } case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: { uint16_t pixel = ((const uint16_t*)pixels)[offset]; color[0] = ((pixel >> 11) & 0x1F)*255/31; // R (5 bits) color[1] = ((pixel >> 6) & 0x1F)*255/31; // G (5 bits) color[2] = ((pixel >> 1) & 0x1F)*255/31; // B (5 bits) color[3] = (pixel & 0x01)*255; // A (1 bit) break; } case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: { uint16_t pixel = ((const uint16_t*)pixels)[offset]; color[0] = ((pixel >> 12) & 0x0F)*255/15; // R (4 bits) color[1] = ((pixel >> 8) & 0x0F)*255/15; // G (4 bits) color[2] = ((pixel >> 4) & 0x0F)*255/15; // B (4 bits) color[3] = (pixel & 0x0F)*255/15; // A (4 bits) break; } case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: { const uint8_t *src = &((const uint8_t*)pixels)[offset*4]; color[0] = src[0]; color[1] = src[1]; color[2] = src[2]; color[3] = src[3]; break; } case SW_PIXELFORMAT_UNCOMPRESSED_R32: { float val = ((const float*)pixels)[offset]; uint8_t gray = (uint8_t)(val*255.0f); color[0] = gray; color[1] = gray; color[2] = gray; color[3] = 255; break; } case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: { const float *src = &((const float*)pixels)[offset*3]; color[0] = (uint8_t)(src[0]*255.0f); color[1] = (uint8_t)(src[1]*255.0f); color[2] = (uint8_t)(src[2]*255.0f); color[3] = 255; break; } case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: { const float *src = &((const float*)pixels)[offset*4]; color[0] = (uint8_t)(src[0]*255.0f); color[1] = (uint8_t)(src[1]*255.0f); color[2] = (uint8_t)(src[2]*255.0f); color[3] = (uint8_t)(src[3]*255.0f); break; } case SW_PIXELFORMAT_UNCOMPRESSED_R16: { uint16_t val = ((const uint16_t*)pixels)[offset]; uint8_t gray = sw_half_to_float(val)*SW_INV_255; color[0] = gray; color[1] = gray; color[2] = gray; color[3] = 255; break; } case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: { const uint16_t *src = &((const uint16_t*)pixels)[offset*3]; color[0] = sw_half_to_float(src[0])*SW_INV_255; color[1] = sw_half_to_float(src[1])*SW_INV_255; color[2] = sw_half_to_float(src[2])*SW_INV_255; color[3] = 255; break; } case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: { const uint16_t *src = &((const uint16_t*)pixels)[offset*4]; color[0] = sw_half_to_float(src[0])*SW_INV_255; color[1] = sw_half_to_float(src[1])*SW_INV_255; color[2] = sw_half_to_float(src[2])*SW_INV_255; color[3] = sw_half_to_float(src[3])*SW_INV_255; break; } case SW_PIXELFORMAT_UNKNOWN: default: { color[0] = 0; color[1] = 0; color[2] = 0; color[3] = 0; break; } } } //------------------------------------------------------------------------------------------- // Texture sampling functionality //------------------------------------------------------------------------------------------- static inline void sw_texture_fetch(float* color, const sw_texture_t* tex, int x, int y) { sw_float_from_unorm8_simd(color, &tex->pixels[4*(y*tex->width + x)]); } static inline void sw_texture_sample_nearest(float *color, const sw_texture_t *tex, float u, float v) { u = (tex->sWrap == SW_REPEAT)? sw_fract(u) : sw_saturate(u); v = (tex->tWrap == SW_REPEAT)? sw_fract(v) : sw_saturate(v); int x = u*tex->width; int y = v*tex->height; sw_texture_fetch(color, tex, x, y); } static inline void sw_texture_sample_linear(float *color, const sw_texture_t *tex, float u, float v) { // TODO: With a bit more cleverness we could clearly reduce the // number of operations here, but for now it works fine float xf = (u*tex->width) - 0.5f; float yf = (v*tex->height) - 0.5f; float fx = sw_fract(xf); float fy = sw_fract(yf); int x0 = (int)xf; int y0 = (int)yf; int x1 = x0 + 1; int y1 = y0 + 1; // NOTE: If the textures are POT we could avoid the division for SW_REPEAT if (tex->sWrap == SW_CLAMP) { x0 = (x0 > tex->wMinus1)? tex->wMinus1 : x0; x1 = (x1 > tex->wMinus1)? tex->wMinus1 : x1; } else { x0 = (x0%tex->width + tex->width)%tex->width; x1 = (x1%tex->width + tex->width)%tex->width; } if (tex->tWrap == SW_CLAMP) { y0 = (y0 > tex->hMinus1)? tex->hMinus1 : y0; y1 = (y1 > tex->hMinus1)? tex->hMinus1 : y1; } else { y0 = (y0%tex->height + tex->height)%tex->height; y1 = (y1%tex->height + tex->height)%tex->height; } float c00[4], c10[4], c01[4], c11[4]; sw_texture_fetch(c00, tex, x0, y0); sw_texture_fetch(c10, tex, x1, y0); sw_texture_fetch(c01, tex, x0, y1); sw_texture_fetch(c11, tex, x1, y1); for (int i = 0; i < 4; i++) { float t = c00[i] + fx*(c10[i] - c00[i]); float b = c01[i] + fx*(c11[i] - c01[i]); color[i] = t + fy*(b - t); } } static inline void sw_texture_sample(float *color, const sw_texture_t *tex, float u, float v, float dUdx, float dUdy, float dVdx, float dVdy) { // Previous method: There is no need to compute the square root // because using the squared value, the comparison remains `L2 > 1.0f*1.0f` //float du = sqrtf(dUdx*dUdx + dUdy*dUdy); //float dv = sqrtf(dVdx*dVdx + dVdy*dVdy); //float L = (du > dv)? du : dv; // Calculate the derivatives for each axis float dU2 = dUdx*dUdx + dUdy*dUdy; float dV2 = dVdx*dVdx + dVdy*dVdy; float L2 = (dU2 > dV2)? dU2 : dV2; SWfilter filter = (L2 > 1.0f)? tex->minFilter : tex->magFilter; switch (filter) { case SW_NEAREST: sw_texture_sample_nearest(color, tex, u, v); break; case SW_LINEAR: sw_texture_sample_linear(color, tex, u, v); break; default: break; } } //------------------------------------------------------------------------------------------- // Color blending functionality //------------------------------------------------------------------------------------------- static inline void sw_factor_zero(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst) { factor[0] = factor[1] = factor[2] = factor[3] = 0.0f; } static inline void sw_factor_one(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst) { factor[0] = factor[1] = factor[2] = factor[3] = 1.0f; } static inline void sw_factor_src_color(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst) { factor[0] = src[0]; factor[1] = src[1]; factor[2] = src[2]; factor[3] = src[3]; } static inline void sw_factor_one_minus_src_color(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst) { factor[0] = 1.0f - src[0]; factor[1] = 1.0f - src[1]; factor[2] = 1.0f - src[2]; factor[3] = 1.0f - src[3]; } static inline void sw_factor_src_alpha(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst) { factor[0] = factor[1] = factor[2] = factor[3] = src[3]; } static inline void sw_factor_one_minus_src_alpha(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst) { float invAlpha = 1.0f - src[3]; factor[0] = factor[1] = factor[2] = factor[3] = invAlpha; } static inline void sw_factor_dst_alpha(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst) { factor[0] = factor[1] = factor[2] = factor[3] = dst[3]; } static inline void sw_factor_one_minus_dst_alpha(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst) { float invAlpha = 1.0f - dst[3]; factor[0] = factor[1] = factor[2] = factor[3] = invAlpha; } static inline void sw_factor_dst_color(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst) { factor[0] = dst[0]; factor[1] = dst[1]; factor[2] = dst[2]; factor[3] = dst[3]; } static inline void sw_factor_one_minus_dst_color(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst) { factor[0] = 1.0f - dst[0]; factor[1] = 1.0f - dst[1]; factor[2] = 1.0f - dst[2]; factor[3] = 1.0f - dst[3]; } static inline void sw_factor_src_alpha_saturate(float *SW_RESTRICT factor, const float *SW_RESTRICT src, const float *SW_RESTRICT dst) { factor[0] = factor[1] = factor[2] = 1.0f; factor[3] = (src[3] < 1.0f)? src[3] : 1.0f; } static inline void sw_blend_colors(float *SW_RESTRICT dst/*[4]*/, const float *SW_RESTRICT src/*[4]*/) { float srcFactor[4], dstFactor[4]; RLSW.srcFactorFunc(srcFactor, src, dst); RLSW.dstFactorFunc(dstFactor, src, dst); dst[0] = srcFactor[0]*src[0] + dstFactor[0]*dst[0]; dst[1] = srcFactor[1]*src[1] + dstFactor[1]*dst[1]; dst[2] = srcFactor[2]*src[2] + dstFactor[2]*dst[2]; dst[3] = srcFactor[3]*src[3] + dstFactor[3]*dst[3]; } //------------------------------------------------------------------------------------------- // Projection helper functions //------------------------------------------------------------------------------------------- static inline void sw_project_ndc_to_screen(float screen[2], const float ndc[4]) { screen[0] = RLSW.vpCenter[0] + ndc[0]*RLSW.vpHalf[0] + 0.5f; screen[1] = RLSW.vpCenter[1] - ndc[1]*RLSW.vpHalf[1] + 0.5f; } //------------------------------------------------------------------------------------------- // Polygon clipping management //------------------------------------------------------------------------------------------- #define DEFINE_CLIP_FUNC(name, FUNC_IS_INSIDE, FUNC_COMPUTE_T) \ static inline int sw_clip_##name( \ sw_vertex_t output[SW_MAX_CLIPPED_POLYGON_VERTICES], \ const sw_vertex_t input[SW_MAX_CLIPPED_POLYGON_VERTICES], \ int n) \ { \ const sw_vertex_t *prev = &input[n - 1]; \ int prevInside = FUNC_IS_INSIDE(prev->homogeneous); \ int outputCount = 0; \ \ for (int i = 0; i < n; i++) { \ const sw_vertex_t *curr = &input[i]; \ int currInside = FUNC_IS_INSIDE(curr->homogeneous); \ \ /* If transition between interior/exterior, calculate intersection point */ \ if (prevInside != currInside) { \ float t = FUNC_COMPUTE_T(prev->homogeneous, curr->homogeneous); \ sw_lerp_vertex_PTCH(&output[outputCount++], prev, curr, t); \ } \ \ /* If current vertex inside, add it */ \ if (currInside) { \ output[outputCount++] = *curr; \ } \ \ prev = curr; \ prevInside = currInside; \ } \ \ return outputCount; \ } //------------------------------------------------------------------------------------------- // Frustum cliping functions //------------------------------------------------------------------------------------------- #define IS_INSIDE_PLANE_W(h) ((h)[3] >= SW_CLIP_EPSILON) #define IS_INSIDE_PLANE_X_POS(h) ( (h)[0] < (h)[3]) // Exclusive for +X #define IS_INSIDE_PLANE_X_NEG(h) (-(h)[0] < (h)[3]) // Exclusive for -X #define IS_INSIDE_PLANE_Y_POS(h) ( (h)[1] < (h)[3]) // Exclusive for +Y #define IS_INSIDE_PLANE_Y_NEG(h) (-(h)[1] < (h)[3]) // Exclusive for -Y #define IS_INSIDE_PLANE_Z_POS(h) ( (h)[2] <= (h)[3]) // Inclusive for +Z #define IS_INSIDE_PLANE_Z_NEG(h) (-(h)[2] <= (h)[3]) // Inclusive for -Z #define COMPUTE_T_PLANE_W(hPrev, hCurr) ((SW_CLIP_EPSILON - (hPrev)[3])/((hCurr)[3] - (hPrev)[3])) #define COMPUTE_T_PLANE_X_POS(hPrev, hCurr) (((hPrev)[3] - (hPrev)[0])/(((hPrev)[3] - (hPrev)[0]) - ((hCurr)[3] - (hCurr)[0]))) #define COMPUTE_T_PLANE_X_NEG(hPrev, hCurr) (((hPrev)[3] + (hPrev)[0])/(((hPrev)[3] + (hPrev)[0]) - ((hCurr)[3] + (hCurr)[0]))) #define COMPUTE_T_PLANE_Y_POS(hPrev, hCurr) (((hPrev)[3] - (hPrev)[1])/(((hPrev)[3] - (hPrev)[1]) - ((hCurr)[3] - (hCurr)[1]))) #define COMPUTE_T_PLANE_Y_NEG(hPrev, hCurr) (((hPrev)[3] + (hPrev)[1])/(((hPrev)[3] + (hPrev)[1]) - ((hCurr)[3] + (hCurr)[1]))) #define COMPUTE_T_PLANE_Z_POS(hPrev, hCurr) (((hPrev)[3] - (hPrev)[2])/(((hPrev)[3] - (hPrev)[2]) - ((hCurr)[3] - (hCurr)[2]))) #define COMPUTE_T_PLANE_Z_NEG(hPrev, hCurr) (((hPrev)[3] + (hPrev)[2])/(((hPrev)[3] + (hPrev)[2]) - ((hCurr)[3] + (hCurr)[2]))) DEFINE_CLIP_FUNC(w, IS_INSIDE_PLANE_W, COMPUTE_T_PLANE_W) DEFINE_CLIP_FUNC(x_pos, IS_INSIDE_PLANE_X_POS, COMPUTE_T_PLANE_X_POS) DEFINE_CLIP_FUNC(x_neg, IS_INSIDE_PLANE_X_NEG, COMPUTE_T_PLANE_X_NEG) DEFINE_CLIP_FUNC(y_pos, IS_INSIDE_PLANE_Y_POS, COMPUTE_T_PLANE_Y_POS) DEFINE_CLIP_FUNC(y_neg, IS_INSIDE_PLANE_Y_NEG, COMPUTE_T_PLANE_Y_NEG) DEFINE_CLIP_FUNC(z_pos, IS_INSIDE_PLANE_Z_POS, COMPUTE_T_PLANE_Z_POS) DEFINE_CLIP_FUNC(z_neg, IS_INSIDE_PLANE_Z_NEG, COMPUTE_T_PLANE_Z_NEG) //------------------------------------------------------------------------------------------- // Scissor clip functions //------------------------------------------------------------------------------------------- #define COMPUTE_T_SCISSOR_X_MIN(hPrev, hCurr) (((RLSW.scClipMin[0])*(hPrev)[3] - (hPrev)[0])/(((hCurr)[0] - (RLSW.scClipMin[0])*(hCurr)[3]) - ((hPrev)[0] - (RLSW.scClipMin[0])*(hPrev)[3]))) #define COMPUTE_T_SCISSOR_X_MAX(hPrev, hCurr) (((RLSW.scClipMax[0])*(hPrev)[3] - (hPrev)[0])/(((hCurr)[0] - (RLSW.scClipMax[0])*(hCurr)[3]) - ((hPrev)[0] - (RLSW.scClipMax[0])*(hPrev)[3]))) #define COMPUTE_T_SCISSOR_Y_MIN(hPrev, hCurr) (((RLSW.scClipMin[1])*(hPrev)[3] - (hPrev)[1])/(((hCurr)[1] - (RLSW.scClipMin[1])*(hCurr)[3]) - ((hPrev)[1] - (RLSW.scClipMin[1])*(hPrev)[3]))) #define COMPUTE_T_SCISSOR_Y_MAX(hPrev, hCurr) (((RLSW.scClipMax[1])*(hPrev)[3] - (hPrev)[1])/(((hCurr)[1] - (RLSW.scClipMax[1])*(hCurr)[3]) - ((hPrev)[1] - (RLSW.scClipMax[1])*(hPrev)[3]))) #define IS_INSIDE_SCISSOR_X_MIN(h) ((h)[0] >= (RLSW.scClipMin[0])*(h)[3]) #define IS_INSIDE_SCISSOR_X_MAX(h) ((h)[0] <= (RLSW.scClipMax[0])*(h)[3]) #define IS_INSIDE_SCISSOR_Y_MIN(h) ((h)[1] >= (RLSW.scClipMin[1])*(h)[3]) #define IS_INSIDE_SCISSOR_Y_MAX(h) ((h)[1] <= (RLSW.scClipMax[1])*(h)[3]) DEFINE_CLIP_FUNC(scissor_x_min, IS_INSIDE_SCISSOR_X_MIN, COMPUTE_T_SCISSOR_X_MIN) DEFINE_CLIP_FUNC(scissor_x_max, IS_INSIDE_SCISSOR_X_MAX, COMPUTE_T_SCISSOR_X_MAX) DEFINE_CLIP_FUNC(scissor_y_min, IS_INSIDE_SCISSOR_Y_MIN, COMPUTE_T_SCISSOR_Y_MIN) DEFINE_CLIP_FUNC(scissor_y_max, IS_INSIDE_SCISSOR_Y_MAX, COMPUTE_T_SCISSOR_Y_MAX) //------------------------------------------------------------------------------------------- // Main polygon clip function static inline bool sw_polygon_clip(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON_VERTICES], int *vertexCounter) { static sw_vertex_t tmp[SW_MAX_CLIPPED_POLYGON_VERTICES]; int n = *vertexCounter; #define CLIP_AGAINST_PLANE(FUNC_CLIP) \ { \ n = FUNC_CLIP(tmp, polygon, n); \ if (n < 3) \ { \ *vertexCounter = 0; \ return false; \ } \ for (int i = 0; i < n; i++) polygon[i] = tmp[i]; \ } CLIP_AGAINST_PLANE(sw_clip_w); CLIP_AGAINST_PLANE(sw_clip_x_pos); CLIP_AGAINST_PLANE(sw_clip_x_neg); CLIP_AGAINST_PLANE(sw_clip_y_pos); CLIP_AGAINST_PLANE(sw_clip_y_neg); CLIP_AGAINST_PLANE(sw_clip_z_pos); CLIP_AGAINST_PLANE(sw_clip_z_neg); if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { CLIP_AGAINST_PLANE(sw_clip_scissor_x_min); CLIP_AGAINST_PLANE(sw_clip_scissor_x_max); CLIP_AGAINST_PLANE(sw_clip_scissor_y_min); CLIP_AGAINST_PLANE(sw_clip_scissor_y_max); } *vertexCounter = n; return (n >= 3); } // Triangle rendering logic //------------------------------------------------------------------------------------------- static inline bool sw_triangle_face_culling(void) { // NOTE: Face culling is done before clipping to avoid unnecessary computations // To handle triangles crossing the w=0 plane correctly, // we perform the winding order test in homogeneous coordinates directly, // before the perspective division (division by w) // This test determines the orientation of the triangle in the (x,y,w) plane, // which corresponds to the projected 2D winding order sign, // even with negative w values // Preload homogeneous coordinates into local variables const float *h0 = RLSW.vertexBuffer[0].homogeneous; const float *h1 = RLSW.vertexBuffer[1].homogeneous; const float *h2 = RLSW.vertexBuffer[2].homogeneous; // Compute a value proportional to the signed area in the projected 2D plane, // calculated directly using homogeneous coordinates BEFORE division by w. // This is the determinant of the matrix formed by the (x, y, w) components // of the vertices, which correctly captures the winding order in homogeneous // space and its relationship to the projected 2D winding order, even with // negative w values // The determinant formula used here is: // h0.x*(h1.y*h2.w - h2.y*h1.w) + // h1.x*(h2.y*h0.w - h0.y*h2.w) + // h2.x*(h0.y*h1.w - h1.y*h0.w) const float hSgnArea = h0[0]*(h1[1]*h2[3] - h2[1]*h1[3]) + h1[0]*(h2[1]*h0[3] - h0[1]*h2[3]) + h2[0]*(h0[1]*h1[3] - h1[1]*h0[3]); // Discard the triangle if its winding order (determined by the sign // of the homogeneous area/determinant) matches the culled direction // A positive hSgnArea typically corresponds to a counter-clockwise // winding in the projected space when all w > 0. // This test is robust for points with w > 0 or w < 0, correctly // capturing the change in orientation when crossing the w=0 plane // The culling logic remains the same based on the signed area/determinant // A value of 0 for hSgnArea means the points are collinear in (x, y, w) // space, which corresponds to a degenerate triangle projection. // Such triangles are typically not culled by this test (0 < 0 is false, 0 > 0 is false) // and should be handled by the clipper if necessary return (RLSW.cullFace == SW_FRONT)? (hSgnArea < 0) : (hSgnArea > 0); // Cull if winding is "clockwise" : "counter-clockwise" } static inline void sw_triangle_clip_and_project(void) { sw_vertex_t *polygon = RLSW.vertexBuffer; int *vertexCounter = &RLSW.vertexCounter; if (sw_polygon_clip(polygon, vertexCounter)) { // Transformation to screen space and normalization for (int i = 0; i < *vertexCounter; i++) { sw_vertex_t *v = &polygon[i]; // Calculation of the reciprocal of W for normalization // as well as perspective-correct attributes const float wRcp = 1.0f/v->homogeneous[3]; v->homogeneous[3] = wRcp; // Division of XYZ coordinates by weight v->homogeneous[0] *= wRcp; v->homogeneous[1] *= wRcp; v->homogeneous[2] *= wRcp; // Division of texture coordinates (perspective-correct) v->texcoord[0] *= wRcp; v->texcoord[1] *= wRcp; // Division of colors (perspective-correct) v->color[0] *= wRcp; v->color[1] *= wRcp; v->color[2] *= wRcp; v->color[3] *= wRcp; // Transformation to screen space sw_project_ndc_to_screen(v->screen, v->homogeneous); } } } #define DEFINE_TRIANGLE_RASTER_SCANLINE(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \ static inline void FUNC_NAME(const sw_texture_t *tex, const sw_vertex_t *start, \ const sw_vertex_t *end, float dUdy, float dVdy) \ { \ /* Gets the start and end coordinates */ \ int xStart = (int)start->screen[0]; \ int xEnd = (int)end->screen[0]; \ \ /* Avoid empty lines */ \ if (xStart == xEnd) return; \ \ /* Compute the subpixel distance to traverse before the first pixel */ \ float xSubstep = 1.0f - sw_fract(start->screen[0]); \ \ /* Compute the inverse horizontal distance along the X axis */ \ float dxRcp = 1.0f/(end->screen[0] - start->screen[0]); \ \ /* Compute the interpolation steps along the X axis */ \ float dZdx = (end->homogeneous[2] - start->homogeneous[2])*dxRcp; \ float dWdx = (end->homogeneous[3] - start->homogeneous[3])*dxRcp; \ \ float dCdx[4] = { \ (end->color[0] - start->color[0])*dxRcp, \ (end->color[1] - start->color[1])*dxRcp, \ (end->color[2] - start->color[2])*dxRcp, \ (end->color[3] - start->color[3])*dxRcp \ }; \ \ float dUdx = 0.0f; \ float dVdx = 0.0f; \ if (ENABLE_TEXTURE) { \ dUdx = (end->texcoord[0] - start->texcoord[0])*dxRcp; \ dVdx = (end->texcoord[1] - start->texcoord[1])*dxRcp; \ } \ \ /* Initializing the interpolation starting values */ \ float z = start->homogeneous[2] + dZdx*xSubstep; \ float w = start->homogeneous[3] + dWdx*xSubstep; \ \ float color[4] = { \ start->color[0] + dCdx[0]*xSubstep, \ start->color[1] + dCdx[1]*xSubstep, \ start->color[2] + dCdx[2]*xSubstep, \ start->color[3] + dCdx[3]*xSubstep \ }; \ \ float u = 0.0f; \ float v = 0.0f; \ if (ENABLE_TEXTURE) { \ u = start->texcoord[0] + dUdx*xSubstep; \ v = start->texcoord[1] + dVdx*xSubstep; \ } \ \ /* Pre-calculate the starting pointers for the framebuffer row */ \ int y = (int)start->screen[1]; \ sw_pixel_t *ptr = RLSW.framebuffer.pixels + y*RLSW.framebuffer.width + xStart; \ \ /* Scanline rasterization */ \ for (int x = xStart; x < xEnd; x++) \ { \ float wRcp = 1.0f/w; \ float srcColor[4] = { \ color[0]*wRcp, \ color[1]*wRcp, \ color[2]*wRcp, \ color[3]*wRcp \ }; \ \ if (ENABLE_DEPTH_TEST) \ { \ /* TODO: Implement different depth funcs? */ \ float depth = sw_framebuffer_read_depth(ptr); \ if (z > depth) goto discard; \ } \ \ /* TODO: Implement depth mask */ \ sw_framebuffer_write_depth(ptr, z); \ \ if (ENABLE_TEXTURE) \ { \ float texColor[4]; \ float s = u*wRcp; \ float t = v*wRcp; \ sw_texture_sample(texColor, tex, s, t, dUdx, dUdy, dVdx, dVdy); \ srcColor[0] *= texColor[0]; \ srcColor[1] *= texColor[1]; \ srcColor[2] *= texColor[2]; \ srcColor[3] *= texColor[3]; \ } \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ sw_framebuffer_read_color(dstColor, ptr); \ sw_blend_colors(dstColor, srcColor); \ sw_framebuffer_write_color(ptr, dstColor); \ } \ else \ { \ sw_framebuffer_write_color(ptr, srcColor); \ } \ \ /* Increment the interpolation parameter, UVs, and pointers */ \ discard: \ z += dZdx; \ w += dWdx; \ color[0] += dCdx[0]; \ color[1] += dCdx[1]; \ color[2] += dCdx[2]; \ color[3] += dCdx[3]; \ if (ENABLE_TEXTURE) \ { \ u += dUdx; \ v += dVdx; \ } \ ++ptr; \ } \ } #define DEFINE_TRIANGLE_RASTER(FUNC_NAME, FUNC_SCANLINE, ENABLE_TEXTURE) \ static inline void FUNC_NAME(const sw_vertex_t *v0, const sw_vertex_t *v1, \ const sw_vertex_t *v2, const sw_texture_t *tex) \ { \ /* Swap vertices by increasing y */ \ if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t *tmp = v0; v0 = v1; v1 = tmp; } \ if (v1->screen[1] > v2->screen[1]) { const sw_vertex_t *tmp = v1; v1 = v2; v2 = tmp; } \ if (v0->screen[1] > v1->screen[1]) { const sw_vertex_t *tmp = v0; v0 = v1; v1 = tmp; } \ \ /* Extracting coordinates from the sorted vertices */ \ float x0 = v0->screen[0], y0 = v0->screen[1]; \ float x1 = v1->screen[0], y1 = v1->screen[1]; \ float x2 = v2->screen[0], y2 = v2->screen[1]; \ \ /* Compute height differences */ \ float h02 = y2 - y0; \ float h01 = y1 - y0; \ float h12 = y2 - y1; \ \ if (h02 < 1e-6f) return; \ \ /* Precompute the inverse values without additional checks */ \ float h02Rcp = 1.0f/h02; \ float h01Rcp = (h01 > 1e-6f)? 1.0f/h01 : 0.0f; \ float h12Rcp = (h12 > 1e-6f)? 1.0f/h12 : 0.0f; \ \ /* Pre-calculation of slopes */ \ float dXdy02 = (x2 - x0)*h02Rcp; \ float dXdy01 = (x1 - x0)*h01Rcp; \ float dXdy12 = (x2 - x1)*h12Rcp; \ \ /* Y subpixel correction */ \ float y0Substep = 1.0f - sw_fract(y0); \ float y1Substep = 1.0f - sw_fract(y1); \ \ /* Y bounds (vertical clipping) */ \ int yTop = (int)y0; \ int yMid = (int)y1; \ int yBot = (int)y2; \ \ /* Compute gradients for each side of the triangle */ \ sw_vertex_t dVXdy02, dVXdy01, dVXdy12; \ sw_get_vertex_grad_PTCH(&dVXdy02, v0, v2, h02Rcp); \ sw_get_vertex_grad_PTCH(&dVXdy01, v0, v1, h01Rcp); \ sw_get_vertex_grad_PTCH(&dVXdy12, v1, v2, h12Rcp); \ \ /* Get a copy of vertices for interpolation and apply substep correction */ \ sw_vertex_t vLeft = *v0, vRight = *v0; \ sw_add_vertex_grad_scaled_PTCH(&vLeft, &dVXdy02, y0Substep); \ sw_add_vertex_grad_scaled_PTCH(&vRight, &dVXdy01, y0Substep); \ \ vLeft.screen[0] += dXdy02*y0Substep; \ vRight.screen[0] += dXdy01*y0Substep; \ \ /* Scanline for the upper part of the triangle */ \ for (int y = yTop; y < yMid; y++) \ { \ vLeft.screen[1] = vRight.screen[1] = y; \ \ if (vLeft.screen[0] < vRight.screen[0]) FUNC_SCANLINE(tex, &vLeft, &vRight, dVXdy02.texcoord[0], dVXdy02.texcoord[1]); \ else FUNC_SCANLINE(tex, &vRight, &vLeft, dVXdy02.texcoord[0], dVXdy02.texcoord[1]); \ \ sw_add_vertex_grad_PTCH(&vLeft, &dVXdy02); \ vLeft.screen[0] += dXdy02; \ \ sw_add_vertex_grad_PTCH(&vRight, &dVXdy01); \ vRight.screen[0] += dXdy01; \ } \ \ /* Get a copy of next right for interpolation and apply substep correction */ \ vRight = *v1; \ sw_add_vertex_grad_scaled_PTCH(&vRight, &dVXdy12, y1Substep); \ vRight.screen[0] += dXdy12*y1Substep; \ \ /* Scanline for the lower part of the triangle */ \ for (int y = yMid; y < yBot; y++) \ { \ vLeft.screen[1] = vRight.screen[1] = y; \ \ if (vLeft.screen[0] < vRight.screen[0]) FUNC_SCANLINE(tex, &vLeft, &vRight, dVXdy02.texcoord[0], dVXdy02.texcoord[1]); \ else FUNC_SCANLINE(tex, &vRight, &vLeft, dVXdy02.texcoord[0], dVXdy02.texcoord[1]); \ \ sw_add_vertex_grad_PTCH(&vLeft, &dVXdy02); \ vLeft.screen[0] += dXdy02; \ \ sw_add_vertex_grad_PTCH(&vRight, &dVXdy12); \ vRight.screen[0] += dXdy12; \ } \ } DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline, 0, 0, 0) DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX, 1, 0, 0) DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_DEPTH, 0, 1, 0) DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_BLEND, 0, 0, 1) DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX_DEPTH, 1, 1, 0) DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX_BLEND, 1, 0, 1) DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_DEPTH_BLEND, 0, 1, 1) DEFINE_TRIANGLE_RASTER_SCANLINE(sw_triangle_raster_scanline_TEX_DEPTH_BLEND, 1, 1, 1) DEFINE_TRIANGLE_RASTER(sw_triangle_raster, sw_triangle_raster_scanline, false) DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX, sw_triangle_raster_scanline_TEX, true) DEFINE_TRIANGLE_RASTER(sw_triangle_raster_DEPTH, sw_triangle_raster_scanline_DEPTH, false) DEFINE_TRIANGLE_RASTER(sw_triangle_raster_BLEND, sw_triangle_raster_scanline_BLEND, false) DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH, sw_triangle_raster_scanline_TEX_DEPTH, true) DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX_BLEND, sw_triangle_raster_scanline_TEX_BLEND, true) DEFINE_TRIANGLE_RASTER(sw_triangle_raster_DEPTH_BLEND, sw_triangle_raster_scanline_DEPTH_BLEND, false) DEFINE_TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH_BLEND, sw_triangle_raster_scanline_TEX_DEPTH_BLEND, true) static inline void sw_triangle_render(void) { if (RLSW.stateFlags & SW_STATE_CULL_FACE) { if (!sw_triangle_face_culling()) return; } sw_triangle_clip_and_project(); if (RLSW.vertexCounter < 3) return; #define TRIANGLE_RASTER(RASTER_FUNC) \ { \ for (int i = 0; i < RLSW.vertexCounter - 2; i++) \ { \ RASTER_FUNC( \ &RLSW.vertexBuffer[0], \ &RLSW.vertexBuffer[i + 1], \ &RLSW.vertexBuffer[i + 2], \ &RLSW.loadedTextures[RLSW.currentTexture] \ ); \ } \ } uint32_t state = RLSW.stateFlags; if (RLSW.currentTexture == 0) state &= ~SW_STATE_TEXTURE_2D; if ((RLSW.srcFactor == SW_ONE) && (RLSW.dstFactor == SW_ZERO)) state &= ~SW_STATE_BLEND; if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH_BLEND) else if (SW_STATE_CHECK_EX(state, SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_DEPTH_BLEND) else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_TEX_BLEND) else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST)) TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH) else if (SW_STATE_CHECK_EX(state, SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_BLEND) else if (SW_STATE_CHECK_EX(state, SW_STATE_DEPTH_TEST)) TRIANGLE_RASTER(sw_triangle_raster_DEPTH) else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D)) TRIANGLE_RASTER(sw_triangle_raster_TEX) else TRIANGLE_RASTER(sw_triangle_raster) #undef TRIANGLE_RASTER } //------------------------------------------------------------------------------------------- // Quad rendering logic //------------------------------------------------------------------------------------------- static inline bool sw_quad_face_culling(void) { // NOTE: Face culling is done before clipping to avoid unnecessary computations // To handle quads crossing the w=0 plane correctly, // we perform the winding order test in homogeneous coordinates directly, // before the perspective division (division by w) // For a convex quad with vertices P0, P1, P2, P3 in sequential order, // the winding order of the quad is the same as the winding order // of the triangle P0 P1 P2. We use the homogeneous triangle // winding test on this first triangle // Preload homogeneous coordinates into local variables const float *h0 = RLSW.vertexBuffer[0].homogeneous; const float *h1 = RLSW.vertexBuffer[1].homogeneous; const float *h2 = RLSW.vertexBuffer[2].homogeneous; // NOTE: h3 is not needed for this test // const float *h3 = RLSW.vertexBuffer[3].homogeneous; // Compute a value proportional to the signed area of the triangle P0 P1 P2 // in the projected 2D plane, calculated directly using homogeneous coordinates // BEFORE division by w // This is the determinant of the matrix formed by the (x, y, w) components // of the vertices P0, P1, and P2. Its sign correctly indicates the winding order // in homogeneous space and its relationship to the projected 2D winding order, // even with negative w values // The determinant formula used here is: // h0.x*(h1.y*h2.w - h2.y*h1.w) + // h1.x*(h2.y*h0.w - h0.y*h2.w) + // h2.x*(h0.y*h1.w - h1.y*h0.w) const float hSgnArea = h0[0]*(h1[1]*h2[3] - h2[1]*h1[3]) + h1[0]*(h2[1]*h0[3] - h0[1]*h2[3]) + h2[0]*(h0[1]*h1[3] - h1[1]*h0[3]); // Perform face culling based on the winding order determined by the sign // of the homogeneous area/determinant of triangle P0 P1 P2 // This test is robust for points with w > 0 or w < 0 within the triangle, // correctly capturing the change in orientation when crossing the w=0 plane // A positive hSgnArea typically corresponds to a counter-clockwise // winding in the projected space when all w > 0 // A value of 0 for hSgnArea means P0, P1, P2 are collinear in (x, y, w) // space, which corresponds to a degenerate triangle projection // Such quads might also be degenerate or non-planar. They are typically // not culled by this test (0 < 0 is false, 0 > 0 is false) // and should be handled by the clipper if necessary. return (RLSW.cullFace == SW_FRONT)? (hSgnArea < 0.0f) : (hSgnArea > 0.0f); // Cull if winding is "clockwise" : "counter-clockwise" } static inline void sw_quad_clip_and_project(void) { sw_vertex_t *polygon = RLSW.vertexBuffer; int *vertexCounter = &RLSW.vertexCounter; if (sw_polygon_clip(polygon, vertexCounter)) { // Transformation to screen space and normalization for (int i = 0; i < *vertexCounter; i++) { sw_vertex_t *v = &polygon[i]; // Calculation of the reciprocal of W for normalization // as well as perspective-correct attributes const float wRcp = 1.0f/v->homogeneous[3]; v->homogeneous[3] = wRcp; // Division of XYZ coordinates by weight v->homogeneous[0] *= wRcp; v->homogeneous[1] *= wRcp; v->homogeneous[2] *= wRcp; // Division of texture coordinates (perspective-correct) v->texcoord[0] *= wRcp; v->texcoord[1] *= wRcp; // Division of colors (perspective-correct) v->color[0] *= wRcp; v->color[1] *= wRcp; v->color[2] *= wRcp; v->color[3] *= wRcp; // Transformation to screen space sw_project_ndc_to_screen(v->screen, v->homogeneous); } } } static inline bool sw_quad_is_axis_aligned(void) { // Reject quads with perspective projection // The fast path assumes affine (non-perspective) quads, // so we require all vertices to have homogeneous w = 1.0 for (int i = 0; i < 4; i++) { if (RLSW.vertexBuffer[i].homogeneous[3] != 1.0f) return false; } // Epsilon tolerance in screen space (pixels) const float epsilon = 0.5f; // Fetch screen-space positions for the four quad vertices const float *p0 = RLSW.vertexBuffer[0].screen; const float *p1 = RLSW.vertexBuffer[1].screen; const float *p2 = RLSW.vertexBuffer[2].screen; const float *p3 = RLSW.vertexBuffer[3].screen; // Compute edge vectors between consecutive vertices // These define the four sides of the quad in screen space float dx01 = p1[0] - p0[0], dy01 = p1[1] - p0[1]; float dx12 = p2[0] - p1[0], dy12 = p2[1] - p1[1]; float dx23 = p3[0] - p2[0], dy23 = p3[1] - p2[1]; float dx30 = p0[0] - p3[0], dy30 = p0[1] - p3[1]; // Each edge must be either horizontal or vertical within epsilon tolerance // If any edge deviates significantly from either axis, the quad is not axis-aligned if (!((fabsf(dy01) < epsilon) || (fabsf(dx01) < epsilon))) return false; if (!((fabsf(dy12) < epsilon) || (fabsf(dx12) < epsilon))) return false; if (!((fabsf(dy23) < epsilon) || (fabsf(dx23) < epsilon))) return false; if (!((fabsf(dy30) < epsilon) || (fabsf(dx30) < epsilon))) return false; return true; } static inline void sw_quad_sort_cw(const sw_vertex_t* *output) { const sw_vertex_t *input = RLSW.vertexBuffer; // Calculate the centroid of the quad float cx = (input[0].screen[0] + input[1].screen[0] + input[2].screen[0] + input[3].screen[0])*0.25f; float cy = (input[0].screen[1] + input[1].screen[1] + input[2].screen[1] + input[3].screen[1])*0.25f; // Calculate the angle of each vertex relative to the center // and assign them directly to their correct position const sw_vertex_t *corners[4] = { 0 }; for (int i = 0; i < 4; i++) { float dx = input[i].screen[0] - cx; float dy = input[i].screen[1] - cy; // Determine the quadrant (clockwise from top-left) // top-left: dx < 0, dy < 0 // top-right: dx >= 0, dy < 0 // bottom-right: dx >= 0, dy >= 0 // bottom-left: dx < 0, dy >= 0 int idx; if (dy < 0) idx = (dx < 0)? 0 : 1; // Top row else idx = (dx < 0)? 3 : 2; // Bottom row corners[idx] = &input[i]; } output[0] = corners[0]; // top-left output[1] = corners[1]; // top-right output[2] = corners[2]; // bottom-right output[3] = corners[3]; // bottom-left } // TODO: REVIEW: Could a perfectly aligned quad, where one of the four points has a different depth, // still appear perfectly aligned from a certain point of view? // Because in that case, we would still need to perform perspective division for textures and colors... #define DEFINE_QUAD_RASTER_AXIS_ALIGNED(FUNC_NAME, ENABLE_TEXTURE, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \ static inline void FUNC_NAME(void) \ { \ const sw_vertex_t *sortedVerts[4]; \ sw_quad_sort_cw(sortedVerts); \ \ const sw_vertex_t *v0 = sortedVerts[0]; \ const sw_vertex_t *v1 = sortedVerts[1]; \ const sw_vertex_t *v2 = sortedVerts[2]; \ const sw_vertex_t *v3 = sortedVerts[3]; \ \ /* Screen bounds (axis-aligned) */ \ int xMin = (int)v0->screen[0]; \ int yMin = (int)v0->screen[1]; \ int xMax = (int)v2->screen[0]; \ int yMax = (int)v2->screen[1]; \ \ float w = v2->screen[0] - v0->screen[0]; \ float h = v2->screen[1] - v0->screen[1]; \ \ if ((w == 0) || (h == 0)) return; \ \ float wRcp = (w > 0.0f)? 1.0f/w : 0.0f; \ float hRcp = (h > 0.0f)? 1.0f/h : 0.0f; \ \ /* Subpixel corrections */ \ float xSubstep = 1.0f - sw_fract(v0->screen[0]); \ float ySubstep = 1.0f - sw_fract(v0->screen[1]); \ \ /* Calculation of vertex gradients in X and Y */ \ float dUdx = 0.0f, dVdx = 0.0f; \ float dUdy = 0.0f, dVdy = 0.0f; \ if (ENABLE_TEXTURE) { \ dUdx = (v1->texcoord[0] - v0->texcoord[0])*wRcp; \ dVdx = (v1->texcoord[1] - v0->texcoord[1])*wRcp; \ dUdy = (v3->texcoord[0] - v0->texcoord[0])*hRcp; \ dVdy = (v3->texcoord[1] - v0->texcoord[1])*hRcp; \ } \ \ float dCdx[4], dCdy[4]; \ dCdx[0] = (v1->color[0] - v0->color[0])*wRcp; \ dCdx[1] = (v1->color[1] - v0->color[1])*wRcp; \ dCdx[2] = (v1->color[2] - v0->color[2])*wRcp; \ dCdx[3] = (v1->color[3] - v0->color[3])*wRcp; \ dCdy[0] = (v3->color[0] - v0->color[0])*hRcp; \ dCdy[1] = (v3->color[1] - v0->color[1])*hRcp; \ dCdy[2] = (v3->color[2] - v0->color[2])*hRcp; \ dCdy[3] = (v3->color[3] - v0->color[3])*hRcp; \ \ float dZdx, dZdy; \ dZdx = (v1->homogeneous[2] - v0->homogeneous[2])*wRcp; \ dZdy = (v3->homogeneous[2] - v0->homogeneous[2])*hRcp; \ \ /* Start of quad rasterization */ \ const sw_texture_t *tex; \ if (ENABLE_TEXTURE) tex = &RLSW.loadedTextures[RLSW.currentTexture]; \ \ sw_pixel_t *pixels = RLSW.framebuffer.pixels; \ int wDst = RLSW.framebuffer.width; \ \ float zScanline = v0->homogeneous[2] + dZdx*xSubstep + dZdy*ySubstep; \ float uScanline = v0->texcoord[0] + dUdx*xSubstep + dUdy*ySubstep; \ float vScanline = v0->texcoord[1] + dVdx*xSubstep + dVdy*ySubstep; \ \ float colorScanline[4] = { \ v0->color[0] + dCdx[0]*xSubstep + dCdy[0]*ySubstep, \ v0->color[1] + dCdx[1]*xSubstep + dCdy[1]*ySubstep, \ v0->color[2] + dCdx[2]*xSubstep + dCdy[2]*ySubstep, \ v0->color[3] + dCdx[3]*xSubstep + dCdy[3]*ySubstep \ }; \ \ for (int y = yMin; y < yMax; y++) \ { \ sw_pixel_t *ptr = pixels + y*wDst + xMin; \ \ float z = zScanline; \ float u = uScanline; \ float v = vScanline; \ \ float color[4] = { \ colorScanline[0], \ colorScanline[1], \ colorScanline[2], \ colorScanline[3] \ }; \ \ /* Scanline rasterization */ \ for (int x = xMin; x < xMax; x++) \ { \ /* Pixel color computation */ \ float srcColor[4] = { \ color[0], \ color[1], \ color[2], \ color[3] \ }; \ \ /* Test and write depth */ \ if (ENABLE_DEPTH_TEST) \ { \ /* TODO: Implement different depth funcs? */ \ float depth = sw_framebuffer_read_depth(ptr); \ if (z > depth) goto discard; \ } \ \ /* TODO: Implement depth mask */ \ sw_framebuffer_write_depth(ptr, z); \ \ if (ENABLE_TEXTURE) \ { \ float texColor[4]; \ sw_texture_sample(texColor, tex, u, v, dUdx, dUdy, dVdx, dVdy); \ srcColor[0] *= texColor[0]; \ srcColor[1] *= texColor[1]; \ srcColor[2] *= texColor[2]; \ srcColor[3] *= texColor[3]; \ } \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ sw_framebuffer_read_color(dstColor, ptr); \ sw_blend_colors(dstColor, srcColor); \ sw_framebuffer_write_color(ptr, dstColor); \ } \ else sw_framebuffer_write_color(ptr, srcColor); \ \ discard: \ z += dZdx; \ color[0] += dCdx[0]; \ color[1] += dCdx[1]; \ color[2] += dCdx[2]; \ color[3] += dCdx[3]; \ if (ENABLE_TEXTURE) \ { \ u += dUdx; \ v += dVdx; \ } \ ++ptr; \ } \ \ zScanline += dZdy; \ colorScanline[0] += dCdy[0]; \ colorScanline[1] += dCdy[1]; \ colorScanline[2] += dCdy[2]; \ colorScanline[3] += dCdy[3]; \ \ if (ENABLE_TEXTURE) \ { \ uScanline += dUdy; \ vScanline += dVdy; \ } \ } \ } DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned, 0, 0, 0) DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_TEX, 1, 0, 0) DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_DEPTH, 0, 1, 0) DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_BLEND, 0, 0, 1) DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_TEX_DEPTH, 1, 1, 0) DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_TEX_BLEND, 1, 0, 1) DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_DEPTH_BLEND, 0, 1, 1) DEFINE_QUAD_RASTER_AXIS_ALIGNED(sw_quad_raster_axis_aligned_TEX_DEPTH_BLEND, 1, 1, 1) static inline void sw_quad_render(void) { if (RLSW.stateFlags & SW_STATE_CULL_FACE) { if (!sw_quad_face_culling()) return; } sw_quad_clip_and_project(); if (RLSW.vertexCounter < 3) return; uint32_t state = RLSW.stateFlags; if (RLSW.currentTexture == 0) state &= ~SW_STATE_TEXTURE_2D; if ((RLSW.srcFactor == SW_ONE) && (RLSW.dstFactor == SW_ZERO)) state &= ~SW_STATE_BLEND; if ((RLSW.vertexCounter == 4) && sw_quad_is_axis_aligned()) { if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) sw_quad_raster_axis_aligned_TEX_DEPTH_BLEND(); else if (SW_STATE_CHECK_EX(state, SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) sw_quad_raster_axis_aligned_DEPTH_BLEND(); else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_BLEND)) sw_quad_raster_axis_aligned_TEX_BLEND(); else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST)) sw_quad_raster_axis_aligned_TEX_DEPTH(); else if (SW_STATE_CHECK_EX(state, SW_STATE_BLEND)) sw_quad_raster_axis_aligned_BLEND(); else if (SW_STATE_CHECK_EX(state, SW_STATE_DEPTH_TEST)) sw_quad_raster_axis_aligned_DEPTH(); else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D)) sw_quad_raster_axis_aligned_TEX(); else sw_quad_raster_axis_aligned(); return; } #define TRIANGLE_RASTER(RASTER_FUNC) \ { \ for (int i = 0; i < RLSW.vertexCounter - 2; i++) \ { \ RASTER_FUNC( \ &RLSW.vertexBuffer[0], \ &RLSW.vertexBuffer[i + 1], \ &RLSW.vertexBuffer[i + 2], \ &RLSW.loadedTextures[RLSW.currentTexture] \ ); \ } \ } if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH_BLEND) else if (SW_STATE_CHECK_EX(state, SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_DEPTH_BLEND) else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_TEX_BLEND) else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D | SW_STATE_DEPTH_TEST)) TRIANGLE_RASTER(sw_triangle_raster_TEX_DEPTH) else if (SW_STATE_CHECK_EX(state, SW_STATE_BLEND)) TRIANGLE_RASTER(sw_triangle_raster_BLEND) else if (SW_STATE_CHECK_EX(state, SW_STATE_DEPTH_TEST)) TRIANGLE_RASTER(sw_triangle_raster_DEPTH) else if (SW_STATE_CHECK_EX(state, SW_STATE_TEXTURE_2D)) TRIANGLE_RASTER(sw_triangle_raster_TEX) else TRIANGLE_RASTER(sw_triangle_raster) #undef TRIANGLE_RASTER } //------------------------------------------------------------------------------------------- // Line rendering logic //------------------------------------------------------------------------------------------- static inline bool sw_line_clip_coord(float q, float p, float *t0, float *t1) { if (fabsf(p) < SW_CLIP_EPSILON) { // Check if the line is entirely outside the window if (q < -SW_CLIP_EPSILON) return 0; // Completely outside return 1; // Completely inside or on the edges } const float r = q/p; if (p < 0) { if (r > *t1) return 0; if (r > *t0) *t0 = r; } else { if (r < *t0) return 0; if (r < *t1) *t1 = r; } return 1; } static inline bool sw_line_clip(sw_vertex_t *v0, sw_vertex_t *v1) { float t0 = 0.0f, t1 = 1.0f; float dH[4], dC[4]; for (int i = 0; i < 4; i++) { dH[i] = v1->homogeneous[i] - v0->homogeneous[i]; dC[i] = v1->color[i] - v0->color[i]; } // Clipping Liang-Barsky if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[0], -dH[3] + dH[0], &t0, &t1)) return false; if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[0], -dH[3] - dH[0], &t0, &t1)) return false; if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[1], -dH[3] + dH[1], &t0, &t1)) return false; if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[1], -dH[3] - dH[1], &t0, &t1)) return false; if (!sw_line_clip_coord(v0->homogeneous[3] - v0->homogeneous[2], -dH[3] + dH[2], &t0, &t1)) return false; if (!sw_line_clip_coord(v0->homogeneous[3] + v0->homogeneous[2], -dH[3] - dH[2], &t0, &t1)) return false; // Clipping Scissor if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { if (!sw_line_clip_coord(v0->homogeneous[0] - RLSW.scClipMin[0]*v0->homogeneous[3], RLSW.scClipMin[0]*dH[3] - dH[0], &t0, &t1)) return false; if (!sw_line_clip_coord(RLSW.scClipMax[0]*v0->homogeneous[3] - v0->homogeneous[0], dH[0] - RLSW.scClipMax[0]*dH[3], &t0, &t1)) return false; if (!sw_line_clip_coord(v0->homogeneous[1] - RLSW.scClipMin[1]*v0->homogeneous[3], RLSW.scClipMin[1]*dH[3] - dH[1], &t0, &t1)) return false; if (!sw_line_clip_coord(RLSW.scClipMax[1]*v0->homogeneous[3] - v0->homogeneous[1], dH[1] - RLSW.scClipMax[1]*dH[3], &t0, &t1)) return false; } // Interpolation of new coordinates if (t1 < 1.0f) { for (int i = 0; i < 4; i++) { v1->homogeneous[i] = v0->homogeneous[i] + t1*dH[i]; v1->color[i] = v0->color[i] + t1*dC[i]; } } if (t0 > 0.0f) { for (int i = 0; i < 4; i++) { v0->homogeneous[i] += t0*dH[i]; v0->color[i] += t0*dC[i]; } } return true; } static inline bool sw_line_clip_and_project(sw_vertex_t *v0, sw_vertex_t *v1) { if (!sw_line_clip(v0, v1)) return false; // Convert homogeneous coordinates to NDC v0->homogeneous[3] = 1.0f/v0->homogeneous[3]; v1->homogeneous[3] = 1.0f/v1->homogeneous[3]; for (int i = 0; i < 3; i++) { v0->homogeneous[i] *= v0->homogeneous[3]; v1->homogeneous[i] *= v1->homogeneous[3]; } // Convert NDC coordinates to screen space sw_project_ndc_to_screen(v0->screen, v0->homogeneous); sw_project_ndc_to_screen(v1->screen, v1->homogeneous); return true; } #define DEFINE_LINE_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND) \ static inline void FUNC_NAME(const sw_vertex_t *v0, const sw_vertex_t *v1) \ { \ float x0 = v0->screen[0]; \ float y0 = v0->screen[1]; \ float x1 = v1->screen[0]; \ float y1 = v1->screen[1]; \ \ float dx = x1 - x0; \ float dy = y1 - y0; \ \ /* Compute dominant axis and subpixel offset */ \ float steps, substep; \ if (fabsf(dx) > fabsf(dy)) \ { \ steps = fabsf(dx); \ if (steps < 1.0f) return; \ substep = (dx >= 0.0f)? (1.0f - sw_fract(x0)) : sw_fract(x0); \ } \ else \ { \ steps = fabsf(dy); \ if (steps < 1.0f) return; \ substep = (dy >= 0.0f)? (1.0f - sw_fract(y0)) : sw_fract(y0); \ } \ \ /* Compute per pixel increments */ \ float xInc = dx/steps; \ float yInc = dy/steps; \ float stepRcp = 1.0f/steps; \ \ float zInc = (v1->homogeneous[2] - v0->homogeneous[2])*stepRcp; \ float rInc = (v1->color[0] - v0->color[0])*stepRcp; \ float gInc = (v1->color[1] - v0->color[1])*stepRcp; \ float bInc = (v1->color[2] - v0->color[2])*stepRcp; \ float aInc = (v1->color[3] - v0->color[3])*stepRcp; \ \ /* Initializing the interpolation starting values */ \ float x = x0 + xInc*substep; \ float y = y0 + yInc*substep; \ float z = v0->homogeneous[2] + zInc*substep; \ float r = v0->color[0] + rInc*substep; \ float g = v0->color[1] + gInc*substep; \ float b = v0->color[2] + bInc*substep; \ float a = v0->color[3] + aInc*substep; \ \ const int fbWidth = RLSW.framebuffer.width; \ sw_pixel_t *pixels = RLSW.framebuffer.pixels; \ \ int numPixels = (int)(steps - substep) + 1; \ \ for (int i = 0; i < numPixels; i++) \ { \ /* REVIEW: May require reviewing projection details */ \ int px = (int)(x - 0.5f); \ int py = (int)(y - 0.5f); \ \ sw_pixel_t *ptr = pixels + py*fbWidth + px; \ \ if (ENABLE_DEPTH_TEST) \ { \ float depth = sw_framebuffer_read_depth(ptr); \ if (z > depth) goto discard; \ } \ \ sw_framebuffer_write_depth(ptr, z); \ \ float color[4] = {r, g, b, a}; \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ sw_framebuffer_read_color(dstColor, ptr); \ sw_blend_colors(dstColor, color); \ sw_framebuffer_write_color(ptr, dstColor); \ } \ else sw_framebuffer_write_color(ptr, color); \ \ discard: \ x += xInc; y += yInc; z += zInc; \ r += rInc; g += gInc; b += bInc; a += aInc; \ } \ } #define DEFINE_LINE_THICK_RASTER(FUNC_NAME, RASTER_FUNC) \ void FUNC_NAME(const sw_vertex_t *v1, const sw_vertex_t *v2) \ { \ sw_vertex_t tv1, tv2; \ \ int x1 = (int)v1->screen[0]; \ int y1 = (int)v1->screen[1]; \ int x2 = (int)v2->screen[0]; \ int y2 = (int)v2->screen[1]; \ \ int dx = x2 - x1; \ int dy = y2 - y1; \ \ RASTER_FUNC(v1, v2); \ \ if ((dx != 0) && (abs(dy/dx) < 1)) \ { \ int wy = (int)((RLSW.lineWidth - 1.0f)*abs(dx)/sqrtf(dx*dx + dy*dy)); \ wy >>= 1; \ for (int i = 1; i <= wy; i++) \ { \ tv1 = *v1, tv2 = *v2; \ tv1.screen[1] -= i; \ tv2.screen[1] -= i; \ RASTER_FUNC(&tv1, &tv2); \ tv1 = *v1, tv2 = *v2; \ tv1.screen[1] += i; \ tv2.screen[1] += i; \ RASTER_FUNC(&tv1, &tv2); \ } \ } \ else if (dy != 0) \ { \ int wx = (int)((RLSW.lineWidth - 1.0f)*abs(dy)/sqrtf(dx*dx + dy*dy)); \ wx >>= 1; \ for (int i = 1; i <= wx; i++) \ { \ tv1 = *v1, tv2 = *v2; \ tv1.screen[0] -= i; \ tv2.screen[0] -= i; \ RASTER_FUNC(&tv1, &tv2); \ tv1 = *v1, tv2 = *v2; \ tv1.screen[0] += i; \ tv2.screen[0] += i; \ RASTER_FUNC(&tv1, &tv2); \ } \ } \ } DEFINE_LINE_RASTER(sw_line_raster, 0, 0) DEFINE_LINE_RASTER(sw_line_raster_DEPTH, 1, 0) DEFINE_LINE_RASTER(sw_line_raster_BLEND, 0, 1) DEFINE_LINE_RASTER(sw_line_raster_DEPTH_BLEND, 1, 1) DEFINE_LINE_THICK_RASTER(sw_line_thick_raster, sw_line_raster) DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_DEPTH, sw_line_raster_DEPTH) DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_BLEND, sw_line_raster_BLEND) DEFINE_LINE_THICK_RASTER(sw_line_thick_raster_DEPTH_BLEND, sw_line_raster_DEPTH_BLEND) static inline void sw_line_render(sw_vertex_t *vertices) { if (!sw_line_clip_and_project(&vertices[0], &vertices[1])) return; if (RLSW.lineWidth >= 2.0f) { if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) sw_line_thick_raster_DEPTH_BLEND(&vertices[0], &vertices[1]); else if (SW_STATE_CHECK(SW_STATE_BLEND)) sw_line_thick_raster_BLEND(&vertices[0], &vertices[1]); else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) sw_line_thick_raster_DEPTH(&vertices[0], &vertices[1]); else sw_line_thick_raster(&vertices[0], &vertices[1]); } else { if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) sw_line_raster_DEPTH_BLEND(&vertices[0], &vertices[1]); else if (SW_STATE_CHECK(SW_STATE_BLEND)) sw_line_raster_BLEND(&vertices[0], &vertices[1]); else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) sw_line_raster_DEPTH(&vertices[0], &vertices[1]); else sw_line_raster(&vertices[0], &vertices[1]); } } //------------------------------------------------------------------------------------------- // Point rendering logic //------------------------------------------------------------------------------------------- static inline bool sw_point_clip_and_project(sw_vertex_t *v) { if (v->homogeneous[3] != 1.0f) { for (int_fast8_t i = 0; i < 3; i++) { if ((v->homogeneous[i] < -v->homogeneous[3]) || (v->homogeneous[i] > v->homogeneous[3])) return false; } v->homogeneous[3] = 1.0f/v->homogeneous[3]; v->homogeneous[0] *= v->homogeneous[3]; v->homogeneous[1] *= v->homogeneous[3]; v->homogeneous[2] *= v->homogeneous[3]; } sw_project_ndc_to_screen(v->screen, v->homogeneous); const int *min = NULL, *max = NULL; if (RLSW.stateFlags & SW_STATE_SCISSOR_TEST) { min = RLSW.scMin; max = RLSW.scMax; } else { min = RLSW.vpMin; max = RLSW.vpMax; } bool insideX = (v->screen[0] - RLSW.pointRadius < max[0]) && (v->screen[0] + RLSW.pointRadius > min[0]); bool insideY = (v->screen[1] - RLSW.pointRadius < max[1]) && (v->screen[1] + RLSW.pointRadius > min[1]); return (insideX && insideY); } #define DEFINE_POINT_RASTER(FUNC_NAME, ENABLE_DEPTH_TEST, ENABLE_COLOR_BLEND, CHECK_BOUNDS) \ static inline void FUNC_NAME(int x, int y, float z, const float color[4]) \ { \ if (CHECK_BOUNDS == 1) \ { \ if ((x < RLSW.vpMin[0]) || (x >= RLSW.vpMax[0])) return; \ if ((y < RLSW.vpMin[1]) || (y >= RLSW.vpMax[1])) return; \ } \ else if (CHECK_BOUNDS == SW_SCISSOR_TEST) \ { \ if ((x < RLSW.scMin[0]) || (x >= RLSW.scMax[0])) return; \ if ((y < RLSW.scMin[1]) || (y >= RLSW.scMax[1])) return; \ } \ \ int offset = y*RLSW.framebuffer.width + x; \ sw_pixel_t *ptr = RLSW.framebuffer.pixels + offset; \ \ if (ENABLE_DEPTH_TEST) \ { \ float depth = sw_framebuffer_read_depth(ptr); \ if (z > depth) return; \ } \ \ sw_framebuffer_write_depth(ptr, z); \ \ if (ENABLE_COLOR_BLEND) \ { \ float dstColor[4]; \ sw_framebuffer_read_color(dstColor, ptr); \ sw_blend_colors(dstColor, color); \ sw_framebuffer_write_color(ptr, dstColor); \ } \ else sw_framebuffer_write_color(ptr, color); \ } #define DEFINE_POINT_THICK_RASTER(FUNC_NAME, RASTER_FUNC) \ static inline void FUNC_NAME(sw_vertex_t *v) \ { \ int cx = v->screen[0]; \ int cy = v->screen[1]; \ float cz = v->homogeneous[2]; \ int radius = RLSW.pointRadius; \ const float *color = v->color; \ \ int x = 0; \ int y = radius; \ int d = 3 - 2*radius; \ \ while (x <= y) \ { \ for (int i = -x; i <= x; i++) \ { \ RASTER_FUNC(cx + i, cy + y, cz, color); \ RASTER_FUNC(cx + i, cy - y, cz, color); \ } \ for (int i = -y; i <= y; i++) \ { \ RASTER_FUNC(cx + i, cy + x, cz, color); \ RASTER_FUNC(cx + i, cy - x, cz, color); \ } \ if (d > 0) \ { \ y--; \ d = d + 4*(x - y) + 10; \ } \ else d = d + 4*x + 6; \ x++; \ } \ } DEFINE_POINT_RASTER(sw_point_raster, 0, 0, 0) DEFINE_POINT_RASTER(sw_point_raster_DEPTH, 1, 0, 0) DEFINE_POINT_RASTER(sw_point_raster_BLEND, 0, 1, 0) DEFINE_POINT_RASTER(sw_point_raster_DEPTH_BLEND, 1, 1, 0) DEFINE_POINT_RASTER(sw_point_raster_CHECK, 0, 0, 1) DEFINE_POINT_RASTER(sw_point_raster_DEPTH_CHECK, 1, 0, 1) DEFINE_POINT_RASTER(sw_point_raster_BLEND_CHECK, 0, 1, 1) DEFINE_POINT_RASTER(sw_point_raster_DEPTH_BLEND_CHECK, 1, 1, 1) DEFINE_POINT_RASTER(sw_point_raster_CHECK_SCISSOR, 0, 0, SW_SCISSOR_TEST) DEFINE_POINT_RASTER(sw_point_raster_DEPTH_CHECK_SCISSOR, 1, 0, SW_SCISSOR_TEST) DEFINE_POINT_RASTER(sw_point_raster_BLEND_CHECK_SCISSOR, 0, 1, SW_SCISSOR_TEST) DEFINE_POINT_RASTER(sw_point_raster_DEPTH_BLEND_CHECK_SCISSOR, 1, 1, SW_SCISSOR_TEST) DEFINE_POINT_THICK_RASTER(sw_point_thick_raster, sw_point_raster_CHECK) DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH, sw_point_raster_DEPTH_CHECK) DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_BLEND, sw_point_raster_BLEND_CHECK) DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH_BLEND, sw_point_raster_DEPTH_BLEND_CHECK) DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_SCISSOR, sw_point_raster_CHECK_SCISSOR) DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH_SCISSOR, sw_point_raster_DEPTH_CHECK_SCISSOR) DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_BLEND_SCISSOR, sw_point_raster_BLEND_CHECK_SCISSOR) DEFINE_POINT_THICK_RASTER(sw_point_thick_raster_DEPTH_BLEND_SCISSOR, sw_point_raster_DEPTH_BLEND_CHECK_SCISSOR) static inline void sw_point_render(sw_vertex_t *v) { if (!sw_point_clip_and_project(v)) return; if (RLSW.pointRadius >= 1.0f) { if (SW_STATE_CHECK(SW_STATE_SCISSOR_TEST)) { if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) sw_point_thick_raster_DEPTH_BLEND_SCISSOR(v); else if (SW_STATE_CHECK(SW_STATE_BLEND)) sw_point_thick_raster_BLEND_SCISSOR(v); else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) sw_point_thick_raster_DEPTH_SCISSOR(v); else sw_point_thick_raster_SCISSOR(v); } else { if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) sw_point_thick_raster_DEPTH_BLEND(v); else if (SW_STATE_CHECK(SW_STATE_BLEND)) sw_point_thick_raster_BLEND(v); else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) sw_point_thick_raster_DEPTH(v); else sw_point_thick_raster(v); } } else { if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST | SW_STATE_BLEND)) sw_point_raster_DEPTH_BLEND(v->screen[0], v->screen[1], v->homogeneous[2], v->color); else if (SW_STATE_CHECK(SW_STATE_BLEND)) sw_point_raster_BLEND(v->screen[0], v->screen[1], v->homogeneous[2], v->color); else if (SW_STATE_CHECK(SW_STATE_DEPTH_TEST)) sw_point_raster_DEPTH(v->screen[0], v->screen[1], v->homogeneous[2], v->color); else sw_point_raster(v->screen[0], v->screen[1], v->homogeneous[2], v->color); } } //------------------------------------------------------------------------------------------- // Polygon modes rendering logic //------------------------------------------------------------------------------------------- static inline void sw_poly_point_render(void) { for (int i = 0; i < RLSW.vertexCounter; i++) sw_point_render(&RLSW.vertexBuffer[i]); } static inline void sw_poly_line_render(void) { const sw_vertex_t *vertices = RLSW.vertexBuffer; int cm1 = RLSW.vertexCounter - 1; for (int i = 0; i < cm1; i++) { sw_vertex_t verts[2] = { vertices[i], vertices[i + 1] }; sw_line_render(verts); } sw_vertex_t verts[2] = { vertices[cm1], vertices[0] }; sw_line_render(verts); } static inline void sw_poly_fill_render(void) { switch (RLSW.drawMode) { case SW_POINTS: sw_point_render(&RLSW.vertexBuffer[0]); break; case SW_LINES: sw_line_render(RLSW.vertexBuffer); break; case SW_TRIANGLES: sw_triangle_render(); break; case SW_QUADS: sw_quad_render(); break; } } //------------------------------------------------------------------------------------------- // Immediate rendering logic //------------------------------------------------------------------------------------------- void sw_immediate_push_vertex(const float position[4], const float color[4], const float texcoord[2]) { // Copy the attributes in the current vertex sw_vertex_t *vertex = &RLSW.vertexBuffer[RLSW.vertexCounter++]; for (int i = 0; i < 4; i++) { vertex->position[i] = position[i]; if (i < 2) vertex->texcoord[i] = texcoord[i]; vertex->color[i] = color[i]; } // Calculate homogeneous coordinates const float *m = RLSW.matMVP, *v = vertex->position; vertex->homogeneous[0] = m[0]*v[0] + m[4]*v[1] + m[8]*v[2] + m[12]*v[3]; vertex->homogeneous[1] = m[1]*v[0] + m[5]*v[1] + m[9]*v[2] + m[13]*v[3]; vertex->homogeneous[2] = m[2]*v[0] + m[6]*v[1] + m[10]*v[2] + m[14]*v[3]; vertex->homogeneous[3] = m[3]*v[0] + m[7]*v[1] + m[11]*v[2] + m[15]*v[3]; // Immediate rendering of the primitive if the required number is reached if (RLSW.vertexCounter == RLSW.reqVertices) { switch (RLSW.polyMode) { case SW_FILL: sw_poly_fill_render(); break; case SW_LINE: sw_poly_line_render(); break; case SW_POINT: sw_poly_point_render(); break; default: break; } RLSW.vertexCounter = 0; } } //------------------------------------------------------------------------------------------- // Validity check helper functions //------------------------------------------------------------------------------------------- static inline bool sw_is_texture_valid(uint32_t id) { bool valid = true; if (id == 0) valid = false; else if (id >= SW_MAX_TEXTURES) valid = false; else if (RLSW.loadedTextures[id].pixels == NULL) valid = false; return true; } static inline bool sw_is_texture_filter_valid(int filter) { return ((filter == SW_NEAREST) || (filter == SW_LINEAR)); } static inline bool sw_is_texture_wrap_valid(int wrap) { return ((wrap == SW_REPEAT) || (wrap == SW_CLAMP)); } static inline bool sw_is_draw_mode_valid(int mode) { bool result = false; switch (mode) { case SW_POINTS: case SW_LINES: case SW_TRIANGLES: case SW_QUADS: result = true; break; default: break; } return result; } static inline bool sw_is_poly_mode_valid(int mode) { bool result = false; switch (mode) { case SW_POINT: case SW_LINE: case SW_FILL: result = true; break; default: break; } return result; } static inline bool sw_is_face_valid(int face) { return (face == SW_FRONT || face == SW_BACK); } static inline bool sw_is_blend_src_factor_valid(int blend) { bool result = false; switch (blend) { case SW_ZERO: case SW_ONE: case SW_SRC_COLOR: case SW_ONE_MINUS_SRC_COLOR: case SW_SRC_ALPHA: case SW_ONE_MINUS_SRC_ALPHA: case SW_DST_ALPHA: case SW_ONE_MINUS_DST_ALPHA: case SW_DST_COLOR: case SW_ONE_MINUS_DST_COLOR: case SW_SRC_ALPHA_SATURATE: result = true; break; default: break; } return result; } static inline bool sw_is_blend_dst_factor_valid(int blend) { bool result = false; switch (blend) { case SW_ZERO: case SW_ONE: case SW_SRC_COLOR: case SW_ONE_MINUS_SRC_COLOR: case SW_SRC_ALPHA: case SW_ONE_MINUS_SRC_ALPHA: case SW_DST_ALPHA: case SW_ONE_MINUS_DST_ALPHA: case SW_DST_COLOR: case SW_ONE_MINUS_DST_COLOR: result = true; break; default: break; } return result; } //------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------- // Module Functions Definition //---------------------------------------------------------------------------------- bool swInit(int w, int h) { if (!sw_framebuffer_load(w, h)) { swClose(); return false; } swViewport(0, 0, w, h); swScissor(0, 0, w, h); RLSW.loadedTextures = (sw_texture_t *)SW_MALLOC(SW_MAX_TEXTURES*sizeof(sw_texture_t)); if (RLSW.loadedTextures == NULL) { swClose(); return false; } RLSW.freeTextureIds = (uint32_t *)SW_MALLOC(SW_MAX_TEXTURES*sizeof(uint32_t)); if (RLSW.loadedTextures == NULL) { swClose(); return false; } const float clearColor[4] = { 0.0f, 0.0f, 0.0f, 1.0f }; sw_framebuffer_write_color(&RLSW.clearValue, clearColor); sw_framebuffer_write_depth(&RLSW.clearValue, 1.0f); RLSW.currentMatrixMode = SW_MODELVIEW; RLSW.currentMatrix = &RLSW.stackModelview[0]; sw_matrix_id(RLSW.stackProjection[0]); sw_matrix_id(RLSW.stackModelview[0]); sw_matrix_id(RLSW.stackTexture[0]); sw_matrix_id(RLSW.matMVP); RLSW.stackProjectionCounter = 1; RLSW.stackModelviewCounter = 1; RLSW.stackTextureCounter = 1; RLSW.isDirtyMVP = false; RLSW.current.texcoord[0] = 0.0f; RLSW.current.texcoord[1] = 0.0f; RLSW.current.color[0] = 1.0f; RLSW.current.color[1] = 1.0f; RLSW.current.color[2] = 1.0f; RLSW.current.color[3] = 1.0f; RLSW.srcFactor = SW_SRC_ALPHA; RLSW.dstFactor = SW_ONE_MINUS_SRC_ALPHA; RLSW.srcFactorFunc = sw_factor_src_alpha; RLSW.dstFactorFunc = sw_factor_one_minus_src_alpha; RLSW.polyMode = SW_FILL; RLSW.cullFace = SW_BACK; static uint32_t defaultTex[3*2*2] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; RLSW.loadedTextures[0].pixels = (uint8_t*)defaultTex; RLSW.loadedTextures[0].width = 2; RLSW.loadedTextures[0].height = 2; RLSW.loadedTextures[0].wMinus1 = 1; RLSW.loadedTextures[0].hMinus1 = 1; RLSW.loadedTextures[0].minFilter = SW_NEAREST; RLSW.loadedTextures[0].magFilter = SW_NEAREST; RLSW.loadedTextures[0].sWrap = SW_REPEAT; RLSW.loadedTextures[0].tWrap = SW_REPEAT; RLSW.loadedTextures[0].tx = 0.5f; RLSW.loadedTextures[0].ty = 0.5f; RLSW.loadedTextureCount = 1; SW_LOG("INFO: RLSW: Software renderer initialized successfully\n"); #if defined(SW_HAS_FMA_AVX) && defined(SW_HAS_FMA_AVX2) SW_LOG("INFO: RLSW: Using SIMD instructions: FMA AVX\n"); #endif #if defined(SW_HAS_AVX) || defined(SW_HAS_AVX2) SW_LOG("INFO: RLSW: Using SIMD instructions: AVX\n"); #endif #if defined(SW_HAS_SSE) || defined(SW_HAS_SSE2) || defined(SW_HAS_SSE3) || defined(SW_HAS_SSSE3) || defined(SW_HAS_SSE41) || defined(SW_HAS_SSE42) SW_LOG("INFO: RLSW: Using SIMD instructions: SSE\n"); #endif #if defined(SW_HAS_NEON_FMA) || defined(SW_HAS_NEON) SW_LOG("INFO: RLSW: Using SIMD instructions: NEON\n"); #endif #if defined(SW_HAS_RVV) SW_LOG("INFO: RLSW: Using SIMD instructions: RVV\n"); #endif return true; } void swClose(void) { // NOTE: Starts at texture 1, texture 0 does not have to be freed for (int i = 1; i < RLSW.loadedTextureCount; i++) { if (sw_is_texture_valid(i)) { SW_FREE(RLSW.loadedTextures[i].pixels); } } SW_FREE(RLSW.framebuffer.pixels); SW_FREE(RLSW.loadedTextures); SW_FREE(RLSW.freeTextureIds); RLSW = SW_CURLY_INIT(sw_context_t) { 0 }; } bool swResizeFramebuffer(int w, int h) { return sw_framebuffer_resize(w, h); } void swCopyFramebuffer(int x, int y, int w, int h, SWformat format, SWtype type, void *pixels) { sw_pixelformat_t pFormat = (sw_pixelformat_t)sw_get_pixel_format(format, type); if (w <= 0) { RLSW.errCode = SW_INVALID_VALUE; return; } if (h <= 0) { RLSW.errCode = SW_INVALID_VALUE; return; } if (w > RLSW.framebuffer.width) w = RLSW.framebuffer.width; if (h > RLSW.framebuffer.height) h = RLSW.framebuffer.height; x = sw_clampi(x, 0, w); y = sw_clampi(y, 0, h); if ((x >= w) || (y >= h)) return; if ((x == 0) && (y == 0) && (w == RLSW.framebuffer.width) && (h == RLSW.framebuffer.height)) { #if SW_COLOR_BUFFER_BITS == 32 if (pFormat == SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8) { sw_framebuffer_copy_fast(pixels); return; } #elif SW_COLOR_BUFFER_BITS == 16 if (pFormat == SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5) { sw_framebuffer_copy_fast(pixels); return; } #endif } switch (pFormat) { case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: sw_framebuffer_copy_to_GRAYALPHA(x, y, w, h, (uint8_t *)pixels); break; case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: sw_framebuffer_copy_to_GRAYALPHA(x, y, w, h, (uint8_t *)pixels); break; case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: sw_framebuffer_copy_to_R5G6B5(x, y, w, h, (uint16_t *)pixels); break; case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: sw_framebuffer_copy_to_R8G8B8(x, y, w, h, (uint8_t *)pixels); break; case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: sw_framebuffer_copy_to_R5G5B5A1(x, y, w, h, (uint16_t *)pixels); break; case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: sw_framebuffer_copy_to_R4G4B4A4(x, y, w, h, (uint16_t *)pixels); break; //case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: sw_framebuffer_copy_to_R8G8B8A8(x, y, w, h, (uint8_t *)pixels); break; // Below: not implemented case SW_PIXELFORMAT_UNCOMPRESSED_R32: case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: case SW_PIXELFORMAT_UNCOMPRESSED_R16: case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: default: RLSW.errCode = SW_INVALID_ENUM; break; } } void swBlitFramebuffer(int xDst, int yDst, int wDst, int hDst, int xSrc, int ySrc, int wSrc, int hSrc, SWformat format, SWtype type, void *pixels) { sw_pixelformat_t pFormat = (sw_pixelformat_t)sw_get_pixel_format(format, type); if (wSrc <= 0) { RLSW.errCode = SW_INVALID_VALUE; return; } if (hSrc <= 0) { RLSW.errCode = SW_INVALID_VALUE; return; } if (wSrc > RLSW.framebuffer.width) wSrc = RLSW.framebuffer.width; if (hSrc > RLSW.framebuffer.height) hSrc = RLSW.framebuffer.height; xSrc = sw_clampi(xSrc, 0, wSrc); ySrc = sw_clampi(ySrc, 0, hSrc); // Check if the sizes are identical after clamping the source to avoid unexpected issues // REVIEW: This repeats the operations if true, so we could make a copy function without these checks if (xDst == xSrc && yDst == ySrc && wDst == wSrc && hDst == hSrc) { swCopyFramebuffer(xSrc, ySrc, wSrc, hSrc, format, type, pixels); } switch (pFormat) { case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE: sw_framebuffer_blit_to_GRAYALPHA(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, (uint8_t *)pixels); break; case SW_PIXELFORMAT_UNCOMPRESSED_GRAY_ALPHA: sw_framebuffer_blit_to_GRAYALPHA(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, (uint8_t *)pixels); break; case SW_PIXELFORMAT_UNCOMPRESSED_R5G6B5: sw_framebuffer_blit_to_R5G6B5(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, (uint16_t *)pixels); break; case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8: sw_framebuffer_blit_to_R8G8B8(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, (uint8_t *)pixels); break; case SW_PIXELFORMAT_UNCOMPRESSED_R5G5B5A1: sw_framebuffer_blit_to_R5G5B5A1(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, (uint16_t *)pixels); break; case SW_PIXELFORMAT_UNCOMPRESSED_R4G4B4A4: sw_framebuffer_blit_to_R4G4B4A4(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, (uint16_t *)pixels); break; case SW_PIXELFORMAT_UNCOMPRESSED_R8G8B8A8: sw_framebuffer_blit_to_R8G8B8A8(xDst, yDst, wDst, hDst, xSrc, ySrc, wSrc, hSrc, (uint8_t *)pixels); break; // Below: not implemented case SW_PIXELFORMAT_UNCOMPRESSED_R32: case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32: case SW_PIXELFORMAT_UNCOMPRESSED_R32G32B32A32: case SW_PIXELFORMAT_UNCOMPRESSED_R16: case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16: case SW_PIXELFORMAT_UNCOMPRESSED_R16G16B16A16: default: RLSW.errCode = SW_INVALID_ENUM; break; } } void swEnable(SWstate state) { switch (state) { case SW_SCISSOR_TEST: RLSW.stateFlags |= SW_STATE_SCISSOR_TEST; break; case SW_TEXTURE_2D: RLSW.stateFlags |= SW_STATE_TEXTURE_2D; break; case SW_DEPTH_TEST: RLSW.stateFlags |= SW_STATE_DEPTH_TEST; break; case SW_CULL_FACE: RLSW.stateFlags |= SW_STATE_CULL_FACE; break; case SW_BLEND: RLSW.stateFlags |= SW_STATE_BLEND; break; default: RLSW.errCode = SW_INVALID_ENUM; break; } } void swDisable(SWstate state) { switch (state) { case SW_SCISSOR_TEST: RLSW.stateFlags &= ~SW_STATE_SCISSOR_TEST; break; case SW_TEXTURE_2D: RLSW.stateFlags &= ~SW_STATE_TEXTURE_2D; break; case SW_DEPTH_TEST: RLSW.stateFlags &= ~SW_STATE_DEPTH_TEST; break; case SW_CULL_FACE: RLSW.stateFlags &= ~SW_STATE_CULL_FACE; break; case SW_BLEND: RLSW.stateFlags &= ~SW_STATE_BLEND; break; default: RLSW.errCode = SW_INVALID_ENUM; break; } } void swGetIntegerv(SWget name, int *v) { switch (name) { case SW_MODELVIEW_STACK_DEPTH: *v = SW_MODELVIEW_STACK_DEPTH; break; case SW_PROJECTION_STACK_DEPTH: *v = SW_PROJECTION_STACK_DEPTH; break; case SW_TEXTURE_STACK_DEPTH: *v = SW_TEXTURE_STACK_DEPTH; break; default: RLSW.errCode = SW_INVALID_ENUM; break; } } void swGetFloatv(SWget name, float *v) { switch (name) { case SW_COLOR_CLEAR_VALUE: { sw_framebuffer_read_color(v, &RLSW.clearValue); } break; case SW_DEPTH_CLEAR_VALUE: { v[0] = sw_framebuffer_read_depth(&RLSW.clearValue); } break; case SW_CURRENT_COLOR: { v[0] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[0]; v[1] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[1]; v[2] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[2]; v[3] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].color[3]; } break; case SW_CURRENT_TEXTURE_COORDS: { v[0] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].texcoord[0]; v[1] = RLSW.vertexBuffer[RLSW.vertexCounter - 1].texcoord[1]; } break; case SW_POINT_SIZE: { v[0] = 2.0f*RLSW.pointRadius; } break; case SW_LINE_WIDTH: { v[0] = RLSW.lineWidth; } break; case SW_MODELVIEW_MATRIX: { for (int i = 0; i < 16; i++) v[i] = RLSW.stackModelview[RLSW.stackModelviewCounter - 1][i]; } break; case SW_PROJECTION_MATRIX: { for (int i = 0; i < 16; i++) v[i] = RLSW.stackProjection[RLSW.stackProjectionCounter - 1][i]; } break; case SW_TEXTURE_MATRIX: { for (int i = 0; i < 16; i++) v[i] = RLSW.stackTexture[RLSW.stackTextureCounter - 1][i]; } break; default: RLSW.errCode = SW_INVALID_ENUM; break; } } const char *swGetString(SWget name) { const char *result = NULL; switch (name) { case SW_VENDOR: result = "RLSW Header"; break; case SW_RENDERER: result = "RLSW Software Renderer"; break; case SW_VERSION: result = "RLSW 1.0"; break; case SW_EXTENSIONS: result = "None"; break; default: RLSW.errCode = SW_INVALID_ENUM; break; } return result; } SWerrcode swGetError(void) { SWerrcode ret = RLSW.errCode; RLSW.errCode = SW_NO_ERROR; return ret; } void swViewport(int x, int y, int width, int height) { if ((width < 0) || (height < 0)) { RLSW.errCode = SW_INVALID_VALUE; return; } RLSW.vpSize[0] = width; RLSW.vpSize[1] = height; RLSW.vpHalf[0] = width/2.0f; RLSW.vpHalf[1] = height/2.0f; RLSW.vpCenter[0] = (float)x + RLSW.vpHalf[0]; RLSW.vpCenter[1] = (float)y + RLSW.vpHalf[1]; RLSW.vpMin[0] = sw_clampi(x, 0, RLSW.framebuffer.width - 1); RLSW.vpMin[1] = sw_clampi(y, 0, RLSW.framebuffer.height - 1); RLSW.vpMax[0] = sw_clampi(x + width, 0, RLSW.framebuffer.width - 1); RLSW.vpMax[1] = sw_clampi(y + height, 0, RLSW.framebuffer.height - 1); } void swScissor(int x, int y, int width, int height) { if ((width < 0) || (height < 0)) { RLSW.errCode = SW_INVALID_VALUE; return; } RLSW.scMin[0] = sw_clampi(x, 0, RLSW.framebuffer.width - 1); RLSW.scMin[1] = sw_clampi(y, 0, RLSW.framebuffer.height - 1); RLSW.scMax[0] = sw_clampi(x + width, 0, RLSW.framebuffer.width - 1); RLSW.scMax[1] = sw_clampi(y + height, 0, RLSW.framebuffer.height - 1); RLSW.scClipMin[0] = (2.0f*(float)RLSW.scMin[0]/(float)RLSW.vpSize[0]) - 1.0f; RLSW.scClipMax[0] = (2.0f*(float)RLSW.scMax[0]/(float)RLSW.vpSize[0]) - 1.0f; RLSW.scClipMax[1] = 1.0f - (2.0f*(float)RLSW.scMin[1]/(float)RLSW.vpSize[1]); RLSW.scClipMin[1] = 1.0f - (2.0f*(float)RLSW.scMax[1]/(float)RLSW.vpSize[1]); } void swClearColor(float r, float g, float b, float a) { float v[4] = { r, g, b, a }; sw_framebuffer_write_color(&RLSW.clearValue, v); } void swClearDepth(float depth) { sw_framebuffer_write_depth(&RLSW.clearValue, depth); } void swClear(uint32_t bitmask) { int size = RLSW.framebuffer.width*RLSW.framebuffer.height; if ((bitmask & (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) == (SW_COLOR_BUFFER_BIT | SW_DEPTH_BUFFER_BIT)) { sw_framebuffer_fill(RLSW.framebuffer.pixels, size, RLSW.clearValue); } else if (bitmask & (SW_COLOR_BUFFER_BIT)) { sw_framebuffer_fill_color(RLSW.framebuffer.pixels, size, RLSW.clearValue.color); } else if (bitmask & SW_DEPTH_BUFFER_BIT) { sw_framebuffer_fill_depth(RLSW.framebuffer.pixels, size, RLSW.clearValue.depth); } } void swBlendFunc(SWfactor sfactor, SWfactor dfactor) { if (!sw_is_blend_src_factor_valid(sfactor) || !sw_is_blend_dst_factor_valid(dfactor)) { RLSW.errCode = SW_INVALID_ENUM; return; } RLSW.srcFactor = sfactor; RLSW.dstFactor = dfactor; switch (sfactor) { case SW_ZERO: RLSW.srcFactorFunc = sw_factor_zero; break; case SW_ONE: RLSW.srcFactorFunc = sw_factor_one; break; case SW_SRC_COLOR: RLSW.srcFactorFunc = sw_factor_src_color; break; case SW_ONE_MINUS_SRC_COLOR: RLSW.srcFactorFunc = sw_factor_one_minus_src_color; break; case SW_SRC_ALPHA: RLSW.srcFactorFunc = sw_factor_src_alpha; break; case SW_ONE_MINUS_SRC_ALPHA: RLSW.srcFactorFunc = sw_factor_one_minus_src_alpha; break; case SW_DST_ALPHA: RLSW.srcFactorFunc = sw_factor_dst_alpha; break; case SW_ONE_MINUS_DST_ALPHA: RLSW.srcFactorFunc = sw_factor_one_minus_dst_alpha; break; case SW_DST_COLOR: RLSW.srcFactorFunc = sw_factor_dst_color; break; case SW_ONE_MINUS_DST_COLOR: RLSW.srcFactorFunc = sw_factor_one_minus_dst_color; break; case SW_SRC_ALPHA_SATURATE: RLSW.srcFactorFunc = sw_factor_src_alpha_saturate; break; default: break; } switch (dfactor) { case SW_ZERO: RLSW.dstFactorFunc = sw_factor_zero; break; case SW_ONE: RLSW.dstFactorFunc = sw_factor_one; break; case SW_SRC_COLOR: RLSW.dstFactorFunc = sw_factor_src_color; break; case SW_ONE_MINUS_SRC_COLOR: RLSW.dstFactorFunc = sw_factor_one_minus_src_color; break; case SW_SRC_ALPHA: RLSW.dstFactorFunc = sw_factor_src_alpha; break; case SW_ONE_MINUS_SRC_ALPHA: RLSW.dstFactorFunc = sw_factor_one_minus_src_alpha; break; case SW_DST_ALPHA: RLSW.dstFactorFunc = sw_factor_dst_alpha; break; case SW_ONE_MINUS_DST_ALPHA: RLSW.dstFactorFunc = sw_factor_one_minus_dst_alpha; break; case SW_DST_COLOR: RLSW.dstFactorFunc = sw_factor_dst_color; break; case SW_ONE_MINUS_DST_COLOR: RLSW.dstFactorFunc = sw_factor_one_minus_dst_color; break; case SW_SRC_ALPHA_SATURATE: break; default: break; } } void swPolygonMode(SWpoly mode) { if (!sw_is_poly_mode_valid(mode)) { RLSW.errCode = SW_INVALID_ENUM; return; } RLSW.polyMode = mode; } void swCullFace(SWface face) { if (!sw_is_face_valid(face)) { RLSW.errCode = SW_INVALID_ENUM; return; } RLSW.cullFace = face; } void swPointSize(float size) { RLSW.pointRadius = floorf(size*0.5f); } void swLineWidth(float width) { RLSW.lineWidth = roundf(width); } void swMatrixMode(SWmatrix mode) { switch (mode) { case SW_PROJECTION: RLSW.currentMatrix = &RLSW.stackProjection[RLSW.stackProjectionCounter - 1]; break; case SW_MODELVIEW: RLSW.currentMatrix = &RLSW.stackModelview[RLSW.stackModelviewCounter - 1]; break; case SW_TEXTURE: RLSW.currentMatrix = &RLSW.stackTexture[RLSW.stackTextureCounter - 1]; break; default: RLSW.errCode = SW_INVALID_ENUM; return; } RLSW.currentMatrixMode = mode; } void swPushMatrix(void) { switch (RLSW.currentMatrixMode) { case SW_PROJECTION: { if (RLSW.stackProjectionCounter >= SW_MAX_PROJECTION_STACK_SIZE) { RLSW.errCode = SW_STACK_OVERFLOW; return; } int iOld = RLSW.stackProjectionCounter - 1; int iNew = RLSW.stackProjectionCounter++; for (int i = 0; i < 16; i++) { RLSW.stackProjection[iNew][i] = RLSW.stackProjection[iOld][i]; } RLSW.currentMatrix = &RLSW.stackProjection[iNew]; } break; case SW_MODELVIEW: { if (RLSW.stackModelviewCounter >= SW_MAX_MODELVIEW_STACK_SIZE) { RLSW.errCode = SW_STACK_OVERFLOW; return; } int iOld = RLSW.stackModelviewCounter - 1; int iNew = RLSW.stackModelviewCounter++; for (int i = 0; i < 16; i++) { RLSW.stackModelview[iNew][i] = RLSW.stackModelview[iOld][i]; } RLSW.currentMatrix = &RLSW.stackModelview[iNew]; } break; case SW_TEXTURE: { if (RLSW.stackTextureCounter >= SW_MAX_TEXTURE_STACK_SIZE) { RLSW.errCode = SW_STACK_OVERFLOW; return; } int iOld = RLSW.stackTextureCounter - 1; int iNew = RLSW.stackTextureCounter++; for (int i = 0; i < 16; i++) { RLSW.stackTexture[iNew][i] = RLSW.stackTexture[iOld][i]; } RLSW.currentMatrix = &RLSW.stackTexture[iNew]; } break; default: break; } } void swPopMatrix(void) { switch (RLSW.currentMatrixMode) { case SW_PROJECTION: { if (RLSW.stackProjectionCounter <= 0) { RLSW.errCode = SW_STACK_UNDERFLOW; return; } RLSW.currentMatrix = &RLSW.stackProjection[--RLSW.stackProjectionCounter]; RLSW.isDirtyMVP = true; //< The MVP is considered to have been changed } break; case SW_MODELVIEW: { if (RLSW.stackModelviewCounter <= 0) { RLSW.errCode = SW_STACK_UNDERFLOW; return; } RLSW.currentMatrix = &RLSW.stackModelview[--RLSW.stackModelviewCounter]; RLSW.isDirtyMVP = true; //< The MVP is considered to have been changed } break; case SW_TEXTURE: { if (RLSW.stackTextureCounter <= 0) { RLSW.errCode = SW_STACK_UNDERFLOW; return; } RLSW.currentMatrix = &RLSW.stackTexture[--RLSW.stackTextureCounter]; } break; default: break; } } void swLoadIdentity(void) { sw_matrix_id(*RLSW.currentMatrix); if (RLSW.currentMatrixMode != SW_TEXTURE) RLSW.isDirtyMVP = true; } void swTranslatef(float x, float y, float z) { sw_matrix_t mat; sw_matrix_id(mat); mat[12] = x; mat[13] = y; mat[14] = z; sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); if (RLSW.currentMatrixMode != SW_TEXTURE) RLSW.isDirtyMVP = true; } void swRotatef(float angle, float x, float y, float z) { angle *= SW_DEG2RAD; float lengthSq = x*x + y*y + z*z; if ((lengthSq != 1.0f) && (lengthSq != 0.0f)) { float invLength = 1.0f/sqrtf(lengthSq); x *= invLength; y *= invLength; z *= invLength; } float sinres = sinf(angle); float cosres = cosf(angle); float t = 1.0f - cosres; sw_matrix_t mat; mat[0] = x*x*t + cosres; mat[1] = y*x*t + z*sinres; mat[2] = z*x*t - y*sinres; mat[3] = 0.0f; mat[4] = x*y*t - z*sinres; mat[5] = y*y*t + cosres; mat[6] = z*y*t + x*sinres; mat[7] = 0.0f; mat[8] = x*z*t + y*sinres; mat[9] = y*z*t - x*sinres; mat[10] = z*z*t + cosres; mat[11] = 0.0f; mat[12] = 0.0f; mat[13] = 0.0f; mat[14] = 0.0f; mat[15] = 1.0f; sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); if (RLSW.currentMatrixMode != SW_TEXTURE) RLSW.isDirtyMVP = true; } void swScalef(float x, float y, float z) { sw_matrix_t mat; mat[0] = x, mat[1] = 0, mat[2] = 0, mat[3] = 0; mat[4] = 0, mat[5] = y, mat[6] = 0, mat[7] = 0; mat[8] = 0, mat[9] = 0, mat[10] = z, mat[11] = 0; mat[12] = 0, mat[13] = 0, mat[14] = 0, mat[15] = 1; sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); if (RLSW.currentMatrixMode != SW_TEXTURE) RLSW.isDirtyMVP = true; } void swMultMatrixf(const float *mat) { sw_matrix_mul(*RLSW.currentMatrix, mat, *RLSW.currentMatrix); if (RLSW.currentMatrixMode != SW_TEXTURE) RLSW.isDirtyMVP = true; } void swFrustum(double left, double right, double bottom, double top, double znear, double zfar) { sw_matrix_t mat; double rl = right - left; double tb = top - bottom; double fn = zfar - znear; mat[0] = (float)(znear*2.0)/rl; mat[1] = 0.0f; mat[2] = 0.0f; mat[3] = 0.0f; mat[4] = 0.0f; mat[5] = (float)(znear*2.0)/tb; mat[6] = 0.0f; mat[7] = 0.0f; mat[8] = (float)(right + left)/rl; mat[9] = (float)(top + bottom)/tb; mat[10] = -(float)(zfar + znear)/fn; mat[11] = -1.0f; mat[12] = 0.0f; mat[13] = 0.0f; mat[14] = -(zfar*znear*2.0)/fn; mat[15] = 0.0f; sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); if (RLSW.currentMatrixMode != SW_TEXTURE) RLSW.isDirtyMVP = true; } void swOrtho(double left, double right, double bottom, double top, double znear, double zfar) { sw_matrix_t mat; double rl = right - left; double tb = top - bottom; double fn = zfar - znear; mat[0] = 2.0f/(float)rl; mat[1] = 0.0f; mat[2] = 0.0f; mat[3] = 0.0f; mat[4] = 0.0f; mat[5] = 2.0f/(float)tb; mat[6] = 0.0f; mat[7] = 0.0f; mat[8] = 0.0f; mat[9] = 0.0f; mat[10] = -2.0f/(float)fn; mat[11] = 0.0f; mat[12] = -(float)(left + right)/rl; mat[13] = -(float)(top + bottom)/tb; mat[14] = -(float)(zfar + znear)/fn; mat[15] = 1.0f; sw_matrix_mul(*RLSW.currentMatrix, *RLSW.currentMatrix, mat); if (RLSW.currentMatrixMode != SW_TEXTURE) RLSW.isDirtyMVP = true; } void swBegin(SWdraw mode) { // Check if the draw mode is valid if (!sw_is_draw_mode_valid(mode)) { RLSW.errCode = SW_INVALID_ENUM; return; } // Recalculate the MVP if this is needed if (RLSW.isDirtyMVP) { sw_matrix_mul_rst(RLSW.matMVP, RLSW.stackModelview[RLSW.stackModelviewCounter - 1], RLSW.stackProjection[RLSW.stackProjectionCounter - 1]); RLSW.isDirtyMVP = false; } // Obtain the number of vertices needed for this primitive switch (mode) { case SW_POINTS: RLSW.reqVertices = 1; break; case SW_LINES: RLSW.reqVertices = 2; break; case SW_TRIANGLES: RLSW.reqVertices = 3; break; case SW_QUADS: RLSW.reqVertices = 4; break; } // Initialize required values RLSW.vertexCounter = 0; RLSW.drawMode = mode; } void swEnd(void) { RLSW.drawMode = (SWdraw)0; } void swVertex2i(int x, int y) { const float v[4] = { (float)x, (float)y, 0.0f, 1.0f }; sw_immediate_push_vertex(v, RLSW.current.color, RLSW.current.texcoord); } void swVertex2f(float x, float y) { const float v[4] = { x, y, 0.0f, 1.0f }; sw_immediate_push_vertex(v, RLSW.current.color, RLSW.current.texcoord); } void swVertex2fv(const float *v) { const float v4[4] = { v[0], v[1], 0.0f, 1.0f }; sw_immediate_push_vertex(v4, RLSW.current.color, RLSW.current.texcoord); } void swVertex3i(int x, int y, int z) { const float v[4] = { (float)x, (float)y, (float)z, 1.0f }; sw_immediate_push_vertex(v, RLSW.current.color, RLSW.current.texcoord); } void swVertex3f(float x, float y, float z) { const float v[4] = { x, y, z, 1.0f }; sw_immediate_push_vertex(v, RLSW.current.color, RLSW.current.texcoord); } void swVertex3fv(const float *v) { const float v4[4] = { v[0], v[1], v[2], 1.0f }; sw_immediate_push_vertex(v4, RLSW.current.color, RLSW.current.texcoord); } void swVertex4i(int x, int y, int z, int w) { const float v[4] = { (float)x, (float)y, (float)z, (float)w }; sw_immediate_push_vertex(v, RLSW.current.color, RLSW.current.texcoord); } void swVertex4f(float x, float y, float z, float w) { const float v[4] = { x, y, z, w }; sw_immediate_push_vertex(v, RLSW.current.color, RLSW.current.texcoord); } void swVertex4fv(const float *v) { sw_immediate_push_vertex(v, RLSW.current.color, RLSW.current.texcoord); } void swColor3ub(uint8_t r, uint8_t g, uint8_t b) { float cv[4]; cv[0] = (float)r*SW_INV_255; cv[1] = (float)g*SW_INV_255; cv[2] = (float)b*SW_INV_255; cv[3] = 1.0f; swColor4fv(cv); } void swColor3ubv(const uint8_t *v) { float cv[4]; cv[0] = (float)v[0]*SW_INV_255; cv[1] = (float)v[1]*SW_INV_255; cv[2] = (float)v[2]*SW_INV_255; cv[3] = 1.0f; swColor4fv(cv); } void swColor3f(float r, float g, float b) { float cv[4]; cv[0] = r; cv[1] = g; cv[2] = b; cv[3] = 1.0f; swColor4fv(cv); } void swColor3fv(const float *v) { float cv[4]; cv[0] = v[0]; cv[1] = v[1]; cv[2] = v[2]; cv[3] = 1.0f; swColor4fv(cv); } void swColor4ub(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { float cv[4]; cv[0] = (float)r*SW_INV_255; cv[1] = (float)g*SW_INV_255; cv[2] = (float)b*SW_INV_255; cv[3] = (float)a*SW_INV_255; swColor4fv(cv); } void swColor4ubv(const uint8_t *v) { float cv[4]; cv[0] = (float)v[0]*SW_INV_255; cv[1] = (float)v[1]*SW_INV_255; cv[2] = (float)v[2]*SW_INV_255; cv[3] = (float)v[3]*SW_INV_255; swColor4fv(cv); } void swColor4f(float r, float g, float b, float a) { float cv[4]; cv[0] = r; cv[1] = g; cv[2] = b; cv[3] = a; swColor4fv(cv); } void swColor4fv(const float *v) { for (int i = 0; i < 4; i++) RLSW.current.color[i] = v[i]; } void swTexCoord2f(float u, float v) { const float *m = RLSW.stackTexture[RLSW.stackTextureCounter - 1]; RLSW.current.texcoord[0] = m[0]*u + m[4]*v + m[12]; RLSW.current.texcoord[1] = m[1]*u + m[5]*v + m[13]; } void swTexCoord2fv(const float *v) { const float *m = RLSW.stackTexture[RLSW.stackTextureCounter - 1]; RLSW.current.texcoord[0] = m[0]*v[0] + m[4]*v[1] + m[12]; RLSW.current.texcoord[1] = m[1]*v[0] + m[5]*v[1] + m[13]; } void swBindArray(SWarray type, void *buffer) { switch (type) { case SW_VERTEX_ARRAY: RLSW.array.positions = (float *)buffer; break; case SW_TEXTURE_COORD_ARRAY: RLSW.array.texcoords = (float *)buffer; break; case SW_COLOR_ARRAY: RLSW.array.colors = (uint8_t *)buffer; break; default: break; } } void swDrawArrays(SWdraw mode, int offset, int count) { if (RLSW.array.positions == 0) { RLSW.errCode = SW_INVALID_OPERATION; return; } swBegin(mode); { const float *texMatrix = RLSW.stackTexture[RLSW.stackTextureCounter - 1]; const float *defaultTexcoord = RLSW.current.texcoord; const float *defaultColor = RLSW.current.color; const float *positions = RLSW.array.positions; const float *texcoords = RLSW.array.texcoords; const uint8_t *colors = RLSW.array.colors; int end = offset + count; for (int i = offset; i < end; i++) { float u, v; if (texcoords) { int idx = 2*i; u = texcoords[idx]; v = texcoords[idx + 1]; } else { u = defaultTexcoord[0]; v = defaultTexcoord[1]; } float texcoord[2]; texcoord[0] = texMatrix[0]*u + texMatrix[4]*v + texMatrix[12]; texcoord[1] = texMatrix[1]*u + texMatrix[5]*v + texMatrix[13]; float color[4] = { defaultColor[0], defaultColor[1], defaultColor[2], defaultColor[3] }; if (colors) { int idx = 4*i; color[0] *= (float)colors[idx]*SW_INV_255; color[1] *= (float)colors[idx + 1]*SW_INV_255; color[2] *= (float)colors[idx + 2]*SW_INV_255; color[3] *= (float)colors[idx + 3]*SW_INV_255; } int idx = 3*i; float position[4] = { positions[idx], positions[idx + 1], positions[idx + 2], 1.0f }; sw_immediate_push_vertex(position, color, texcoord); } } swEnd(); } void swDrawElements(SWdraw mode, int count, int type, const void *indices) { if (RLSW.array.positions == 0) { RLSW.errCode = SW_INVALID_OPERATION; return; } if (count < 0) { RLSW.errCode = SW_INVALID_VALUE; return; } const uint8_t *indicesUb = NULL; const uint16_t *indicesUs = NULL; const uint32_t *indicesUi = NULL; switch (type) { case SW_UNSIGNED_BYTE: indicesUb = (const uint8_t *)indices; break; case SW_UNSIGNED_SHORT: indicesUs = (const uint16_t *)indices; break; case SW_UNSIGNED_INT: indicesUi = (const uint32_t *)indices; break; default: RLSW.errCode = SW_INVALID_ENUM; return; } swBegin(mode); { const float *texMatrix = RLSW.stackTexture[RLSW.stackTextureCounter - 1]; const float *defaultTexcoord = RLSW.current.texcoord; const float *defaultColor = RLSW.current.color; const float *positions = RLSW.array.positions; const float *texcoords = RLSW.array.texcoords; const uint8_t *colors = RLSW.array.colors; for (int i = 0; i < count; i++) { int index = indicesUb? indicesUb[i] : (indicesUs? indicesUs[i] : indicesUi[i]); float u, v; if (texcoords) { int idx = 2*index; u = texcoords[idx]; v = texcoords[idx + 1]; } else { u = defaultTexcoord[0]; v = defaultTexcoord[1]; } float texcoord[2]; texcoord[0] = texMatrix[0]*u + texMatrix[4]*v + texMatrix[12]; texcoord[1] = texMatrix[1]*u + texMatrix[5]*v + texMatrix[13]; float color[4] = { defaultColor[0], defaultColor[1], defaultColor[2], defaultColor[3] }; if (colors) { int idx = 4*index; color[0] *= (float)colors[idx]*SW_INV_255; color[1] *= (float)colors[idx + 1]*SW_INV_255; color[2] *= (float)colors[idx + 2]*SW_INV_255; color[3] *= (float)colors[idx + 3]*SW_INV_255; } int idx = 3*index; float position[4] = { positions[idx], positions[idx + 1], positions[idx + 2], 1.0f }; sw_immediate_push_vertex(position, color, texcoord); } } swEnd(); } void swGenTextures(int count, uint32_t *textures) { if ((count == 0) || (textures == NULL)) return; for (int i = 0; i < count; i++) { if (RLSW.loadedTextureCount >= SW_MAX_TEXTURES) { RLSW.errCode = SW_STACK_OVERFLOW; // WARNING: Out of memory, not really stack overflow return; } uint32_t id = 0; if (RLSW.freeTextureIdCount > 0) id = RLSW.freeTextureIds[--RLSW.freeTextureIdCount]; else id = RLSW.loadedTextureCount++; RLSW.loadedTextures[id] = RLSW.loadedTextures[0]; textures[i] = id; } } void swDeleteTextures(int count, uint32_t *textures) { if ((count == 0) || (textures == NULL)) return; for (int i = 0; i < count; i++) { if (!sw_is_texture_valid(textures[i])) { RLSW.errCode = SW_INVALID_VALUE; continue; } SW_FREE(RLSW.loadedTextures[textures[i]].pixels); RLSW.loadedTextures[textures[i]].pixels = NULL; RLSW.freeTextureIds[RLSW.freeTextureIdCount++] = textures[i]; } } void swTexImage2D(int width, int height, SWformat format, SWtype type, const void *data) { uint32_t id = RLSW.currentTexture; if (!sw_is_texture_valid(id)) { RLSW.errCode = SW_INVALID_VALUE; return; } int pixelFormat = sw_get_pixel_format(format, type); if (pixelFormat <= SW_PIXELFORMAT_UNKNOWN) { RLSW.errCode = SW_INVALID_ENUM; return; } sw_texture_t *texture = &RLSW.loadedTextures[id]; int size = width*height; texture->pixels = SW_MALLOC(4*size); if (texture->pixels == NULL) { RLSW.errCode = SW_STACK_OVERFLOW; // WARNING: Out of memory... return; } for (int i = 0; i < size; i++) { uint32_t *dst = &((uint32_t*)texture->pixels)[i]; sw_get_pixel((uint8_t*)dst, data, i, pixelFormat); } texture->width = width; texture->height = height; texture->wMinus1 = width - 1; texture->hMinus1 = height - 1; texture->tx = 1.0f/width; texture->ty = 1.0f/height; } void swTexParameteri(int param, int value) { uint32_t id = RLSW.currentTexture; if (!sw_is_texture_valid(id)) { RLSW.errCode = SW_INVALID_VALUE; return; } sw_texture_t *texture = &RLSW.loadedTextures[id]; switch (param) { case SW_TEXTURE_MIN_FILTER: { if (!sw_is_texture_filter_valid(value)) { RLSW.errCode = SW_INVALID_ENUM; return; } texture->minFilter = (SWfilter)value; } break; case SW_TEXTURE_MAG_FILTER: { if (!sw_is_texture_filter_valid(value)) { RLSW.errCode = SW_INVALID_ENUM; return; } texture->magFilter = (SWfilter)value; } break; case SW_TEXTURE_WRAP_S: { if (!sw_is_texture_wrap_valid(value)) { RLSW.errCode = SW_INVALID_ENUM; return; } texture->sWrap = (SWwrap)value; } break; case SW_TEXTURE_WRAP_T: { if (!sw_is_texture_wrap_valid(value)) { RLSW.errCode = SW_INVALID_ENUM; return; } texture->tWrap = (SWwrap)value; } break; default: RLSW.errCode = SW_INVALID_ENUM; break; } } void swBindTexture(uint32_t id) { if (id >= SW_MAX_TEXTURES) { RLSW.errCode = SW_INVALID_VALUE; return; } if (RLSW.loadedTextures[id].pixels == NULL) { RLSW.errCode = SW_INVALID_OPERATION; return; } RLSW.currentTexture = id; } #endif // RLSW_IMPLEMENTATION