diff --git a/CMakeLists.txt b/CMakeLists.txt index d8200c6..444119b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,7 +61,7 @@ add_subdirectory(third_party/imgui) add_subdirectory(third_party/sdl) add_subdirectory(third_party/portaudio) add_subdirectory(third_party/spdlog) -add_subdirectory(third_party/HLSLcc) +add_subdirectory(third_party/slang) # setup portaudio set(PA_USE_ASIO ON CACHE BOOL "" FORCE) @@ -99,7 +99,6 @@ set(SDL_LOCALE OFF CACHE BOOL "" FORCE) set(SDL_MISC OFF CACHE BOOL "" FORCE) set(SDL_MMX OFF CACHE BOOL "" FORCE) set(SDL_OFFSCREEN OFF CACHE BOOL "" FORCE) -set(SDL_OPENGLES OFF CACHE BOOL "" FORCE) set(SDL_POWER OFF CACHE BOOL "" FORCE) set(SDL_RENDER OFF CACHE BOOL "" FORCE) set(SDL_RENDER_D3D OFF CACHE BOOL "" FORCE) @@ -114,17 +113,19 @@ set(SDL_TIMERS OFF CACHE BOOL "" FORCE) set(SDL_VIRTUAL_JOYSTICK OFF CACHE BOOL "" FORCE) set(SDL_TEST_LIBRARY OFF CACHE BOOL "" FORCE) -if (WIN32 OR (UNIX AND NOT APPLE)) - set(SDL_VULKAN ON CACHE BOOL "" FORCE) -else() - set(SDL_VULKAN OFF CACHE BOOL "" FORCE) +set(SDL_VULKAN ON CACHE BOOL "" FORCE) +set(SDL_OPENGL ON CACHE BOOL "" FORCE) +set(SDL_OPENGLES OFF CACHE BOOL "" FORCE) + +if (APPLE) set(SDL_METAL ON CACHE BOOL "" FORCE) +else() + set(SDL_METAL OFF CACHE BOOL "" FORCE) endif() set(SDL_WASAPI OFF CACHE BOOL "" FORCE) set(SDL_XINPUT OFF CACHE BOOL "" FORCE) set(SDL_DISABLE_UNINSTALL ON CACHE BOOL "" FORCE) -set(SDL_OPENGL OFF CACHE BOOL "" FORCE) # setup spdlog diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 13a7136..a0389c1 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -8,10 +8,10 @@ retrieve_files(ALL_FILES) add_library(${PROJECT_NAME} SHARED ${ALL_FILES}) -target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} spdlog imgui HLSLcc) +target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} spdlog imgui slang) -target_link_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} spdlog imgui HLSLcc) -target_link_libraries(${PROJECT_NAME} PUBLIC imgui spdlog ${SDL2_LIBRARIES} HLSLcc) +target_link_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} spdlog imgui) +target_link_libraries(${PROJECT_NAME} PUBLIC imgui spdlog ${SDL2_LIBRARIES} slang) target_precompile_headers(${PROJECT_NAME} PUBLIC extern.h) diff --git a/core/application/application.cpp b/core/application/application.cpp index 94a5299..b516452 100644 --- a/core/application/application.cpp +++ b/core/application/application.cpp @@ -8,6 +8,7 @@ #include "imgui_internal.h" #include "filesystem/stb_image.h" #include "rhi/texture.h" +#include "rhi/opengl/renderer_opengl.h" #include "spdlog/async.h" #include "spdlog/spdlog.h" #include "spdlog/sinks/basic_file_sink.h" @@ -18,6 +19,7 @@ bool g_is_running = true; bool g_exit_requested = false; +slang::IGlobalSession* g_slang_global_session = nullptr; application::~application() { @@ -26,6 +28,8 @@ application::~application() void application::init(window_params in_window_params, int argc, char** argv) { + slang::createGlobalSession(&g_slang_global_session); + try { auto async_file = spdlog::basic_logger_mt("async_file_logger", "logs/log.txt"); @@ -53,20 +57,24 @@ void application::init(window_params in_window_params, int argc, char** argv) bool use_dx11 = false; bool use_dx12 = false; bool use_vulkan = false; + bool use_opengl = false; #else bool use_vulkan = true; + bool use_opengl = false; #endif #if WIN32 command_line::instance().get_arg("dx11", use_dx11); command_line::instance().get_arg("dx12", use_dx12); command_line::instance().get_arg("vulkan", use_vulkan); + command_line::instance().get_arg("opengl", use_opengl); + // only one renderer can be used at a time - const int renderer_count = use_dx11 + use_dx12 + use_vulkan; + const int renderer_count = use_dx11 + use_dx12 + use_vulkan + use_opengl; assert(renderer_count <= 1); // if no renderer is specified, use dx11 - if (!(use_dx11 || use_dx12 || use_vulkan)) + if (!(use_dx11 || use_dx12 || use_vulkan || use_opengl)) { use_dx11 = true; } @@ -75,6 +83,11 @@ void application::init(window_params in_window_params, int argc, char** argv) { renderer_ = new renderer_dx11(); } + else if (use_opengl) + { + renderer_ = new renderer_opengl(); + window_flags |= SDL_WINDOW_OPENGL; + } // if (use_dx12) // { // renderer_ = new renderer_dx12(); @@ -90,11 +103,17 @@ void application::init(window_params in_window_params, int argc, char** argv) renderer_ = new renderer_vulkan(); window_flags |= SDL_WINDOW_VULKAN; } + else if (use_opengl) + { + renderer_ = new renderer_opengl(); + window_flags |= SDL_WINDOW_OPENGL; + } #endif // if (!renderer_) // renderer_ = new renderer_null(); - + renderer_->pre_init(); + if (in_window_params.fullscreen) window_flags |= SDL_WINDOW_FULLSCREEN; if (in_window_params.borderless) @@ -121,6 +140,7 @@ void application::init(window_params in_window_params, int argc, char** argv) SDL_ShowWindow(window_); renderer_->init(window_); + renderer_->init_slang(R"(E:\Projects\AronaStudio\Arona\shaders\)"); renderer_->resize(in_window_params.width, in_window_params.height); g_is_running = true; } @@ -147,9 +167,9 @@ int application::run() if (g_exit_requested) break; - renderer_->new_frame(); + renderer_->new_frame(window_); draw_gui(); - renderer_->end_frame(); + renderer_->end_frame(window_); } return 0; } @@ -162,7 +182,7 @@ void application::shutdown() delete renderer_; } -texture* application::load_texture(const std::string& path) const +std::shared_ptr application::load_texture(const std::string& path) const { int width = 0; int height = 0; @@ -177,7 +197,7 @@ texture* application::load_texture(const std::string& path) const return texture; } -texture* application::create_texture(const unsigned char* data, const int width, const int height) const +std::shared_ptr application::create_texture(const unsigned char* data, const int width, const int height) const { return renderer_->create_texture(data, width, height); } diff --git a/core/application/application.h b/core/application/application.h index 3c102a8..dc4fb5f 100644 --- a/core/application/application.h +++ b/core/application/application.h @@ -2,12 +2,16 @@ #include #include "SDL.h" #include "imgui.h" +#include "slang.h" class renderer; class texture; +class application; extern bool g_is_running; extern bool g_exit_requested; +extern slang::IGlobalSession* g_slang_global_session; +static application* g_app_instance = nullptr; struct window_params { @@ -26,18 +30,25 @@ struct window_params class CORE_API application { public: - application() = default; + application() + { + g_app_instance = this; + } virtual ~application(); application(const application&) = delete; application(application&&) = delete; + static application* get() + { + return g_app_instance; + } virtual void init(window_params in_window_params, int argc, char** argv); virtual int run(); virtual void shutdown(); virtual void draw_gui() = 0; virtual void init_imgui(ImGuiContext* in_context) = 0; - texture* load_texture(const std::string& path) const; - texture* create_texture(const unsigned char* data, const int width, const int height) const; + std::shared_ptr load_texture(const std::string& path) const; + std::shared_ptr create_texture(const unsigned char* data, const int width, const int height) const; renderer* get_renderer() const { return renderer_; } SDL_Window* get_window() const { return window_; } diff --git a/core/misc/ref_counting.cpp b/core/misc/ref_counting.cpp index f1fc1b2..9832bb0 100644 --- a/core/misc/ref_counting.cpp +++ b/core/misc/ref_counting.cpp @@ -1,2 +1 @@ -#include "E:/Projects/Arona/build/core/CMakeFiles/core.dir/Debug/cmake_pch.hxx" -#include "ref_counting.h" +#include "ref_counting.h" diff --git a/core/rhi/opengl/opengl_def.h b/core/rhi/opengl/opengl_def.h new file mode 100644 index 0000000..f14cadd --- /dev/null +++ b/core/rhi/opengl/opengl_def.h @@ -0,0 +1,51 @@ +#pragma once + +#define CHECK_GL_ERRORS \ +{\ + GLenum Error = glGetError();\ + if (Error != 0)\ + spdlog::critical("GL error: 0x{:x}", Error);\ +} + +#define GL_READ_FRAMEBUFFER 0x8CA8 +#define GL_DRAW_FRAMEBUFFER 0x8CA9 +#define GL_READ_FRAMEBUFFER_BINDING 0x8CAA + +#define GL_MAX_COLOR_ATTACHMENTS 0x8CDF +#define GL_COLOR_ATTACHMENT0 0x8CE0 +#define GL_COLOR_ATTACHMENT1 0x8CE1 +#define GL_COLOR_ATTACHMENT2 0x8CE2 +#define GL_COLOR_ATTACHMENT3 0x8CE3 +#define GL_COLOR_ATTACHMENT4 0x8CE4 +#define GL_COLOR_ATTACHMENT5 0x8CE5 +#define GL_COLOR_ATTACHMENT6 0x8CE6 +#define GL_COLOR_ATTACHMENT7 0x8CE7 +#define GL_COLOR_ATTACHMENT8 0x8CE8 +#define GL_COLOR_ATTACHMENT9 0x8CE9 +#define GL_COLOR_ATTACHMENT10 0x8CEA +#define GL_COLOR_ATTACHMENT11 0x8CEB +#define GL_COLOR_ATTACHMENT12 0x8CEC +#define GL_COLOR_ATTACHMENT13 0x8CED +#define GL_COLOR_ATTACHMENT14 0x8CEE +#define GL_COLOR_ATTACHMENT15 0x8CEF +#define GL_COLOR_ATTACHMENT16 0x8CF0 +#define GL_COLOR_ATTACHMENT17 0x8CF1 +#define GL_COLOR_ATTACHMENT18 0x8CF2 +#define GL_COLOR_ATTACHMENT19 0x8CF3 +#define GL_COLOR_ATTACHMENT20 0x8CF4 +#define GL_COLOR_ATTACHMENT21 0x8CF5 +#define GL_COLOR_ATTACHMENT22 0x8CF6 +#define GL_COLOR_ATTACHMENT23 0x8CF7 +#define GL_COLOR_ATTACHMENT24 0x8CF8 +#define GL_COLOR_ATTACHMENT25 0x8CF9 +#define GL_COLOR_ATTACHMENT26 0x8CFA +#define GL_COLOR_ATTACHMENT27 0x8CFB +#define GL_COLOR_ATTACHMENT28 0x8CFC +#define GL_COLOR_ATTACHMENT29 0x8CFD +#define GL_COLOR_ATTACHMENT30 0x8CFE +#define GL_COLOR_ATTACHMENT31 0x8CFF +#define GL_DEPTH_ATTACHMENT 0x8D00 +#define GL_STENCIL_ATTACHMENT 0x8D20 + +#define GL_COMPUTE_SHADER 0x91B9 +#define GL_GEOMETRY_SHADER 0x8DD9 diff --git a/core/rhi/opengl/opengl_func.h b/core/rhi/opengl/opengl_func.h new file mode 100644 index 0000000..a1e8230 --- /dev/null +++ b/core/rhi/opengl/opengl_func.h @@ -0,0 +1,19 @@ +#pragma once + +typedef void (APIENTRYP PFNGLGENFRAMEBUFFERSPROC) (GLsizei n, GLuint *framebuffers); +typedef void (APIENTRYP PFNGLBINDFRAMEBUFFERPROC) (GLenum target, GLuint framebuffer); +typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +typedef void (APIENTRYP PFNGLGETTEXIMAGEPROC) (GLenum target, GLint level, GLenum format, GLenum type, void *pixels); + +inline PFNGLGENFRAMEBUFFERSPROC glGenFramebuffers; +inline PFNGLBINDFRAMEBUFFERPROC glBindFramebuffer; +inline PFNGLFRAMEBUFFERTEXTURE2DPROC glFramebufferTexture2D; +inline PFNGLGETTEXIMAGEPROC glGetTexImage; + +inline void load_opengl_func() +{ + glGenFramebuffers = (PFNGLGENFRAMEBUFFERSPROC)imgl3wGetProcAddress("glGenFramebuffers"); + glBindFramebuffer = (PFNGLBINDFRAMEBUFFERPROC)imgl3wGetProcAddress("glBindFramebuffer"); + glFramebufferTexture2D = (PFNGLFRAMEBUFFERTEXTURE2DPROC)imgl3wGetProcAddress("glFramebufferTexture2D"); + glGetTexImage = (PFNGLGETTEXIMAGEPROC)imgl3wGetProcAddress("glGetTexImage"); +} diff --git a/core/rhi/opengl/render_target_opengl.cpp b/core/rhi/opengl/render_target_opengl.cpp new file mode 100644 index 0000000..026bedf --- /dev/null +++ b/core/rhi/opengl/render_target_opengl.cpp @@ -0,0 +1,97 @@ +#include "render_target_opengl.h" + +#include "opengl_def.h" +#include "opengl_func.h" + +void render_target_opengl::init(int width, int height, texture_format format) +{ + glGenFramebuffers(1, &fbo_); + CHECK_GL_ERRORS + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo_); + CHECK_GL_ERRORS + +#if defined(__APPLE__) +LockGLContext([NSOpenGLContext currentContext]); +#endif + + // Create a new OpenGL texture + glGenTextures(1, &texture_); + CHECK_GL_ERRORS + + glBindTexture(GL_TEXTURE_2D, texture_); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + CHECK_GL_ERRORS +#if defined(__APPLE__) + UnlockGLContext([NSOpenGLContext currentContext]); +#endif + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture_, 0); + CHECK_GL_ERRORS + + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); +} + +void* render_target_opengl::lock(lock_state state) +{ + locked_texture_ = malloc(width_ * height_ * 4); + switch (state) + { + case lock_state::READ: + case lock_state::READ_WRITE: + { + glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, locked_texture_); + } + break; + case lock_state::WRITE: + break; + case lock_state::NONE: + break; + } + return locked_texture_; +} + +void render_target_opengl::unlock() +{ +#if defined(__APPLE__) + LockGLContext([NSOpenGLContext currentContext]); +#endif + // Ensure texturing is enabled before setting texture properties + + glBindTexture(GL_TEXTURE_2D, texture_); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width_, height_, 0, GL_RGBA, GL_UNSIGNED_BYTE, locked_texture_); + +#if defined(__APPLE__) + UnlockGLContext([NSOpenGLContext currentContext]); +#endif + + free(locked_texture_); + locked_texture_ = nullptr; +} + +void render_target_opengl::on_resize(int width, int height) +{ + width_ = width; + height_ = height; + glDeleteTextures(1, &texture_); + +#if defined(__APPLE__) + LockGLContext([NSOpenGLContext currentContext]); +#endif + + glBindTexture(GL_TEXTURE_2D, texture_); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width_, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + +#if defined(__APPLE__) + UnlockGLContext([NSOpenGLContext currentContext]); +#endif +} diff --git a/core/rhi/opengl/render_target_opengl.h b/core/rhi/opengl/render_target_opengl.h new file mode 100644 index 0000000..02cf0d2 --- /dev/null +++ b/core/rhi/opengl/render_target_opengl.h @@ -0,0 +1,20 @@ +#pragma once +#include "imgui_impl_opengl3_loader.h" +#include "rhi/render_target.h" + +class render_target_opengl : public render_target +{ +public: + + + void init(int width, int height, texture_format format) override; + ImTextureID get_texture_id() override { return (void*)static_cast(fbo_); } + void* lock(lock_state state) override; + void unlock() override; +protected: + void on_resize(int width, int height) override; +private: + GLuint fbo_ = 0; + GLuint texture_ = 0; + void* locked_texture_ = nullptr; +}; diff --git a/core/rhi/opengl/renderer_opengl.cpp b/core/rhi/opengl/renderer_opengl.cpp new file mode 100644 index 0000000..554b77f --- /dev/null +++ b/core/rhi/opengl/renderer_opengl.cpp @@ -0,0 +1,165 @@ +#include "renderer_opengl.h" + +#include + +#include "imgui_impl_opengl3.h" +#include "imgui_impl_opengl3_loader.h" +#include "imgui_impl_sdl3.h" +#include "opengl_func.h" +#include "render_target_opengl.h" +#include "texture_opengl.h" +#include "application/application.h" +#include "rhi/shader.h" +#include "shader/shader_cs_opengl.h" +#include "shader/shader_gs_opengl.h" +#include "shader/shader_ps_opengl.h" +#include "shader/shader_vs_opengl.h" + +SDL_GLContext g_gl_context = nullptr; + +void renderer_opengl::pre_init() +{ +#if defined(__APPLE__) + SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_FORWARD_COMPATIBLE_FLAG); // Always required on Mac +#else + SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, 0); +#endif + SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 6); + // set sdl using graphics card + + + // Enable native IME. + SDL_SetHint(SDL_HINT_IME_SHOW_UI, "1"); + + // Create window with graphics context + SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1); + SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 24); + SDL_GL_SetAttribute(SDL_GL_STENCIL_SIZE, 8); +} + +bool renderer_opengl::init(SDL_Window* window_handle) +{ + if (has_initialized_) + return true; + + const auto glsl_version = "#version 460"; + + g_gl_context = SDL_GL_CreateContext(window_handle); + SDL_GL_MakeCurrent(window_handle, g_gl_context); + SDL_GL_SetSwapInterval(1); // Enable vsync + SDL_ShowWindow(window_handle); + + // Setup Platform/Renderer backends + ImGui_ImplSDL3_InitForOpenGL(window_handle, g_gl_context); + ImGui_ImplOpenGL3_Init(glsl_version); + load_opengl_func(); + + return true; +} + +void renderer_opengl::shutdown() +{ + ImGui_ImplOpenGL3_Shutdown(); + ImGui_ImplSDL3_Shutdown(); + + SDL_GL_DeleteContext(g_gl_context); +} + +Slang::ComPtr renderer_opengl::create_slang_session(const std::string& shader_path) +{ + slang::TargetDesc target_desc; + target_desc.format = SLANG_GLSL; + target_desc.profile = g_slang_global_session->findProfile("glsl_460"); + + const char* search_paths[] = { shader_path.c_str() }; + slang::SessionDesc session_desc; + session_desc.searchPaths = search_paths; + session_desc.searchPathCount = 1; + session_desc.targets = &target_desc; + session_desc.targetCount = 1; + + Slang::ComPtr out; + g_slang_global_session->createSession(session_desc, out.writeRef()); + return out; +} + +std::shared_ptr renderer_opengl::load_shader(const std::string& module_name, const std::string& entry_name) +{ + auto handle = std::make_shared(); + if (!handle->init_slang_module(module_name, entry_name)) + return nullptr; + const auto shader_type = handle->get_shader_type(); + std::shared_ptr out; + switch (shader_type) + { + case SLANG_STAGE_VERTEX: + { + out = std::make_shared(handle); + } + break; + case SLANG_STAGE_GEOMETRY: + { + out = std::make_shared(handle); + } + break; + case SLANG_STAGE_PIXEL: + { + out = std::make_shared(handle); + } + break; + case SLANG_STAGE_COMPUTE: + { + out = std::make_shared(handle); + } + break; + default: + spdlog::error("slang: unsupported shader type"); + return nullptr; + } + if (!out->init()) + return nullptr; + return out; +} + +void renderer_opengl::new_frame(SDL_Window* window_handle) +{ + ImGui_ImplOpenGL3_NewFrame(); + ImGui_ImplSDL3_NewFrame(); + ImGui::NewFrame(); +} + +void renderer_opengl::end_frame(SDL_Window* window_handle) +{ + // Rendering + ImGui::Render(); + const auto& io = ImGui::GetIO(); + + glViewport(0, 0, (int)io.DisplaySize.x, (int)io.DisplaySize.y); + glClearColor(clear_color.x * clear_color.w, clear_color.y * clear_color.w, clear_color.z * clear_color.w, clear_color.w); + glClear(GL_COLOR_BUFFER_BIT); + ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData()); + SDL_GL_SwapWindow(window_handle); +} + +void renderer_opengl::resize(int width, int height) +{ +} + +std::shared_ptr renderer_opengl::create_texture(const unsigned char* data, int width, int height) +{ + auto out = std::make_shared(); + if (!out->init_data(data, width, height)) + { + out = nullptr; + } + return out; +} + +std::shared_ptr renderer_opengl::create_render_target(int width, int height, texture_format format) +{ + const auto target_dx11 = std::make_shared(); + target_dx11->init(width, height, format); + return target_dx11; +} diff --git a/core/rhi/opengl/renderer_opengl.h b/core/rhi/opengl/renderer_opengl.h new file mode 100644 index 0000000..37ce9f2 --- /dev/null +++ b/core/rhi/opengl/renderer_opengl.h @@ -0,0 +1,23 @@ +#pragma once +#include "rhi/renderer.h" +extern SDL_GLContext g_gl_context; + +class renderer_opengl : public renderer +{ +public: + void pre_init() override; + bool init(SDL_Window* window_handle) override; + void shutdown() override; + + Slang::ComPtr create_slang_session(const std::string& shader_path) override; + std::shared_ptr load_shader(const std::string& module_name, const std::string& entry_name) override; + + void new_frame(SDL_Window* window_handle) override; + void end_frame(SDL_Window* window_handle) override; + + void resize(int width, int height) override; + std::shared_ptr create_texture(const unsigned char* data, int width, int height) override; + std::shared_ptr create_render_target(int width, int height, texture_format format) override; +private: + bool has_initialized_ = false; +}; diff --git a/core/rhi/opengl/shader/shader_cs_opengl.cpp b/core/rhi/opengl/shader/shader_cs_opengl.cpp new file mode 100644 index 0000000..4147239 --- /dev/null +++ b/core/rhi/opengl/shader/shader_cs_opengl.cpp @@ -0,0 +1 @@ +#include "shader_cs_opengl.h" diff --git a/core/rhi/opengl/shader/shader_cs_opengl.h b/core/rhi/opengl/shader/shader_cs_opengl.h new file mode 100644 index 0000000..93b0ec7 --- /dev/null +++ b/core/rhi/opengl/shader/shader_cs_opengl.h @@ -0,0 +1,10 @@ +#pragma once +#include "shader_opengl.h" +#include "rhi/opengl/opengl_def.h" + +class shader_cs_opengl : public shader_opengl +{ +public: + shader_cs_opengl(const std::shared_ptr& handle) : shader_opengl(handle) {} + GLenum get_shader_type() const override { return GL_COMPUTE_SHADER; } +}; diff --git a/core/rhi/opengl/shader/shader_gs_opengl.cpp b/core/rhi/opengl/shader/shader_gs_opengl.cpp new file mode 100644 index 0000000..127ab8d --- /dev/null +++ b/core/rhi/opengl/shader/shader_gs_opengl.cpp @@ -0,0 +1 @@ +#include "shader_gs_opengl.h" diff --git a/core/rhi/opengl/shader/shader_gs_opengl.h b/core/rhi/opengl/shader/shader_gs_opengl.h new file mode 100644 index 0000000..a3110bc --- /dev/null +++ b/core/rhi/opengl/shader/shader_gs_opengl.h @@ -0,0 +1,11 @@ +#pragma once +#include "shader_opengl.h" +#include "rhi/opengl/opengl_def.h" + +class shader_gs_opengl : public shader_opengl +{ +public: + shader_gs_opengl(const std::shared_ptr& handle) : shader_opengl(handle) {} + + GLenum get_shader_type() const override { return GL_GEOMETRY_SHADER; } +}; diff --git a/core/rhi/opengl/shader/shader_opengl.cpp b/core/rhi/opengl/shader/shader_opengl.cpp new file mode 100644 index 0000000..6dfd392 --- /dev/null +++ b/core/rhi/opengl/shader/shader_opengl.cpp @@ -0,0 +1,35 @@ +#include "shader_opengl.h" + +#include "imgui_impl_opengl3_loader.h" +#include "rhi/slang_handle.h" + +bool shader_opengl::init() +{ + shader_id_ = glCreateShader(get_shader_type()); + if (shader_id_ == 0) + { + spdlog::error("Failed to create shader"); + return false; + } + const auto code_blob = handle_->get_entry_point_code(); + const auto code_array = static_cast(code_blob->getBufferPointer()); + const GLint code_size = static_cast(code_blob->getBufferSize()); + glShaderSource(shader_id_, 1, &code_array, &code_size); + + GLint compile_status = GL_FALSE; + glGetShaderiv(shader_id_, GL_COMPILE_STATUS, &compile_status); + if (compile_status == GL_FALSE) + { + GLint log_length = 0; + glGetShaderiv(shader_id_, GL_INFO_LOG_LENGTH, &log_length); + std::vector log(log_length); + glGetShaderInfoLog(shader_id_, log_length, nullptr, log.data()); + spdlog::error("Failed to compile shader: {}", log.data()); + + glDeleteShader(shader_id_); + shader_id_ = 0; + return false; + } + + return true; +} diff --git a/core/rhi/opengl/shader/shader_opengl.h b/core/rhi/opengl/shader/shader_opengl.h new file mode 100644 index 0000000..9a22cfa --- /dev/null +++ b/core/rhi/opengl/shader/shader_opengl.h @@ -0,0 +1,15 @@ +#pragma once +#include "imgui_impl_opengl3_loader.h" +#include "rhi/shader.h" + +class shader_opengl : public shader +{ +public: + shader_opengl(const std::shared_ptr& handle) : shader(handle) {} + + bool init() override; + [[nodiscard]] virtual GLenum get_shader_type() const = 0; + [[nodiscard]] bool is_initialized() const override { return shader_id_ != 0; } +protected: + GLuint shader_id_; +}; diff --git a/core/rhi/opengl/shader/shader_ps_opengl.cpp b/core/rhi/opengl/shader/shader_ps_opengl.cpp new file mode 100644 index 0000000..6b33561 --- /dev/null +++ b/core/rhi/opengl/shader/shader_ps_opengl.cpp @@ -0,0 +1 @@ +#include "shader_ps_opengl.h" diff --git a/core/rhi/opengl/shader/shader_ps_opengl.h b/core/rhi/opengl/shader/shader_ps_opengl.h new file mode 100644 index 0000000..944b861 --- /dev/null +++ b/core/rhi/opengl/shader/shader_ps_opengl.h @@ -0,0 +1,9 @@ +#pragma once +#include "shader_opengl.h" + +class shader_ps_opengl : public shader_opengl +{ +public: + shader_ps_opengl(const std::shared_ptr& handle) : shader_opengl(handle) {} + GLenum get_shader_type() const override { return GL_FRAGMENT_SHADER; } +}; diff --git a/core/rhi/opengl/shader/shader_vs_opengl.cpp b/core/rhi/opengl/shader/shader_vs_opengl.cpp new file mode 100644 index 0000000..35008d8 --- /dev/null +++ b/core/rhi/opengl/shader/shader_vs_opengl.cpp @@ -0,0 +1 @@ +#include "shader_vs_opengl.h" diff --git a/core/rhi/opengl/shader/shader_vs_opengl.h b/core/rhi/opengl/shader/shader_vs_opengl.h new file mode 100644 index 0000000..960b0c0 --- /dev/null +++ b/core/rhi/opengl/shader/shader_vs_opengl.h @@ -0,0 +1,10 @@ +#pragma once +#include "shader_opengl.h" + +class shader_vs_opengl : public shader_opengl +{ +public: + shader_vs_opengl(const std::shared_ptr& handle) : shader_opengl(handle) {} + + GLenum get_shader_type() const override { return GL_VERTEX_SHADER; } +}; diff --git a/core/rhi/opengl/texture_opengl.cpp b/core/rhi/opengl/texture_opengl.cpp new file mode 100644 index 0000000..97b5605 --- /dev/null +++ b/core/rhi/opengl/texture_opengl.cpp @@ -0,0 +1,29 @@ +#include "texture_opengl.h" + +#include "opengl_def.h" + +bool texture_opengl::init_data(const unsigned char* data, int width, int height) +{ + width_ = width; + height_ = height; + +#if defined(__APPLE__) + LockGLContext([NSOpenGLContext currentContext]); +#endif + + // Create a new OpenGL texture + glGenTextures(1, &texture_id_); + CHECK_GL_ERRORS + + glBindTexture(GL_TEXTURE_2D, texture_id_); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, data); + CHECK_GL_ERRORS +#if defined(__APPLE__) + UnlockGLContext([NSOpenGLContext currentContext]); +#endif + return true; +} diff --git a/core/rhi/opengl/texture_opengl.h b/core/rhi/opengl/texture_opengl.h new file mode 100644 index 0000000..6b398a6 --- /dev/null +++ b/core/rhi/opengl/texture_opengl.h @@ -0,0 +1,13 @@ +#pragma once +#include "imgui_impl_opengl3_loader.h" +#include "rhi/texture.h" + +class texture_opengl : public texture +{ +public: + ImTextureID get_texture_id() override { return (void*)static_cast(texture_id_); } + bool init_data(const unsigned char* data, int width, int height) override; + [[nodiscard]] bool is_valid() const override { return texture_id_ != 0; } +private: + GLuint texture_id_ = 0; +}; diff --git a/core/rhi/renderer.cpp b/core/rhi/renderer.cpp index e69de29..1915444 100644 --- a/core/rhi/renderer.cpp +++ b/core/rhi/renderer.cpp @@ -0,0 +1,6 @@ +#include "renderer.h" + +void renderer::init_slang(const std::string& shader_path) +{ + session_ = create_slang_session(shader_path); +} diff --git a/core/rhi/renderer.h b/core/rhi/renderer.h index 19e772a..80c80aa 100644 --- a/core/rhi/renderer.h +++ b/core/rhi/renderer.h @@ -1,25 +1,37 @@ #pragma once #include +#include "imgui.h" +#include "slang_handle.h" + +class shader; class render_target; class texture; +constexpr ImVec4 clear_color = ImVec4(0.45f, 0.55f, 0.60f, 1.00f); +constexpr float clear_color_with_alpha[4] = { clear_color.x * clear_color.w, clear_color.y * clear_color.w, clear_color.z * clear_color.w, clear_color.w }; class renderer { public: virtual ~renderer() = default; + virtual void pre_init() {} virtual bool init(SDL_Window* window_handle) = 0; virtual void shutdown() = 0; + + void init_slang(const std::string& shader_path); + virtual Slang::ComPtr create_slang_session(const std::string& shader_path) = 0; + virtual Slang::ComPtr get_slang_session() { return session_; } + virtual std::shared_ptr load_shader(const std::string& module_name, const std::string& entry_name) = 0; - virtual void new_frame() = 0; - virtual void end_frame() = 0; + virtual void new_frame(SDL_Window* window_handle) = 0; + virtual void end_frame(SDL_Window* window_handle) = 0; virtual void resize(int width, int height) = 0; - virtual texture* create_texture(const unsigned char* data, int width, int height) = 0; - virtual render_target* create_render_target(int width, int height, texture_format format) = 0; - virtual bool compile_shader() = 0; + virtual std::shared_ptr create_texture(const unsigned char* data, int width, int height) = 0; + virtual std::shared_ptr create_render_target(int width, int height, texture_format format) = 0; void set_vsync(const bool vsync) { vsync_ = vsync; } protected: + Slang::ComPtr session_; bool vsync_ = true; }; diff --git a/core/rhi/shader.cpp b/core/rhi/shader.cpp new file mode 100644 index 0000000..1ff07ac --- /dev/null +++ b/core/rhi/shader.cpp @@ -0,0 +1 @@ +#include "shader.h" diff --git a/core/rhi/shader.h b/core/rhi/shader.h new file mode 100644 index 0000000..a2695be --- /dev/null +++ b/core/rhi/shader.h @@ -0,0 +1,19 @@ +#pragma once + +class slang_handle; + +class shader +{ +public: + virtual ~shader() = default; + explicit shader(const std::shared_ptr& handle) : handle_(handle) {} + + virtual bool init() { return false; } + [[nodiscard]] virtual bool is_initialized() const = 0; + [[nodiscard]] virtual bool is_valid() const { return handle_ != nullptr && is_initialized(); } + + // param setters + // virtual void set_int(const char* name, int value) = 0; +protected: + std::shared_ptr handle_; +}; diff --git a/core/rhi/slang_handle.cpp b/core/rhi/slang_handle.cpp new file mode 100644 index 0000000..c615e21 --- /dev/null +++ b/core/rhi/slang_handle.cpp @@ -0,0 +1,85 @@ +#include "slang_handle.h" + +#include "renderer.h" +#include "application/application.h" + +bool slang_handle::init_slang_module(const std::string& module_name, const std::string& entry_name) +{ + spdlog::info("slang: init slang handle: module \"{}\", entry point \"{}\"", module_name.c_str(), entry_name.c_str()); + + const auto session = application::get()->get_renderer()->get_slang_session(); + + Slang::ComPtr diagnostics; + *module.writeRef() = session->loadModule(module_name.c_str(), diagnostics.writeRef()); + + if (diagnostics) + { + spdlog::error("slang: load module \"{}\" with diagnostics: {}", module_name.c_str(), (const char*)diagnostics->getBufferPointer()); + return false; + } + + auto r = module->findEntryPointByName(entry_name.c_str(), entry_point.writeRef()); + if (r != SLANG_OK) + { + spdlog::error("slang: can't find entry point \"{}\"", entry_name.c_str()); + return false; + } + + slang::IComponentType* components[] = { module, entry_point }; + r = session->createCompositeComponentType(components, 2, program.writeRef()); + if (r != SLANG_OK) + { + spdlog::error("slang: create composite component type failed"); + return false; + } + // get entry point index + for (int i = 0; i < module->getDefinedEntryPointCount(); ++i) + { + Slang::ComPtr temp_entry_point; + module->getDefinedEntryPoint(i, temp_entry_point.writeRef()); + if (temp_entry_point == entry_point) + { + entry_point_index_ = i; + break; + } + } + + slang::ProgramLayout* layout = program->getLayout(target_index); + const auto entry_reflection = layout->getEntryPointByIndex(entry_point_index_); + shader_type_ = entry_reflection->getStage(); + +#if _DEBUG + spdlog::info("slang: shader type: {}", shader_type_); + spdlog::info("================parameters================"); + + for (int i = 0; i < layout->getParameterCount(); ++i) + { + slang::VariableLayoutReflection* reflection = layout->getParameterByIndex(i); + const auto type_reflection = reflection->getTypeLayout()->getType(); + + spdlog::info("{} {} {}; ", i, type_reflection->getName(), reflection->getName()); + } + spdlog::info("================parameters================"); +#endif + + spdlog::info("slang: init slang handle successfully"); + return true; +} + +Slang::ComPtr slang_handle::get_entry_point_code() const +{ + Slang::ComPtr diagnostics; + Slang::ComPtr code_blob; + program->getEntryPointCode( + entry_point_index_, + target_index, + code_blob.writeRef(), + diagnostics.writeRef()); + + if (diagnostics) + { + spdlog::error("slang: get entry point code failed: {}", (const char*)diagnostics->getBufferPointer()); + return nullptr; + } + return code_blob; +} diff --git a/core/rhi/slang_handle.h b/core/rhi/slang_handle.h new file mode 100644 index 0000000..64fd88f --- /dev/null +++ b/core/rhi/slang_handle.h @@ -0,0 +1,25 @@ +#pragma once +#include "slang-com-ptr.h" +#include + +class slang_handle +{ +public: + Slang::ComPtr module; + Slang::ComPtr program; + Slang::ComPtr entry_point; + static constexpr int target_index = 0; // only one target + + bool init_slang_module(const std::string& module_name, const std::string& entry_name); + [[nodiscard]] Slang::ComPtr get_entry_point_code() const; + [[nodiscard]] const char* get_entry_point_name() const + { + slang::ProgramLayout* layout = program->getLayout(0); + const auto entry_reflection = layout->getEntryPointByIndex(entry_point_index_); + return entry_reflection->getName(); + } + [[nodiscard]] SlangStage get_shader_type() const { return shader_type_; } +private: + int entry_point_index_ = -1; + SlangStage shader_type_ = SLANG_STAGE_NONE; +}; diff --git a/core/rhi/windows/dx11/dx11_func.h b/core/rhi/windows/dx11/dx11_func.h new file mode 100644 index 0000000..f826660 --- /dev/null +++ b/core/rhi/windows/dx11/dx11_func.h @@ -0,0 +1,27 @@ +#pragma once +#include + +inline HMODULE get_compiler_module() +{ + static HMODULE compiler_dll = nullptr; + + if (compiler_dll == nullptr) + { + // load the system one as the last resort + compiler_dll = LoadLibrary(TEXT("d3dcompiler_47.dll")); + } + return compiler_dll; +} + +// @return pointer to the D3DCompile function +inline pD3DCompile get_d3d_compile_func() +{ + static HMODULE compiler_dll = get_compiler_module(); + + if (compiler_dll) + { + return static_cast((void*)GetProcAddress(compiler_dll, "D3DCompile")); + } + + return nullptr; +} diff --git a/core/rhi/windows/dx_format.h b/core/rhi/windows/dx11/dx_format.h similarity index 87% rename from core/rhi/windows/dx_format.h rename to core/rhi/windows/dx11/dx_format.h index 2c55e87..462c128 100644 --- a/core/rhi/windows/dx_format.h +++ b/core/rhi/windows/dx11/dx_format.h @@ -3,7 +3,7 @@ #include "rhi/rhi_defintion.h" -DXGI_FORMAT ToDXFormat(texture_format format) +inline DXGI_FORMAT to_dx_format(texture_format format) { switch (format) { diff --git a/core/rhi/windows/dx11/render_target_dx11.cpp b/core/rhi/windows/dx11/render_target_dx11.cpp index d8b6a05..e4c57c6 100644 --- a/core/rhi/windows/dx11/render_target_dx11.cpp +++ b/core/rhi/windows/dx11/render_target_dx11.cpp @@ -2,8 +2,8 @@ #include +#include "dx_format.h" #include "renderer_dx11.h" -#include "rhi/windows/dx_format.h" render_target_dx11::render_target_dx11() : lock_state_(lock_state::NONE) { @@ -22,7 +22,7 @@ void render_target_dx11::init(int width, int height, texture_format format) texture_desc.Height = height; texture_desc.MipLevels = 1; texture_desc.ArraySize = 1; - texture_desc.Format = ToDXFormat(format); + texture_desc.Format = to_dx_format(format); texture_desc.SampleDesc.Count = 1; texture_desc.SampleDesc.Quality = 0; texture_desc.Usage = D3D11_USAGE_DEFAULT; diff --git a/core/rhi/windows/dx11/renderer_dx11.cpp b/core/rhi/windows/dx11/renderer_dx11.cpp index 298aa1d..86dd808 100644 --- a/core/rhi/windows/dx11/renderer_dx11.cpp +++ b/core/rhi/windows/dx11/renderer_dx11.cpp @@ -8,14 +8,20 @@ #include "imgui_impl_sdl3.h" #include "render_target_dx11.h" #include "texture_dx11.h" -#include "CompilerHlsl/compileHlsl.hpp" -#include "ShaderWriter/VertexWriter.hpp" +#include "application/application.h" +#include "shader/shader_cs_dx11.h" +#include "shader/shader_ds_dx11.h" +#include "shader/shader_gs_dx11.h" +#include "shader/shader_hs_dx11.h" +#include "shader/shader_ps_dx11.h" +#include "shader/shader_vs_dx11.h" ref_count_ptr g_d3d11_device; ref_count_ptr g_d3d11_device_context; ref_count_ptr g_d3d11_swap_chain; ref_count_ptr g_main_render_target_view; + renderer_dx11::renderer_dx11() { @@ -50,19 +56,83 @@ void renderer_dx11::shutdown() g_d3d11_swap_chain.safe_release(); } -void renderer_dx11::new_frame() +Slang::ComPtr renderer_dx11::create_slang_session(const std::string& shader_path) +{ + slang::TargetDesc target_desc; + target_desc.format = SLANG_HLSL; + target_desc.profile = g_slang_global_session->findProfile("sm_5_1"); + + const char* search_paths[] = { shader_path.c_str() }; + slang::SessionDesc session_desc; + session_desc.searchPaths = search_paths; + session_desc.searchPathCount = 1; + session_desc.targets = &target_desc; + session_desc.targetCount = 1; + + Slang::ComPtr out; + g_slang_global_session->createSession(session_desc, out.writeRef()); + return out; +} + +std::shared_ptr renderer_dx11::load_shader(const std::string& module_name, const std::string& entry_name) +{ + auto handle = std::make_shared(); + if (!handle->init_slang_module(module_name, entry_name)) + return nullptr; + + const auto shader_type = handle->get_shader_type(); + std::shared_ptr out; + switch (shader_type) + { + case SLANG_STAGE_VERTEX: + { + out = std::make_shared(handle); + } + break; + case SLANG_STAGE_HULL: + { + out = std::make_shared(handle); + } + break; + case SLANG_STAGE_DOMAIN: + { + out = std::make_shared(handle); + } + break; + case SLANG_STAGE_GEOMETRY: + { + out = std::make_shared(handle); + } + break; + case SLANG_STAGE_PIXEL: + { + out = std::make_shared(handle); + } + break; + case SLANG_STAGE_COMPUTE: + { + out = std::make_shared(handle); + } + break; + default: + spdlog::error("slang: unsupported shader type"); + return nullptr; + } + if (!out->init()) + return nullptr; + return out; +} + +void renderer_dx11::new_frame(SDL_Window* window_handle) { - // Start the Dear ImGui frame ImGui_ImplDX11_NewFrame(); ImGui_ImplSDL3_NewFrame(); ImGui::NewFrame(); } -void renderer_dx11::end_frame() +void renderer_dx11::end_frame(SDL_Window* window_handle) { - constexpr ImVec4 clear_color = ImVec4(0.45f, 0.55f, 0.60f, 1.00f); - constexpr float clear_color_with_alpha[4] = { clear_color.x * clear_color.w, clear_color.y * clear_color.w, clear_color.z * clear_color.w, clear_color.w }; - + ImGui::Render(); ID3D11RenderTargetView* target_view = g_main_render_target_view.get_reference(); @@ -80,29 +150,23 @@ void renderer_dx11::resize(int width, int height) create_render_target(); } -texture* renderer_dx11::create_texture(const unsigned char* data, const int width, const int height) +std::shared_ptr renderer_dx11::create_texture(const unsigned char* data, const int width, const int height) { - auto out = new texture_dx11(); + auto out = std::make_shared(); if (!out->init_data(data, width, height)) { - delete out; out = nullptr; } return out; } -render_target* renderer_dx11::create_render_target(int width, int height, texture_format format) +std::shared_ptr renderer_dx11::create_render_target(int width, int height, texture_format format) { - const auto target_dx11 = new render_target_dx11(); + const auto target_dx11 = std::make_shared(); target_dx11->init(width, height, format); return target_dx11; } -bool renderer_dx11::compile_shader() -{ - return true; -} - void renderer_dx11::create_render_target() { ref_count_ptr p_back_buffer; diff --git a/core/rhi/windows/dx11/renderer_dx11.h b/core/rhi/windows/dx11/renderer_dx11.h index a6f5d1e..edfe4a6 100644 --- a/core/rhi/windows/dx11/renderer_dx11.h +++ b/core/rhi/windows/dx11/renderer_dx11.h @@ -4,6 +4,9 @@ #include "misc/ref_counting.h" #include "rhi/renderer.h" +#include "slang-com-ptr.h" +#include "slang.h" + extern ref_count_ptr g_d3d11_device; extern ref_count_ptr g_d3d11_device_context; extern ref_count_ptr g_d3d11_swap_chain; @@ -16,13 +19,15 @@ public: bool init(SDL_Window* window_handle) override; void shutdown() override; - void new_frame() override; - void end_frame() override; + Slang::ComPtr create_slang_session(const std::string& shader_path) override; + std::shared_ptr load_shader(const std::string& module_name, const std::string& entry_name) override; + + void new_frame(SDL_Window* window_handle) override; + void end_frame(SDL_Window* window_handle) override; void resize(int width, int height) override; - texture* create_texture(const unsigned char* data, int width, int height) override; - render_target* create_render_target(int width, int height, texture_format format) override; - bool compile_shader() override; + std::shared_ptr create_texture(const unsigned char* data, int width, int height) override; + std::shared_ptr create_render_target(int width, int height, texture_format format) override; protected: void create_render_target(); bool create_device(HWND in_hwnd); diff --git a/core/rhi/windows/dx11/shader/shader_cs_dx11.cpp b/core/rhi/windows/dx11/shader/shader_cs_dx11.cpp new file mode 100644 index 0000000..b15f0a6 --- /dev/null +++ b/core/rhi/windows/dx11/shader/shader_cs_dx11.cpp @@ -0,0 +1,10 @@ +#include "shader_cs_dx11.h" + +#include "rhi/slang_handle.h" +#include "rhi/windows/dx11/dx11_func.h" +#include "rhi/windows/dx11/renderer_dx11.h" + +HRESULT shader_cs_dx11::create_shader(ID3DBlob* blob, ID3D11Device* device) +{ + return device->CreateComputeShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, compute_shader_.writeRef()); +} diff --git a/core/rhi/windows/dx11/shader/shader_cs_dx11.h b/core/rhi/windows/dx11/shader/shader_cs_dx11.h new file mode 100644 index 0000000..1bf6fcf --- /dev/null +++ b/core/rhi/windows/dx11/shader/shader_cs_dx11.h @@ -0,0 +1,20 @@ +#pragma once +#include "shader_dx11.h" +#include "slang-com-ptr.h" + +class shader_cs_dx11 : public shader_dx11 +{ +public: + explicit shader_cs_dx11(const std::shared_ptr& handle) + : shader_dx11(handle) + { + } + + HRESULT create_shader(ID3DBlob* blob, ID3D11Device* device) override; + + [[nodiscard]] ID3D11DeviceChild* get_shader() override { return compute_shader_; } + [[nodiscard]] const char* get_shader_model() const override { return "cs_5_0"; } + [[nodiscard]] bool is_initialized() const override { return compute_shader_ != nullptr; } +private: + Slang::ComPtr compute_shader_; +}; diff --git a/core/rhi/windows/dx11/shader/shader_ds_dx11.cpp b/core/rhi/windows/dx11/shader/shader_ds_dx11.cpp new file mode 100644 index 0000000..31bbf52 --- /dev/null +++ b/core/rhi/windows/dx11/shader/shader_ds_dx11.cpp @@ -0,0 +1,6 @@ +#include "shader_ds_dx11.h" + +HRESULT shader_ds_dx11::create_shader(ID3DBlob* blob, ID3D11Device* device) +{ + return device->CreateDomainShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, domain_shader_.writeRef()); +} diff --git a/core/rhi/windows/dx11/shader/shader_ds_dx11.h b/core/rhi/windows/dx11/shader/shader_ds_dx11.h new file mode 100644 index 0000000..9f4c43c --- /dev/null +++ b/core/rhi/windows/dx11/shader/shader_ds_dx11.h @@ -0,0 +1,20 @@ +#pragma once +#include "shader_dx11.h" +#include "slang-com-ptr.h" + +class shader_ds_dx11 : public shader_dx11 +{ +public: + explicit shader_ds_dx11(const std::shared_ptr& handle) + : shader_dx11(handle) + { + } + + HRESULT create_shader(ID3DBlob* blob, ID3D11Device* device) override; + + [[nodiscard]] ID3D11DeviceChild* get_shader() override { return domain_shader_; } + [[nodiscard]] const char* get_shader_model() const override { return "ds_5_0"; } + [[nodiscard]] bool is_initialized() const override { return domain_shader_ != nullptr; } +private: + Slang::ComPtr domain_shader_; +}; diff --git a/core/rhi/windows/dx11/shader/shader_dx11.cpp b/core/rhi/windows/dx11/shader/shader_dx11.cpp new file mode 100644 index 0000000..2db52f4 --- /dev/null +++ b/core/rhi/windows/dx11/shader/shader_dx11.cpp @@ -0,0 +1,62 @@ +#include "shader_dx11.h" + +#include + +#include "slang-com-ptr.h" +#include "rhi/windows/dx11/dx11_func.h" +#include "rhi/windows/dx11/renderer_dx11.h" + +bool shader_dx11::init() +{ + Slang::ComPtr kernel_blob; + Slang::ComPtr error_blob; + + const auto code_blob = handle_->get_entry_point_code(); + if (!code_blob) + { + spdlog::error("slang: get entry point code failed"); + return false; + } + + const auto compile_func = get_d3d_compile_func(); + if (!compile_func) + { + spdlog::critical("slang: get D3DCompile function failed"); + return false; + } + + unsigned int shader_flags = D3DCOMPILE_ENABLE_STRICTNESS; +#if _DEBUG + shader_flags |= D3DCOMPILE_DEBUG; +#else + shader_flags |= D3DCOMPILE_OPTIMIZATION_LEVEL3; +#endif + + const auto target = "cs_5_0"; + + auto hr = compile_func( + code_blob->getBufferPointer(), + code_blob->getBufferSize(), + nullptr, + nullptr, + nullptr, + handle_->get_entry_point_name(), + target, + shader_flags, + 0, + kernel_blob.writeRef(), + error_blob.writeRef()); + if (FAILED(hr)) + { + spdlog::error("slang: compile shader failed: {}", (const char*)error_blob->GetBufferPointer()); + return false; + } + hr = create_shader(kernel_blob, g_d3d11_device); + if (FAILED(hr)) + { + spdlog::error("slang: create compute shader failed: {:x}", hr); + return false; + } + + return true; +} diff --git a/core/rhi/windows/dx11/shader/shader_dx11.h b/core/rhi/windows/dx11/shader/shader_dx11.h new file mode 100644 index 0000000..5529c76 --- /dev/null +++ b/core/rhi/windows/dx11/shader/shader_dx11.h @@ -0,0 +1,18 @@ +#pragma once +#include "rhi/shader.h" +#include + +class shader_dx11 : public shader +{ +public: + explicit shader_dx11(const std::shared_ptr& handle) + : shader(handle) + { + } + + bool init() override; + virtual HRESULT create_shader(ID3DBlob* blob, ID3D11Device* device) = 0; + + [[nodiscard]] virtual ID3D11DeviceChild* get_shader() = 0; + [[nodiscard]] virtual const char* get_shader_model() const = 0; +}; diff --git a/core/rhi/windows/dx11/shader/shader_gs_dx11.cpp b/core/rhi/windows/dx11/shader/shader_gs_dx11.cpp new file mode 100644 index 0000000..b93ca22 --- /dev/null +++ b/core/rhi/windows/dx11/shader/shader_gs_dx11.cpp @@ -0,0 +1,6 @@ +#include "shader_gs_dx11.h" + +HRESULT shader_gs_dx11::create_shader(ID3DBlob* blob, ID3D11Device* device) +{ + return device->CreateGeometryShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, geometry_shader_.writeRef()); +} diff --git a/core/rhi/windows/dx11/shader/shader_gs_dx11.h b/core/rhi/windows/dx11/shader/shader_gs_dx11.h new file mode 100644 index 0000000..ec8f781 --- /dev/null +++ b/core/rhi/windows/dx11/shader/shader_gs_dx11.h @@ -0,0 +1,20 @@ +#pragma once +#include "shader_dx11.h" +#include "slang-com-ptr.h" + +class shader_gs_dx11 : public shader_dx11 +{ +public: + explicit shader_gs_dx11(const std::shared_ptr& handle) + : shader_dx11(handle) + { + } + + HRESULT create_shader(ID3DBlob* blob, ID3D11Device* device) override; + + [[nodiscard]] ID3D11DeviceChild* get_shader() override { return geometry_shader_; } + [[nodiscard]] const char* get_shader_model() const override { return "gs_5_0"; } + [[nodiscard]] bool is_initialized() const override { return geometry_shader_ != nullptr; } +private: + Slang::ComPtr geometry_shader_; +}; diff --git a/core/rhi/windows/dx11/shader/shader_hs_dx11.cpp b/core/rhi/windows/dx11/shader/shader_hs_dx11.cpp new file mode 100644 index 0000000..88ce3c5 --- /dev/null +++ b/core/rhi/windows/dx11/shader/shader_hs_dx11.cpp @@ -0,0 +1,6 @@ +#include "shader_hs_dx11.h" + +HRESULT shader_hs_dx11::create_shader(ID3DBlob* blob, ID3D11Device* device) +{ + return device->CreateHullShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, hull_shader_.writeRef()); +} diff --git a/core/rhi/windows/dx11/shader/shader_hs_dx11.h b/core/rhi/windows/dx11/shader/shader_hs_dx11.h new file mode 100644 index 0000000..0d89bd8 --- /dev/null +++ b/core/rhi/windows/dx11/shader/shader_hs_dx11.h @@ -0,0 +1,20 @@ +#pragma once +#include "shader_dx11.h" +#include "slang-com-ptr.h" + +class shader_hs_dx11 : public shader_dx11 +{ +public: + explicit shader_hs_dx11(const std::shared_ptr& handle) + : shader_dx11(handle) + { + } + + HRESULT create_shader(ID3DBlob* blob, ID3D11Device* device) override; + + [[nodiscard]] ID3D11DeviceChild* get_shader() override { return hull_shader_; } + [[nodiscard]] const char* get_shader_model() const override { return "hs_5_0"; } + [[nodiscard]] bool is_initialized() const override { return hull_shader_ != nullptr; } +private: + Slang::ComPtr hull_shader_; +}; diff --git a/core/rhi/windows/dx11/shader/shader_ps_dx11.cpp b/core/rhi/windows/dx11/shader/shader_ps_dx11.cpp new file mode 100644 index 0000000..ce0c13d --- /dev/null +++ b/core/rhi/windows/dx11/shader/shader_ps_dx11.cpp @@ -0,0 +1,6 @@ +#include "shader_ps_dx11.h" + +HRESULT shader_ps_dx11::create_shader(ID3DBlob* blob, ID3D11Device* device) +{ + return device->CreatePixelShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, pixel_shader_.writeRef()); +} diff --git a/core/rhi/windows/dx11/shader/shader_ps_dx11.h b/core/rhi/windows/dx11/shader/shader_ps_dx11.h new file mode 100644 index 0000000..bb5365d --- /dev/null +++ b/core/rhi/windows/dx11/shader/shader_ps_dx11.h @@ -0,0 +1,20 @@ +#pragma once +#include "shader_dx11.h" +#include "slang-com-ptr.h" + +class shader_ps_dx11 : public shader_dx11 +{ +public: + explicit shader_ps_dx11(const std::shared_ptr& handle) + : shader_dx11(handle) + { + } + + HRESULT create_shader(ID3DBlob* blob, ID3D11Device* device) override; + + [[nodiscard]] ID3D11DeviceChild* get_shader() override { return pixel_shader_; } + [[nodiscard]] const char* get_shader_model() const override { return "ps_5_0"; } + [[nodiscard]] bool is_initialized() const override { return pixel_shader_ != nullptr; } +private: + Slang::ComPtr pixel_shader_; +}; diff --git a/core/rhi/windows/dx11/shader/shader_vs_dx11.cpp b/core/rhi/windows/dx11/shader/shader_vs_dx11.cpp new file mode 100644 index 0000000..7e148ef --- /dev/null +++ b/core/rhi/windows/dx11/shader/shader_vs_dx11.cpp @@ -0,0 +1,6 @@ +#include "shader_vs_dx11.h" + +HRESULT shader_vs_dx11::create_shader(ID3DBlob* blob, ID3D11Device* device) +{ + return device->CreateVertexShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, vertex_shader_.writeRef()); +} diff --git a/core/rhi/windows/dx11/shader/shader_vs_dx11.h b/core/rhi/windows/dx11/shader/shader_vs_dx11.h new file mode 100644 index 0000000..cb53a21 --- /dev/null +++ b/core/rhi/windows/dx11/shader/shader_vs_dx11.h @@ -0,0 +1,20 @@ +#pragma once +#include "shader_dx11.h" +#include "slang-com-ptr.h" + +class shader_vs_dx11 : public shader_dx11 +{ +public: + explicit shader_vs_dx11(const std::shared_ptr& handle) + : shader_dx11(handle) + { + } + + HRESULT create_shader(ID3DBlob* blob, ID3D11Device* device) override; + + [[nodiscard]] ID3D11DeviceChild* get_shader() override { return vertex_shader_; } + [[nodiscard]] const char* get_shader_model() const override { return "vs_5_0"; } + [[nodiscard]] bool is_initialized() const override { return vertex_shader_ != nullptr; } +private: + Slang::ComPtr vertex_shader_; +}; diff --git a/third_party/HLSLcc/.editorconfig b/third_party/HLSLcc/.editorconfig deleted file mode 100644 index 51442de..0000000 --- a/third_party/HLSLcc/.editorconfig +++ /dev/null @@ -1,73 +0,0 @@ -# see http://editorconfig.org/ for docs on this file - -root = true - -[*] -# help with sharing files across os's (i.e. network share or through local vm) -end_of_line = lf -#charset temporarily disabled due to bug in VS2017 changing to UTF-8 with BOM (https://favro.com/card/c564ede4ed3337f7b17986b6/Uni-17877) -#charset = utf-8 -trim_trailing_whitespace = true -insert_final_newline = true - -# formattable file extensions (keep in sync with format.ini from unity-meta repo) -# -# Note: We need to split the formattable files configs into shorter duplicate entries (logically grouped) -# due to known issue in VS editorconfig extension where there is a limit of 51 characters (empirically determined). -# see: https://github.com/editorconfig/editorconfig-visualstudio/issues/21 -# -## uncrustify -[*.{c,h,cpp,hpp,m,mm,cc,cs}] -indent_style = space -indent_size = 4 - -## generic formatter (shaders) -[*.{cg,cginc,glslinc,hlsl,shader,y,ypp,yy}] -indent_style = space -indent_size = 4 - -## generic formatter (misc) -[*.{asm,s,S,pch,pchmm,java,sh,uss}] -indent_style = space -indent_size = 4 - -## perltidy -[*.{pl,pm,t,it}] -indent_style = space -indent_size = 4 - -## unity special -[*.{bindings,mem.xml}] -indent_style = space -indent_size = 4 - -# other filetypes we want to overwrite default configuration to preserve the standard -[{Makefile,makefile}] -# TAB characters are part of the Makefile format -indent_style = tab - -[*.{md,markdown}] -# trailing whitespace is significant in markdown (bad choice, bad!) -trim_trailing_whitespace = false - -# keep these and the VS stuff below in sync with .hgeol's CRLF extensions -[*.{vcproj,bat,cmd,xaml,tt,t4,ttinclude}] -end_of_line = crlf - -# this VS-specific stuff is based on experiments to see how VS will modify a file after it has been manually edited. -# the settings are meant to closely match what VS does to minimize unnecessary diffs. this duplicates some settings in * -# but let's be explicit here to be safe (in case someone wants to copy-paste this out to another .editorconfig). -[*.{vcxproj,vcxproj.filters,csproj,props,targets}] -indent_style = space -indent_size = 2 -end_of_line = crlf -charset = utf-8-bom -trim_trailing_whitespace = true -insert_final_newline = false -[*.{sln,sln.template}] -indent_style = tab -indent_size = 4 -end_of_line = crlf -charset = utf-8 -trim_trailing_whitespace = true -insert_final_newline = false diff --git a/third_party/HLSLcc/.gitignore b/third_party/HLSLcc/.gitignore deleted file mode 100644 index 4581ef2..0000000 --- a/third_party/HLSLcc/.gitignore +++ /dev/null @@ -1,29 +0,0 @@ -# Compiled Object files -*.slo -*.lo -*.o -*.obj - -# Precompiled Headers -*.gch -*.pch - -# Compiled Dynamic libraries -*.so -*.dylib -*.dll - -# Fortran module files -*.mod -*.smod - -# Compiled Static libraries -*.lai -*.la -*.a -*.lib - -# Executables -*.exe -*.out -*.app diff --git a/third_party/HLSLcc/CMakeLists.txt b/third_party/HLSLcc/CMakeLists.txt deleted file mode 100644 index 176dd6e..0000000 --- a/third_party/HLSLcc/CMakeLists.txt +++ /dev/null @@ -1,51 +0,0 @@ - -cmake_minimum_required(VERSION 3.15) - -project(HLSLcc) -set(CMAKE_CXX_STANDARD 11) - -option(HLSLCC_LIBRARY_SHARED "Build shared library instead of static." ON) - -file(GLOB HLSLCC_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/*") -set(HLSLCC_SRC - src/ControlFlowGraph.cpp - src/ControlFlowGraphUtils.cpp - src/DataTypeAnalysis.cpp - src/Declaration.cpp - src/decode.cpp - src/HLSLcc.cpp - src/HLSLccToolkit.cpp - src/HLSLCrossCompilerContext.cpp - src/Instruction.cpp - src/LoopTransform.cpp - src/Operand.cpp - src/reflect.cpp - src/Shader.cpp - src/ShaderInfo.cpp - src/toGLSL.cpp - src/toGLSLDeclaration.cpp - src/toGLSLInstruction.cpp - src/toGLSLOperand.cpp - src/toMetal.cpp - src/toMetalDeclaration.cpp - src/toMetalInstruction.cpp - src/toMetalOperand.cpp - src/UseDefineChains.cpp - src/cbstring/bsafe.c - src/cbstring/bstraux.c - src/cbstring/bstrlib.c) - -if(HLSLCC_LIBRARY_SHARED) - add_library(${PROJECT_NAME} SHARED ${HLSLCC_SRC}) -else() - add_library(${PROJECT_NAME} STATIC ${HLSLCC_SRC}) -endif() - -target_include_directories(${PROJECT_NAME} - PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR} - ${CMAKE_CURRENT_SOURCE_DIR}/include - PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/src - ${CMAKE_CURRENT_SOURCE_DIR}/src/cbstring - ${CMAKE_CURRENT_SOURCE_DIR}/src/internal_includes) diff --git a/third_party/HLSLcc/README.md b/third_party/HLSLcc/README.md deleted file mode 100644 index 9fe4a5b..0000000 --- a/third_party/HLSLcc/README.md +++ /dev/null @@ -1,54 +0,0 @@ -# HLSLcc -DirectX shader bytecode cross compiler. - -Originally based on https://github.com/James-Jones/HLSLCrossCompiler. - -This library takes DirectX bytecode as input, and translates it into the following languages: -- GLSL (OpenGL 3.2 and later) -- GLSL ES (OpenGL ES 2.0 and later) -- GLSL for Vulkan consumption (as input for Glslang to generate SPIR-V) -- Metal Shading Language - -This library is used to generate all shaders in Unity for OpenGL, OpenGL ES 3.0+, Metal and Vulkan. - -Changes from original HLSLCrossCompiler: -- Codebase changed to C++11, with major code reorganizations. -- Support for multiple language output backends (currently ToGLSL and ToMetal) -- Metal language output support -- Temp register type analysis: In DX bytecode the registers are typeless 32-bit 4-vectors. We do code analysis to infer the actual data types (to prevent the need for tons of bitcasts). -- Loop transformation: Detect constructs that look like for-loops and transform them back to their original form -- Support for partial precision variables in HLSL (min16float etc). Do extra analysis pass to infer the intended precision of samplers. -- Reflection interface to retrieve the shader inputs and their types. -- Lots of workarounds for various driver/shader compiler bugs. -- Lots of minor fixes and improvements for correctness -- Lots of Unity-specific tweaks to allow extending HLSL without having to change the D3D compiler itself. - -## Note - -This project is originally integrated into the Unity build systems. However, building this library should be fairly straightforward: just compile `src/*.cpp` (in C++11 mode!) and `src/cbstring/*.c` with the following include paths: - -- include -- src/internal_includes -- src/cbstrinc -- src - -Alternatively, a CMakeLists.txt is provided to build the project using cmake. - -The main entry point is TranslateHLSLFromMem() function in HLSLcc.cpp (taking DX bytecode as input). - - -## Contributors -- Mikko Strandborg -- Juho Oravainen -- David Rogers -- Marton Ekler -- Antti Tapaninen -- Florian Penzkofer -- Alexey Orlov -- Povilas Kanapickas -- Aleksandr Kirillov -- Kay Chang - -## License - -MIT license for HLSLcc itself, BSD license for the bstring library. See license.txt. diff --git a/third_party/HLSLcc/include/ShaderInfo.h b/third_party/HLSLcc/include/ShaderInfo.h deleted file mode 100644 index dbaf258..0000000 --- a/third_party/HLSLcc/include/ShaderInfo.h +++ /dev/null @@ -1,510 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include "growing_array.h" -#include - -//Reflection -#define MAX_RESOURCE_BINDINGS 256 - -typedef enum _SHADER_VARIABLE_TYPE -{ - SVT_VOID = 0, - SVT_BOOL = 1, - SVT_INT = 2, - SVT_FLOAT = 3, - SVT_STRING = 4, - SVT_TEXTURE = 5, - SVT_TEXTURE1D = 6, - SVT_TEXTURE2D = 7, - SVT_TEXTURE3D = 8, - SVT_TEXTURECUBE = 9, - SVT_SAMPLER = 10, - SVT_PIXELSHADER = 15, - SVT_VERTEXSHADER = 16, - SVT_UINT = 19, - SVT_UINT8 = 20, - SVT_GEOMETRYSHADER = 21, - SVT_RASTERIZER = 22, - SVT_DEPTHSTENCIL = 23, - SVT_BLEND = 24, - SVT_BUFFER = 25, - SVT_CBUFFER = 26, - SVT_TBUFFER = 27, - SVT_TEXTURE1DARRAY = 28, - SVT_TEXTURE2DARRAY = 29, - SVT_RENDERTARGETVIEW = 30, - SVT_DEPTHSTENCILVIEW = 31, - SVT_TEXTURE2DMS = 32, - SVT_TEXTURE2DMSARRAY = 33, - SVT_TEXTURECUBEARRAY = 34, - SVT_HULLSHADER = 35, - SVT_DOMAINSHADER = 36, - SVT_INTERFACE_POINTER = 37, - SVT_COMPUTESHADER = 38, - SVT_DOUBLE = 39, - SVT_RWTEXTURE1D = 40, - SVT_RWTEXTURE1DARRAY = 41, - SVT_RWTEXTURE2D = 42, - SVT_RWTEXTURE2DARRAY = 43, - SVT_RWTEXTURE3D = 44, - SVT_RWBUFFER = 45, - SVT_BYTEADDRESS_BUFFER = 46, - SVT_RWBYTEADDRESS_BUFFER = 47, - SVT_STRUCTURED_BUFFER = 48, - SVT_RWSTRUCTURED_BUFFER = 49, - SVT_APPEND_STRUCTURED_BUFFER = 50, - SVT_CONSUME_STRUCTURED_BUFFER = 51, - - - // Only used as a marker when analyzing register types - SVT_FORCED_INT = 152, - // Integer that can be either signed or unsigned. Only used as an intermediate step when doing data type analysis - SVT_INT_AMBIGUOUS = 153, - - // Partial precision types. Used when doing type analysis - SVT_FLOAT10 = 53, // Seems to be used in constant buffers - SVT_FLOAT16 = 54, - SVT_INT16 = 156, - SVT_INT12 = 157, - SVT_UINT16 = 158, - - SVT_FORCE_DWORD = 0x7fffffff -} SHADER_VARIABLE_TYPE; - -typedef enum _SHADER_VARIABLE_CLASS -{ - SVC_SCALAR = 0, - SVC_VECTOR = (SVC_SCALAR + 1), - SVC_MATRIX_ROWS = (SVC_VECTOR + 1), - SVC_MATRIX_COLUMNS = (SVC_MATRIX_ROWS + 1), - SVC_OBJECT = (SVC_MATRIX_COLUMNS + 1), - SVC_STRUCT = (SVC_OBJECT + 1), - SVC_INTERFACE_CLASS = (SVC_STRUCT + 1), - SVC_INTERFACE_POINTER = (SVC_INTERFACE_CLASS + 1), - SVC_FORCE_DWORD = 0x7fffffff -} SHADER_VARIABLE_CLASS; - - -/////////////////////////////////////// -// Types - -enum TESSELLATOR_PARTITIONING -{ - TESSELLATOR_PARTITIONING_UNDEFINED = 0, - TESSELLATOR_PARTITIONING_INTEGER = 1, - TESSELLATOR_PARTITIONING_POW2 = 2, - TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3, - TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4 -}; - -enum TESSELLATOR_OUTPUT_PRIMITIVE -{ - TESSELLATOR_OUTPUT_UNDEFINED = 0, - TESSELLATOR_OUTPUT_POINT = 1, - TESSELLATOR_OUTPUT_LINE = 2, - TESSELLATOR_OUTPUT_TRIANGLE_CW = 3, - TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4 -}; - -typedef enum TESSELLATOR_DOMAIN -{ - TESSELLATOR_DOMAIN_UNDEFINED = 0, - TESSELLATOR_DOMAIN_ISOLINE = 1, - TESSELLATOR_DOMAIN_TRI = 2, - TESSELLATOR_DOMAIN_QUAD = 3 -} TESSELLATOR_DOMAIN; - -enum SPECIAL_NAME -{ - NAME_UNDEFINED = 0, - NAME_POSITION = 1, - NAME_CLIP_DISTANCE = 2, - NAME_CULL_DISTANCE = 3, - NAME_RENDER_TARGET_ARRAY_INDEX = 4, - NAME_VIEWPORT_ARRAY_INDEX = 5, - NAME_VERTEX_ID = 6, - NAME_PRIMITIVE_ID = 7, - NAME_INSTANCE_ID = 8, - NAME_IS_FRONT_FACE = 9, - NAME_SAMPLE_INDEX = 10, - // The following are added for D3D11 - NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR = 11, - NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR = 12, - NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR = 13, - NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR = 14, - NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR = 15, - NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR = 16, - NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR = 17, - NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR = 18, - NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR = 19, - NAME_FINAL_TRI_INSIDE_TESSFACTOR = 20, - NAME_FINAL_LINE_DETAIL_TESSFACTOR = 21, - NAME_FINAL_LINE_DENSITY_TESSFACTOR = 22, -}; - - -enum INOUT_COMPONENT_TYPE -{ - INOUT_COMPONENT_UNKNOWN = 0, - INOUT_COMPONENT_UINT32 = 1, - INOUT_COMPONENT_SINT32 = 2, - INOUT_COMPONENT_FLOAT32 = 3 -}; - -enum MIN_PRECISION -{ - MIN_PRECISION_DEFAULT = 0, - MIN_PRECISION_FLOAT_16 = 1, - MIN_PRECISION_FLOAT_2_8 = 2, - MIN_PRECISION_RESERVED = 3, - MIN_PRECISION_SINT_16 = 4, - MIN_PRECISION_UINT_16 = 5, - MIN_PRECISION_ANY_16 = 0xf0, - MIN_PRECISION_ANY_10 = 0xf1 -}; - -enum ResourceType -{ - RTYPE_CBUFFER,//0 - RTYPE_TBUFFER,//1 - RTYPE_TEXTURE,//2 - RTYPE_SAMPLER,//3 - RTYPE_UAV_RWTYPED,//4 - RTYPE_STRUCTURED,//5 - RTYPE_UAV_RWSTRUCTURED,//6 - RTYPE_BYTEADDRESS,//7 - RTYPE_UAV_RWBYTEADDRESS,//8 - RTYPE_UAV_APPEND_STRUCTURED,//9 - RTYPE_UAV_CONSUME_STRUCTURED,//10 - RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER,//11 - RTYPE_COUNT, -}; - -enum ResourceGroup -{ - RGROUP_CBUFFER, - RGROUP_TEXTURE, - RGROUP_SAMPLER, - RGROUP_UAV, - RGROUP_COUNT, -}; - -enum REFLECT_RESOURCE_DIMENSION -{ - REFLECT_RESOURCE_DIMENSION_UNKNOWN = 0, - REFLECT_RESOURCE_DIMENSION_BUFFER = 1, - REFLECT_RESOURCE_DIMENSION_TEXTURE1D = 2, - REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY = 3, - REFLECT_RESOURCE_DIMENSION_TEXTURE2D = 4, - REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY = 5, - REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS = 6, - REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 7, - REFLECT_RESOURCE_DIMENSION_TEXTURE3D = 8, - REFLECT_RESOURCE_DIMENSION_TEXTURECUBE = 9, - REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10, - REFLECT_RESOURCE_DIMENSION_BUFFEREX = 11, -}; - -enum REFLECT_RESOURCE_PRECISION -{ - REFLECT_RESOURCE_PRECISION_UNKNOWN = 0, - REFLECT_RESOURCE_PRECISION_LOWP = 1, - REFLECT_RESOURCE_PRECISION_MEDIUMP = 2, - REFLECT_RESOURCE_PRECISION_HIGHP = 3, -}; - -enum RESOURCE_RETURN_TYPE -{ - RETURN_TYPE_UNORM = 1, - RETURN_TYPE_SNORM = 2, - RETURN_TYPE_SINT = 3, - RETURN_TYPE_UINT = 4, - RETURN_TYPE_FLOAT = 5, - RETURN_TYPE_MIXED = 6, - RETURN_TYPE_DOUBLE = 7, - RETURN_TYPE_CONTINUED = 8, - RETURN_TYPE_UNUSED = 9, -}; - -typedef std::map HLSLccSamplerPrecisionInfo; - -struct ResourceBinding -{ - std::string name; - ResourceType eType; - uint32_t ui32BindPoint; - uint32_t ui32BindCount; - uint32_t ui32Flags; - uint32_t ui32Space; - uint32_t ui32RangeID; - REFLECT_RESOURCE_DIMENSION eDimension; - RESOURCE_RETURN_TYPE ui32ReturnType; - uint32_t ui32NumSamples; - REFLECT_RESOURCE_PRECISION ePrecision; - int m_SamplerMode; // (SB_SAMPLER_MODE) For samplers, this is the sampler mode this sampler is declared with - - SHADER_VARIABLE_TYPE GetDataType() const - { - switch (ePrecision) - { - case REFLECT_RESOURCE_PRECISION_LOWP: - switch (ui32ReturnType) - { - case RETURN_TYPE_UNORM: - case RETURN_TYPE_SNORM: - case RETURN_TYPE_FLOAT: - return SVT_FLOAT10; - case RETURN_TYPE_SINT: - return SVT_INT16; - case RETURN_TYPE_UINT: - return SVT_UINT16; - default: -// ASSERT(0); - return SVT_FLOAT10; - } - - case REFLECT_RESOURCE_PRECISION_MEDIUMP: - switch (ui32ReturnType) - { - case RETURN_TYPE_UNORM: - case RETURN_TYPE_SNORM: - case RETURN_TYPE_FLOAT: - return SVT_FLOAT16; - case RETURN_TYPE_SINT: - return SVT_INT16; - case RETURN_TYPE_UINT: - return SVT_UINT16; - default: -// ASSERT(0); - return SVT_FLOAT16; - } - - default: - switch (ui32ReturnType) - { - case RETURN_TYPE_UNORM: - case RETURN_TYPE_SNORM: - case RETURN_TYPE_FLOAT: - return SVT_FLOAT; - case RETURN_TYPE_SINT: - return SVT_INT; - case RETURN_TYPE_UINT: - return SVT_UINT; - case RETURN_TYPE_DOUBLE: - return SVT_DOUBLE; - default: -// ASSERT(0); - return SVT_FLOAT; - } - } - } -}; - -struct ShaderVarType -{ - ShaderVarType() : - Class(), - Type(), - Rows(), - Columns(), - Elements(), - MemberCount(), - Offset(), - ParentCount(), - Parent(), - m_IsUsed(false) - {} - - SHADER_VARIABLE_CLASS Class; - SHADER_VARIABLE_TYPE Type; - uint32_t Rows; - uint32_t Columns; - uint32_t Elements; - uint32_t MemberCount; - uint32_t Offset; - std::string name; - - uint32_t ParentCount; - struct ShaderVarType * Parent; - //Includes all parent names. - std::string fullName; - - std::vector Members; - - bool m_IsUsed; // If not set, is not used in the shader code - - uint32_t GetMemberCount() const - { - if (Class == SVC_STRUCT) - { - uint32_t res = 0; - std::vector::const_iterator itr; - for (itr = Members.begin(); itr != Members.end(); itr++) - { - res += itr->GetMemberCount(); - } - return res; - } - else - return 1; - } -}; - -struct ShaderVar -{ - std::string name; - int haveDefaultValue; - std::vector pui32DefaultValues; - //Offset/Size in bytes. - uint32_t ui32StartOffset; - uint32_t ui32Size; - - ShaderVarType sType; -}; - -struct ConstantBuffer -{ - std::string name; - - std::vector asVars; - - uint32_t ui32TotalSizeInBytes; - - uint32_t GetMemberCount(bool stripUnused) const - { - uint32_t res = 0; - std::vector::const_iterator itr; - for (itr = asVars.begin(); itr != asVars.end(); itr++) - { - if (stripUnused && !itr->sType.m_IsUsed) - continue; - res += itr->sType.GetMemberCount(); - } - return res; - } -}; - -struct ClassType -{ - std::string name; - uint16_t ui16ID; - uint16_t ui16ConstBufStride; - uint16_t ui16Texture; - uint16_t ui16Sampler; -}; - -struct ClassInstance -{ - std::string name; - uint16_t ui16ID; - uint16_t ui16ConstBuf; - uint16_t ui16ConstBufOffset; - uint16_t ui16Texture; - uint16_t ui16Sampler; -}; - -class Operand; - -class ShaderInfo -{ -public: - - struct InOutSignature - { - std::string semanticName; - uint32_t ui32SemanticIndex; - SPECIAL_NAME eSystemValueType; - INOUT_COMPONENT_TYPE eComponentType; - uint32_t ui32Register; - uint32_t ui32Mask; - uint32_t ui32ReadWriteMask; - - int iRebase; // If mask does not start from zero, this indicates the offset that needs to be subtracted from each swizzle - - uint32_t ui32Stream; - MIN_PRECISION eMinPrec; - - std::set isIndexed; // Set of phases where this input/output is part of a index range. - std::map indexStart; // If indexed, contains the start index for the range - std::map index; // If indexed, contains the current index relative to the index start. - }; - - ShaderInfo() : - ui32MajorVersion(), - ui32MinorVersion(), - psResourceBindings(), - psConstantBuffers(), - psThisPointerConstBuffer(), - psClassTypes(), - psClassInstances() - {} - - SHADER_VARIABLE_TYPE GetTextureDataType(uint32_t regNo); - - int GetResourceFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ResourceBinding** ppsOutBinding) const; - - void GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const; - - int GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const; - - int GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const; - int GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const; - int GetOutputSignatureFromRegister(const uint32_t ui32Register, - const uint32_t ui32CompMask, - const uint32_t ui32Stream, - const InOutSignature** ppsOut, - bool allowNull = false) const; - - int GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const; - - static ResourceGroup ResourceTypeToResourceGroup(ResourceType); - - static uint32_t GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize = false); - - static int GetShaderVarFromOffset(const uint32_t ui32Vec4Offset, - const uint32_t(&pui32Swizzle)[4], - const ConstantBuffer* psCBuf, - const ShaderVarType** ppsShaderVar, - bool* isArray, - std::vector* arrayIndices, - int32_t* pi32Rebase, - uint32_t flags); - - static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, const std::vector& indices, const std::string& dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors); - - // Apply shader precision information to resource bindings - void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info); - - uint32_t ui32MajorVersion; - uint32_t ui32MinorVersion; - - std::vector psInputSignatures; - std::vector psOutputSignatures; - std::vector psPatchConstantSignatures; - - std::vector psResourceBindings; - - std::vector psConstantBuffers; - ConstantBuffer* psThisPointerConstBuffer; - - std::vector psClassTypes; - std::vector psClassInstances; - - //Func table ID to class name ID. - HLSLcc::growing_vector aui32TableIDToTypeID; - - HLSLcc::growing_vector aui32ResourceMap[RGROUP_COUNT]; - - HLSLcc::growing_vector sGroupSharedVarType; - - TESSELLATOR_PARTITIONING eTessPartitioning; - TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim; - uint32_t ui32TessInputControlPointCount; - uint32_t ui32TessOutputControlPointCount; - TESSELLATOR_DOMAIN eTessDomain; - bool bEarlyFragmentTests; -}; diff --git a/third_party/HLSLcc/include/UnityInstancingFlexibleArraySize.h b/third_party/HLSLcc/include/UnityInstancingFlexibleArraySize.h deleted file mode 100644 index 70fb308..0000000 --- a/third_party/HLSLcc/include/UnityInstancingFlexibleArraySize.h +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -// In Unity, instancing array sizes should be able to be dynamically patched at runtime by defining the macro. - -#include -#define UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "UNITY_RUNTIME_INSTANCING_ARRAY_SIZE" -#define UNITY_PRETRANSFORM_CONSTANT_NAME "UnityDisplayOrientationPreTransform" - -const unsigned int kArraySizeConstantID = 0; -const unsigned int kPreTransformConstantID = 1; - -// TODO: share with Runtime/GfxDevice/InstancingUtilities.h -inline bool IsUnityInstancingConstantBufferName(const char* cbName) -{ - static const char kInstancedCbNamePrefix[] = "UnityInstancing"; - return strncmp(cbName, kInstancedCbNamePrefix, sizeof(kInstancedCbNamePrefix) - 1) == 0; -} - -inline bool IsPreTransformConstantBufferName(const char* cbName) -{ - static const char kPreTransformCbNamePrefix[] = "UnityDisplayOrientationPreTransformData"; - return strncmp(cbName, kPreTransformCbNamePrefix, sizeof(kPreTransformCbNamePrefix) - 1) == 0; -} diff --git a/third_party/HLSLcc/include/growing_array.h b/third_party/HLSLcc/include/growing_array.h deleted file mode 100644 index d558665..0000000 --- a/third_party/HLSLcc/include/growing_array.h +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once - -namespace HLSLcc -{ - // A vector that automatically grows when written to, fills the intermediate ones with default value. - // Reading from an index returns the default value if attempting to access out of bounds. - template class growing_vector - { - public: - growing_vector() : data() {} - - std::vector data; - - T & operator[](std::size_t idx) - { - if (idx >= data.size()) - data.resize((idx + 1) * 2); - return data[idx]; - } - - const T & operator[](std::size_t idx) const - { - static T defaultValue = T(); - if (idx >= data.size()) - return defaultValue; - return data[idx]; - } - }; - - // Same but with bool specialization - template<> class growing_vector - { - public: - growing_vector() : data() {} - - std::vector data; - - std::vector::reference operator[](std::size_t idx) - { - if (idx >= data.size()) - data.resize((idx + 1) * 2, false); - return data[idx]; - } - }; -} diff --git a/third_party/HLSLcc/include/hlslcc.h b/third_party/HLSLcc/include/hlslcc.h deleted file mode 100644 index dc7853a..0000000 --- a/third_party/HLSLcc/include/hlslcc.h +++ /dev/null @@ -1,816 +0,0 @@ -#ifndef HLSLCC_H_ -#define HLSLCC_H_ - -#include -#include -#include -#include - -#if defined(_WIN32) && defined(HLSLCC_DYNLIB) - #define HLSLCC_APIENTRY __stdcall - #if defined(libHLSLcc_EXPORTS) - #define HLSLCC_API __declspec(dllexport) - #else - #define HLSLCC_API __declspec(dllimport) - #endif -#else - #define HLSLCC_APIENTRY - #define HLSLCC_API -#endif - -#include -#include - -typedef enum -{ - LANG_DEFAULT,// Depends on the HLSL shader model. - LANG_ES_100, LANG_ES_FIRST = LANG_ES_100, - LANG_ES_300, - LANG_ES_310, LANG_ES_LAST = LANG_ES_310, - LANG_120, LANG_GL_FIRST = LANG_120, - LANG_130, - LANG_140, - LANG_150, - LANG_330, - LANG_400, - LANG_410, - LANG_420, - LANG_430, - LANG_440, LANG_GL_LAST = LANG_440, - LANG_METAL, -} GLLang; - -typedef struct GlExtensions -{ - uint32_t ARB_explicit_attrib_location : 1; - uint32_t ARB_explicit_uniform_location : 1; - uint32_t ARB_shading_language_420pack : 1; - uint32_t OVR_multiview : 1; - uint32_t EXT_shader_framebuffer_fetch : 1; -} GlExtensions; - -#include "ShaderInfo.h" -#include "UnityInstancingFlexibleArraySize.h" - -typedef std::vector TextureSamplerPairs; - -typedef enum INTERPOLATION_MODE -{ - INTERPOLATION_UNDEFINED = 0, - INTERPOLATION_CONSTANT = 1, - INTERPOLATION_LINEAR = 2, - INTERPOLATION_LINEAR_CENTROID = 3, - INTERPOLATION_LINEAR_NOPERSPECTIVE = 4, - INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5, - INTERPOLATION_LINEAR_SAMPLE = 6, - INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7, -} INTERPOLATION_MODE; - -#define PS_FLAG_VERTEX_SHADER 0x1 -#define PS_FLAG_HULL_SHADER 0x2 -#define PS_FLAG_DOMAIN_SHADER 0x4 -#define PS_FLAG_GEOMETRY_SHADER 0x8 -#define PS_FLAG_PIXEL_SHADER 0x10 - -#define TO_FLAG_NONE 0x0 -#define TO_FLAG_INTEGER 0x1 -#define TO_FLAG_NAME_ONLY 0x2 -#define TO_FLAG_DECLARATION_NAME 0x4 -#define TO_FLAG_DESTINATION 0x8 //Operand is being written to by assignment. -#define TO_FLAG_UNSIGNED_INTEGER 0x10 -#define TO_FLAG_DOUBLE 0x20 -// --- TO_AUTO_BITCAST_TO_FLOAT --- -//If the operand is an integer temp variable then this flag -//indicates that the temp has a valid floating point encoding -//and that the current expression expects the operand to be floating point -//and therefore intBitsToFloat must be applied to that variable. -#define TO_AUTO_BITCAST_TO_FLOAT 0x40 -#define TO_AUTO_BITCAST_TO_INT 0x80 -#define TO_AUTO_BITCAST_TO_UINT 0x100 -// AUTO_EXPAND flags automatically expand the operand to at least (i/u)vecX -// to match HLSL functionality. -#define TO_AUTO_EXPAND_TO_VEC2 0x200 -#define TO_AUTO_EXPAND_TO_VEC3 0x400 -#define TO_AUTO_EXPAND_TO_VEC4 0x800 -#define TO_FLAG_BOOL 0x1000 -// These flags are only used for Metal: -// Force downscaling of the operand to match -// the other operand (Metal doesn't like mixing halfs with floats) -#define TO_FLAG_FORCE_HALF 0x2000 - -typedef enum -{ - INVALID_SHADER = -1, - PIXEL_SHADER, - VERTEX_SHADER, - GEOMETRY_SHADER, - HULL_SHADER, - DOMAIN_SHADER, - COMPUTE_SHADER, -} SHADER_TYPE; - -// Enum for texture dimension reflection data -typedef enum -{ - TD_FLOAT = 0, - TD_INT, - TD_2D, - TD_3D, - TD_CUBE, - TD_2DSHADOW, - TD_2DARRAY, - TD_CUBEARRAY -} HLSLCC_TEX_DIMENSION; - -// The prefix for all temporary variables used by the generated code. -// Using a texture or uniform name like this will cause conflicts -#define HLSLCC_TEMP_PREFIX "u_xlat" - -typedef std::vector > MemberDefinitions; - -// We store struct definition contents inside a vector of strings -struct StructDefinition -{ - StructDefinition() : m_Members(), m_Dependencies(), m_IsPrinted(false) {} - - MemberDefinitions m_Members; // A vector of strings with the struct members - std::vector m_Dependencies; // A vector of struct names this struct depends on. - bool m_IsPrinted; // Has this struct been printed out yet? -}; - -typedef std::map StructDefinitions; - -// Map of extra function definitions we need to add before the shader body but after the declarations. -typedef std::map FunctionDefinitions; - -// A helper class for allocating binding slots -// (because both UAVs and textures use the same slots in Metal, also constant buffers and other buffers etc) -class BindingSlotAllocator -{ - typedef std::map SlotMap; - SlotMap m_Allocations; - uint32_t m_ShaderStageAllocations; -public: - BindingSlotAllocator() : m_Allocations(), m_ShaderStageAllocations(0) - { - for (int i = MAX_RESOURCE_BINDINGS - 1; i >= 0; i--) - m_FreeSlots.push_back(i); - } - - enum BindType - { - ConstantBuffer = 0, - RWBuffer, - Texture, - UAV - }; - - uint32_t GetBindingSlot(uint32_t regNo, BindType type) - { - // The key is regNumber with the bindtype stored to highest 16 bits - uint32_t key = (m_ShaderStageAllocations + regNo) | (uint32_t(type) << 16); - SlotMap::iterator itr = m_Allocations.find(key); - if (itr == m_Allocations.end()) - { - uint32_t slot = m_FreeSlots.back(); - m_FreeSlots.pop_back(); - m_Allocations.insert(std::make_pair(key, slot)); - return slot; - } - return itr->second; - } - - // Func for reserving binding slots with the original reg number. - // Used for fragment shader UAVs (SetRandomWriteTarget etc). - void ReserveBindingSlot(uint32_t regNo, BindType type) - { - uint32_t key = regNo | (uint32_t(type) << 16); - m_Allocations.insert(std::make_pair(key, regNo)); - - // Remove regNo from free slots - for (int i = m_FreeSlots.size() - 1; i >= 0; i--) - { - if (m_FreeSlots[i] == regNo) - { - m_FreeSlots.erase(m_FreeSlots.begin() + i); - return; - } - } - } - - uint32_t PeekFirstFreeSlot() const - { - return m_FreeSlots.back(); - } - - uint32_t SaveTotalShaderStageAllocationsCount() - { - m_ShaderStageAllocations = m_Allocations.size(); - return m_ShaderStageAllocations; - } - -private: - std::vector m_FreeSlots; -}; - -//The shader stages (Vertex, Pixel et al) do not depend on each other -//in HLSL. GLSL is a different story. HLSLCrossCompiler requires -//that hull shaders must be compiled before domain shaders, and -//the pixel shader must be compiled before all of the others. -//During compilation the GLSLCrossDependencyData struct will -//carry over any information needed about a different shader stage -//in order to construct valid GLSL shader combinations. - - -//Using GLSLCrossDependencyData is optional. However some shader -//combinations may show link failures, or runtime errors. -class GLSLCrossDependencyData -{ -public: - - struct GLSLBufferBindPointInfo - { - uint32_t slot; - bool known; - }; - - // A container for a single Vulkan resource binding ( pair) - struct VulkanResourceBinding - { - uint32_t set; - uint32_t binding; - }; - - enum GLSLBufferType - { - BufferType_ReadWrite, - BufferType_Constant, - BufferType_SSBO, - BufferType_Texture, - BufferType_UBO, - - BufferType_Count, - BufferType_Generic = BufferType_ReadWrite - }; - -private: - //Required if PixelInterpDependency is true - std::vector pixelInterpolation; - - // Map of varying locations, indexed by varying names. - typedef std::map VaryingLocations; - - static const int MAX_NAMESPACES = 6; // Max namespaces: vert input, hull input, domain input, geom input, ps input, (ps output) - - VaryingLocations varyingLocationsMap[MAX_NAMESPACES]; - uint32_t nextAvailableVaryingLocation[MAX_NAMESPACES]; - - typedef std::map VulkanResourceBindings; - VulkanResourceBindings m_VulkanResourceBindings; - uint32_t m_NextAvailableVulkanResourceBinding[8]; // one per set. - - typedef std::map GLSLResouceBindings; - -public: - GLSLResouceBindings m_GLSLResourceBindings; - uint32_t m_NextAvailableGLSLResourceBinding[BufferType_Count]; // UAV, Constant and Buffers have seperate binding ranges - uint32_t m_StructuredBufferBindPoints[MAX_RESOURCE_BINDINGS]; // for the old style bindings - - inline int GetVaryingNamespace(SHADER_TYPE eShaderType, bool isInput) - { - switch (eShaderType) - { - case VERTEX_SHADER: - return isInput ? 0 : 1; - - case HULL_SHADER: - return isInput ? 1 : 2; - - case DOMAIN_SHADER: - return isInput ? 2 : 3; - - case GEOMETRY_SHADER: - // The input depends on whether there's a tessellation shader before us - if (isInput) - { - return ui32ProgramStages & PS_FLAG_DOMAIN_SHADER ? 3 : 1; - } - return 4; - - case PIXEL_SHADER: - // The inputs can come from geom shader, domain shader or directly from vertex shader - if (isInput) - { - if (ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER) - { - return 4; - } - else if (ui32ProgramStages & PS_FLAG_DOMAIN_SHADER) - { - return 3; - } - else - { - return 1; - } - } - return 5; // This value never really used - default: - return 0; - } - } - -public: - GLSLCrossDependencyData() - : eTessPartitioning(), - eTessOutPrim(), - fMaxTessFactor(64.0), - numPatchesInThreadGroup(0), - hasControlPoint(false), - hasPatchConstant(false), - ui32ProgramStages(0), - m_ExtBlendModes() - { - memset(nextAvailableVaryingLocation, 0, sizeof(nextAvailableVaryingLocation)); - memset(m_NextAvailableVulkanResourceBinding, 0, sizeof(m_NextAvailableVulkanResourceBinding)); - memset(m_NextAvailableGLSLResourceBinding, 0, sizeof(m_NextAvailableGLSLResourceBinding)); - } - - // Retrieve the location for a varying with a given name. - // If the name doesn't already have an allocated location, allocate one - // and store it into the map. - inline uint32_t GetVaryingLocation(const std::string &name, SHADER_TYPE eShaderType, bool isInput, bool keepLocation, uint32_t maxSemanticIndex) - { - int nspace = GetVaryingNamespace(eShaderType, isInput); - VaryingLocations::iterator itr = varyingLocationsMap[nspace].find(name); - if (itr != varyingLocationsMap[nspace].end()) - return itr->second; - - if (keepLocation) - { - // Try to generate consistent varying locations based on the semantic indices in the hlsl source, i.e "TEXCOORD11" gets assigned to layout(location = 11) - - // Inspect last 2 characters in name - size_t len = name.length(); - - if (len > 1) - { - if (isdigit(name[len - 1])) - { - uint32_t index = 0; - if (isdigit(name[len - 2])) - index = atoi(&name[len - 2]); // 2-digits index - else - index = atoi(&name[len - 1]); // 1-digit index - - if (index < 32) // Some platforms only allow 32 varying locations - { - // Check that index is not already used - bool canUseIndex = true; - for (VaryingLocations::iterator it = varyingLocationsMap[nspace].begin(); it != varyingLocationsMap[nspace].end(); ++it) - { - if (it->second == index) - { - canUseIndex = false; - break; - } - } - - if (canUseIndex) - { - varyingLocationsMap[nspace].insert(std::make_pair(name, index)); - return index; - } - } - } - } - - // fallback: pick an unused index (max of already allocated AND of semanticIndices found by SignatureAnalysis - uint32_t maxIndexAlreadyAssigned = 0; - for (VaryingLocations::iterator it = varyingLocationsMap[nspace].begin(); it != varyingLocationsMap[nspace].end(); ++it) - maxIndexAlreadyAssigned = std::max(maxIndexAlreadyAssigned, it->second); - - uint32_t fallbackIndex = std::max(maxIndexAlreadyAssigned + 1, maxSemanticIndex + 1); - varyingLocationsMap[nspace].insert(std::make_pair(name, fallbackIndex)); - return fallbackIndex; - } - else - { - uint32_t newKey = nextAvailableVaryingLocation[nspace]; - nextAvailableVaryingLocation[nspace]++; - varyingLocationsMap[nspace].insert(std::make_pair(name, newKey)); - return newKey; - } - } - - // Retrieve the binding for a resource (texture, constant buffer, image) with a given name - // If not found, allocate a new one (in set 0) and return that - // The returned value is a pair of - // If the name contains "hlslcc_set_X_bind_Y", those values (from the first found occurence in the name) - // will be used instead, and all occurences of that string will be removed from name, so name parameter can be modified - // if allocRoomForCounter is true, the following binding number in the same set will be allocated with name + '_counter' - inline VulkanResourceBinding GetVulkanResourceBinding(std::string &name, bool allocRoomForCounter = false, uint32_t preferredSet = 0) - { - // scan for the special marker - const char *marker = "Xhlslcc_set_%d_bind_%dX"; - uint32_t Set = 0, Binding = 0; - size_t startLoc = name.find("Xhlslcc"); - if ((startLoc != std::string::npos) && (sscanf(name.c_str() + startLoc, marker, &Set, &Binding) == 2)) - { - // Get rid of all markers - while ((startLoc = name.find("Xhlslcc")) != std::string::npos) - { - size_t endLoc = name.find('X', startLoc + 1); - if (endLoc == std::string::npos) - break; - name.erase(startLoc, endLoc - startLoc + 1); - } - // Add to map - VulkanResourceBinding newBind = { Set, Binding }; - m_VulkanResourceBindings.insert(std::make_pair(name, newBind)); - if (allocRoomForCounter) - { - VulkanResourceBinding counterBind = { Set, Binding + 1 }; - m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind)); - } - - return newBind; - } - - VulkanResourceBindings::iterator itr = m_VulkanResourceBindings.find(name); - if (itr != m_VulkanResourceBindings.end()) - return itr->second; - - // Allocate a new one - VulkanResourceBinding newBind = { preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet] }; - m_NextAvailableVulkanResourceBinding[preferredSet]++; - m_VulkanResourceBindings.insert(std::make_pair(name, newBind)); - if (allocRoomForCounter) - { - VulkanResourceBinding counterBind = { preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet] }; - m_NextAvailableVulkanResourceBinding[preferredSet]++; - m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind)); - } - return newBind; - } - - // GLSL Bind point handling logic - // Handles both 'old style' fill around fixed UAV and new style partitioned offsets with fixed UAV locations - - // HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers. - // The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers. - // In this step make m_structuredBufferBindPoints contain increasingly ordered uints starting from zero. - // This is only used when we are doing old style binding setup - void SetupGLSLResourceBindingSlotsIndices() - { - for (uint32_t i = 0; i < MAX_RESOURCE_BINDINGS; i++) - { - m_StructuredBufferBindPoints[i] = i; - } - } - - void RemoveBindPointFromAvailableList(uint32_t bindPoint) - { - for (uint32_t i = 0; i < MAX_RESOURCE_BINDINGS - 1 && m_StructuredBufferBindPoints[i] <= bindPoint; i++) - { - if (m_StructuredBufferBindPoints[i] == bindPoint) // Remove uav binding point from the list by copying array remainder here - { - memcpy(&m_StructuredBufferBindPoints[i], &m_StructuredBufferBindPoints[i + 1], (MAX_RESOURCE_BINDINGS - 1 - i) * sizeof(uint32_t)); - break; - } - } - } - - void ReserveNamedBindPoint(const std::string &name, uint32_t bindPoint, GLSLBufferType type) - { - m_GLSLResourceBindings.insert(std::make_pair(name, bindPoint)); - RemoveBindPointFromAvailableList(bindPoint); - } - - bool ShouldUseBufferSpecificBinding(GLSLBufferType bufferType) - { - return bufferType == BufferType_Constant || bufferType == BufferType_Texture || bufferType == BufferType_UBO; - } - - uint32_t GetGLSLBufferBindPointIndex(GLSLBufferType bufferType) - { - uint32_t binding = -1; - - if (ShouldUseBufferSpecificBinding(bufferType)) - { - binding = m_NextAvailableGLSLResourceBinding[bufferType]; - } - else - { - binding = m_StructuredBufferBindPoints[m_NextAvailableGLSLResourceBinding[BufferType_Generic]]; - } - - return binding; - } - - void UpdateResourceBindingIndex(GLSLBufferType bufferType) - { - if (ShouldUseBufferSpecificBinding(bufferType)) - { - m_NextAvailableGLSLResourceBinding[bufferType]++; - } - else - { - m_NextAvailableGLSLResourceBinding[BufferType_Generic]++; - } - } - - inline GLSLBufferBindPointInfo GetGLSLResourceBinding(const std::string &name, GLSLBufferType bufferType) - { - GLSLResouceBindings::iterator itr = m_GLSLResourceBindings.find(name); - if (itr != m_GLSLResourceBindings.end()) - { - return GLSLBufferBindPointInfo{ itr->second, true }; - } - - uint32_t binding = GetGLSLBufferBindPointIndex(bufferType); - UpdateResourceBindingIndex(bufferType); - - m_GLSLResourceBindings.insert(std::make_pair(name, binding)); - - return GLSLBufferBindPointInfo{ binding, false }; - } - - //dcl_tessellator_partitioning and dcl_tessellator_output_primitive appear in hull shader for D3D, - //but they appear on inputs inside domain shaders for GL. - //Hull shader must be compiled before domain so the - //ensure correct partitioning and primitive type information - //can be saved when compiling hull and passed to domain compilation. - TESSELLATOR_PARTITIONING eTessPartitioning; - TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim; - float fMaxTessFactor; - int numPatchesInThreadGroup; - bool hasControlPoint; - bool hasPatchConstant; - - // Bitfield for the shader stages this program is going to include (see PS_FLAG_*). - // Needed so we can construct proper shader input and output names - uint32_t ui32ProgramStages; - - std::vector m_ExtBlendModes; // The blend modes (from KHR_blend_equation_advanced) requested for this shader. See ext spec for list. - - inline INTERPOLATION_MODE GetInterpolationMode(uint32_t regNo) - { - if (regNo >= pixelInterpolation.size()) - return INTERPOLATION_UNDEFINED; - else - return pixelInterpolation[regNo]; - } - - inline void SetInterpolationMode(uint32_t regNo, INTERPOLATION_MODE mode) - { - if (regNo >= pixelInterpolation.size()) - pixelInterpolation.resize((regNo + 1) * 2, INTERPOLATION_UNDEFINED); - - pixelInterpolation[regNo] = mode; - } - - struct CompareFirst - { - CompareFirst(std::string val) : m_Val(val) {} - bool operator()(const std::pair& elem) const - { - return m_Val == elem.first; - } - - private: - std::string m_Val; - }; - - inline bool IsMemberDeclared(const std::string &name) - { - if (std::find_if(m_SharedFunctionMembers.begin(), m_SharedFunctionMembers.end(), CompareFirst(name)) != m_SharedFunctionMembers.end()) - return true; - return false; - } - - MemberDefinitions m_SharedFunctionMembers; - std::vector m_SharedDependencies; - BindingSlotAllocator m_SharedTextureSlots, m_SharedSamplerSlots; - BindingSlotAllocator m_SharedBufferSlots; - - inline void ClearCrossDependencyData() - { - pixelInterpolation.clear(); - for (int i = 0; i < MAX_NAMESPACES; i++) - { - varyingLocationsMap[i].clear(); - nextAvailableVaryingLocation[i] = 0; - } - m_SharedFunctionMembers.clear(); - m_SharedDependencies.clear(); - } - - bool IsHullShaderInputAlreadyDeclared(const std::string& name) - { - bool isKnown = false; - - for (size_t idx = 0, end = m_hullShaderInputs.size(); idx < end; ++idx) - { - if (m_hullShaderInputs[idx] == name) - { - isKnown = true; - break; - } - } - - return isKnown; - } - - void RecordHullShaderInput(const std::string& name) - { - m_hullShaderInputs.push_back(name); - } - - std::vector m_hullShaderInputs; -}; - -struct GLSLShader -{ - int shaderType; //One of the GL enums. - std::string sourceCode; - ShaderInfo reflection; - GLLang GLSLLanguage; - TextureSamplerPairs textureSamplers; // HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS fills this out -}; - -// Interface for retrieving reflection and diagnostics data -class HLSLccReflection -{ -public: - HLSLccReflection() {} - virtual ~HLSLccReflection() {} - - // Called on errors or diagnostic messages - virtual void OnDiagnostics(const std::string &error, int line, bool isError) {} - - virtual void OnInputBinding(const std::string &name, int bindIndex) {} - - // Returns false if this constant buffer is not needed for this shader. This info can be used for pruning unused - // constant buffers and vars from compute shaders where we need broader context than a single kernel to know - // if something can be dropped, as the constant buffers are shared between all kernels in a .compute file. - virtual bool OnConstantBuffer(const std::string &name, size_t bufferSize, size_t memberCount) { return true; } - - // Returns false if this constant var is not needed for this shader. See above. - virtual bool OnConstant(const std::string &name, int bindIndex, SHADER_VARIABLE_TYPE cType, int rows, int cols, bool isMatrix, int arraySize, bool isUsed) { return true; } - - virtual void OnConstantBufferBinding(const std::string &name, int bindIndex) {} - virtual void OnTextureBinding(const std::string &name, int bindIndex, int samplerIndex, bool multisampled, HLSLCC_TEX_DIMENSION dim, bool isUAV) {} - virtual void OnBufferBinding(const std::string &name, int bindIndex, bool isUAV) {} - virtual void OnThreadGroupSize(unsigned int xSize, unsigned int ySize, unsigned int zSize) {} - virtual void OnTessellationInfo(uint32_t tessPartitionMode, uint32_t tessOutputWindingOrder, uint32_t tessMaxFactor, uint32_t tessNumPatchesInThreadGroup) {} - virtual void OnTessellationKernelInfo(uint32_t patchKernelBufferCount) {} - - // these are for now metal only (but can be trivially added for other backends if needed) - // they are useful mostly for diagnostics as interim values are actually hidden from user - virtual void OnVertexProgramOutput(const std::string& name, const std::string& semantic, int semanticIndex) {} - virtual void OnBuiltinOutput(SPECIAL_NAME name) {} - virtual void OnFragmentOutputDeclaration(int numComponents, int outputIndex) {} - - - enum AccessType - { - ReadAccess = 1 << 0, - WriteAccess = 1 << 1 - }; - - virtual void OnStorageImage(int bindIndex, unsigned int access) {} -}; - - -/*HLSL constant buffers are treated as default-block unform arrays by default. This is done - to support versions of GLSL which lack ARB_uniform_buffer_object functionality. - Setting this flag causes each one to have its own uniform block. - Note: Currently the nth const buffer will be named UnformBufferN. This is likey to change to the original HLSL name in the future.*/ -static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT = 0x1; - -static const unsigned int HLSLCC_FLAG_ORIGIN_UPPER_LEFT = 0x2; - -static const unsigned int HLSLCC_FLAG_PIXEL_CENTER_INTEGER = 0x4; - -static const unsigned int HLSLCC_FLAG_GLOBAL_CONSTS_NEVER_IN_UBO = 0x8; - -//GS enabled? -//Affects vertex shader (i.e. need to compile vertex shader again to use with/without GS). -//This flag is needed in order for the interfaces between stages to match when GS is in use. -//PS inputs VtxGeoOutput -//GS outputs VtxGeoOutput -//Vs outputs VtxOutput if GS enabled. VtxGeoOutput otherwise. -static const unsigned int HLSLCC_FLAG_GS_ENABLED = 0x10; - -static const unsigned int HLSLCC_FLAG_TESS_ENABLED = 0x20; - -//Either use this flag or glBindFragDataLocationIndexed. -//When set the first pixel shader output is the first input to blend -//equation, the others go to the second input. -static const unsigned int HLSLCC_FLAG_DUAL_SOURCE_BLENDING = 0x40; - -//If set, shader inputs and outputs are declared with their semantic name. -static const unsigned int HLSLCC_FLAG_INOUT_SEMANTIC_NAMES = 0x80; -//If set, shader inputs and outputs are declared with their semantic name appended. -static const unsigned int HLSLCC_FLAG_INOUT_APPEND_SEMANTIC_NAMES = 0x100; - -//If set, combines texture/sampler pairs used together into samplers named "texturename_X_samplername". -static const unsigned int HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS = 0x200; - -//If set, attribute and uniform explicit location qualifiers are disabled (even if the language version supports that) -static const unsigned int HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS = 0x400; - -//If set, global uniforms are not stored in a struct. -static const unsigned int HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT = 0x800; - -//If set, image declarations will always have binding and format qualifiers. -static const unsigned int HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS = 0x1000; - -// If set, treats sampler names ending with _highp, _mediump, and _lowp as sampler precision qualifiers -// Also removes that prefix from generated output -static const unsigned int HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME = 0x2000; - -// If set, adds location qualifiers to intra-shader varyings. -static const unsigned int HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS = 0x4000; // NOTE: obsolete flag (behavior enabled by this flag began default in 83a16a1829cf) - -// If set, wraps all uniform buffer declarations in a preprocessor macro #ifdef HLSLCC_ENABLE_UNIFORM_BUFFERS -// so that if that macro is undefined, all UBO declarations will become normal uniforms -static const unsigned int HLSLCC_FLAG_WRAP_UBO = 0x8000; - -// If set, skips all members of the $Globals constant buffer struct that are not referenced in the shader code -static const unsigned int HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS = 0x10000; - -#define HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "hlslcc_mtx%dx%d" - -// If set, translates all matrix declarations into vec4 arrays (as the DX bytecode treats them), and prefixes the name with 'hlslcc_mtxx' -static const unsigned int HLSLCC_FLAG_TRANSLATE_MATRICES = 0x20000; - -// If set, emits Vulkan-style (set, binding) bindings, also captures that info from any declaration named "_hlslcc_set_X_bind_Y" -// Unless bindings are given explicitly, they are allocated into set 0 (map stored in GLSLCrossDependencyData) -static const unsigned int HLSLCC_FLAG_VULKAN_BINDINGS = 0x40000; - -// If set, metal output will use linear sampler for shadow compares, otherwise point sampler. -static const unsigned int HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR = 0x80000; - -// If set, avoid emit atomic counter (ARB_shader_atomic_counters) and use atomic functions provided by ARB_shader_storage_buffer_object instead. -static const unsigned int HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS = 0x100000; - -// Unused 0x200000; - -// If set, this shader uses the GLSL extension EXT_shader_framebuffer_fetch -static const unsigned int HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH = 0x400000; - -// Build for Switch. -static const unsigned int HLSLCC_FLAG_NVN_TARGET = 0x800000; - -// If set, generate an instance name for constant buffers. GLSL specs 4.5 disallows uniform variables from different constant buffers sharing the same name -// as long as they are part of the same final linked program. Uniform buffer instance names solve this cross-shader symbol conflict issue. -static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME = 0x1000000; - -// Massage shader steps into Metal compute kernel from vertex/hull shaders + post-tessellation vertex shader from domain shader -static const unsigned int HLSLCC_FLAG_METAL_TESSELLATION = 0x2000000; - -// Disable fastmath -static const unsigned int HLSLCC_FLAG_DISABLE_FASTMATH = 0x4000000; - -//If set, uniform explicit location qualifiers are enabled (even if the language version doesn't support that) -static const unsigned int HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS = 0x8000000; - -// If set, each line of the generated source will be preceded by a comment specifying which DirectX bytecode instruction it maps to -static const unsigned int HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS = 0x10000000; - -// If set, try to generate consistent varying locations based on the semantic indices in the hlsl source, i.e "TEXCOORD11" gets assigned to layout(location = 11) -static const unsigned int HLSLCC_FLAG_KEEP_VARYING_LOCATIONS = 0x20000000; - -// Code generation might vary for mobile targets, or using lower sampler precision than full by default -static const unsigned int HLSLCC_FLAG_MOBILE_TARGET = 0x40000000; - -#ifdef __cplusplus -extern "C" { -#endif - -HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename, - unsigned int flags, - GLLang language, - const GlExtensions *extensions, - GLSLCrossDependencyData* dependencies, - HLSLccSamplerPrecisionInfo& samplerPrecisions, - HLSLccReflection& reflectionCallbacks, - GLSLShader* result -); - -HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, - unsigned int flags, - GLLang language, - const GlExtensions *extensions, - GLSLCrossDependencyData* dependencies, - HLSLccSamplerPrecisionInfo& samplerPrecisions, - HLSLccReflection& reflectionCallbacks, - GLSLShader* result); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/third_party/HLSLcc/include/hlslcc.hpp b/third_party/HLSLcc/include/hlslcc.hpp deleted file mode 100644 index 67a792a..0000000 --- a/third_party/HLSLcc/include/hlslcc.hpp +++ /dev/null @@ -1,3 +0,0 @@ -extern "C" { -#include "hlslcc.h" -} diff --git a/third_party/HLSLcc/include/pstdint.h b/third_party/HLSLcc/include/pstdint.h deleted file mode 100644 index 5a53278..0000000 --- a/third_party/HLSLcc/include/pstdint.h +++ /dev/null @@ -1,799 +0,0 @@ -/* A portable stdint.h - **************************************************************************** - * BSD License: - **************************************************************************** - * - * Copyright (c) 2005-2011 Paul Hsieh - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - **************************************************************************** - * - * Version 0.1.12 - * - * The ANSI C standard committee, for the C99 standard, specified the - * inclusion of a new standard include file called stdint.h. This is - * a very useful and long desired include file which contains several - * very precise definitions for integer scalar types that is - * critically important for making portable several classes of - * applications including cryptography, hashing, variable length - * integer libraries and so on. But for most developers its likely - * useful just for programming sanity. - * - * The problem is that most compiler vendors have decided not to - * implement the C99 standard, and the next C++ language standard - * (which has a lot more mindshare these days) will be a long time in - * coming and its unknown whether or not it will include stdint.h or - * how much adoption it will have. Either way, it will be a long time - * before all compilers come with a stdint.h and it also does nothing - * for the extremely large number of compilers available today which - * do not include this file, or anything comparable to it. - * - * So that's what this file is all about. Its an attempt to build a - * single universal include file that works on as many platforms as - * possible to deliver what stdint.h is supposed to. A few things - * that should be noted about this file: - * - * 1) It is not guaranteed to be portable and/or present an identical - * interface on all platforms. The extreme variability of the - * ANSI C standard makes this an impossibility right from the - * very get go. Its really only meant to be useful for the vast - * majority of platforms that possess the capability of - * implementing usefully and precisely defined, standard sized - * integer scalars. Systems which are not intrinsically 2s - * complement may produce invalid constants. - * - * 2) There is an unavoidable use of non-reserved symbols. - * - * 3) Other standard include files are invoked. - * - * 4) This file may come in conflict with future platforms that do - * include stdint.h. The hope is that one or the other can be - * used with no real difference. - * - * 5) In the current verison, if your platform can't represent - * int32_t, int16_t and int8_t, it just dumps out with a compiler - * error. - * - * 6) 64 bit integers may or may not be defined. Test for their - * presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX. - * Note that this is different from the C99 specification which - * requires the existence of 64 bit support in the compiler. If - * this is not defined for your platform, yet it is capable of - * dealing with 64 bits then it is because this file has not yet - * been extended to cover all of your system's capabilities. - * - * 7) (u)intptr_t may or may not be defined. Test for its presence - * with the test: #ifdef PTRDIFF_MAX. If this is not defined - * for your platform, then it is because this file has not yet - * been extended to cover all of your system's capabilities, not - * because its optional. - * - * 8) The following might not been defined even if your platform is - * capable of defining it: - * - * WCHAR_MIN - * WCHAR_MAX - * (u)int64_t - * PTRDIFF_MIN - * PTRDIFF_MAX - * (u)intptr_t - * - * 9) The following have not been defined: - * - * WINT_MIN - * WINT_MAX - * - * 10) The criteria for defining (u)int_least(*)_t isn't clear, - * except for systems which don't have a type that precisely - * defined 8, 16, or 32 bit types (which this include file does - * not support anyways). Default definitions have been given. - * - * 11) The criteria for defining (u)int_fast(*)_t isn't something I - * would trust to any particular compiler vendor or the ANSI C - * committee. It is well known that "compatible systems" are - * commonly created that have very different performance - * characteristics from the systems they are compatible with, - * especially those whose vendors make both the compiler and the - * system. Default definitions have been given, but its strongly - * recommended that users never use these definitions for any - * reason (they do *NOT* deliver any serious guarantee of - * improved performance -- not in this file, nor any vendor's - * stdint.h). - * - * 12) The following macros: - * - * PRINTF_INTMAX_MODIFIER - * PRINTF_INT64_MODIFIER - * PRINTF_INT32_MODIFIER - * PRINTF_INT16_MODIFIER - * PRINTF_LEAST64_MODIFIER - * PRINTF_LEAST32_MODIFIER - * PRINTF_LEAST16_MODIFIER - * PRINTF_INTPTR_MODIFIER - * - * are strings which have been defined as the modifiers required - * for the "d", "u" and "x" printf formats to correctly output - * (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t, - * (u)least32_t, (u)least16_t and (u)intptr_t types respectively. - * PRINTF_INTPTR_MODIFIER is not defined for some systems which - * provide their own stdint.h. PRINTF_INT64_MODIFIER is not - * defined if INT64_MAX is not defined. These are an extension - * beyond what C99 specifies must be in stdint.h. - * - * In addition, the following macros are defined: - * - * PRINTF_INTMAX_HEX_WIDTH - * PRINTF_INT64_HEX_WIDTH - * PRINTF_INT32_HEX_WIDTH - * PRINTF_INT16_HEX_WIDTH - * PRINTF_INT8_HEX_WIDTH - * PRINTF_INTMAX_DEC_WIDTH - * PRINTF_INT64_DEC_WIDTH - * PRINTF_INT32_DEC_WIDTH - * PRINTF_INT16_DEC_WIDTH - * PRINTF_INT8_DEC_WIDTH - * - * Which specifies the maximum number of characters required to - * print the number of that type in either hexadecimal or decimal. - * These are an extension beyond what C99 specifies must be in - * stdint.h. - * - * Compilers tested (all with 0 warnings at their highest respective - * settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32 - * bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio - * .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3 - * - * This file should be considered a work in progress. Suggestions for - * improvements, especially those which increase coverage are strongly - * encouraged. - * - * Acknowledgements - * - * The following people have made significant contributions to the - * development and testing of this file: - * - * Chris Howie - * John Steele Scott - * Dave Thorup - * John Dill - * - */ - -#include -#include -#include - -/* - * For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and - * do nothing else. On the Mac OS X version of gcc this is _STDINT_H_. - */ - -#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__WATCOMC__) && (defined(_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined(__UINT_FAST64_TYPE__)))) && !defined(_PSTDINT_H_INCLUDED) -#include -#define _PSTDINT_H_INCLUDED -# ifndef PRINTF_INT64_MODIFIER -# define PRINTF_INT64_MODIFIER "ll" -# endif -# ifndef PRINTF_INT32_MODIFIER -# define PRINTF_INT32_MODIFIER "l" -# endif -# ifndef PRINTF_INT16_MODIFIER -# define PRINTF_INT16_MODIFIER "h" -# endif -# ifndef PRINTF_INTMAX_MODIFIER -# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER -# endif -# ifndef PRINTF_INT64_HEX_WIDTH -# define PRINTF_INT64_HEX_WIDTH "16" -# endif -# ifndef PRINTF_INT32_HEX_WIDTH -# define PRINTF_INT32_HEX_WIDTH "8" -# endif -# ifndef PRINTF_INT16_HEX_WIDTH -# define PRINTF_INT16_HEX_WIDTH "4" -# endif -# ifndef PRINTF_INT8_HEX_WIDTH -# define PRINTF_INT8_HEX_WIDTH "2" -# endif -# ifndef PRINTF_INT64_DEC_WIDTH -# define PRINTF_INT64_DEC_WIDTH "20" -# endif -# ifndef PRINTF_INT32_DEC_WIDTH -# define PRINTF_INT32_DEC_WIDTH "10" -# endif -# ifndef PRINTF_INT16_DEC_WIDTH -# define PRINTF_INT16_DEC_WIDTH "5" -# endif -# ifndef PRINTF_INT8_DEC_WIDTH -# define PRINTF_INT8_DEC_WIDTH "3" -# endif -# ifndef PRINTF_INTMAX_HEX_WIDTH -# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH -# endif -# ifndef PRINTF_INTMAX_DEC_WIDTH -# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH -# endif - -/* - * Something really weird is going on with Open Watcom. Just pull some of - * these duplicated definitions from Open Watcom's stdint.h file for now. - */ - -# if defined(__WATCOMC__) && __WATCOMC__ >= 1250 -# if !defined(INT64_C) -# define INT64_C(x) (x + (INT64_MAX - INT64_MAX)) -# endif -# if !defined(UINT64_C) -# define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX)) -# endif -# if !defined(INT32_C) -# define INT32_C(x) (x + (INT32_MAX - INT32_MAX)) -# endif -# if !defined(UINT32_C) -# define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX)) -# endif -# if !defined(INT16_C) -# define INT16_C(x) (x) -# endif -# if !defined(UINT16_C) -# define UINT16_C(x) (x) -# endif -# if !defined(INT8_C) -# define INT8_C(x) (x) -# endif -# if !defined(UINT8_C) -# define UINT8_C(x) (x) -# endif -# if !defined(UINT64_MAX) -# define UINT64_MAX 18446744073709551615ULL -# endif -# if !defined(INT64_MAX) -# define INT64_MAX 9223372036854775807LL -# endif -# if !defined(UINT32_MAX) -# define UINT32_MAX 4294967295UL -# endif -# if !defined(INT32_MAX) -# define INT32_MAX 2147483647L -# endif -# if !defined(INTMAX_MAX) -# define INTMAX_MAX INT64_MAX -# endif -# if !defined(INTMAX_MIN) -# define INTMAX_MIN INT64_MIN -# endif -# endif -#endif - -#ifndef _PSTDINT_H_INCLUDED -#define _PSTDINT_H_INCLUDED - -#ifndef SIZE_MAX -# define SIZE_MAX (~(size_t)0) -#endif - -/* - * Deduce the type assignments from limits.h under the assumption that - * integer sizes in bits are powers of 2, and follow the ANSI - * definitions. - */ - -#ifndef UINT8_MAX -# define UINT8_MAX 0xff -#endif -#ifndef uint8_t -# if (UCHAR_MAX == UINT8_MAX) || defined(S_SPLINT_S) -typedef unsigned char uint8_t; -# define UINT8_C(v) ((uint8_t) v) -# else -# error "Platform not supported" -# endif -#endif - -#ifndef INT8_MAX -# define INT8_MAX 0x7f -#endif -#ifndef INT8_MIN -# define INT8_MIN INT8_C(0x80) -#endif -#ifndef int8_t -# if (SCHAR_MAX == INT8_MAX) || defined(S_SPLINT_S) -typedef signed char int8_t; -# define INT8_C(v) ((int8_t) v) -# else -# error "Platform not supported" -# endif -#endif - -#ifndef UINT16_MAX -# define UINT16_MAX 0xffff -#endif -#ifndef uint16_t -#if (UINT_MAX == UINT16_MAX) || defined(S_SPLINT_S) -typedef unsigned int uint16_t; -# ifndef PRINTF_INT16_MODIFIER -# define PRINTF_INT16_MODIFIER "" -# endif -# define UINT16_C(v) ((uint16_t) (v)) -#elif (USHRT_MAX == UINT16_MAX) -typedef unsigned short uint16_t; -# define UINT16_C(v) ((uint16_t) (v)) -# ifndef PRINTF_INT16_MODIFIER -# define PRINTF_INT16_MODIFIER "h" -# endif -#else -#error "Platform not supported" -#endif -#endif - -#ifndef INT16_MAX -# define INT16_MAX 0x7fff -#endif -#ifndef INT16_MIN -# define INT16_MIN INT16_C(0x8000) -#endif -#ifndef int16_t -#if (INT_MAX == INT16_MAX) || defined(S_SPLINT_S) -typedef signed int int16_t; -# define INT16_C(v) ((int16_t) (v)) -# ifndef PRINTF_INT16_MODIFIER -# define PRINTF_INT16_MODIFIER "" -# endif -#elif (SHRT_MAX == INT16_MAX) -typedef signed short int16_t; -# define INT16_C(v) ((int16_t) (v)) -# ifndef PRINTF_INT16_MODIFIER -# define PRINTF_INT16_MODIFIER "h" -# endif -#else -#error "Platform not supported" -#endif -#endif - -#ifndef UINT32_MAX -# define UINT32_MAX (0xffffffffUL) -#endif -#ifndef uint32_t -#if (ULONG_MAX == UINT32_MAX) || defined(S_SPLINT_S) -typedef unsigned long uint32_t; -# define UINT32_C(v) v ## UL -# ifndef PRINTF_INT32_MODIFIER -# define PRINTF_INT32_MODIFIER "l" -# endif -#elif (UINT_MAX == UINT32_MAX) -typedef unsigned int uint32_t; -# ifndef PRINTF_INT32_MODIFIER -# define PRINTF_INT32_MODIFIER "" -# endif -# define UINT32_C(v) v ## U -#elif (USHRT_MAX == UINT32_MAX) -typedef unsigned short uint32_t; -# define UINT32_C(v) ((unsigned short) (v)) -# ifndef PRINTF_INT32_MODIFIER -# define PRINTF_INT32_MODIFIER "" -# endif -#else -#error "Platform not supported" -#endif -#endif - -#ifndef INT32_MAX -# define INT32_MAX (0x7fffffffL) -#endif -#ifndef INT32_MIN -# define INT32_MIN INT32_C(0x80000000) -#endif -#ifndef int32_t -#if (LONG_MAX == INT32_MAX) || defined(S_SPLINT_S) -typedef signed long int32_t; -# define INT32_C(v) v ## L -# ifndef PRINTF_INT32_MODIFIER -# define PRINTF_INT32_MODIFIER "l" -# endif -#elif (INT_MAX == INT32_MAX) -typedef signed int int32_t; -# define INT32_C(v) v -# ifndef PRINTF_INT32_MODIFIER -# define PRINTF_INT32_MODIFIER "" -# endif -#elif (SHRT_MAX == INT32_MAX) -typedef signed short int32_t; -# define INT32_C(v) ((short) (v)) -# ifndef PRINTF_INT32_MODIFIER -# define PRINTF_INT32_MODIFIER "" -# endif -#else -#error "Platform not supported" -#endif -#endif - -/* - * The macro stdint_int64_defined is temporarily used to record - * whether or not 64 integer support is available. It must be - * defined for any 64 integer extensions for new platforms that are - * added. - */ - -#undef stdint_int64_defined -#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined(S_SPLINT_S) -# if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined(S_SPLINT_S) -# define stdint_int64_defined -typedef long long int64_t; -typedef unsigned long long uint64_t; -# define UINT64_C(v) v ## ULL -# define INT64_C(v) v ## LL -# ifndef PRINTF_INT64_MODIFIER -# define PRINTF_INT64_MODIFIER "ll" -# endif -# endif -#endif - -#if !defined(stdint_int64_defined) -# if defined(__GNUC__) -# define stdint_int64_defined -__extension__ typedef long long int64_t; -__extension__ typedef unsigned long long uint64_t; -# define UINT64_C(v) v ## ULL -# define INT64_C(v) v ## LL -# ifndef PRINTF_INT64_MODIFIER -# define PRINTF_INT64_MODIFIER "ll" -# endif -# elif defined(__MWERKS__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) || defined(__APPLE_CC__) || defined(_LONG_LONG) || defined(_CRAYC) || defined(S_SPLINT_S) -# define stdint_int64_defined -typedef long long int64_t; -typedef unsigned long long uint64_t; -# define UINT64_C(v) v ## ULL -# define INT64_C(v) v ## LL -# ifndef PRINTF_INT64_MODIFIER -# define PRINTF_INT64_MODIFIER "ll" -# endif -# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined(__BORLANDC__) && __BORLANDC__ > 0x460) || defined(__alpha) || defined(__DECC) -# define stdint_int64_defined -typedef __int64 int64_t; -typedef unsigned __int64 uint64_t; -# define UINT64_C(v) v ## UI64 -# define INT64_C(v) v ## I64 -# ifndef PRINTF_INT64_MODIFIER -# define PRINTF_INT64_MODIFIER "I64" -# endif -# endif -#endif - -#if !defined(LONG_LONG_MAX) && defined(INT64_C) -# define LONG_LONG_MAX INT64_C (9223372036854775807) -#endif -#ifndef ULONG_LONG_MAX -# define ULONG_LONG_MAX UINT64_C (18446744073709551615) -#endif - -#if !defined(INT64_MAX) && defined(INT64_C) -# define INT64_MAX INT64_C (9223372036854775807) -#endif -#if !defined(INT64_MIN) && defined(INT64_C) -# define INT64_MIN INT64_C (-9223372036854775808) -#endif -#if !defined(UINT64_MAX) && defined(INT64_C) -# define UINT64_MAX UINT64_C (18446744073709551615) -#endif - -/* - * Width of hexadecimal for number field. - */ - -#ifndef PRINTF_INT64_HEX_WIDTH -# define PRINTF_INT64_HEX_WIDTH "16" -#endif -#ifndef PRINTF_INT32_HEX_WIDTH -# define PRINTF_INT32_HEX_WIDTH "8" -#endif -#ifndef PRINTF_INT16_HEX_WIDTH -# define PRINTF_INT16_HEX_WIDTH "4" -#endif -#ifndef PRINTF_INT8_HEX_WIDTH -# define PRINTF_INT8_HEX_WIDTH "2" -#endif - -#ifndef PRINTF_INT64_DEC_WIDTH -# define PRINTF_INT64_DEC_WIDTH "20" -#endif -#ifndef PRINTF_INT32_DEC_WIDTH -# define PRINTF_INT32_DEC_WIDTH "10" -#endif -#ifndef PRINTF_INT16_DEC_WIDTH -# define PRINTF_INT16_DEC_WIDTH "5" -#endif -#ifndef PRINTF_INT8_DEC_WIDTH -# define PRINTF_INT8_DEC_WIDTH "3" -#endif - -/* - * Ok, lets not worry about 128 bit integers for now. Moore's law says - * we don't need to worry about that until about 2040 at which point - * we'll have bigger things to worry about. - */ - -#ifdef stdint_int64_defined -typedef int64_t intmax_t; -typedef uint64_t uintmax_t; -# define INTMAX_MAX INT64_MAX -# define INTMAX_MIN INT64_MIN -# define UINTMAX_MAX UINT64_MAX -# define UINTMAX_C(v) UINT64_C(v) -# define INTMAX_C(v) INT64_C(v) -# ifndef PRINTF_INTMAX_MODIFIER -# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER -# endif -# ifndef PRINTF_INTMAX_HEX_WIDTH -# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH -# endif -# ifndef PRINTF_INTMAX_DEC_WIDTH -# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH -# endif -#else -typedef int32_t intmax_t; -typedef uint32_t uintmax_t; -# define INTMAX_MAX INT32_MAX -# define UINTMAX_MAX UINT32_MAX -# define UINTMAX_C(v) UINT32_C(v) -# define INTMAX_C(v) INT32_C(v) -# ifndef PRINTF_INTMAX_MODIFIER -# define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER -# endif -# ifndef PRINTF_INTMAX_HEX_WIDTH -# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH -# endif -# ifndef PRINTF_INTMAX_DEC_WIDTH -# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH -# endif -#endif - -/* - * Because this file currently only supports platforms which have - * precise powers of 2 as bit sizes for the default integers, the - * least definitions are all trivial. Its possible that a future - * version of this file could have different definitions. - */ - -#ifndef stdint_least_defined -typedef int8_t int_least8_t; -typedef uint8_t uint_least8_t; -typedef int16_t int_least16_t; -typedef uint16_t uint_least16_t; -typedef int32_t int_least32_t; -typedef uint32_t uint_least32_t; -# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER -# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER -# define UINT_LEAST8_MAX UINT8_MAX -# define INT_LEAST8_MAX INT8_MAX -# define UINT_LEAST16_MAX UINT16_MAX -# define INT_LEAST16_MAX INT16_MAX -# define UINT_LEAST32_MAX UINT32_MAX -# define INT_LEAST32_MAX INT32_MAX -# define INT_LEAST8_MIN INT8_MIN -# define INT_LEAST16_MIN INT16_MIN -# define INT_LEAST32_MIN INT32_MIN -# ifdef stdint_int64_defined -typedef int64_t int_least64_t; -typedef uint64_t uint_least64_t; -# define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER -# define UINT_LEAST64_MAX UINT64_MAX -# define INT_LEAST64_MAX INT64_MAX -# define INT_LEAST64_MIN INT64_MIN -# endif -#endif -#undef stdint_least_defined - -/* - * The ANSI C committee pretending to know or specify anything about - * performance is the epitome of misguided arrogance. The mandate of - * this file is to *ONLY* ever support that absolute minimum - * definition of the fast integer types, for compatibility purposes. - * No extensions, and no attempt to suggest what may or may not be a - * faster integer type will ever be made in this file. Developers are - * warned to stay away from these types when using this or any other - * stdint.h. - */ - -typedef int_least8_t int_fast8_t; -typedef uint_least8_t uint_fast8_t; -typedef int_least16_t int_fast16_t; -typedef uint_least16_t uint_fast16_t; -typedef int_least32_t int_fast32_t; -typedef uint_least32_t uint_fast32_t; -#define UINT_FAST8_MAX UINT_LEAST8_MAX -#define INT_FAST8_MAX INT_LEAST8_MAX -#define UINT_FAST16_MAX UINT_LEAST16_MAX -#define INT_FAST16_MAX INT_LEAST16_MAX -#define UINT_FAST32_MAX UINT_LEAST32_MAX -#define INT_FAST32_MAX INT_LEAST32_MAX -#define INT_FAST8_MIN INT_LEAST8_MIN -#define INT_FAST16_MIN INT_LEAST16_MIN -#define INT_FAST32_MIN INT_LEAST32_MIN -#ifdef stdint_int64_defined -typedef int_least64_t int_fast64_t; -typedef uint_least64_t uint_fast64_t; -# define UINT_FAST64_MAX UINT_LEAST64_MAX -# define INT_FAST64_MAX INT_LEAST64_MAX -# define INT_FAST64_MIN INT_LEAST64_MIN -#endif - -#undef stdint_int64_defined - -/* - * Whatever piecemeal, per compiler thing we can do about the wchar_t - * type limits. - */ - -#if defined(__WATCOMC__) || defined(_MSC_VER) || defined(__GNUC__) -# include -# ifndef WCHAR_MIN -# define WCHAR_MIN 0 -# endif -# ifndef WCHAR_MAX -# define WCHAR_MAX ((wchar_t)-1) -# endif -#endif - -/* - * Whatever piecemeal, per compiler/platform thing we can do about the - * (u)intptr_t types and limits. - */ - -#if defined(_MSC_VER) && defined(_UINTPTR_T_DEFINED) -# define STDINT_H_UINTPTR_T_DEFINED -#endif - -#ifndef STDINT_H_UINTPTR_T_DEFINED -# if defined(__alpha__) || defined(__ia64__) || defined(__x86_64__) || defined(_WIN64) -# define stdint_intptr_bits 64 -# elif defined(__WATCOMC__) || defined(__TURBOC__) -# if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__) -# define stdint_intptr_bits 16 -# else -# define stdint_intptr_bits 32 -# endif -# elif defined(__i386__) || defined(_WIN32) || defined(WIN32) -# define stdint_intptr_bits 32 -# elif defined(__INTEL_COMPILER) -#error Unknown compiler -# endif - -# ifdef stdint_intptr_bits -# define stdint_intptr_glue3_i(a, b, c) a##b##c -# define stdint_intptr_glue3(a, b, c) stdint_intptr_glue3_i(a,b,c) -# ifndef PRINTF_INTPTR_MODIFIER -# define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER) -# endif -# ifndef PTRDIFF_MAX -# define PTRDIFF_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) -# endif -# ifndef PTRDIFF_MIN -# define PTRDIFF_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) -# endif -# ifndef UINTPTR_MAX -# define UINTPTR_MAX stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX) -# endif -# ifndef INTPTR_MAX -# define INTPTR_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) -# endif -# ifndef INTPTR_MIN -# define INTPTR_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) -# endif -# ifndef INTPTR_C -# define INTPTR_C(x) stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x) -# endif -# ifndef UINTPTR_C -# define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x) -# endif -typedef stdint_intptr_glue3 (uint, stdint_intptr_bits, _t) uintptr_t; -typedef stdint_intptr_glue3 (int, stdint_intptr_bits, _t) intptr_t; -# else -#error Unknown compiler -# endif -# define STDINT_H_UINTPTR_T_DEFINED -#endif - -/* - * Assumes sig_atomic_t is signed and we have a 2s complement machine. - */ - -#ifndef SIG_ATOMIC_MAX -# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1) -#endif - -#endif - -#if defined(__TEST_PSTDINT_FOR_CORRECTNESS) - -/* - * Please compile with the maximum warning settings to make sure macros are not - * defined more than once. - */ - -#include -#include -#include - -#define glue3_aux(x, y, z) x ## y ## z -#define glue3(x, y, z) glue3_aux(x,y,z) - -#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0); -#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0); - -#define DECL(us, bits) glue3(DECL,us,) (bits) - -#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits) - -int main() -{ - DECL(I, 8) - DECL(U, 8) - DECL(I, 16) - DECL(U, 16) - DECL(I, 32) - DECL(U, 32) -#ifdef INT64_MAX - DECL(I, 64) - DECL(U, 64) -#endif - intmax_t imax = INTMAX_C(0); - uintmax_t umax = UINTMAX_C(0); - char str0[256], str1[256]; - - sprintf(str0, "%d %x\n", 0, ~0); - - sprintf(str1, "%d %x\n", i8, ~0); - if (0 != strcmp(str0, str1)) printf("Something wrong with i8 : %s\n", str1); - sprintf(str1, "%u %x\n", u8, ~0); - if (0 != strcmp(str0, str1)) printf("Something wrong with u8 : %s\n", str1); - sprintf(str1, "%d %x\n", i16, ~0); - if (0 != strcmp(str0, str1)) printf("Something wrong with i16 : %s\n", str1); - sprintf(str1, "%u %x\n", u16, ~0); - if (0 != strcmp(str0, str1)) printf("Something wrong with u16 : %s\n", str1); - sprintf(str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0); - if (0 != strcmp(str0, str1)) printf("Something wrong with i32 : %s\n", str1); - sprintf(str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0); - if (0 != strcmp(str0, str1)) printf("Something wrong with u32 : %s\n", str1); -#ifdef INT64_MAX - sprintf(str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0); - if (0 != strcmp(str0, str1)) printf("Something wrong with i64 : %s\n", str1); -#endif - sprintf(str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0); - if (0 != strcmp(str0, str1)) printf("Something wrong with imax : %s\n", str1); - sprintf(str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0); - if (0 != strcmp(str0, str1)) printf("Something wrong with umax : %s\n", str1); - - TESTUMAX(8); - TESTUMAX(16); - TESTUMAX(32); -#ifdef INT64_MAX - TESTUMAX(64); -#endif - - return EXIT_SUCCESS; -} - -#endif diff --git a/third_party/HLSLcc/license.txt b/third_party/HLSLcc/license.txt deleted file mode 100644 index 6e2d4bb..0000000 --- a/third_party/HLSLcc/license.txt +++ /dev/null @@ -1,53 +0,0 @@ - -Original HLSLcc source code Copyright (c) 2012 James Jones -Further improvements Copyright (c) 2014-2016 Unity Technologies -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, -and/or sell copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - -This software makes use of the bstring library which is provided under the following license: - -Copyright (c) 2002-2008 Paul Hsieh -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - Neither the name of bstrlib nor the names of its contributors may be used - to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/third_party/HLSLcc/src/ControlFlowGraph.cpp b/third_party/HLSLcc/src/ControlFlowGraph.cpp deleted file mode 100644 index bf45aae..0000000 --- a/third_party/HLSLcc/src/ControlFlowGraph.cpp +++ /dev/null @@ -1,815 +0,0 @@ -#include "internal_includes/debug.h" -#include "internal_includes/ControlFlowGraph.h" -#include "internal_includes/ControlFlowGraphUtils.h" -#include "internal_includes/Instruction.h" -#include "internal_includes/Operand.h" -#include "internal_includes/HLSLccToolkit.h" -#include - -using namespace HLSLcc::ControlFlow; -using HLSLcc::ForEachOperand; - -const BasicBlock &ControlFlowGraph::Build(const Instruction* firstInstruction, const Instruction* endInstruction) -{ - using std::for_each; - - m_BlockMap.clear(); - m_BlockStorage.clear(); - - // Self-registering into m_BlockStorage so it goes out of the scope when ControlFlowGraph does - BasicBlock *root = new BasicBlock(Utils::GetNextNonLabelInstruction(firstInstruction), *this, NULL, endInstruction); - - // Build the reachable set for each block - bool hadChanges; - do - { - hadChanges = false; - for_each(m_BlockStorage.begin(), m_BlockStorage.end(), [&](const shared_ptr &bb) - { - BasicBlock &b = *bb.get(); - if (b.RebuildReachable()) - { - hadChanges = true; - } - }); - } - while (hadChanges == true); - - return *root; -} - -const BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction) const -{ - BasicBlockMap::const_iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction)); - if (itr == m_BlockMap.end()) - return NULL; - - return itr->second; -} - -BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction) -{ - BasicBlockMap::iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction)); - if (itr == m_BlockMap.end()) - return NULL; - - return itr->second; -} - -// Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build(). -// Auto-registers itself into ControlFlowGraph -BasicBlock::BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead, const Instruction* endInstruction) - : m_Graph(graph) - , m_First(psFirst) - , m_Last(NULL) - , m_End(endInstruction) -{ - m_UEVar.clear(); - m_VarKill.clear(); - m_Preceding.clear(); - m_Succeeding.clear(); - m_DEDef.clear(); - m_Reachable.clear(); - - // Check that we've pruned the labels - ASSERT(psFirst == Utils::GetNextNonLabelInstruction(psFirst)); - - // Insert to block storage, block map and connect to previous block - m_Graph.m_BlockStorage.push_back(shared_ptr(this)); - - bool didInsert = m_Graph.m_BlockMap.insert(std::make_pair(psFirst, this)).second; - ASSERT(didInsert); - - if (psPrecedingBlockHead != NULL) - { - m_Preceding.insert(psPrecedingBlockHead); - BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(psPrecedingBlockHead); - ASSERT(prec != 0); - didInsert = prec->m_Succeeding.insert(psFirst).second; - ASSERT(didInsert); - } - - Build(); -} - -void BasicBlock::Build() -{ - const Instruction *inst = m_First; - while (inst != m_End) - { - // Process sources first - ForEachOperand(inst, inst + 1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, - [this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) - { - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; - - uint32_t tempReg = psOperand->ui32RegisterNumber; - uint32_t accessMask = psOperand->GetAccessMask(); - - // Go through each component - for (int k = 0; k < 4; k++) - { - if (!(accessMask & (1 << k))) - continue; - - uint32_t regIdx = tempReg * 4 + k; - // Is this idx already in the kill set, meaning that it's already been re-defined in this basic block? Ignore - if (m_VarKill.find(regIdx) != m_VarKill.end()) - continue; - - // Add to UEVars set. Doesn't matter if it's already there. - m_UEVar.insert(regIdx); - } - return; - }); - - // Then the destination operands - ForEachOperand(inst, inst + 1, FEO_FLAG_DEST_OPERAND, - [this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) - { - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; - - uint32_t tempReg = psOperand->ui32RegisterNumber; - uint32_t accessMask = psOperand->GetAccessMask(); - - // Go through each component - for (int k = 0; k < 4; k++) - { - if (!(accessMask & (1 << k))) - continue; - - uint32_t regIdx = tempReg * 4 + k; - - // Add to kill set. Dupes are fine, this is a set. - m_VarKill.insert(regIdx); - // Also into the downward definitions. Overwrite the previous definition in this basic block, if any - Definition d(psInst, psOperand); - m_DEDef[regIdx].clear(); - m_DEDef[regIdx].insert(d); - } - return; - }); - - // Check for flow control instructions - bool blockDone = false; - switch (inst->eOpcode) - { - default: - break; - case OPCODE_RET: - // Continue processing, in the case of unreachable code we still need to translate it properly (case 1160309) - // blockDone = true; - break; - case OPCODE_RETC: - // Basic block is done, start a next one. - // There REALLY should be no existing blocks for this one - ASSERT(m_Graph.GetBasicBlockForInstruction(Utils::GetNextNonLabelInstruction(inst + 1)) == NULL); - AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1)); - blockDone = true; - break; - case OPCODE_LOOP: - case OPCODE_CASE: - case OPCODE_ENDIF: - case OPCODE_ENDSWITCH: - // Not a flow control branch, but need to start a new block anyway. - AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1)); - blockDone = true; - break; - - // Branches - case OPCODE_IF: - case OPCODE_BREAKC: - case OPCODE_CONTINUEC: - { - const Instruction *jumpPoint = Utils::GetJumpPoint(inst); - ASSERT(jumpPoint != NULL); - - // The control branches to the next instruction or jumps to jumpPoint - AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1)); - AddChildBasicBlock(jumpPoint); - - blockDone = true; - break; - } - case OPCODE_SWITCH: - { - bool sawEndSwitch = false; - bool needConnectToParent = false; - const Instruction *jumpPoint = Utils::GetJumpPoint(inst, &sawEndSwitch, &needConnectToParent); - ASSERT(jumpPoint != NULL); - - while (1) - { - if (!sawEndSwitch || needConnectToParent) - AddChildBasicBlock(jumpPoint); - - if (sawEndSwitch) - break; - - // The -1 is a bit of a hack: we always scroll past all labels so rewind to the last one so we'll know to search for the next label - ASSERT((jumpPoint - 1)->eOpcode == OPCODE_CASE || (jumpPoint - 1)->eOpcode == OPCODE_DEFAULT); - jumpPoint = Utils::GetJumpPoint(jumpPoint - 1, &sawEndSwitch, &needConnectToParent); - ASSERT(jumpPoint != NULL); - } - blockDone = true; - break; - } - - // Non-conditional jumps - case OPCODE_BREAK: - case OPCODE_ELSE: - case OPCODE_CONTINUE: - case OPCODE_ENDLOOP: - { - const Instruction *jumpPoint = Utils::GetJumpPoint(inst); - ASSERT(jumpPoint != NULL); - - AddChildBasicBlock(jumpPoint); - - blockDone = true; - break; - } - } - - if (blockDone) - break; - - inst++; - } - // In initial building phase, just make m_Reachable equal to m_DEDef - m_Reachable = m_DEDef; - - // Tag the end of the basic block - m_Last = std::max(m_First, std::min(inst, m_End - 1)); -// printf("Basic Block %d -> %d\n", (int)m_First->id, (int)m_Last->id); -} - -BasicBlock * BasicBlock::AddChildBasicBlock(const Instruction *psFirst) -{ - // First see if this already exists - BasicBlock *b = m_Graph.GetBasicBlockForInstruction(psFirst); - if (b) - { - // Just add dependency and we're done - b->m_Preceding.insert(m_First); - m_Succeeding.insert(psFirst); - return b; - } - // Otherwise create one. Self-registering and self-connecting - return new BasicBlock(psFirst, m_Graph, m_First, m_End); -} - -bool BasicBlock::RebuildReachable() -{ - // Building the Reachable set is an iterative process, where each block gets rebuilt until nothing changes. - // Formula: reachable = this.DEDef union ( each preceding.Reachable() minus this.VarKill()) - - ReachableVariables newReachable = m_DEDef; - bool hasChanges = false; - - // Loop each predecessor - std::for_each(Preceding().begin(), Preceding().end(), [&](const Instruction *instr) - { - const BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(instr); - const ReachableVariables &precReachable = prec->Reachable(); - - // Loop each variable*component - std::for_each(precReachable.begin(), precReachable.end(), [&](const std::pair &itr2) - { - uint32_t regIdx = itr2.first; - const BasicBlock::ReachableDefinitionsPerVariable &defs = itr2.second; - - // Already killed in this block? - if (VarKill().find(regIdx) != VarKill().end()) - return; - - // Only do comparisons against current definitions if we've yet to find any changes - BasicBlock::ReachableDefinitionsPerVariable *currReachablePerVar = 0; - if (!hasChanges) - currReachablePerVar = &m_Reachable[regIdx]; - - BasicBlock::ReachableDefinitionsPerVariable &newReachablePerVar = newReachable[regIdx]; - - // Loop each definition - std::for_each(defs.begin(), defs.end(), [&](const BasicBlock::Definition &d) - { - if (!hasChanges) - { - // Check if already there - if (currReachablePerVar->find(d) == currReachablePerVar->end()) - hasChanges = true; - } - newReachablePerVar.insert(d); - }); // definition - }); // variable*component - }); // predecessor - - if (hasChanges) - { - std::swap(m_Reachable, newReachable); - } - - return hasChanges; -} - -void BasicBlock::RVarUnion(ReachableVariables &a, const ReachableVariables &b) -{ - std::for_each(b.begin(), b.end(), [&a](const std::pair &rpvPair) - { - uint32_t regIdx = rpvPair.first; - const ReachableDefinitionsPerVariable &rpv = rpvPair.second; - // No previous definitions for this variable? - auto aRPVItr = a.find(regIdx); - if (aRPVItr == a.end()) - { - // Just set the definitions and continue - a[regIdx] = rpv; - return; - } - ReachableDefinitionsPerVariable &aRPV = aRPVItr->second; - aRPV.insert(rpv.begin(), rpv.end()); - }); -} - -#if ENABLE_UNIT_TESTS - -#define UNITY_EXTERNAL_TOOL 1 -#include "Projects/PrecompiledHeaders/UnityPrefix.h" // Needed for defines such as ENABLE_CPP_EXCEPTIONS -#include "Testing.h" // From Runtime/Testing - -UNIT_TEST_SUITE(HLSLcc) -{ - TEST(ControlFlowGraph_Build_Simple_Works) - { - Instruction inst[] = - { - // MOV t0.xyzw, I0.xyzw - Instruction(0, OPCODE_MOV, 0, 0xf, 0xffffffff, 0xf), - Instruction(1, OPCODE_RET) - }; - - ControlFlowGraph cfg; - const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst)); - - CHECK_EQUAL(&inst[0], root.First()); - CHECK_EQUAL(&inst[1], root.Last()); - - CHECK(root.Preceding().empty()); - CHECK(root.Succeeding().empty()); - - CHECK_EQUAL(4, root.VarKill().size()); - - // Check that all components from t0 are killed - CHECK_EQUAL(1, root.VarKill().count(0)); - CHECK_EQUAL(1, root.VarKill().count(1)); - CHECK_EQUAL(1, root.VarKill().count(2)); - CHECK_EQUAL(1, root.VarKill().count(3)); - - CHECK_EQUAL(&inst[0], root.DEDef().find(0)->second.begin()->m_Instruction); - CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(0)->second.begin()->m_Operand); - CHECK_EQUAL(&inst[0], root.DEDef().find(1)->second.begin()->m_Instruction); - CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(1)->second.begin()->m_Operand); - CHECK_EQUAL(&inst[0], root.DEDef().find(2)->second.begin()->m_Instruction); - CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(2)->second.begin()->m_Operand); - CHECK_EQUAL(&inst[0], root.DEDef().find(3)->second.begin()->m_Instruction); - CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(3)->second.begin()->m_Operand); - } - - TEST(ControlFlowGraph_Build_If_Works) - { - Instruction inst[] = - { - // B0 - // 0: MOV t1.xyzw, i0.xyzw - Instruction(0, OPCODE_MOV, 1, 0xf, 0xffffffff, 0xf), - // 1: MUL t0, t1, t1 - Instruction(1, OPCODE_MUL, 0, 0xf, 1, 0xf, 1, 0xf), - // 2: IF t1.y - Instruction(2, OPCODE_IF, 1, 2), - // B1 - // 3: MOV o0, t0 - Instruction(3, OPCODE_MOV, 0xffffffff, 0xf, 0, 0xf), - // 4: - Instruction(4, OPCODE_ELSE), - // B2 - // 5: MOV o0, t1 - Instruction(5, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf), - // 6: - Instruction(6, OPCODE_ENDIF), - // B3 - // 7: - Instruction(7, OPCODE_NOP), - // 8: - Instruction(8, OPCODE_RET) - }; - - ControlFlowGraph cfg; - const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst)); - - CHECK_EQUAL(root.First(), &inst[0]); - CHECK_EQUAL(root.Last(), &inst[2]); - - CHECK(root.Preceding().empty()); - - const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]); - const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]); - const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[7]); - - CHECK(b1 != NULL); - CHECK(b2 != NULL); - CHECK(b3 != NULL); - - CHECK_EQUAL(&inst[3], b1->First()); - CHECK_EQUAL(&inst[5], b2->First()); - CHECK_EQUAL(&inst[7], b3->First()); - - CHECK_EQUAL(&inst[4], b1->Last()); - CHECK_EQUAL(&inst[6], b2->Last()); - CHECK_EQUAL(&inst[8], b3->Last()); - - CHECK_EQUAL(1, root.Succeeding().count(&inst[3])); - CHECK_EQUAL(1, root.Succeeding().count(&inst[5])); - CHECK_EQUAL(2, root.Succeeding().size()); - - CHECK_EQUAL(1, b1->Preceding().size()); - CHECK_EQUAL(1, b1->Preceding().count(&inst[0])); - - CHECK_EQUAL(1, b2->Preceding().size()); - CHECK_EQUAL(1, b2->Preceding().count(&inst[0])); - - CHECK_EQUAL(2, b3->Preceding().size()); - CHECK_EQUAL(0, b3->Preceding().count(&inst[0])); - CHECK_EQUAL(1, b3->Preceding().count(&inst[3])); - CHECK_EQUAL(1, b3->Preceding().count(&inst[5])); - - // The if block must have upwards-exposed t0 - CHECK_EQUAL(1, b1->UEVar().count(0)); - CHECK_EQUAL(1, b1->UEVar().count(1)); - CHECK_EQUAL(1, b1->UEVar().count(2)); - CHECK_EQUAL(1, b1->UEVar().count(3)); - - // The else block must have upwards-exposed t1 - CHECK_EQUAL(1, b2->UEVar().count(4)); - CHECK_EQUAL(1, b2->UEVar().count(5)); - CHECK_EQUAL(1, b2->UEVar().count(6)); - CHECK_EQUAL(1, b2->UEVar().count(7)); - - CHECK_EQUAL(8, root.VarKill().size()); - - // Check that all components from t0 and t1 are killed - CHECK_EQUAL(1, root.VarKill().count(0)); - CHECK_EQUAL(1, root.VarKill().count(1)); - CHECK_EQUAL(1, root.VarKill().count(2)); - CHECK_EQUAL(1, root.VarKill().count(3)); - - CHECK_EQUAL(1, root.VarKill().count(4)); - CHECK_EQUAL(1, root.VarKill().count(5)); - CHECK_EQUAL(1, root.VarKill().count(6)); - CHECK_EQUAL(1, root.VarKill().count(7)); - - // The expected downwards-exposed definitions: - // B0: t0, t1 - // B1-B3: none - - CHECK_EQUAL(8, root.DEDef().size()); - CHECK_EQUAL(0, b1->DEDef().size()); - CHECK_EQUAL(0, b2->DEDef().size()); - CHECK_EQUAL(0, b3->DEDef().size()); - - CHECK(root.DEDef() == root.Reachable()); - - CHECK(root.Reachable() == b1->Reachable()); - CHECK(root.Reachable() == b2->Reachable()); - CHECK(root.Reachable() == b3->Reachable()); - } - - TEST(ControlFlowGraph_Build_SwitchCase_Works) - { - Instruction inst[] = - { - // Start B0 - // i0: MOV t0.x, I0.x - Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1), - // i1: MOVE t1.xyz, I0.yzw - Instruction(1, OPCODE_MOV, 1, 7, 0xffffffff, 0xe), - // i2: MOVE t1.w, t0.x - Instruction(2, OPCODE_MOV, 1, 8, 0xffffffff, 0x1), - // i3: MOVE t2, I0 - Instruction(3, OPCODE_MOV, 2, 0xf, 0xffffffff, 0xf), - // i4: SWITCH t0.y - Instruction(4, OPCODE_SWITCH, 1, 2), - // End B0 - // i5: CASE - Instruction(5, OPCODE_CASE), - // i6: DEFAULT - Instruction(6, OPCODE_DEFAULT), - // Start B1 - // i7: MOC t1.z, t0.x - Instruction(7, OPCODE_MOV, 1, 4, 0, 1), - // i8: CASE - Instruction(8, OPCODE_CASE), - // End B1 - // Start B2 - // i9: MOV t1.z, t2.x - Instruction(9, OPCODE_MOV, 1, 4, 2, 1), - // i10: BREAK - Instruction(10, OPCODE_BREAK), - // End B2 - // i11: CASE - Instruction(11, OPCODE_CASE), - // Start B3 - // i12: MOV t1.z, t2.y - Instruction(12, OPCODE_MOV, 1, 4, 2, 2), - // i13: BREAKC t0.x - Instruction(13, OPCODE_BREAKC, 0, 1), - // End B3 - // i14: CASE - Instruction(14, OPCODE_CASE), - // Start B4 - // i15: MOV t1.z, t2.z - Instruction(15, OPCODE_MOV, 1, 4, 2, 4), - // i16: ENDSWITCH - Instruction(16, OPCODE_ENDSWITCH), - // End B4 - // Start B5 - // i17: MOV o0, t1 - Instruction(17, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf), - // i18: RET - Instruction(18, OPCODE_RET) - // End B5 - }; - - ControlFlowGraph cfg; - const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst)); - - CHECK_EQUAL(&inst[0], root.First()); - CHECK_EQUAL(&inst[4], root.Last()); - - const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[7]); - const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[9]); - const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[12]); - const BasicBlock *b4 = cfg.GetBasicBlockForInstruction(&inst[15]); - const BasicBlock *b5 = cfg.GetBasicBlockForInstruction(&inst[17]); - - CHECK(b1 != NULL); - CHECK(b2 != NULL); - CHECK(b3 != NULL); - CHECK(b4 != NULL); - CHECK(b5 != NULL); - - // Check instruction ranges - CHECK_EQUAL(&inst[8], b1->Last()); - CHECK_EQUAL(&inst[10], b2->Last()); - CHECK_EQUAL(&inst[13], b3->Last()); - CHECK_EQUAL(&inst[16], b4->Last()); - CHECK_EQUAL(&inst[18], b5->Last()); - - // Nothing before the root, nothing after b5 - CHECK(root.Preceding().empty()); - CHECK(b5->Succeeding().empty()); - - // Check that all connections are there and no others. - - // B0->B1 - // B0->B2 - // B0->B3 - // B0->B4 - CHECK_EQUAL(1, root.Succeeding().count(&inst[7])); - CHECK_EQUAL(1, root.Succeeding().count(&inst[9])); - CHECK_EQUAL(1, root.Succeeding().count(&inst[12])); - CHECK_EQUAL(1, root.Succeeding().count(&inst[15])); - - CHECK_EQUAL(4, root.Succeeding().size()); - - // B1 - - // B1->B2 - CHECK_EQUAL(1, b1->Succeeding().count(&inst[9])); - CHECK_EQUAL(1, b1->Succeeding().size()); - - // B0->B1, reverse - CHECK_EQUAL(1, b1->Preceding().count(&inst[0])); - CHECK_EQUAL(1, b1->Preceding().size()); - - // B2 - - // B2->B5 - CHECK_EQUAL(1, b2->Succeeding().count(&inst[17])); - CHECK_EQUAL(1, b2->Succeeding().size()); - CHECK_EQUAL(1, b2->Preceding().count(&inst[7])); - CHECK_EQUAL(1, b2->Preceding().count(&inst[0])); - CHECK_EQUAL(2, b2->Preceding().size()); - - // B3 - // B3->B4 - // B3->B5 - CHECK_EQUAL(1, b3->Succeeding().count(&inst[15])); - CHECK_EQUAL(1, b3->Succeeding().count(&inst[17])); - CHECK_EQUAL(2, b3->Succeeding().size()); - CHECK_EQUAL(1, b3->Preceding().count(&inst[0])); - CHECK_EQUAL(1, b3->Preceding().size()); - - // B4 - CHECK_EQUAL(1, b4->Succeeding().count(&inst[17])); - CHECK_EQUAL(1, b4->Succeeding().size()); - CHECK_EQUAL(1, b4->Preceding().count(&inst[0])); - CHECK_EQUAL(2, b4->Preceding().size()); - - // B5 - CHECK_EQUAL(0, b5->Succeeding().size()); - CHECK_EQUAL(3, b5->Preceding().size()); //b2, b3, b4 - CHECK_EQUAL(1, b5->Preceding().count(&inst[9])); - CHECK_EQUAL(1, b5->Preceding().count(&inst[12])); - CHECK_EQUAL(1, b5->Preceding().count(&inst[15])); - - - // Verify reachable sets - - CHECK(root.Reachable() == root.DEDef()); - CHECK_EQUAL(9, root.Reachable().size()); - - // B5 should have these reachables: - // t0.x only from b0 - // t1.xy from b0, i1 - // t1.z from b2,i9 + b3,i12 + b4,i15 (the defs from b0 and b1 are killed by b2) - // t1.w from b0, i2 - // t2.xyzw from b0, i3 - - // Cast away const so [] works. - BasicBlock::ReachableVariables &r = (BasicBlock::ReachableVariables &)b5->Reachable(); - - CHECK_EQUAL(9, r.size()); - - CHECK_EQUAL(1, r[0].size()); - CHECK_EQUAL(0, r[1].size()); - CHECK_EQUAL(0, r[2].size()); - CHECK_EQUAL(0, r[3].size()); - CHECK_EQUAL(&inst[0], r[0].begin()->m_Instruction); - - CHECK_EQUAL(1, r[4].size()); - CHECK_EQUAL(1, r[5].size()); - CHECK_EQUAL(3, r[6].size()); - CHECK_EQUAL(1, r[7].size()); - - const BasicBlock::ReachableDefinitionsPerVariable &d = r[6]; - BasicBlock::ReachableDefinitionsPerVariable t; - t.insert(BasicBlock::Definition(&inst[9], &inst[9].asOperands[0])); - t.insert(BasicBlock::Definition(&inst[12], &inst[12].asOperands[0])); - t.insert(BasicBlock::Definition(&inst[15], &inst[15].asOperands[0])); - - CHECK(t == d); - - CHECK_EQUAL(1, r[8].size()); - CHECK_EQUAL(1, r[9].size()); - CHECK_EQUAL(1, r[10].size()); - CHECK_EQUAL(1, r[11].size()); - } - - TEST(ControlFlowGraph_Build_Loop_Works) - { - Instruction inst[] = - { - // Start B0 - // i0: MOV t0.x, I0.x - Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1), - // i1: MOVE t1.xy, I0.zw // The .x definition should not make it past the loop, .y should. - Instruction(1, OPCODE_MOV, 1, 3, 0xffffffff, 0xc), - // i2: LOOP - Instruction(2, OPCODE_LOOP, 1, 2), - // End B0 -> B1 - // Begin B1 - // i3: MOV t1.x, t0.x - Instruction(3, OPCODE_MOV, 1, 1, 0, 1), - // i4: BREAKC t0.x - Instruction(4, OPCODE_BREAKC, 0, 1), - // End B1 -> B2, B3 - // Begin B2 - // i5: ADD t0.x, t0.y - Instruction(5, OPCODE_ADD, 0, 1, 0, 2), - // i6: MOV t1.x, t0.x // This should never show up as definition - Instruction(6, OPCODE_MOV, 1, 1, 0, 1), - // i7: ENDLOOP - Instruction(7, OPCODE_ENDLOOP), - // End B2 -> B1 - // Start B3 - // i8: MOV O0.x, t1.x - Instruction(8, OPCODE_MOV, 0xffffffff, 1, 1, 1), - // i9: RET - Instruction(9, OPCODE_RET), - // End B3 - }; - - ControlFlowGraph cfg; - const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst)); - - CHECK_EQUAL(&inst[0], root.First()); - CHECK_EQUAL(&inst[2], root.Last()); - - const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]); - const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]); - const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[8]); - - CHECK(b1 != NULL); - CHECK(b2 != NULL); - CHECK(b3 != NULL); - - // Check instruction ranges - CHECK_EQUAL(&inst[4], b1->Last()); - CHECK_EQUAL(&inst[7], b2->Last()); - CHECK_EQUAL(&inst[9], b3->Last()); - - // Nothing before the root, nothing after b3 - CHECK(root.Preceding().empty()); - CHECK(b3->Succeeding().empty()); - - // Check that all connections are there and no others. - - // B0->B1 - CHECK_EQUAL(1, root.Succeeding().count(&inst[3])); - CHECK_EQUAL(1, root.Succeeding().size()); - - // B1 - - // B1->B2 - // B1->B3 - CHECK_EQUAL(1, b1->Succeeding().count(&inst[5])); - CHECK_EQUAL(1, b1->Succeeding().count(&inst[8])); - CHECK_EQUAL(2, b1->Succeeding().size()); - - // B0->B1, reverse - CHECK_EQUAL(1, b1->Preceding().count(&inst[0])); - // We may also come from B2 - CHECK_EQUAL(1, b1->Preceding().count(&inst[5])); - CHECK_EQUAL(2, b1->Preceding().size()); - - // B2 - - // B2->B1 - CHECK_EQUAL(1, b2->Succeeding().count(&inst[3])); - CHECK_EQUAL(1, b2->Succeeding().size()); - CHECK_EQUAL(1, b2->Preceding().count(&inst[3])); - CHECK_EQUAL(1, b2->Preceding().size()); - - // B3 - CHECK_EQUAL(1, b3->Preceding().count(&inst[3])); - CHECK_EQUAL(1, b3->Preceding().size()); - - // Verify reachable sets - - - BasicBlock::ReachableVariables t; - - // B0 DEDef and Reachable - t.clear(); - t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0])); - t[4].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); - t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); - - CHECK(root.DEDef() == t); - CHECK(root.Reachable() == root.DEDef()); - - // B1 DEDef and Reachable - t.clear(); - t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0])); - CHECK(b1->DEDef() == t); - - t = b1->DEDef(); - // t0.x from i0, t1.y (but not .x) from i1 - t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0])); - t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); - - // t0.x from i5, but nothing from i6 - t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0])); - CHECK(b1->Reachable() == t); - - // B2 - t.clear(); - t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0])); - t[4].insert(BasicBlock::Definition(&inst[6], &inst[6].asOperands[0])); - CHECK(b2->DEDef() == t); - - t = b2->DEDef(); - t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); - - CHECK(b2->Reachable() == t); - - // B3 - t.clear(); - CHECK(b3->DEDef() == t); - // t0.x from i0, t1.y from i1 - t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0])); - t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); - - // t1.x from i3 - t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0])); - - // t0.x from i5 - t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0])); - - CHECK(b3->Reachable() == t); - } -} - -#endif diff --git a/third_party/HLSLcc/src/ControlFlowGraphUtils.cpp b/third_party/HLSLcc/src/ControlFlowGraphUtils.cpp deleted file mode 100644 index b74fbed..0000000 --- a/third_party/HLSLcc/src/ControlFlowGraphUtils.cpp +++ /dev/null @@ -1,116 +0,0 @@ -#include "ControlFlowGraphUtils.h" - -#include "internal_includes/debug.h" -#include "internal_includes/Instruction.h" -#include "internal_includes/Operand.h" - - -// Get the next instruction that's not one of CASE, DEFAULT, LOOP, ENDSWITCH -const Instruction *HLSLcc::ControlFlow::Utils::GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch /*= 0*/) -{ - const Instruction *inst = psStart; - // Skip CASE/DEFAULT/ENDSWITCH/LOOP labels - while (inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_LOOP) - { - // We really shouldn't be seeing ENDSWITCH without sawEndSwitch being set (as in, we're expecting it) - ASSERT(inst->eOpcode != OPCODE_ENDSWITCH || sawEndSwitch != NULL); - if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != NULL) - *sawEndSwitch = true; - inst++; - } - return inst; -} - -// For a given flow-control instruction, find the corresponding jump location: -// If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1 -// For ELSE, find same level ENDIF + 1 -// For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1 -// For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1 -// For ENDLOOP, find previous same-level LOOP + 1 -// For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels -// For CONTINUE/C the previous LOOP + 1 -// Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block. -// Note that CASE labels fall through. -// Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc. -const Instruction * HLSLcc::ControlFlow::Utils::GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch /*= 0*/, bool *needConnectToParent /* = 0*/) -{ - const Instruction *inst = psStart; - int depth = 0; - OPCODE_TYPE op = psStart->eOpcode; - ASSERT(op == OPCODE_IF || op == OPCODE_ELSE || op == OPCODE_BREAK || op == OPCODE_BREAKC - || op == OPCODE_SWITCH || op == OPCODE_CASE || op == OPCODE_DEFAULT - || op == OPCODE_ENDLOOP || op == OPCODE_CONTINUE || op == OPCODE_CONTINUEC); - - switch (op) - { - default: - ASSERT(0); - break; - case OPCODE_IF: - case OPCODE_ELSE: - while (1) - { - inst++; - if ((inst->eOpcode == OPCODE_ELSE || inst->eOpcode == OPCODE_ENDIF) && (depth == 0)) - { - return GetNextNonLabelInstruction(inst + 1, sawEndSwitch); - } - if (inst->eOpcode == OPCODE_IF) - depth++; - if (inst->eOpcode == OPCODE_ENDIF) - depth--; - } - case OPCODE_BREAK: - case OPCODE_BREAKC: - while (1) - { - inst++; - if ((inst->eOpcode == OPCODE_ENDLOOP || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0)) - { - return GetNextNonLabelInstruction(inst + 1, sawEndSwitch); - } - if (inst->eOpcode == OPCODE_SWITCH || inst->eOpcode == OPCODE_LOOP) - depth++; - if (inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_ENDLOOP) - depth--; - } - case OPCODE_CONTINUE: - case OPCODE_CONTINUEC: - case OPCODE_ENDLOOP: - while (1) - { - inst--; - if ((inst->eOpcode == OPCODE_LOOP) && (depth == 0)) - { - return GetNextNonLabelInstruction(inst + 1, sawEndSwitch); - } - if (inst->eOpcode == OPCODE_LOOP) - depth--; - if (inst->eOpcode == OPCODE_ENDLOOP) - depth++; - } - case OPCODE_SWITCH: - case OPCODE_CASE: - case OPCODE_DEFAULT: - while (1) - { - inst++; - if ((inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0)) - { - // Note that we'll skip setting sawEndSwitch if inst->eOpcode = OPCODE_ENDSWITCH - // so that BasicBlock::Build can distinguish between there being a direct route - // from SWITCH->ENDSWITCH (CASE followed directly by ENDSWITCH) and not. - - if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != 0) - *sawEndSwitch = true; - - return GetNextNonLabelInstruction(inst + 1, needConnectToParent); - } - if (inst->eOpcode == OPCODE_SWITCH) - depth++; - if (inst->eOpcode == OPCODE_ENDSWITCH) - depth--; - } - } - return 0; -} diff --git a/third_party/HLSLcc/src/DataTypeAnalysis.cpp b/third_party/HLSLcc/src/DataTypeAnalysis.cpp deleted file mode 100644 index a53fc5a..0000000 --- a/third_party/HLSLcc/src/DataTypeAnalysis.cpp +++ /dev/null @@ -1,777 +0,0 @@ -#include "internal_includes/debug.h" -#include "internal_includes/tokens.h" -#include "internal_includes/HLSLccToolkit.h" -#include "internal_includes/DataTypeAnalysis.h" -#include "internal_includes/Shader.h" -#include "internal_includes/HLSLCrossCompilerContext.h" -#include "internal_includes/Instruction.h" -#include - - -// Helper function to set the vector type of 1 or more components in a vector -// If the existing values (in vector we're writing to) are all SVT_VOID, just upgrade the value and we're done -// Otherwise, set all the components in the vector that currently are set to that same value OR are now being written to -// to the "highest" type value (ordering int->uint->float) -static void SetVectorType(std::vector &aeTempVecType, uint32_t regBaseIndex, uint32_t componentMask, SHADER_VARIABLE_TYPE eType, int *psMadeProgress) -{ - int i = 0; - - // Expand the mask to include all components that are used, also upgrade type - for (i = 0; i < 4; i++) - { - if (aeTempVecType[regBaseIndex + i] != SVT_VOID) - { - componentMask |= (1 << i); - eType = HLSLcc::SelectHigherType(eType, aeTempVecType[regBaseIndex + i]); - } - } - - // Now componentMask contains the components we actually need to update and eType may have been changed to something else. - // Write the results - for (i = 0; i < 4; i++) - { - if (componentMask & (1 << i)) - { - if (aeTempVecType[regBaseIndex + i] != eType) - { - aeTempVecType[regBaseIndex + i] = eType; - if (psMadeProgress) - *psMadeProgress = 1; - } - } - } -} - -static SHADER_VARIABLE_TYPE OperandPrecisionToShaderVariableType(OPERAND_MIN_PRECISION prec, SHADER_VARIABLE_TYPE eDefault) -{ - SHADER_VARIABLE_TYPE eType = eDefault; - switch (prec) - { - case OPERAND_MIN_PRECISION_DEFAULT: - break; - case OPERAND_MIN_PRECISION_SINT_16: - eType = SVT_INT16; - break; - case OPERAND_MIN_PRECISION_UINT_16: - eType = SVT_UINT16; - break; - case OPERAND_MIN_PRECISION_FLOAT_2_8: - eType = SVT_FLOAT10; - break; - case OPERAND_MIN_PRECISION_FLOAT_16: - eType = SVT_FLOAT16; - break; - default: - ASSERT(0); // Catch this to see what's going on. - break; - } - return eType; -} - -static void MarkOperandAs(Operand *psOperand, SHADER_VARIABLE_TYPE eType, std::vector &aeTempVecType) -{ - if (psOperand->eType == OPERAND_TYPE_TEMP) - { - const uint32_t ui32RegIndex = psOperand->ui32RegisterNumber * 4; - uint32_t mask = psOperand->GetAccessMask(); - // Adjust type based on operand precision - eType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, eType); - - SetVectorType(aeTempVecType, ui32RegIndex, mask, eType, NULL); - } -} - -static void MarkAllOperandsAs(Instruction* psInst, SHADER_VARIABLE_TYPE eType, std::vector &aeTempVecType) -{ - uint32_t i = 0; - for (i = 0; i < psInst->ui32NumOperands; i++) - { - MarkOperandAs(&psInst->asOperands[i], eType, aeTempVecType); - } -} - -// Mark scalars from CBs. TODO: Do we need to do the same for vec2/3's as well? There may be swizzles involved which make it vec4 or something else again. -static void SetCBOperandComponents(HLSLCrossCompilerContext *psContext, Operand *psOperand) -{ - const ConstantBuffer* psCBuf = NULL; - const ShaderVarType* psVarType = NULL; - int32_t rebase = 0; - bool isArray; - - if (psOperand->eType != OPERAND_TYPE_CONSTANT_BUFFER) - return; - - // Ignore selection modes that access more than one component - switch (psOperand->eSelMode) - { - case OPERAND_4_COMPONENT_SELECT_1_MODE: - break; - case OPERAND_4_COMPONENT_SWIZZLE_MODE: - if (!psOperand->IsSwizzleReplicated()) - return; - break; - case OPERAND_4_COMPONENT_MASK_MODE: - return; - } - - psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); - ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); - - if (psVarType->Class == SVC_SCALAR) - psOperand->iNumComponents = 1; -} - -struct SetPartialDataTypes -{ - SetPartialDataTypes(SHADER_VARIABLE_TYPE *_aeTempVec) - : m_TempVec(_aeTempVec) - {} - SHADER_VARIABLE_TYPE *m_TempVec; - - template void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const - { - uint32_t mask = 0; - SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec; - SHADER_VARIABLE_TYPE newType; - uint32_t i, reg; - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; - - if (ui32OperandType == FEO_FLAG_SUBOPERAND) - { - // We really shouldn't ever be getting minprecision float indices here - ASSERT(psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_16 && psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_2_8); - - mask = psOperand->GetAccessMask(); - reg = psOperand->ui32RegisterNumber; - newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_INT_AMBIGUOUS); - for (i = 0; i < 4; i++) - { - if (!(mask & (1 << i))) - continue; - if (aeTempVecType[reg * 4 + i] == SVT_VOID) - aeTempVecType[reg * 4 + i] = newType; - } - return; - } - - if (psOperand->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT) - return; - - mask = psOperand->GetAccessMask(); - reg = psOperand->ui32RegisterNumber; - newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_VOID); - ASSERT(newType != SVT_VOID); - for (i = 0; i < 4; i++) - { - if (!(mask & (1 << i))) - continue; - aeTempVecType[reg * 4 + i] = newType; - } - } -}; - -// Write back the temp datatypes into operands. Also mark scalars in constant buffers - -struct WritebackDataTypes -{ - WritebackDataTypes(HLSLCrossCompilerContext *_ctx, SHADER_VARIABLE_TYPE *_aeTempVec) - : m_Context(_ctx) - , m_TempVec(_aeTempVec) - {} - HLSLCrossCompilerContext *m_Context; - SHADER_VARIABLE_TYPE *m_TempVec; - - template void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const - { - SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec; - uint32_t reg, mask, i; - SHADER_VARIABLE_TYPE dtype; - - if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER) - SetCBOperandComponents(m_Context, psOperand); - - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; - - reg = psOperand->ui32RegisterNumber; - mask = psOperand->GetAccessMask(); - dtype = SVT_VOID; - - for (i = 0; i < 4; i++) - { - if (!(mask & (1 << i))) - continue; - - // Check that all components have the same type - ASSERT(dtype == SVT_VOID || dtype == aeTempVecType[reg * 4 + i]); - - dtype = aeTempVecType[reg * 4 + i]; - - ASSERT(dtype != SVT_VOID); - ASSERT(dtype == OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, dtype)); - - psOperand->aeDataType[i] = dtype; - } - } -}; - - -void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector & instructions, uint32_t ui32TempCount, std::vector &results) -{ - uint32_t i; - Instruction *psFirstInst = &instructions[0]; - Instruction *psInst = psFirstInst; - // Start with void, then move up the chain void->ambiguous int->minprec int/uint->int/uint->minprec float->float - std::vector &aeTempVecType = results; - - aeTempVecType.clear(); - aeTempVecType.resize(ui32TempCount * 4, SVT_VOID); - - if (ui32TempCount == 0) - return; - - // Go through the instructions, pick up partial datatypes, because we at least know those for a fact. - // Also set all suboperands to be integers (they're always used as indices) - ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, SetPartialDataTypes(&aeTempVecType[0])); - - // if (psContext->psShader->ui32MajorVersion <= 3) - { - // First pass, do analysis: deduce the data type based on opcodes, fill out aeTempVecType table - // Only ever to int->float promotion (or int->uint), never the other way around - for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++) - { - if (psInst->ui32NumOperands == 0) - continue; -#ifdef _DEBUG - for (int k = 0; k < (int)psInst->ui32NumOperands; k++) - { - if (psInst->asOperands[k].eType == OPERAND_TYPE_TEMP) - { - ASSERT(psInst->asOperands[k].ui32RegisterNumber < ui32TempCount); - } - } -#endif - - switch (psInst->eOpcode) - { - // All float-only ops - case OPCODE_ADD: - case OPCODE_DERIV_RTX: - case OPCODE_DERIV_RTY: - case OPCODE_DIV: - case OPCODE_DP2: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_EXP: - case OPCODE_FRC: - case OPCODE_LOG: - case OPCODE_MAD: - case OPCODE_MIN: - case OPCODE_MAX: - case OPCODE_MUL: - case OPCODE_ROUND_NE: - case OPCODE_ROUND_NI: - case OPCODE_ROUND_PI: - case OPCODE_ROUND_Z: - case OPCODE_RSQ: - case OPCODE_SAMPLE: - case OPCODE_SAMPLE_C: - case OPCODE_SAMPLE_C_LZ: - case OPCODE_SAMPLE_L: - case OPCODE_SAMPLE_D: - case OPCODE_SAMPLE_B: - case OPCODE_SQRT: - case OPCODE_SINCOS: - case OPCODE_LOD: - case OPCODE_GATHER4: - - case OPCODE_DERIV_RTX_COARSE: - case OPCODE_DERIV_RTX_FINE: - case OPCODE_DERIV_RTY_COARSE: - case OPCODE_DERIV_RTY_FINE: - case OPCODE_GATHER4_C: - case OPCODE_GATHER4_PO: - case OPCODE_GATHER4_PO_C: - case OPCODE_RCP: - - MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType); - break; - - // Comparison ops, need to enable possibility for going boolean - case OPCODE_IEQ: - case OPCODE_INE: - MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_INT_AMBIGUOUS, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_INT_AMBIGUOUS, aeTempVecType); - break; - - case OPCODE_IF: - case OPCODE_BREAKC: - case OPCODE_CALLC: - case OPCODE_CONTINUEC: - case OPCODE_RETC: - MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); - break; - - case OPCODE_ILT: - case OPCODE_IGE: - MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); - break; - - case OPCODE_ULT: - case OPCODE_UGE: - MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_UINT, aeTempVecType); - break; - - case OPCODE_AND: - case OPCODE_OR: - MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType); - break; - - // Integer ops that don't care of signedness - case OPCODE_IADD: - case OPCODE_INEG: - case OPCODE_ISHL: - case OPCODE_NOT: - case OPCODE_XOR: - case OPCODE_BUFINFO: - case OPCODE_COUNTBITS: - case OPCODE_FIRSTBIT_HI: - case OPCODE_FIRSTBIT_LO: - case OPCODE_FIRSTBIT_SHI: - case OPCODE_BFI: - case OPCODE_BFREV: - case OPCODE_ATOMIC_AND: - case OPCODE_ATOMIC_OR: - case OPCODE_ATOMIC_XOR: - case OPCODE_ATOMIC_CMP_STORE: - case OPCODE_ATOMIC_IADD: - case OPCODE_IMM_ATOMIC_IADD: - case OPCODE_IMM_ATOMIC_AND: - case OPCODE_IMM_ATOMIC_OR: - case OPCODE_IMM_ATOMIC_XOR: - case OPCODE_IMM_ATOMIC_EXCH: - case OPCODE_IMM_ATOMIC_CMP_EXCH: - - - MarkAllOperandsAs(psInst, SVT_INT_AMBIGUOUS, aeTempVecType); - break; - - - // Integer ops - case OPCODE_IMAD: - case OPCODE_IMAX: - case OPCODE_IMIN: - case OPCODE_IMUL: - case OPCODE_ISHR: - case OPCODE_IBFE: - - case OPCODE_ATOMIC_IMAX: - case OPCODE_ATOMIC_IMIN: - case OPCODE_IMM_ATOMIC_IMAX: - case OPCODE_IMM_ATOMIC_IMIN: - MarkAllOperandsAs(psInst, SVT_INT, aeTempVecType); - break; - - - // uint ops - case OPCODE_UDIV: - case OPCODE_UMUL: - case OPCODE_UMAD: - case OPCODE_UMAX: - case OPCODE_UMIN: - case OPCODE_USHR: - case OPCODE_UADDC: - case OPCODE_USUBB: - case OPCODE_ATOMIC_UMAX: - case OPCODE_ATOMIC_UMIN: - case OPCODE_IMM_ATOMIC_UMAX: - case OPCODE_IMM_ATOMIC_UMIN: - case OPCODE_IMM_ATOMIC_ALLOC: - case OPCODE_IMM_ATOMIC_CONSUME: - MarkAllOperandsAs(psInst, SVT_UINT, aeTempVecType); - break; - case OPCODE_UBFE: - MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[3], SVT_UINT, aeTempVecType); - break; - - // Need special handling - case OPCODE_FTOI: - case OPCODE_FTOU: - MarkOperandAs(&psInst->asOperands[0], psInst->eOpcode == OPCODE_FTOI ? SVT_INT : SVT_UINT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType); - break; - - case OPCODE_GE: - case OPCODE_LT: - case OPCODE_EQ: - case OPCODE_NE: - - MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_FLOAT, aeTempVecType); - break; - - case OPCODE_ITOF: - case OPCODE_UTOF: - MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], psInst->eOpcode == OPCODE_ITOF ? SVT_INT : SVT_UINT, aeTempVecType); - break; - - case OPCODE_LD: - case OPCODE_LD_MS: - { - SHADER_VARIABLE_TYPE samplerReturnType = psInst->asOperands[2].aeDataType[0]; - MarkOperandAs(&psInst->asOperands[0], samplerReturnType, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); - break; - } - - case OPCODE_MOVC: - MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType); - break; - - case OPCODE_SWAPC: - MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType); - break; - - case OPCODE_RESINFO: - // Operand 0 depends on the return type declaration, op 1 is always uint - MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); - switch (psInst->eResInfoReturnType) - { - default: - case RESINFO_INSTRUCTION_RETURN_FLOAT: - case RESINFO_INSTRUCTION_RETURN_RCPFLOAT: - MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); - break; - case RESINFO_INSTRUCTION_RETURN_UINT: - MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); - break; - } - break; - - case OPCODE_SAMPLE_INFO: - // Sample_info uses the same RESINFO_RETURN_TYPE for storage. 0 = float, 1 = uint. - MarkOperandAs(&psInst->asOperands[0], psInst->eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT ? SVT_FLOAT : SVT_UINT, aeTempVecType); - break; - - case OPCODE_SAMPLE_POS: - MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); - break; - - - case OPCODE_LD_UAV_TYPED: - // translates to gvec4 loadImage(gimage i, ivec p). - MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p - break; - - case OPCODE_STORE_UAV_TYPED: - // translates to storeImage(gimage i, ivec p, gvec4 data) - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p - MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); // gvec4 data - break; - - case OPCODE_LD_RAW: - if (psInst->asOperands[2].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) - MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); - else - MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); - break; - - case OPCODE_STORE_RAW: - if (psInst->asOperands[0].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) - MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); - else - MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); - break; - - case OPCODE_LD_STRUCTURED: - MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); - break; - - case OPCODE_STORE_STRUCTURED: - MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[3], SVT_INT, aeTempVecType); - break; - - case OPCODE_F32TOF16: - MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType); - break; - - case OPCODE_F16TOF32: - MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); - MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); - break; - - - // No-operands, should never get here anyway - /* case OPCODE_BREAK: - case OPCODE_CALL: - case OPCODE_CASE: - case OPCODE_CONTINUE: - case OPCODE_CUT: - case OPCODE_DEFAULT: - case OPCODE_DISCARD: - case OPCODE_ELSE: - case OPCODE_EMIT: - case OPCODE_EMITTHENCUT: - case OPCODE_ENDIF: - case OPCODE_ENDLOOP: - case OPCODE_ENDSWITCH: - - case OPCODE_LABEL: - case OPCODE_LOOP: - case OPCODE_CUSTOMDATA: - case OPCODE_NOP: - case OPCODE_RET: - case OPCODE_SWITCH: - case OPCODE_DCL_RESOURCE: // DCL* opcodes have - case OPCODE_DCL_CONSTANT_BUFFER: // custom operand formats. - case OPCODE_DCL_SAMPLER: - case OPCODE_DCL_INDEX_RANGE: - case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: - case OPCODE_DCL_GS_INPUT_PRIMITIVE: - case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: - case OPCODE_DCL_INPUT: - case OPCODE_DCL_INPUT_SGV: - case OPCODE_DCL_INPUT_SIV: - case OPCODE_DCL_INPUT_PS: - case OPCODE_DCL_INPUT_PS_SGV: - case OPCODE_DCL_INPUT_PS_SIV: - case OPCODE_DCL_OUTPUT: - case OPCODE_DCL_OUTPUT_SGV: - case OPCODE_DCL_OUTPUT_SIV: - case OPCODE_DCL_TEMPS: - case OPCODE_DCL_INDEXABLE_TEMP: - case OPCODE_DCL_GLOBAL_FLAGS: - - - case OPCODE_HS_DECLS: // token marks beginning of HS sub-shader - case OPCODE_HS_CONTROL_POINT_PHASE: // token marks beginning of HS sub-shader - case OPCODE_HS_FORK_PHASE: // token marks beginning of HS sub-shader - case OPCODE_HS_JOIN_PHASE: // token marks beginning of HS sub-shader - - case OPCODE_EMIT_STREAM: - case OPCODE_CUT_STREAM: - case OPCODE_EMITTHENCUT_STREAM: - case OPCODE_INTERFACE_CALL: - - - case OPCODE_DCL_STREAM: - case OPCODE_DCL_FUNCTION_BODY: - case OPCODE_DCL_FUNCTION_TABLE: - case OPCODE_DCL_INTERFACE: - - case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: - case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: - case OPCODE_DCL_TESS_DOMAIN: - case OPCODE_DCL_TESS_PARTITIONING: - case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: - case OPCODE_DCL_HS_MAX_TESSFACTOR: - case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: - case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: - - case OPCODE_DCL_THREAD_GROUP: - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: - case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: - case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: - case OPCODE_DCL_RESOURCE_RAW: - case OPCODE_DCL_RESOURCE_STRUCTURED: - case OPCODE_SYNC: - - case OPCODE_EVAL_SNAPPED: - case OPCODE_EVAL_SAMPLE_INDEX: - case OPCODE_EVAL_CENTROID: - - case OPCODE_DCL_GS_INSTANCE_COUNT: - - case OPCODE_ABORT: - case OPCODE_DEBUG_BREAK: - - // Double not supported - case OPCODE_DADD: - case OPCODE_DMAX: - case OPCODE_DMIN: - case OPCODE_DMUL: - case OPCODE_DEQ: - case OPCODE_DGE: - case OPCODE_DLT: - case OPCODE_DNE: - case OPCODE_DMOV: - case OPCODE_DMOVC: - case OPCODE_DTOF: - case OPCODE_FTOD: - */ - - default: - break; - } - } - } - - { - int madeProgress = 0; - // Next go through MOV and MOVC and propagate the data type of whichever parameter we happen to have - do - { - madeProgress = 0; - psInst = psFirstInst; - for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++) - { - if (psInst->eOpcode == OPCODE_MOV || psInst->eOpcode == OPCODE_MOVC) - { - // Figure out the data type - uint32_t k; - SHADER_VARIABLE_TYPE dataType = SVT_VOID; - int foundImmediate = 0; - for (k = 0; k < psInst->ui32NumOperands; k++) - { - uint32_t mask, j; - if (psInst->eOpcode == OPCODE_MOVC && k == 1) - continue; // Ignore the condition operand, it's always int - - if (psInst->asOperands[k].eType == OPERAND_TYPE_IMMEDIATE32) - { - foundImmediate = 1; - continue; // We don't know the data type of immediates yet, but if this is the only one found, mark as int, it'll get promoted later if needed - } - - if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP) - { - dataType = psInst->asOperands[k].GetDataType(psContext); - break; - } - - if (psInst->asOperands[k].eModifier != OPERAND_MODIFIER_NONE) - { - // If any modifiers are used in MOV or MOVC, that automatically is treated as float. - dataType = SVT_FLOAT; - break; - } - - mask = psInst->asOperands[k].GetAccessMask(); - for (j = 0; j < 4; j++) - { - if (!(mask & (1 << j))) - continue; - if (aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j] != SVT_VOID) - { - dataType = HLSLcc::SelectHigherType(dataType, aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j]); - } - } - } - - // Use at minimum int type when any operand is immediate. - // Allowing bool could lead into bugs like case 883080 - if (foundImmediate && (dataType == SVT_VOID || dataType == SVT_BOOL)) - dataType = SVT_INT; - - if (dataType != SVT_VOID) - { - // Found data type, write to all operands - // First adjust it to not have precision qualifiers in it - switch (dataType) - { - case SVT_FLOAT10: - case SVT_FLOAT16: - dataType = SVT_FLOAT; - break; - case SVT_INT12: - case SVT_INT16: - dataType = SVT_INT; - break; - case SVT_UINT16: - case SVT_UINT8: - dataType = SVT_UINT; - break; - default: - break; - } - for (k = 0; k < psInst->ui32NumOperands; k++) - { - uint32_t mask; - if (psInst->eOpcode == OPCODE_MOVC && k == 1) - continue; // Ignore the condition operand, it's always int - - if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP) - continue; - if (psInst->asOperands[k].eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT) - continue; - - mask = psInst->asOperands[k].GetAccessMask(); - SetVectorType(aeTempVecType, psInst->asOperands[k].ui32RegisterNumber * 4, mask, dataType, &madeProgress); - } - } - } - } - } - while (madeProgress != 0); - } - - - // translate forced_int and int_ambiguous back to int - for (i = 0; i < ui32TempCount * 4; i++) - { - if (aeTempVecType[i] == SVT_FORCED_INT || aeTempVecType[i] == SVT_INT_AMBIGUOUS) - aeTempVecType[i] = SVT_INT; - } - - ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, WritebackDataTypes(psContext, &aeTempVecType[0])); - - // Propagate boolean data types over logical operators - bool didProgress = false; - do - { - didProgress = false; - std::for_each(instructions.begin(), instructions.end(), [&didProgress, &psContext, &aeTempVecType](Instruction &i) - { - if ((i.eOpcode == OPCODE_AND || i.eOpcode == OPCODE_OR) - && (i.asOperands[1].GetDataType(psContext) == SVT_BOOL && i.asOperands[2].GetDataType(psContext) == SVT_BOOL) - && (i.asOperands[0].eType == OPERAND_TYPE_TEMP && i.asOperands[0].GetDataType(psContext) != SVT_BOOL)) - { - // Check if all uses see only this define - bool isStandalone = true; - std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [&isStandalone](Instruction::Use &u) - { - if (u.m_Op->m_Defines.size() > 1) - isStandalone = false; - }); - - if (isStandalone) - { - didProgress = true; - // Change data type of this and all uses - i.asOperands[0].aeDataType[0] = i.asOperands[0].aeDataType[1] = i.asOperands[0].aeDataType[2] = i.asOperands[0].aeDataType[3] = SVT_BOOL; - uint32_t reg = i.asOperands[0].ui32RegisterNumber; - aeTempVecType[reg * 4 + 0] = aeTempVecType[reg * 4 + 1] = aeTempVecType[reg * 4 + 2] = aeTempVecType[reg * 4 + 3] = SVT_BOOL; - - std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [](Instruction::Use &u) - { - u.m_Op->aeDataType[0] = u.m_Op->aeDataType[1] = u.m_Op->aeDataType[2] = u.m_Op->aeDataType[3] = SVT_BOOL; - }); - } - } - }); - } - while (didProgress); -} diff --git a/third_party/HLSLcc/src/Declaration.cpp b/third_party/HLSLcc/src/Declaration.cpp deleted file mode 100644 index 4171fb3..0000000 --- a/third_party/HLSLcc/src/Declaration.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "internal_includes/Declaration.h" diff --git a/third_party/HLSLcc/src/HLSLCrossCompilerContext.cpp b/third_party/HLSLcc/src/HLSLCrossCompilerContext.cpp deleted file mode 100644 index 7117d81..0000000 --- a/third_party/HLSLcc/src/HLSLCrossCompilerContext.cpp +++ /dev/null @@ -1,350 +0,0 @@ -#include "internal_includes/HLSLCrossCompilerContext.h" -#include "internal_includes/HLSLccToolkit.h" -#include "internal_includes/Shader.h" -#include "internal_includes/DataTypeAnalysis.h" -#include "internal_includes/UseDefineChains.h" -#include "internal_includes/Declaration.h" -#include "internal_includes/debug.h" -#include "internal_includes/Translator.h" -#include "internal_includes/ControlFlowGraph.h" -#include "internal_includes/languages.h" -#include "include/hlslcc.h" -#include - -void HLSLCrossCompilerContext::DoDataTypeAnalysis(ShaderPhase *psPhase) -{ - size_t ui32DeclCount = psPhase->psDecl.size(); - uint32_t i; - - psPhase->psTempDeclaration = NULL; - psPhase->ui32OrigTemps = 0; - psPhase->ui32TotalTemps = 0; - - // Retrieve the temp decl count - for (i = 0; i < ui32DeclCount; ++i) - { - if (psPhase->psDecl[i].eOpcode == OPCODE_DCL_TEMPS) - { - psPhase->ui32TotalTemps = psPhase->psDecl[i].value.ui32NumTemps; - psPhase->psTempDeclaration = &psPhase->psDecl[i]; - break; - } - } - - if (psPhase->ui32TotalTemps == 0) - return; - - psPhase->ui32OrigTemps = psPhase->ui32TotalTemps; - - // The split table is a table containing the index of the original register this register was split out from, or 0xffffffff - // Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count - psPhase->pui32SplitInfo.clear(); - psPhase->pui32SplitInfo.resize(psPhase->ui32TotalTemps * 2, 0xffffffff); - - // Build use-define chains and split temps based on those. - { - DefineUseChains duChains; - UseDefineChains udChains; - - BuildUseDefineChains(psPhase->psInst, psPhase->ui32TotalTemps, duChains, udChains, psPhase->GetCFG()); - - CalculateStandaloneDefinitions(duChains, psPhase->ui32TotalTemps); - - // Only do sampler precision downgrade with pixel shaders on mobile targets / Switch - if (psShader->eShaderType == PIXEL_SHADER && (IsMobileTarget(this) || IsSwitch())) - UpdateSamplerPrecisions(psShader->sInfo, duChains, psPhase->ui32TotalTemps); - - UDSplitTemps(&psPhase->ui32TotalTemps, duChains, udChains, psPhase->pui32SplitInfo); - - WriteBackUsesAndDefines(duChains); - } - - HLSLcc::DataTypeAnalysis::SetDataTypes(this, psPhase->psInst, psPhase->ui32TotalTemps, psPhase->peTempTypes); - - if (psPhase->psTempDeclaration && (psPhase->ui32OrigTemps != psPhase->ui32TotalTemps)) - psPhase->psTempDeclaration->value.ui32NumTemps = psPhase->ui32TotalTemps; -} - -void HLSLCrossCompilerContext::ReserveFramebufferFetchInputs() -{ - if (psShader->eShaderType != PIXEL_SHADER) - return; - - if (!psShader->extensions->EXT_shader_framebuffer_fetch) - return; - - if ((flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) == 0) - return; - - if (!(psShader->eTargetLanguage >= LANG_ES_300 && psShader->eTargetLanguage <= LANG_ES_LAST)) - return; - - if (!psDependencies) - return; - - if (!HaveUniformBindingsAndLocations(psShader->eTargetLanguage, psShader->extensions, flags) && - ((flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS) == 0 || (flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != 0)) - return; - - // The Adreno GLSL compiler fails to compile shaders that use the same location for textures and inout attachments - // So here we figure out the maximum index of any inout render target and then make sure that we never use those for textures. - int maxInOutRenderTargetIndex = -1; - for (const Declaration& decl : psShader->asPhases[0].psDecl) - { - if (decl.eOpcode != OPCODE_DCL_INPUT_PS) - continue; - - const Operand& operand = decl.asOperands[0]; - if (!operand.iPSInOut) - continue; - - const ShaderInfo::InOutSignature* signature = NULL; - if (!psShader->sInfo.GetInputSignatureFromRegister(operand.ui32RegisterNumber, operand.ui32CompMask, &signature, true)) - continue; - - const int index = signature->ui32SemanticIndex; - if (index > maxInOutRenderTargetIndex) - maxInOutRenderTargetIndex = index; - } - - if (maxInOutRenderTargetIndex >= 0) - { - if (maxInOutRenderTargetIndex >= psDependencies->m_NextAvailableGLSLResourceBinding[GLSLCrossDependencyData::BufferType_Texture]) - psDependencies->m_NextAvailableGLSLResourceBinding[GLSLCrossDependencyData::BufferType_Texture] = maxInOutRenderTargetIndex + 1; - } -} - -void HLSLCrossCompilerContext::ClearDependencyData() -{ - switch (psShader->eShaderType) - { - case PIXEL_SHADER: - { - psDependencies->ClearCrossDependencyData(); - break; - } - case HULL_SHADER: - { - psDependencies->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED; - psDependencies->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED; - break; - } - default: - break; - } -} - -void HLSLCrossCompilerContext::AddIndentation() -{ - int i; - bstring glsl = *currentGLSLString; - for (i = 0; i < indent; ++i) - { - bcatcstr(glsl, " "); - } -} - -bool HLSLCrossCompilerContext::RequireExtension(const std::string &extName) -{ - if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end()) - return true; - - m_EnabledExtensions.insert(extName); - bformata(extensions, "#extension %s : require\n", extName.c_str()); - return false; -} - -bool HLSLCrossCompilerContext::EnableExtension(const std::string &extName) -{ - if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end()) - return true; - - m_EnabledExtensions.insert(extName); - bformata(extensions, "#ifdef %s\n", extName.c_str()); - bformata(extensions, "#extension %s : enable\n", extName.c_str()); - bcatcstr(extensions, "#endif\n"); - return false; -} - -std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const -{ - std::ostringstream oss; - const ShaderInfo::InOutSignature* psIn = NULL; - int regSpace = psOperand->GetRegisterSpace(this); - - if (iIgnoreRedirect == 0) - { - if ((regSpace == 0 && psShader->asPhases[currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) - || - (regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)) - { - oss << "phase" << currentPhase << "_Input" << regSpace << "_" << psOperand->ui32RegisterNumber; - if (piRebase) - *piRebase = 0; - return oss.str(); - } - } - - if (regSpace == 0) - psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true); - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true); - - if (psIn && piRebase) - *piRebase = psIn->iRebase; - - const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch"; - std::string res = ""; - - bool skipPrefix = false; - if (psTranslator->TranslateSystemValue(psOperand, psIn, res, puiIgnoreSwizzle, psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0, true, &skipPrefix, &iIgnoreRedirect)) - { - if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0) && !skipPrefix) - return inputPrefix + res; - else - return res; - } - - ASSERT(psIn != NULL); - oss << inputPrefix << (regSpace == 1 ? patchPrefix : "") << psIn->semanticName << psIn->ui32SemanticIndex; - return oss.str(); -} - -std::string HLSLCrossCompilerContext::GetDeclaredOutputName(const Operand* psOperand, - int* piStream, - uint32_t *puiIgnoreSwizzle, - int *piRebase, - int iIgnoreRedirect) const -{ - std::ostringstream oss; - const ShaderInfo::InOutSignature* psOut = NULL; - int regSpace = psOperand->GetRegisterSpace(this); - - if (iIgnoreRedirect == 0) - { - if ((regSpace == 0 && psShader->asPhases[currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) - || (regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)) - { - oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOperand->ui32RegisterNumber; - if (piRebase) - *piRebase = 0; - return oss.str(); - } - } - - if (regSpace == 0) - psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), psShader->ui32CurrentVertexOutputStream, &psOut, true); - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psOut, true); - - - if (psOut && piRebase) - *piRebase = psOut->iRebase; - - if (psOut && (psOut->isIndexed.find(currentPhase) != psOut->isIndexed.end())) - { - // Need to route through temp output variable - oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOut->indexStart.find(currentPhase)->second; - if (!psOperand->m_SubOperands[0].get()) - { - oss << "[" << psOperand->ui32RegisterNumber << "]"; - } - if (piRebase) - *piRebase = 0; - return oss.str(); - } - - const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch"; - std::string res = ""; - - if (psTranslator->TranslateSystemValue(psOperand, psOut, res, puiIgnoreSwizzle, psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber], false, NULL, &iIgnoreRedirect)) - { - // clip/cull planes will always have interim variable, as HLSL operates on float4 but we need to size output accordingly with actual planes count - // with tessellation factor buffers, a separate buffer from output is used. for some reason TranslateSystemValue return *outSkipPrefix = true - // for ALL system vars and then we simply ignore it here, so opt to modify iIgnoreRedirect for these special cases - - if (psShader->eTargetLanguage == LANG_METAL && regSpace == 0 && (iIgnoreRedirect == 0)) - return outputPrefix + res; - else if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0)) - return patchPrefix + res; - else - return res; - } - ASSERT(psOut != NULL); - - oss << outputPrefix << (regSpace == 1 ? patchPrefix : "") << psOut->semanticName << psOut->ui32SemanticIndex; - return oss.str(); -} - -bool HLSLCrossCompilerContext::OutputNeedsDeclaring(const Operand* psOperand, const int count) -{ - char compMask = (char)psOperand->ui32CompMask; - int regSpace = psOperand->GetRegisterSpace(this); - uint32_t startIndex = psOperand->ui32RegisterNumber + (psShader->ui32CurrentVertexOutputStream * 1024); // Assume less than 1K input streams - ASSERT(psShader->ui32CurrentVertexOutputStream < 4); - - // First check for various builtins, mostly depth-output ones. - if (psShader->eShaderType == PIXEL_SHADER) - { - if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL || - psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL) - { - return true; - } - - if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH) - { - // GL doesn't need declaration, Metal does. - return psShader->eTargetLanguage == LANG_METAL; - } - } - - // Needs declaring if any of the components hasn't been already declared - if ((compMask & ~psShader->acOutputDeclared[regSpace][startIndex]) != 0) - { - int offset; - const ShaderInfo::InOutSignature* psSignature = NULL; - - if (psOperand->eSpecialName == NAME_UNDEFINED) - { - // Need to fetch the actual comp mask - if (regSpace == 0) - psShader->sInfo.GetOutputSignatureFromRegister( - psOperand->ui32RegisterNumber, - psOperand->ui32CompMask, - psShader->ui32CurrentVertexOutputStream, - &psSignature); - else - psShader->sInfo.GetPatchConstantSignatureFromRegister( - psOperand->ui32RegisterNumber, - psOperand->ui32CompMask, - &psSignature); - - compMask = (char)psSignature->ui32Mask; - } - for (offset = 0; offset < count; offset++) - { - psShader->acOutputDeclared[regSpace][startIndex + offset] |= compMask; - } - - if (psSignature && (psSignature->semanticName == "PSIZE") && (psShader->eTargetLanguage != LANG_METAL)) - { - // gl_PointSize, doesn't need declaring. TODO: Metal doesn't have pointsize at all? - return false; - } - - return true; - } - - return false; -} - -bool HLSLCrossCompilerContext::IsVulkan() const -{ - return (flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; -} - -bool HLSLCrossCompilerContext::IsSwitch() const -{ - return (flags & HLSLCC_FLAG_NVN_TARGET) != 0; -} diff --git a/third_party/HLSLcc/src/HLSLcc.cpp b/third_party/HLSLcc/src/HLSLcc.cpp deleted file mode 100644 index 4592d8c..0000000 --- a/third_party/HLSLcc/src/HLSLcc.cpp +++ /dev/null @@ -1,250 +0,0 @@ -#include "hlslcc.h" - -#include -#include -#include "internal_includes/HLSLCrossCompilerContext.h" -#include "internal_includes/toGLSL.h" -#include "internal_includes/toMetal.h" -#include "internal_includes/Shader.h" -#include "internal_includes/decode.h" - - -#ifndef GL_VERTEX_SHADER_ARB -#define GL_VERTEX_SHADER_ARB 0x8B31 -#endif -#ifndef GL_FRAGMENT_SHADER_ARB -#define GL_FRAGMENT_SHADER_ARB 0x8B30 -#endif -#ifndef GL_GEOMETRY_SHADER -#define GL_GEOMETRY_SHADER 0x8DD9 -#endif -#ifndef GL_TESS_EVALUATION_SHADER -#define GL_TESS_EVALUATION_SHADER 0x8E87 -#endif -#ifndef GL_TESS_CONTROL_SHADER -#define GL_TESS_CONTROL_SHADER 0x8E88 -#endif -#ifndef GL_COMPUTE_SHADER -#define GL_COMPUTE_SHADER 0x91B9 -#endif - -static bool CheckConstantBuffersNoDuplicateNames(const std::vector& buffers, HLSLccReflection& reflectionCallbacks) -{ - uint32_t count = buffers.size(); - for (uint32_t i = 0; i < count; ++i) - { - const ConstantBuffer& lhs = buffers[i]; - for (uint32_t j = i + 1; j < count; ++j) - { - const ConstantBuffer& rhs = buffers[j]; - if (lhs.name == rhs.name) - { - std::ostringstream oss; - oss << "Duplicate constant buffer declaration: " << lhs.name; - reflectionCallbacks.OnDiagnostics(oss.str(), 0, true); - return false; - } - } - } - - return true; -} - -HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, - unsigned int flags, - GLLang language, - const GlExtensions *extensions, - GLSLCrossDependencyData* dependencies, - HLSLccSamplerPrecisionInfo& samplerPrecisions, - HLSLccReflection& reflectionCallbacks, - GLSLShader* result) -{ - uint32_t* tokens; - char* glslcstr = NULL; - int GLSLShaderType = GL_FRAGMENT_SHADER_ARB; - int success = 0; - uint32_t i; - - tokens = (uint32_t*)shader; - - std::auto_ptr psShader(DecodeDXBC(tokens, flags)); - - if (psShader.get()) - { - Shader* shader = psShader.get(); - if (!CheckConstantBuffersNoDuplicateNames(shader->sInfo.psConstantBuffers, reflectionCallbacks)) - return 0; - - HLSLCrossCompilerContext sContext(reflectionCallbacks); - - // Add shader precisions from the list - psShader->sInfo.AddSamplerPrecisions(samplerPrecisions); - - if (psShader->ui32MajorVersion <= 3) - { - flags &= ~HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS; - } - -#ifdef _DEBUG - flags |= HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS; -#endif - - sContext.psShader = shader; - sContext.flags = flags; - - // If dependencies == NULL, we'll create a dummy object for it so that there's always something there. - std::auto_ptr depPtr(NULL); - if (dependencies == NULL) - { - depPtr.reset(new GLSLCrossDependencyData()); - sContext.psDependencies = depPtr.get(); - sContext.psDependencies->SetupGLSLResourceBindingSlotsIndices(); - } - else - sContext.psDependencies = dependencies; - - for (i = 0; i < psShader->asPhases.size(); ++i) - { - psShader->asPhases[i].hasPostShaderCode = 0; - } - - if (language == LANG_METAL) - { - // Geometry shader is not supported - if (psShader->eShaderType == GEOMETRY_SHADER) - { - result->sourceCode = ""; - return 0; - } - ToMetal translator(&sContext); - if (!translator.Translate()) - { - bdestroy(sContext.glsl); - for (i = 0; i < psShader->asPhases.size(); ++i) - { - bdestroy(psShader->asPhases[i].postShaderCode); - bdestroy(psShader->asPhases[i].earlyMain); - } - - return 0; - } - } - else - { - ToGLSL translator(&sContext); - language = translator.SetLanguage(language); - translator.SetExtensions(extensions); - if (!translator.Translate()) - { - bdestroy(sContext.glsl); - for (i = 0; i < psShader->asPhases.size(); ++i) - { - bdestroy(psShader->asPhases[i].postShaderCode); - bdestroy(psShader->asPhases[i].earlyMain); - } - - return 0; - } - } - - switch (psShader->eShaderType) - { - case VERTEX_SHADER: - { - GLSLShaderType = GL_VERTEX_SHADER_ARB; - break; - } - case GEOMETRY_SHADER: - { - GLSLShaderType = GL_GEOMETRY_SHADER; - break; - } - case DOMAIN_SHADER: - { - GLSLShaderType = GL_TESS_EVALUATION_SHADER; - break; - } - case HULL_SHADER: - { - GLSLShaderType = GL_TESS_CONTROL_SHADER; - break; - } - case COMPUTE_SHADER: - { - GLSLShaderType = GL_COMPUTE_SHADER; - break; - } - default: - { - break; - } - } - - glslcstr = bstr2cstr(sContext.glsl, '\0'); - result->sourceCode = glslcstr; - bcstrfree(glslcstr); - - bdestroy(sContext.glsl); - for (i = 0; i < psShader->asPhases.size(); ++i) - { - bdestroy(psShader->asPhases[i].postShaderCode); - bdestroy(psShader->asPhases[i].earlyMain); - } - - result->reflection = psShader->sInfo; - - result->textureSamplers = psShader->textureSamplers; - - success = 1; - } - - shader = 0; - tokens = 0; - - /* Fill in the result struct */ - - result->shaderType = GLSLShaderType; - result->GLSLLanguage = language; - - return success; -} - -HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename, - unsigned int flags, - GLLang language, - const GlExtensions *extensions, - GLSLCrossDependencyData* dependencies, - HLSLccSamplerPrecisionInfo& samplerPrecisions, - HLSLccReflection& reflectionCallbacks, - GLSLShader* result) -{ - FILE* shaderFile; - int length; - size_t readLength; - std::vector shader; - int success = 0; - - shaderFile = fopen(filename, "rb"); - - if (!shaderFile) - { - return 0; - } - - fseek(shaderFile, 0, SEEK_END); - length = ftell(shaderFile); - fseek(shaderFile, 0, SEEK_SET); - - shader.resize(length + 1); - - readLength = fread(&shader[0], 1, length, shaderFile); - - fclose(shaderFile); - shaderFile = 0; - - shader[readLength] = '\0'; - - success = TranslateHLSLFromMem(&shader[0], flags, language, extensions, dependencies, samplerPrecisions, reflectionCallbacks, result); - - return success; -} diff --git a/third_party/HLSLcc/src/HLSLccToolkit.cpp b/third_party/HLSLcc/src/HLSLccToolkit.cpp deleted file mode 100644 index d081f2d..0000000 --- a/third_party/HLSLcc/src/HLSLccToolkit.cpp +++ /dev/null @@ -1,574 +0,0 @@ -#include "internal_includes/HLSLccToolkit.h" -#include "internal_includes/debug.h" -#include "internal_includes/toGLSLOperand.h" -#include "internal_includes/HLSLCrossCompilerContext.h" -#include "internal_includes/Shader.h" -#include "internal_includes/languages.h" -#include "include/UnityInstancingFlexibleArraySize.h" -#include -#include - -namespace HLSLcc -{ - uint32_t GetNumberBitsSet(uint32_t a) - { - // Calculate number of bits in a - // Taken from https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64 - // Works only up to 14 bits (we're only using up to 4) - return (a * 0x200040008001ULL & 0x111111111111111ULL) % 0xf; - } - - uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType) - { - if (eType == SVT_FLOAT16) - { - return TO_FLAG_FORCE_HALF; - } - if (eType == SVT_UINT || eType == SVT_UINT16) - { - return TO_FLAG_UNSIGNED_INTEGER; - } - else if (eType == SVT_INT || eType == SVT_INT16 || eType == SVT_INT12) - { - return TO_FLAG_INTEGER; - } - else if (eType == SVT_BOOL) - { - return TO_FLAG_BOOL; - } - else - { - return TO_FLAG_NONE; - } - } - - SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags) - { - if (typeflags & TO_FLAG_FORCE_HALF) - return SVT_FLOAT16; - if (typeflags & (TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT)) - return SVT_INT; - if (typeflags & (TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT)) - return SVT_UINT; - if (typeflags & TO_FLAG_BOOL) - return SVT_BOOL; - return SVT_FLOAT; - } - - const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision) - { - static const char * const uintTypes[] = { " ", "uint", "uvec2", "uvec3", "uvec4" }; - static const char * const uint16Types[] = { " ", "mediump uint", "mediump uvec2", "mediump uvec3", "mediump uvec4" }; - static const char * const intTypes[] = { " ", "int", "ivec2", "ivec3", "ivec4" }; - static const char * const int16Types[] = { " ", "mediump int", "mediump ivec2", "mediump ivec3", "mediump ivec4" }; - static const char * const int12Types[] = { " ", "lowp int", "lowp ivec2", "lowp ivec3", "lowp ivec4" }; - static const char * const floatTypes[] = { " ", "float", "vec2", "vec3", "vec4" }; - static const char * const float16Types[] = { " ", "mediump float", "mediump vec2", "mediump vec3", "mediump vec4" }; - static const char * const float10Types[] = { " ", "lowp float", "lowp vec2", "lowp vec3", "lowp vec4" }; - static const char * const boolTypes[] = { " ", "bool", "bvec2", "bvec3", "bvec4" }; - - ASSERT(components >= 1 && components <= 4); - bool emitLowp = EmitLowp(context); - - switch (eType) - { - case SVT_UINT: - return HaveUnsignedTypes(context->psShader->eTargetLanguage) ? uintTypes[components] : intTypes[components]; - case SVT_UINT16: - return useGLSLPrecision ? uint16Types[components] : uintTypes[components]; - case SVT_INT: - return intTypes[components]; - case SVT_INT16: - return useGLSLPrecision ? int16Types[components] : intTypes[components]; - case SVT_INT12: - return useGLSLPrecision ? (emitLowp ? int12Types[components] : int16Types[components]) : intTypes[components]; - case SVT_FLOAT: - return floatTypes[components]; - case SVT_FLOAT16: - return useGLSLPrecision ? float16Types[components] : floatTypes[components]; - case SVT_FLOAT10: - return useGLSLPrecision ? (emitLowp ? float10Types[components] : float16Types[components]) : floatTypes[components]; - case SVT_BOOL: - return boolTypes[components]; - default: - ASSERT(0); - return " "; - } - } - - const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, const int components) - { - static const char * const uintTypes[] = { " ", "uint", "uint2", "uint3", "uint4" }; - static const char * const ushortTypes[] = { " ", "ushort", "ushort2", "ushort3", "ushort4" }; - static const char * const intTypes[] = { " ", "int", "int2", "int3", "int4" }; - static const char * const shortTypes[] = { " ", "short", "short2", "short3", "short4" }; - static const char * const floatTypes[] = { " ", "float", "float2", "float3", "float4" }; - static const char * const halfTypes[] = { " ", "half", "half2", "half3", "half4" }; - static const char * const boolTypes[] = { " ", "bool", "bool2", "bool3", "bool4" }; - - ASSERT(components >= 1 && components <= 4); - - switch (eType) - { - case SVT_UINT: - return uintTypes[components]; - case SVT_UINT16: - case SVT_UINT8: // there is not uint8 in metal so treat it as ushort - return ushortTypes[components]; - case SVT_INT: - return intTypes[components]; - case SVT_INT16: - case SVT_INT12: - return shortTypes[components]; - case SVT_FLOAT: - return floatTypes[components]; - case SVT_FLOAT16: - case SVT_FLOAT10: - return halfTypes[components]; - case SVT_BOOL: - return boolTypes[components]; - default: - ASSERT(0); - return " "; - } - } - - const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision /* = true*/) - { - if (psContext->psShader->eTargetLanguage == LANG_METAL) - return GetConstructorForTypeMetal(eType, components); - else - return GetConstructorForTypeGLSL(psContext, eType, components, useGLSLPrecision); - } - - std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows) - { - std::string result; - std::ostringstream oss; - if (psContext->psShader->eTargetLanguage == LANG_METAL) - { - switch (eBaseType) - { - case SVT_FLOAT: - oss << "float" << columns << "x" << rows; - break; - case SVT_FLOAT16: - case SVT_FLOAT10: - oss << "half" << columns << "x" << rows; - break; - default: - ASSERT(0); - break; - } - } - else - { - switch (eBaseType) - { - case SVT_FLOAT: - oss << "mat" << columns << "x" << rows; - break; - case SVT_FLOAT16: - oss << "mediump mat" << columns << "x" << rows; - break; - case SVT_FLOAT10: - oss << "lowp mat" << columns << "x" << rows; - break; - default: - ASSERT(0); - break; - } - } - result = oss.str(); - return result; - } - - void AddSwizzleUsingElementCount(bstring dest, uint32_t count) - { - if (count == 4) - return; - if (count) - { - bcatcstr(dest, "."); - bcatcstr(dest, "x"); - count--; - } - if (count) - { - bcatcstr(dest, "y"); - count--; - } - if (count) - { - bcatcstr(dest, "z"); - count--; - } - if (count) - { - bcatcstr(dest, "w"); - count--; - } - } - - // Calculate the bits set in mask - int WriteMaskToComponentCount(uint32_t writeMask) - { - // In HLSL bytecode writemask 0 also means everything - if (writeMask == 0) - return 4; - - return (int)GetNumberBitsSet(writeMask); - } - - uint32_t BuildComponentMaskFromElementCount(int count) - { - // Translate numComponents into bitmask - // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 - return (1 << count) - 1; - } - - // Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc) - bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src) - { - if (src == dest) - return true; - - if ((dest == SVT_FLOAT || dest == SVT_FLOAT10 || dest == SVT_FLOAT16) && - (src == SVT_FLOAT || src == SVT_FLOAT10 || src == SVT_FLOAT16)) - return true; - - if ((dest == SVT_INT || dest == SVT_INT12 || dest == SVT_INT16) && - (src == SVT_INT || src == SVT_INT12 || src == SVT_INT16)) - return true; - - if ((dest == SVT_UINT || dest == SVT_UINT16) && - (src == SVT_UINT || src == SVT_UINT16)) - return true; - - return false; - } - - uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType) - { - if (eType == RETURN_TYPE_SINT) - { - return TO_FLAG_INTEGER; - } - else if (eType == RETURN_TYPE_UINT) - { - return TO_FLAG_UNSIGNED_INTEGER; - } - else - { - return TO_FLAG_NONE; - } - } - - SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec) - { - if (eType == RETURN_TYPE_SINT) - { - switch (ePrec) - { - default: - return SVT_INT; - case REFLECT_RESOURCE_PRECISION_LOWP: - return SVT_INT12; - case REFLECT_RESOURCE_PRECISION_MEDIUMP: - return SVT_INT16; - } - } - else if (eType == RETURN_TYPE_UINT) - { - switch (ePrec) - { - default: - return SVT_UINT; - case REFLECT_RESOURCE_PRECISION_LOWP: - return SVT_UINT8; - case REFLECT_RESOURCE_PRECISION_MEDIUMP: - return SVT_UINT16; - } - } - else - { - switch (ePrec) - { - default: - return SVT_FLOAT; - case REFLECT_RESOURCE_PRECISION_LOWP: - return SVT_FLOAT10; - case REFLECT_RESOURCE_PRECISION_MEDIUMP: - return SVT_FLOAT16; - } - } - } - - RESOURCE_RETURN_TYPE SVTTypeToResourceReturnType(SHADER_VARIABLE_TYPE type) - { - switch (type) - { - case SVT_INT: - case SVT_INT12: - case SVT_INT16: - return RETURN_TYPE_SINT; - case SVT_UINT: - case SVT_UINT16: - return RETURN_TYPE_UINT; - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - return RETURN_TYPE_FLOAT; - default: - return RETURN_TYPE_UNUSED; - } - } - - REFLECT_RESOURCE_PRECISION SVTTypeToPrecision(SHADER_VARIABLE_TYPE type) - { - switch (type) - { - case SVT_INT: - case SVT_UINT: - case SVT_FLOAT: - return REFLECT_RESOURCE_PRECISION_HIGHP; - case SVT_INT16: - case SVT_UINT16: - case SVT_FLOAT16: - return REFLECT_RESOURCE_PRECISION_MEDIUMP; - case SVT_INT12: - case SVT_FLOAT10: - case SVT_UINT8: - return REFLECT_RESOURCE_PRECISION_LOWP; - default: - return REFLECT_RESOURCE_PRECISION_UNKNOWN; - } - } - - uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount) - { - return TO_AUTO_EXPAND_TO_VEC2 << (elemCount - 2); - } - - // Returns true if the operation is commutative - bool IsOperationCommutative(int eOpCode) - { - switch ((OPCODE_TYPE)eOpCode) - { - case OPCODE_DADD: - case OPCODE_IADD: - case OPCODE_ADD: - case OPCODE_MUL: - case OPCODE_IMUL: - case OPCODE_OR: - case OPCODE_AND: - return true; - default: - return false; - } - } - - // Returns true if operands are identical, only cares about temp registers currently. - bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB) - { - if (!psA || !psB) - return 0; - - if (psA->eType != OPERAND_TYPE_TEMP || psB->eType != OPERAND_TYPE_TEMP) - return 0; - - if (psA->eModifier != psB->eModifier) - return 0; - - if (psA->iNumComponents != psB->iNumComponents) - return 0; - - if (psA->ui32RegisterNumber != psB->ui32RegisterNumber) - return 0; - - if (psA->eSelMode != psB->eSelMode) - return 0; - - if (psA->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && psA->ui32CompMask != psB->ui32CompMask) - return 0; - - if (psA->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE && psA->aui32Swizzle[0] != psB->aui32Swizzle[0]) - return 0; - - if (psA->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE && std::equal(&psA->aui32Swizzle[0], &psA->aui32Swizzle[4], &psB->aui32Swizzle[0])) - return 0; - - return 1; - } - - bool IsAddOneInstruction(const Instruction *psInst) - { - if (psInst->eOpcode != OPCODE_IADD) - return false; - if (psInst->asOperands[0].eType != OPERAND_TYPE_TEMP) - return false; - - if (psInst->asOperands[1].eType == OPERAND_TYPE_TEMP) - { - if (psInst->asOperands[1].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber) - return false; - if (psInst->asOperands[2].eType != OPERAND_TYPE_IMMEDIATE32) - return false; - - if (*(int *)&psInst->asOperands[2].afImmediates[0] != 1) - return false; - } - else - { - if (psInst->asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32) - return false; - if (psInst->asOperands[2].eType != OPERAND_TYPE_TEMP) - return false; - - if (psInst->asOperands[2].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber) - return false; - - if (*(int *)&psInst->asOperands[1].afImmediates[0] != 1) - return false; - } - return true; - } - - int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim) - { - switch ((RESOURCE_DIMENSION)eResDim) - { - case RESOURCE_DIMENSION_TEXTURE1D: - return 1; - case RESOURCE_DIMENSION_TEXTURE2D: - case RESOURCE_DIMENSION_TEXTURE2DMS: - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - case RESOURCE_DIMENSION_TEXTURECUBE: - return 2; - case RESOURCE_DIMENSION_TEXTURE3D: - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - return 3; - default: - ASSERT(0); - break; - } - return 0; - } - - // Returns the "more important" type of a and b, currently int < uint < float - SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b) - { -#define DO_CHECK(type) if( a == type || b == type ) return type - - // Priority ordering - DO_CHECK(SVT_FLOAT16); - DO_CHECK(SVT_FLOAT10); - DO_CHECK(SVT_UINT16); - DO_CHECK(SVT_UINT8); - DO_CHECK(SVT_INT16); - DO_CHECK(SVT_INT12); - DO_CHECK(SVT_FORCED_INT); - DO_CHECK(SVT_FLOAT); - DO_CHECK(SVT_UINT); - DO_CHECK(SVT_INT); - DO_CHECK(SVT_INT_AMBIGUOUS); - -#undef DO_CHECK - // After these just rely on ordering. - return a > b ? a : b; - } - - // Returns true if a direct constructor can convert src->dest - bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest) - { - // uint<->int<->bool conversions possible - if ((src == SVT_INT || src == SVT_UINT || src == SVT_BOOL || src == SVT_INT12 || src == SVT_INT16 || src == SVT_UINT16) && - (dest == SVT_INT || dest == SVT_UINT || dest == SVT_BOOL || dest == SVT_INT12 || dest == SVT_INT16 || dest == SVT_UINT16)) - return true; - - // float<->double possible - if ((src == SVT_FLOAT || src == SVT_DOUBLE || src == SVT_FLOAT16 || src == SVT_FLOAT10) && - (dest == SVT_FLOAT || dest == SVT_DOUBLE || dest == SVT_FLOAT16 || dest == SVT_FLOAT10)) - return true; - - if (context->psShader->eTargetLanguage == LANG_METAL) - { - // avoid compiler error: cannot use as_type to cast from 'half' to 'unsigned int' or 'int', types of different size - if ((src == SVT_FLOAT16 || src == SVT_FLOAT10) && (dest == SVT_UINT || dest == SVT_INT)) - return true; - } - - return false; - } - - bool IsUnityFlexibleInstancingBuffer(const ConstantBuffer* psCBuf) - { - return psCBuf != NULL && psCBuf->asVars.size() == 1 - && psCBuf->asVars[0].sType.Class == SVC_STRUCT && psCBuf->asVars[0].sType.Elements == 2 - && IsUnityInstancingConstantBufferName(psCBuf->name.c_str()); - } - -#ifndef fpcheck -#ifdef _MSC_VER -#define fpcheck(x) (_isnan(x) || !_finite(x)) -#else -#define fpcheck(x) (std::isnan(x) || std::isinf(x)) -#endif -#endif // #ifndef fpcheck - - // Helper function to print floats with full precision - void PrintFloat(bstring b, float f) - { - bstring temp; - int ePos; - int pointPos; - - temp = bformat("%.9g", f); - ePos = bstrchrp(temp, 'e', 0); - pointPos = bstrchrp(temp, '.', 0); - - bconcat(b, temp); - bdestroy(temp); - - if (ePos < 0 && pointPos < 0 && !fpcheck(f)) - bcatcstr(b, ".0"); - } - - bstring GetEarlyMain(HLSLCrossCompilerContext *psContext) - { - bstring *oldString = psContext->currentGLSLString; - bstring *str = &psContext->psShader->asPhases[psContext->currentPhase].earlyMain; - int indent = psContext->indent; - - if (psContext->psShader->eTargetLanguage == LANG_METAL && !psContext->indent) - ++psContext->indent; - - psContext->currentGLSLString = str; - psContext->AddIndentation(); - psContext->currentGLSLString = oldString; - psContext->indent = indent; - - return *str; - } - - bstring GetPostShaderCode(HLSLCrossCompilerContext *psContext) - { - bstring *oldString = psContext->currentGLSLString; - bstring *str = &psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; - int indent = psContext->indent; - - if (psContext->psShader->eTargetLanguage == LANG_METAL && !psContext->indent) - ++psContext->indent; - - psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; - - psContext->currentGLSLString = str; - psContext->AddIndentation(); - psContext->currentGLSLString = oldString; - psContext->indent = indent; - - return *str; - } -} diff --git a/third_party/HLSLcc/src/HLSLccTypes.natvis b/third_party/HLSLcc/src/HLSLccTypes.natvis deleted file mode 100644 index 6dd7c23..0000000 --- a/third_party/HLSLcc/src/HLSLccTypes.natvis +++ /dev/null @@ -1,10 +0,0 @@ - - - - {{ id={id} op={eOpcode} o0={asOperands[0]}, o1={asOperands[1]}}} - - - {{ type={eType}, reg={ui32RegisterNumber} }} - - - diff --git a/third_party/HLSLcc/src/Instruction.cpp b/third_party/HLSLcc/src/Instruction.cpp deleted file mode 100644 index ee384cd..0000000 --- a/third_party/HLSLcc/src/Instruction.cpp +++ /dev/null @@ -1,349 +0,0 @@ -#include "internal_includes/Instruction.h" -#include "internal_includes/debug.h" -#include "include/ShaderInfo.h" - -// Returns the result swizzle operand for an instruction, or NULL if all src operands have swizzles -static Operand *GetSrcSwizzleOperand(Instruction *psInst) -{ - switch (psInst->eOpcode) - { - case OPCODE_DP2: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_NOP: - case OPCODE_SWAPC: - case OPCODE_SAMPLE_C: - case OPCODE_SAMPLE_C_LZ: - ASSERT(0); - return NULL; - - // Normal arithmetics, all srcs have swizzles - case OPCODE_ADD: - case OPCODE_AND: - case OPCODE_DERIV_RTX: - case OPCODE_DERIV_RTX_COARSE: - case OPCODE_DERIV_RTX_FINE: - case OPCODE_DERIV_RTY: - case OPCODE_DERIV_RTY_COARSE: - case OPCODE_DERIV_RTY_FINE: - case OPCODE_DIV: - case OPCODE_EQ: - case OPCODE_EXP: - case OPCODE_FRC: - case OPCODE_FTOI: - case OPCODE_FTOU: - case OPCODE_GE: - case OPCODE_IADD: - case OPCODE_IEQ: - case OPCODE_IGE: - case OPCODE_ILT: - case OPCODE_IMAD: - case OPCODE_IMAX: - case OPCODE_IMIN: - case OPCODE_IMUL: - case OPCODE_INE: - case OPCODE_INEG: - case OPCODE_ITOF: - case OPCODE_LOG: - case OPCODE_LT: - case OPCODE_MAD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MOV: - case OPCODE_MUL: - case OPCODE_NE: - case OPCODE_NOT: - case OPCODE_OR: - case OPCODE_ROUND_NE: - case OPCODE_ROUND_NI: - case OPCODE_ROUND_PI: - case OPCODE_ROUND_Z: - case OPCODE_RSQ: - case OPCODE_SINCOS: - case OPCODE_SQRT: - case OPCODE_UDIV: - case OPCODE_UGE: - case OPCODE_ULT: - case OPCODE_UMAD: - case OPCODE_UMAX: - case OPCODE_UMIN: - case OPCODE_UMUL: - case OPCODE_UTOF: - case OPCODE_XOR: - - case OPCODE_BFI: - case OPCODE_BFREV: - case OPCODE_COUNTBITS: - case OPCODE_DADD: - case OPCODE_DDIV: - case OPCODE_DEQ: - case OPCODE_DFMA: - case OPCODE_DGE: - case OPCODE_DLT: - case OPCODE_DMAX: - case OPCODE_DMIN: - case OPCODE_DMUL: - case OPCODE_DMOV: - case OPCODE_DNE: - case OPCODE_DRCP: - case OPCODE_DTOF: - case OPCODE_F16TOF32: - case OPCODE_F32TOF16: - case OPCODE_FIRSTBIT_HI: - case OPCODE_FIRSTBIT_LO: - case OPCODE_FIRSTBIT_SHI: - case OPCODE_FTOD: - case OPCODE_IBFE: - case OPCODE_RCP: - case OPCODE_UADDC: - case OPCODE_UBFE: - case OPCODE_USUBB: - case OPCODE_MOVC: - case OPCODE_DMOVC: - return NULL; - - // Special cases: - case OPCODE_GATHER4: - case OPCODE_GATHER4_C: - case OPCODE_LD: - case OPCODE_LD_MS: - case OPCODE_LOD: - case OPCODE_LD_UAV_TYPED: - case OPCODE_LD_RAW: - case OPCODE_SAMPLE: - case OPCODE_SAMPLE_B: - case OPCODE_SAMPLE_L: - case OPCODE_SAMPLE_D: - case OPCODE_RESINFO: - return &psInst->asOperands[2]; - - case OPCODE_GATHER4_PO: - case OPCODE_GATHER4_PO_C: - case OPCODE_LD_STRUCTURED: - return &psInst->asOperands[3]; - - case OPCODE_SAMPLE_INFO: - return &psInst->asOperands[1]; - - case OPCODE_ISHL: - case OPCODE_ISHR: - case OPCODE_USHR: - // sm4 variant has single component selection on src1 -> only src0 has swizzle - if (psInst->asOperands[2].eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) - return &psInst->asOperands[1]; - else // whereas sm5 variant has swizzle also on src1 - return NULL; - - default: - ASSERT(0); - return NULL; - } -} - -// Tweak the source operands of an instruction so that the rebased write mask will still work -static void DoSrcOperandRebase(Operand *psOperand, uint32_t rebase) -{ - uint32_t i; - switch (psOperand->eSelMode) - { - default: - case OPERAND_4_COMPONENT_MASK_MODE: - ASSERT(psOperand->ui32CompMask == 0 || psOperand->ui32CompMask == OPERAND_4_COMPONENT_MASK_ALL); - - // Special case for immediates, they do not have swizzles - if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32) - { - if (psOperand->iNumComponents > 1) - std::copy(&psOperand->afImmediates[rebase], &psOperand->afImmediates[4], &psOperand->afImmediates[0]); - return; - } - if (psOperand->eType == OPERAND_TYPE_IMMEDIATE64) - { - if (psOperand->iNumComponents > 1) - std::copy(&psOperand->adImmediates[rebase], &psOperand->adImmediates[4], &psOperand->adImmediates[0]); - return; - } - - // Need to change this to swizzle - psOperand->eSelMode = OPERAND_4_COMPONENT_SWIZZLE_MODE; - psOperand->ui32Swizzle = 0; - for (i = 0; i < 4 - rebase; i++) - psOperand->aui32Swizzle[i] = i + rebase; - for (; i < 4; i++) - psOperand->aui32Swizzle[i] = rebase; // The first actual input. - break; - case OPERAND_4_COMPONENT_SELECT_1_MODE: - // Nothing to do - break; - case OPERAND_4_COMPONENT_SWIZZLE_MODE: - for (i = rebase; i < 4; i++) - psOperand->aui32Swizzle[i - rebase] = psOperand->aui32Swizzle[i]; - break; - } -} - -void Instruction::ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase) -{ - uint32_t i = 0; - uint32_t accessMask = 0; - int isDestination = 0; - Operand *psSwizzleOperand = NULL; - - if (flags & UD_CHANGE_SUBOPERANDS) - { - for (i = 0; i < MAX_SUB_OPERANDS; i++) - { - if (psOperand->m_SubOperands[i].get()) - ChangeOperandTempRegister(psOperand->m_SubOperands[i].get(), oldReg, newReg, compMask, UD_CHANGE_ALL, rebase); - } - } - - if ((flags & UD_CHANGE_MAIN_OPERAND) == 0) - return; - - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; - - if (psOperand->ui32RegisterNumber != oldReg) - return; - - accessMask = psOperand->GetAccessMask(); - // If this operation touches other components than the one(s) we're splitting, skip it - if ((accessMask & (~compMask)) != 0) - { - // Verify that we've not messed up in reachability analysis. - // This would mean that we've encountered an instruction that accesses - // a component in multi-component mode and we're supposed to treat it as single-use only. - // Now that we track operands we can bring this back - ASSERT((accessMask & compMask) == 0); - return; - } - -#if 0 - printf("Updating operand %d with access mask %X\n", (int)psOperand->id, accessMask); -#endif - psOperand->ui32RegisterNumber = newReg; - - if (rebase == 0) - return; - - // Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask. - switch (psOperand->eSelMode) - { - case OPERAND_4_COMPONENT_MASK_MODE: - { - uint32_t oldMask = psOperand->ui32CompMask; - if (oldMask == 0) - oldMask = OPERAND_4_COMPONENT_MASK_ALL; - - // Check that we're not losing any information - ASSERT((oldMask >> rebase) << rebase == oldMask); - psOperand->ui32CompMask = (oldMask >> rebase); - break; - } - case OPERAND_4_COMPONENT_SELECT_1_MODE: - ASSERT(psOperand->aui32Swizzle[0] >= rebase); - psOperand->aui32Swizzle[0] -= rebase; - break; - case OPERAND_4_COMPONENT_SWIZZLE_MODE: - { - for (i = 0; i < 4; i++) - { - // Note that this rebase is different from the one done for source operands - ASSERT(psOperand->aui32Swizzle[i] >= rebase); - psOperand->aui32Swizzle[i] -= rebase; - } - break; - } - default: - ASSERT(0); - } - - // Tweak operand datatypes - std::copy(&psOperand->aeDataType[rebase], &psOperand->aeDataType[4], &psOperand->aeDataType[0]); - - // If this operand is a destination, we'll need to tweak sources as well - for (i = 0; i < ui32FirstSrc; i++) - { - if (psOperand == &asOperands[i]) - { - isDestination = 1; - break; - } - } - - if (isDestination == 0) - return; - - // Nasty corner case of 2 destinations, not supported if both targets are written - ASSERT((ui32FirstSrc < 2) || (asOperands[0].eType == OPERAND_TYPE_NULL) || (asOperands[1].eType == OPERAND_TYPE_NULL)); - - // If we made it this far, we're rebasing a destination temp (and the only destination), need to tweak sources depending on the instruction - switch (eOpcode) - { - // The opcodes that do not need tweaking: - case OPCODE_DP2: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_BUFINFO: - case OPCODE_SAMPLE_C: - case OPCODE_SAMPLE_C_LZ: - return; - - default: - psSwizzleOperand = GetSrcSwizzleOperand(this); // Null means tweak all source operands - if (psSwizzleOperand) - { - DoSrcOperandRebase(psSwizzleOperand, rebase); - return; - } - else - { - for (i = ui32FirstSrc; i < ui32NumOperands; i++) - { - DoSrcOperandRebase(&asOperands[i], rebase); - } - } - return; - } -} - -// Returns nonzero if psInst is a sample instruction and the sampler has medium or low precision -bool Instruction::IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const -{ - const Operand *op; - const ResourceBinding *psBinding = NULL; - OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT; - switch (eOpcode) - { - default: - return false; - case OPCODE_SAMPLE: - case OPCODE_SAMPLE_B: - case OPCODE_SAMPLE_L: - case OPCODE_SAMPLE_D: - case OPCODE_SAMPLE_C: - case OPCODE_SAMPLE_C_LZ: - break; - } - - op = &asOperands[3]; - ASSERT(op->eType == OPERAND_TYPE_SAMPLER); - - info.GetResourceFromBindingPoint(RGROUP_SAMPLER, op->ui32RegisterNumber, &psBinding); - if (!psBinding) - { - /* Try to look from texture group */ - info.GetResourceFromBindingPoint(RGROUP_TEXTURE, op->ui32RegisterNumber, &psBinding); - } - - sType = Operand::ResourcePrecisionToOperandPrecision(psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); - - if (sType == OPERAND_MIN_PRECISION_DEFAULT) - return false; - - if (pType) - *pType = sType; - - return true; -} diff --git a/third_party/HLSLcc/src/LoopTransform.cpp b/third_party/HLSLcc/src/LoopTransform.cpp deleted file mode 100644 index e3ba6e6..0000000 --- a/third_party/HLSLcc/src/LoopTransform.cpp +++ /dev/null @@ -1,370 +0,0 @@ -#include "src/internal_includes/HLSLCrossCompilerContext.h" -#include "src/internal_includes/LoopTransform.h" -#include "src/internal_includes/Shader.h" -#include "src/internal_includes/debug.h" -#include -#include -#include - -namespace HLSLcc -{ - struct LoopInfo - { - public: - LoopInfo() : m_StartLoop(0), m_EndLoop(0), m_ExitPoints(), m_IsSwitch(false) {} - - Instruction * m_StartLoop; // OPCODE_LOOP - Instruction * m_EndLoop; // OPCODE_ENDLOOP that matches the LOOP above. - std::vector m_ExitPoints; // Any BREAK/RET/BREAKC instructions within the same loop depth - bool m_IsSwitch; // True if this is a switch-case and not a LOOP/ENDLOOP pair. Used as a helper when parsing. - }; - - typedef std::list Loops; - - // Build a loopinfo array of all the loops in this shader phase - void BuildLoopInfo(ShaderPhase &phase, Loops &res) - { - using namespace std; - res.clear(); - - // A stack of loopinfo elements (stored in res) - list loopStack; - - // Storage for dummy LoopInfo elements to be used for switch-cases. We don't want them cluttering the Loops list so store them here. - list dummyLIForSwitches; - - for (std::vector::iterator instItr = phase.psInst.begin(); instItr != phase.psInst.end(); instItr++) - { - Instruction *i = &*instItr; - - if (i->eOpcode == OPCODE_LOOP) - { - LoopInfo *currLoopInfo = &*res.insert(res.end(), LoopInfo()); - currLoopInfo->m_StartLoop = i; - loopStack.push_front(currLoopInfo); - } - else if (i->eOpcode == OPCODE_ENDLOOP) - { - ASSERT(!loopStack.empty()); - LoopInfo *li = *loopStack.begin(); - loopStack.pop_front(); - li->m_EndLoop = i; - } - else if (i->eOpcode == OPCODE_SWITCH) - { - // Create a dummy entry into the stack - LoopInfo *li = &*dummyLIForSwitches.insert(dummyLIForSwitches.end(), LoopInfo()); - li->m_IsSwitch = true; - loopStack.push_front(li); - } - else if (i->eOpcode == OPCODE_ENDSWITCH) - { - ASSERT(!loopStack.empty()); - LoopInfo *li = *loopStack.begin(); - loopStack.pop_front(); - ASSERT(li->m_IsSwitch); - } - else if (i->eOpcode == OPCODE_BREAK || i->eOpcode == OPCODE_BREAKC) - { - // Get the current loopstack head - ASSERT(!loopStack.empty()); - LoopInfo *li = *loopStack.begin(); - // Ignore breaks from switch-cases - if (!li->m_IsSwitch) - { - li->m_ExitPoints.push_back(i); - } - } - } - } - - // Returns true if the given instruction is a non-vectorized int or uint comparison instruction that reads from at least one temp and writes to a temp - static bool IsScalarTempComparisonInstruction(const Instruction *i) - { - switch (i->eOpcode) - { - default: - return false; - case OPCODE_IGE: - case OPCODE_ILT: - case OPCODE_IEQ: - case OPCODE_INE: - case OPCODE_UGE: - case OPCODE_ULT: - break; - } - - if (i->asOperands[0].eType != OPERAND_TYPE_TEMP) - return false; - - int tempOp = -1; - if (i->asOperands[1].eType == OPERAND_TYPE_TEMP) - tempOp = 1; - else if (i->asOperands[2].eType == OPERAND_TYPE_TEMP) - tempOp = 2; - - // Also reject comparisons where we compare temp.x vs temp.y - if (i->asOperands[1].eType == OPERAND_TYPE_TEMP && i->asOperands[2].eType == OPERAND_TYPE_TEMP && i->asOperands[1].ui32RegisterNumber == i->asOperands[2].ui32RegisterNumber) - return false; - - if (tempOp == -1) - return false; - - if (i->asOperands[0].GetNumSwizzleElements() != 1) - return false; - - return true; - } - - // Returns true iff both instructions perform identical operation. For the purposes of Loop transformation, we only consider operations of type tX = tX imm32 - static bool AreInstructionsIdentical(const Instruction *a, const Instruction *b) - { - if (a->eOpcode != b->eOpcode) - return false; - ASSERT(a->ui32NumOperands == b->ui32NumOperands); - uint32_t dstReg = 0; - if (a->asOperands[0].eType != OPERAND_TYPE_TEMP) - return false; - dstReg = a->asOperands[0].ui32RegisterNumber; - - for (uint32_t i = 0; i < a->ui32NumOperands; i++) - { - const Operand &aop = a->asOperands[i]; - const Operand &bop = b->asOperands[i]; - if (aop.eType != bop.eType) - return false; - - if (aop.GetAccessMask() != bop.GetAccessMask()) - return false; - - if (aop.GetNumSwizzleElements() != 1) - return false; - - if (aop.eType == OPERAND_TYPE_TEMP) - { - if (aop.ui32RegisterNumber != bop.ui32RegisterNumber) - return false; - if (aop.ui32RegisterNumber != dstReg) - return false; - } - else if (aop.eType == OPERAND_TYPE_IMMEDIATE32) - { - if (memcmp(aop.afImmediates, bop.afImmediates, 4 * sizeof(float)) != 0) - return false; - } - } - return true; - } - - // Attempt to transform a single loop into a for-statement - static void AttemptLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase, LoopInfo &li) - { - // In order to transform a loop into a for, the following has to hold: - // - The loop must start with a comparison instruction where one of the src operands is a temp (induction variable), followed by OPCODE_BREAKC. - // - The loop must end with an arithmetic operation (SUB or ADD) where the dest operand is the same temp as one of the sources in the comparison instruction above - // Additionally, if the loop induction variable is initialized before the start of the loop and it has only uses inside the LOOP/ENDLOOP pair, we can declare that inside the for statement. - // Also, the loop induction variable must be standalone (as in, never used as part of a larger vector) - - Instruction *cmpInst = li.m_StartLoop + 1; - - if (!IsScalarTempComparisonInstruction(cmpInst)) - return; - - Instruction *breakInst = li.m_StartLoop + 2; - if (breakInst->eOpcode != OPCODE_BREAKC) - return; - if (breakInst->asOperands[0].eType != OPERAND_TYPE_TEMP) - return; - if (breakInst->asOperands[0].ui32RegisterNumber != cmpInst->asOperands[0].ui32RegisterNumber) - return; - - // Check that the comparison result isn't used anywhere else - if (cmpInst->m_Uses.size() != 1) - return; - - ASSERT(cmpInst->m_Uses[0].m_Inst == breakInst); - - // Ok, at least we have the comparison + breakc combo at top. Try to find the induction variable - uint32_t inductionVarIdx = 0; - - Instruction *lastInst = li.m_EndLoop - 1; - if (lastInst->eOpcode != OPCODE_IADD) - return; - if (lastInst->asOperands[0].eType != OPERAND_TYPE_TEMP) - return; - - if (lastInst->asOperands[0].GetNumSwizzleElements() != 1) - return; - - uint32_t indVar = lastInst->asOperands[0].ui32RegisterNumber; - // Verify that the induction variable actually matches. - if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == indVar) - inductionVarIdx = 1; - else if (cmpInst->asOperands[2].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[2].ui32RegisterNumber == indVar) - inductionVarIdx = 2; - else - return; - - // Verify that we also read from the induction variable in the last instruction - if (!((lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == indVar) || - (lastInst->asOperands[2].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[2].ui32RegisterNumber == indVar))) - return; - - // Nvidia compiler bug workaround: The shader compiler tries to be smart and unrolls constant loops, - // but then fails miserably if the loop variable is used as an index to UAV loads/stores or some other cases ("array access too complex") - // This is also triggered when the driver optimizer sees "simple enough" arithmetics (whatever that is) done on the loop variable before indexing. - // So, disable for-loop transformation altogether whenever we see a UAV load or store inside a loop. - if (psContext->psShader->eTargetLanguage >= LANG_400 && psContext->psShader->eTargetLanguage < LANG_GL_LAST && !psContext->IsVulkan()) - { - for (auto itr = li.m_StartLoop; itr != li.m_EndLoop; itr++) - { - switch (itr->eOpcode) - { - case OPCODE_LD_RAW: - case OPCODE_LD_STRUCTURED: - case OPCODE_LD_UAV_TYPED: - case OPCODE_STORE_RAW: - case OPCODE_STORE_STRUCTURED: - case OPCODE_STORE_UAV_TYPED: - return; // Nope, can't do a for, not even a partial one. - default: - break; - } - } - } - - // One more thing to check: The comparison input may only see 1 definition that originates from inside the loop range: the one in lastInst. - // Anything else means that there's a continue statement, or another break/breakc and that means that lastInst wouldn't get called. - // Of course, if all those instructions are identical, then it's fine. - // Ideally, if there's only one definition that's from outside the loop range, then we can use that as the initializer, as well. - - Instruction *initializer = NULL; - std::vector definitionsOutsideRange; - std::vector definitionsInsideRange; - std::for_each(cmpInst->asOperands[inductionVarIdx].m_Defines.begin(), cmpInst->asOperands[inductionVarIdx].m_Defines.end(), [&](const Operand::Define &def) - { - if (def.m_Inst < li.m_StartLoop || def.m_Inst > li.m_EndLoop) - definitionsOutsideRange.push_back(&def); - else - definitionsInsideRange.push_back(&def); - }); - - if (definitionsInsideRange.size() != 1) - { - // All definitions must be identical - for (std::vector::iterator itr = definitionsInsideRange.begin() + 1; itr != definitionsInsideRange.end(); itr++) - { - if (!AreInstructionsIdentical((*itr)->m_Inst, definitionsInsideRange[0]->m_Inst)) - return; - } - } - - ASSERT(definitionsOutsideRange.size() > 0); - if (definitionsOutsideRange.size() == 1) - initializer = definitionsOutsideRange[0]->m_Inst; - - // Initializer must only write to one component - if (initializer && initializer->asOperands[0].GetNumSwizzleElements() != 1) - initializer = 0; - // Initializer data type must be int or uint - if (initializer) - { - SHADER_VARIABLE_TYPE dataType = initializer->asOperands[0].GetDataType(psContext); - if (dataType != SVT_INT && dataType != SVT_UINT) - return; - } - - // Check that the initializer is only used within the range so we can move it to for statement - if (initializer) - { - bool hasUsesOutsideRange = false; - std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [&](const Instruction::Use &u) - { - if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop) - hasUsesOutsideRange = true; - }); - // Has outside uses? we cannot pull that up to the for statement - if (hasUsesOutsideRange) - initializer = 0; - } - - // Check that the loop adder instruction only has uses inside the loop range, otherwise we cannot move the initializer either - if (initializer) - { - bool cannotDoInitializer = false; - for (auto itr = lastInst->m_Uses.begin(); itr != lastInst->m_Uses.end(); itr++) - { - const Instruction::Use &u = *itr; - if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop) - { - cannotDoInitializer = true; - break; - } - // Also check that the uses are not vector ops (temp splitting has already pulled everything to .x if this is a standalone var) - if (u.m_Op->GetAccessMask() != 1) - { - cannotDoInitializer = true; - break; - } - } - // Has outside uses? we cannot pull that up to the for statement - if (cannotDoInitializer) - initializer = 0; - } - - - if (initializer) - { - // We can declare the initializer in the for loop header, allocate a new number for it and change all uses into that. - uint32_t newRegister = phase.m_NextFreeTempRegister++; - li.m_StartLoop->m_InductorRegister = newRegister; - std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [newRegister](const Instruction::Use &u) - { - u.m_Op->m_ForLoopInductorName = newRegister; - }); - // Also tweak the destinations for cmpInst, and lastInst - if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber) - cmpInst->asOperands[1].m_ForLoopInductorName = newRegister; - else - cmpInst->asOperands[2].m_ForLoopInductorName = newRegister; - - if (lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber) - lastInst->asOperands[1].m_ForLoopInductorName = newRegister; - else - lastInst->asOperands[2].m_ForLoopInductorName = newRegister; - - lastInst->asOperands[0].m_ForLoopInductorName = newRegister; - initializer->asOperands[0].m_ForLoopInductorName = newRegister; - } - - // This loop can be transformed to for-loop. Do the necessary magicks. - li.m_StartLoop->m_LoopInductors[0] = initializer; - li.m_StartLoop->m_LoopInductors[1] = cmpInst; - li.m_StartLoop->m_LoopInductors[2] = breakInst; - li.m_StartLoop->m_LoopInductors[3] = lastInst; - - if (initializer) - initializer->m_SkipTranslation = true; - cmpInst->m_SkipTranslation = true; - breakInst->m_SkipTranslation = true; - lastInst->m_SkipTranslation = true; - } - - void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase) - { - Loops loops; - BuildLoopInfo(phase, loops); - - std::for_each(loops.begin(), loops.end(), [&phase, psContext](LoopInfo &li) - { - // Some sanity checks: start and end points must be initialized, we shouldn't have any switches here, and each loop must have at least one exit point - // Also that there's at least 2 instructions in loop body - ASSERT(li.m_StartLoop != 0); - ASSERT(li.m_EndLoop != 0); - ASSERT(li.m_EndLoop > li.m_StartLoop + 2); - ASSERT(!li.m_IsSwitch); - ASSERT(!li.m_ExitPoints.empty()); - AttemptLoopTransform(psContext, phase, li); - }); - } -} diff --git a/third_party/HLSLcc/src/Operand.cpp b/third_party/HLSLcc/src/Operand.cpp deleted file mode 100644 index 9d9bf23..0000000 --- a/third_party/HLSLcc/src/Operand.cpp +++ /dev/null @@ -1,641 +0,0 @@ -#include "internal_includes/Operand.h" -#include "internal_includes/debug.h" -#include "internal_includes/HLSLccToolkit.h" -#include "internal_includes/Shader.h" -#include "internal_includes/HLSLCrossCompilerContext.h" -#include "internal_includes/Instruction.h" - -uint32_t Operand::GetAccessMask() const -{ - int i; - uint32_t accessMask = 0; - // NOTE: Destination writemask can (AND DOES) affect access from sources, but we do it conservatively for now. - switch (eSelMode) - { - default: - case OPERAND_4_COMPONENT_MASK_MODE: - // Update access mask - accessMask = ui32CompMask; - if (accessMask == 0) - accessMask = OPERAND_4_COMPONENT_MASK_ALL; - break; - - case OPERAND_4_COMPONENT_SWIZZLE_MODE: - accessMask = 0; - for (i = 0; i < 4; i++) - accessMask |= 1 << (aui32Swizzle[i]); - break; - - case OPERAND_4_COMPONENT_SELECT_1_MODE: - accessMask = 1 << (aui32Swizzle[0]); - break; - } - ASSERT(accessMask != 0); - return accessMask; -} - -int Operand::GetMaxComponent() const -{ - if (iWriteMaskEnabled && - iNumComponents == 4) - { - //Component Mask - if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE) - { - if (ui32CompMask != 0 && ui32CompMask != (OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z | OPERAND_4_COMPONENT_MASK_W)) - { - if (ui32CompMask & OPERAND_4_COMPONENT_MASK_W) - { - return 4; - } - if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Z) - { - return 3; - } - if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Y) - { - return 2; - } - if (ui32CompMask & OPERAND_4_COMPONENT_MASK_X) - { - return 1; - } - } - } - else - //Component Swizzle - if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - if (ui32Swizzle == NO_SWIZZLE) - return 4; - - uint32_t res = 0; - for (int i = 0; i < 4; i++) - { - res = std::max(aui32Swizzle[i], res); - } - return (int)res + 1; - } - else if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) - { - return 1; - } - } - - return 4; -} - -//Single component repeated -//e..g .wwww -bool Operand::IsSwizzleReplicated() const -{ - if (iWriteMaskEnabled && - iNumComponents == 4) - { - if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - if (ui32Swizzle == WWWW_SWIZZLE || - ui32Swizzle == ZZZZ_SWIZZLE || - ui32Swizzle == YYYY_SWIZZLE || - ui32Swizzle == XXXX_SWIZZLE) - { - return true; - } - } - } - return false; -} - -// Get the number of elements returned by operand, taking additional component mask into account -uint32_t Operand::GetNumSwizzleElements(uint32_t _ui32CompMask /* = OPERAND_4_COMPONENT_MASK_ALL */) const -{ - uint32_t count = 0; - - switch (eType) - { - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: - case OPERAND_TYPE_INPUT_THREAD_ID: - case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: - // Adjust component count and break to more processing - ((Operand *)this)->iNumComponents = 3; - break; - case OPERAND_TYPE_IMMEDIATE32: - case OPERAND_TYPE_IMMEDIATE64: - case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: - case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: - case OPERAND_TYPE_OUTPUT_DEPTH: - { - // Translate numComponents into bitmask - // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 - uint32_t compMask = (1 << iNumComponents) - 1; - - compMask &= _ui32CompMask; - // Calculate bits left in compMask - return HLSLcc::GetNumberBitsSet(compMask); - } - default: - { - break; - } - } - - if (iWriteMaskEnabled && - iNumComponents != 1) - { - //Component Mask - if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE) - { - uint32_t compMask = ui32CompMask; - if (compMask == 0) - compMask = OPERAND_4_COMPONENT_MASK_ALL; - compMask &= _ui32CompMask; - - if (compMask == OPERAND_4_COMPONENT_MASK_ALL) - return 4; - - if (compMask & OPERAND_4_COMPONENT_MASK_X) - { - count++; - } - if (compMask & OPERAND_4_COMPONENT_MASK_Y) - { - count++; - } - if (compMask & OPERAND_4_COMPONENT_MASK_Z) - { - count++; - } - if (compMask & OPERAND_4_COMPONENT_MASK_W) - { - count++; - } - } - else - //Component Swizzle - if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - uint32_t i; - for (i = 0; i < 4; ++i) - { - if ((_ui32CompMask & (1 << i)) == 0) - continue; - - count++; - } - } - else if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) - { - if (aui32Swizzle[0] == OPERAND_4_COMPONENT_X && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_X)) - { - count++; - } - else if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Y && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Y)) - { - count++; - } - else if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Z && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Z)) - { - count++; - } - else if (aui32Swizzle[0] == OPERAND_4_COMPONENT_W && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_W)) - { - count++; - } - } - - //Component Select 1 - } - - if (!count) - { - // Translate numComponents into bitmask - // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 - uint32_t compMask = (1 << iNumComponents) - 1; - - compMask &= _ui32CompMask; - // Calculate bits left in compMask - return HLSLcc::GetNumberBitsSet(compMask); - } - - return count; -} - -// Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch -int Operand::GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const -{ - if (eShaderType != HULL_SHADER && eShaderType != DOMAIN_SHADER) - return 0; - - if (eShaderType == HULL_SHADER && eShaderPhaseType == HS_CTRL_POINT_PHASE) - return 0; - - if (eShaderType == DOMAIN_SHADER && eType == OPERAND_TYPE_OUTPUT) - return 0; - - if (eType == OPERAND_TYPE_INPUT_CONTROL_POINT || eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT) - return 0; - - return 1; -} - -int Operand::GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const -{ - return GetRegisterSpace(psContext->psShader->eShaderType, psContext->psShader->asPhases[psContext->currentPhase].ePhase); -} - -SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates /* = SVT_INT */) const -{ - // indexable temps (temp arrays) are always float - if (eType == OPERAND_TYPE_INDEXABLE_TEMP) - return SVT_FLOAT; - - // The min precision qualifier overrides all of the stuff below - switch (eMinPrecision) - { - case OPERAND_MIN_PRECISION_FLOAT_16: - return SVT_FLOAT16; - case OPERAND_MIN_PRECISION_FLOAT_2_8: - return SVT_FLOAT10; - case OPERAND_MIN_PRECISION_SINT_16: - return SVT_INT16; - case OPERAND_MIN_PRECISION_UINT_16: - return SVT_UINT16; - default: - break; - } - - switch (eType) - { - case OPERAND_TYPE_TEMP: - { - SHADER_VARIABLE_TYPE eCurrentType = SVT_FLOAT; - int i = 0; - - if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) - { - return aeDataType[aui32Swizzle[0]]; - } - if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - if (ui32Swizzle == (NO_SWIZZLE)) - { - return aeDataType[0]; - } - - return aeDataType[aui32Swizzle[0]]; - } - - if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE) - { - uint32_t mask = ui32CompMask; - if (!mask) - { - mask = OPERAND_4_COMPONENT_MASK_ALL; - } - for (; i < 4; ++i) - { - if (mask & (1 << i)) - { - eCurrentType = aeDataType[i]; - break; - } - } - -#ifdef _DEBUG - //Check if all elements have the same basic type. - for (; i < 4; ++i) - { - if (mask & (1 << i)) - { - if (eCurrentType != aeDataType[i]) - { - ASSERT(0); - } - } - } -#endif - return eCurrentType; - } - - ASSERT(0); - - break; - } - case OPERAND_TYPE_OUTPUT: - { - const uint32_t ui32Register = ui32RegisterNumber; - int regSpace = GetRegisterSpace(psContext); - const ShaderInfo::InOutSignature* psOut = NULL; - - if (regSpace == 0) - psContext->psShader->sInfo.GetOutputSignatureFromRegister(ui32Register, GetAccessMask(), psContext->psShader->ui32CurrentVertexOutputStream, - &psOut); - else - { - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psOut, true); - if (!psOut) - return SVT_FLOAT; - } - - ASSERT(psOut != NULL); - if (psOut->eMinPrec != MIN_PRECISION_DEFAULT) - { - switch (psOut->eMinPrec) - { - default: - ASSERT(0); - break; - case MIN_PRECISION_FLOAT_16: - return SVT_FLOAT16; - case MIN_PRECISION_FLOAT_2_8: - if (psContext->psShader->eTargetLanguage == LANG_METAL) - return SVT_FLOAT16; - else - return SVT_FLOAT10; - case MIN_PRECISION_SINT_16: - return SVT_INT16; - case MIN_PRECISION_UINT_16: - return SVT_UINT16; - } - } - if (psOut->eComponentType == INOUT_COMPONENT_UINT32) - { - return SVT_UINT; - } - else if (psOut->eComponentType == INOUT_COMPONENT_SINT32) - { - return SVT_INT; - } - return SVT_FLOAT; - break; - } - case OPERAND_TYPE_INPUT: - case OPERAND_TYPE_INPUT_PATCH_CONSTANT: - case OPERAND_TYPE_INPUT_CONTROL_POINT: - { - const uint32_t ui32Register = aui32ArraySizes[iIndexDims - 1]; - int regSpace = GetRegisterSpace(psContext); - const ShaderInfo::InOutSignature* psIn = NULL; - - if (regSpace == 0) - { - if (psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[ui32Register] != 0) - return SVT_FLOAT; // All combined inputs are stored as floats - psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Register, GetAccessMask(), - &psIn); - } - else - { - if (psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[ui32Register] != 0) - return SVT_FLOAT; // All combined inputs are stored as floats - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psIn); - } - - ASSERT(psIn != NULL); - - switch (eSpecialName) - { - //UINT in DX, INT in GL. - case NAME_PRIMITIVE_ID: - case NAME_VERTEX_ID: - case NAME_INSTANCE_ID: - case NAME_RENDER_TARGET_ARRAY_INDEX: - case NAME_VIEWPORT_ARRAY_INDEX: - case NAME_SAMPLE_INDEX: - return (psContext->psShader->eTargetLanguage == LANG_METAL) ? SVT_UINT : SVT_INT; - - case NAME_IS_FRONT_FACE: - return SVT_UINT; - - case NAME_POSITION: - case NAME_CLIP_DISTANCE: - case NAME_CULL_DISTANCE: - return SVT_FLOAT; - - default: - break; - // fall through - } - - if (psIn->eSystemValueType == NAME_IS_FRONT_FACE) - return SVT_UINT; - - //UINT in DX, INT in GL. - if (psIn->eSystemValueType == NAME_PRIMITIVE_ID || - psIn->eSystemValueType == NAME_VERTEX_ID || - psIn->eSystemValueType == NAME_INSTANCE_ID || - psIn->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX || - psIn->eSystemValueType == NAME_VIEWPORT_ARRAY_INDEX || - psIn->eSystemValueType == NAME_SAMPLE_INDEX) - return (psContext->psShader->eTargetLanguage == LANG_METAL) ? SVT_UINT : SVT_INT; - - if (psIn->eMinPrec != MIN_PRECISION_DEFAULT) - { - switch (psIn->eMinPrec) - { - default: - ASSERT(0); - break; - case MIN_PRECISION_FLOAT_16: - return SVT_FLOAT16; - case MIN_PRECISION_FLOAT_2_8: - if (psContext->psShader->eTargetLanguage == LANG_METAL) - return SVT_FLOAT16; - else - return SVT_FLOAT10; - case MIN_PRECISION_SINT_16: - return SVT_INT16; - case MIN_PRECISION_UINT_16: - return SVT_UINT16; - } - } - - if (psIn->eComponentType == INOUT_COMPONENT_UINT32) - { - return SVT_UINT; - } - else if (psIn->eComponentType == INOUT_COMPONENT_SINT32) - { - return SVT_INT; - } - return SVT_FLOAT; - break; - } - case OPERAND_TYPE_CONSTANT_BUFFER: - { - const ConstantBuffer* psCBuf = NULL; - const ShaderVarType* psVarType = NULL; - int32_t rebase = -1; - bool isArray; - psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, aui32ArraySizes[0], &psCBuf); - if (psCBuf) - { - int foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); - if (foundVar) - return psVarType->Type; - - ASSERT(0); - } - else - ASSERT(0); - break; - } - case OPERAND_TYPE_IMMEDIATE32: - { - return ePreferredTypeForImmediates; - } - - case OPERAND_TYPE_IMMEDIATE64: - { - return SVT_DOUBLE; - } - - case OPERAND_TYPE_INPUT_THREAD_ID: - case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: - { - return SVT_UINT; - } - case OPERAND_TYPE_SPECIAL_ADDRESS: - case OPERAND_TYPE_SPECIAL_LOOPCOUNTER: - case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: - case OPERAND_TYPE_INPUT_PRIMITIVEID: - { - return SVT_INT; - } - case OPERAND_TYPE_INPUT_GS_INSTANCE_ID: - { - return SVT_UINT; - } - case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: - { - return SVT_INT; - } - case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: - { - return SVT_INT; - } - case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: // constant array is floats everywhere except on vulkan - { - return psContext->IsVulkan() ? SVT_UINT : SVT_FLOAT; - } - - default: - { - return SVT_FLOAT; - } - } - - return SVT_FLOAT; -} - -OPERAND_MIN_PRECISION Operand::ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec) -{ - switch (ePrec) - { - default: - case REFLECT_RESOURCE_PRECISION_UNKNOWN: - case REFLECT_RESOURCE_PRECISION_LOWP: - return OPERAND_MIN_PRECISION_FLOAT_2_8; - case REFLECT_RESOURCE_PRECISION_MEDIUMP: - return OPERAND_MIN_PRECISION_FLOAT_16; - case REFLECT_RESOURCE_PRECISION_HIGHP: - return OPERAND_MIN_PRECISION_DEFAULT; - } -} - -int Operand::GetNumInputElements(const HLSLCrossCompilerContext *psContext) const -{ - const ShaderInfo::InOutSignature *psSig = NULL; - int regSpace = GetRegisterSpace(psContext); - - switch (eType) - { - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: - case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: - case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: - return 1; - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: - case OPERAND_TYPE_INPUT_THREAD_ID: - case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: - case OPERAND_TYPE_INPUT_DOMAIN_POINT: - return 3; - default: - break; - } - - if (regSpace == 0) - psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig); - else - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig); - - ASSERT(psSig != NULL); - - return HLSLcc::GetNumberBitsSet(psSig->ui32Mask); -} - -Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const -{ - Operand *psDynIndexOp = m_SubOperands[0].get(); - if (psDynIndexOp == NULL) - psDynIndexOp = m_SubOperands[1].get(); - - *needsIndexCalcRevert = false; - if (psDynIndexOp != NULL && isAoS) - { - // if dynamically indexing array of structs, try using the original index var before the float4 address calc - bool indexVarFound = false; - *needsIndexCalcRevert = true; - Instruction *psDynIndexOrigin = psDynIndexOp->m_Defines[0].m_Inst; - Operand *asOps = psDynIndexOrigin->asOperands; - Operand *psOriginOp = NULL; - - // DXBC always addresses as float4, find the address calculation - - // Special case where struct is float4 size, no extra calc is done - if (ShaderInfo::GetCBVarSize(psVar->Parent, true) <= 16) // matrixAsVectors arg does not matter here as with matrices the size will go over the limit anyway - { - indexVarFound = true; - *needsIndexCalcRevert = false; - } - else if (psDynIndexOrigin->eOpcode == OPCODE_IMUL) - { - // check which one of the src operands is the original index - if ((asOps[2].eType == OPERAND_TYPE_TEMP || asOps[2].eType == OPERAND_TYPE_INPUT || asOps[2].eType == OPERAND_TYPE_CONSTANT_BUFFER) && asOps[3].eType == OPERAND_TYPE_IMMEDIATE32) - psOriginOp = &asOps[2]; - else if ((asOps[3].eType == OPERAND_TYPE_TEMP || asOps[3].eType == OPERAND_TYPE_INPUT || asOps[3].eType == OPERAND_TYPE_CONSTANT_BUFFER) && asOps[2].eType == OPERAND_TYPE_IMMEDIATE32) - psOriginOp = &asOps[3]; - } - else if (psDynIndexOrigin->eOpcode == OPCODE_ISHL) - { - if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32 && asOps[1].eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - psOriginOp = &asOps[0]; - else if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32) - psOriginOp = &asOps[1]; - } - - if (psOriginOp != NULL) - { - indexVarFound = true; - - // Check if the mul dest is not the same temp as the src. Also check that the temp - // does not have multiple uses (which could override the value) - // -> we can use src straight and no index revert calc is needed - if ((psOriginOp->eType == OPERAND_TYPE_INPUT) - || ((psOriginOp->ui32RegisterNumber != psDynIndexOp->ui32RegisterNumber || psOriginOp->GetDataType(psContext) != psDynIndexOp->GetDataType(psContext)) - && (!psOriginOp->m_Defines.empty()) && psOriginOp->m_Defines[0].m_Inst->m_Uses.size() == 1)) - { - psDynIndexOp = psOriginOp; - *needsIndexCalcRevert = false; - } - } - - // Atm we support only this very basic case of dynamic indexing array of structs. - // Return error if something else is encountered. - if (!indexVarFound) - psContext->m_Reflection.OnDiagnostics("Unsupported dynamic indexing scheme on constant buffer vars.", 0, true); - } - - return psDynIndexOp; -} diff --git a/third_party/HLSLcc/src/Shader.cpp b/third_party/HLSLcc/src/Shader.cpp deleted file mode 100644 index 6f62ffe..0000000 --- a/third_party/HLSLcc/src/Shader.cpp +++ /dev/null @@ -1,989 +0,0 @@ -#include "internal_includes/Shader.h" -#include "internal_includes/debug.h" -#include -#include "internal_includes/Instruction.h" -#include "internal_includes/Declaration.h" -#include "internal_includes/HLSLccToolkit.h" - -uint32_t Shader::GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const -{ - switch (eType) - { - case SVT_FLOAT: - return psFloatTempSizes[ui32Reg]; - case SVT_FLOAT16: - return psFloat16TempSizes[ui32Reg]; - case SVT_FLOAT10: - return psFloat10TempSizes[ui32Reg]; - case SVT_INT: - return psIntTempSizes[ui32Reg]; - case SVT_INT16: - return psInt16TempSizes[ui32Reg]; - case SVT_INT12: - return psInt12TempSizes[ui32Reg]; - case SVT_UINT: - return psUIntTempSizes[ui32Reg]; - case SVT_UINT16: - return psUInt16TempSizes[ui32Reg]; - case SVT_DOUBLE: - return psDoubleTempSizes[ui32Reg]; - case SVT_BOOL: - return psBoolTempSizes[ui32Reg]; - default: - ASSERT(0); - } - return 0; -} - -void Shader::ConsolidateHullTempVars() -{ - uint32_t i, phase; - uint32_t numTemps = 0; - for (phase = 0; phase < asPhases.size(); phase++) - { - for (i = 0; i < asPhases[phase].psDecl.size(); i++) - { - if (asPhases[phase].psDecl[i].eOpcode == OPCODE_DCL_TEMPS) - { - if (asPhases[phase].psDecl[i].value.ui32NumTemps > numTemps) - numTemps = asPhases[phase].psDecl[i].value.ui32NumTemps; - asPhases[phase].psDecl[i].value.ui32NumTemps = 0; - } - } - } - // Now we have the max temps, write it back to the first one we see. - for (phase = 0; phase < asPhases.size(); phase++) - { - for (i = 0; i < asPhases[phase].psDecl.size(); i++) - { - if (asPhases[phase].psDecl[i].eOpcode == OPCODE_DCL_TEMPS) - { - asPhases[phase].psDecl[i].value.ui32NumTemps = numTemps; - return; - } - } - } -} - -// Image (RWTexture in HLSL) declaration op does not provide enough info about the format and accessing. -// Go through all image declarations and instructions accessing it to see if it is readonly/writeonly. -// While doing that we also get the number of components expected in the image format. -// Also resolve access flags for other UAVs as well. No component count resolving for them. -void ShaderPhase::ResolveUAVProperties(const ShaderInfo& sInfo) -{ - Declaration *psFirstDeclaration = &psDecl[0]; - - uint32_t ui32NumDeclarations = (uint32_t)psDecl.size(); - Instruction *psFirstInstruction = &psInst[0]; - uint32_t ui32NumInstructions = (uint32_t)psInst.size(); - - if (ui32NumDeclarations == 0 || ui32NumInstructions == 0) - return; - - Declaration *psLastDeclaration = psFirstDeclaration + ui32NumDeclarations - 1; - Instruction *psLastInstruction = psFirstInstruction + ui32NumInstructions - 1; - Declaration *psDecl; - - for (psDecl = psFirstDeclaration; psDecl <= psLastDeclaration; psDecl++) - { - Instruction *psInst; - uint32_t uavReg; - if (psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED && - psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED && - psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW) - continue; - - uavReg = psDecl->asOperands[0].ui32RegisterNumber; - - for (psInst = psFirstInstruction; psInst <= psLastInstruction; psInst++) - { - uint32_t opIndex; - uint32_t accessFlags; - uint32_t numComponents; - - switch (psInst->eOpcode) - { - case OPCODE_LD_UAV_TYPED: - opIndex = 2; - accessFlags = ACCESS_FLAG_READ; - numComponents = psInst->asOperands[0].GetNumSwizzleElements(); // get component count from the write target - break; - - case OPCODE_STORE_UAV_TYPED: - ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW); - opIndex = 0; - accessFlags = ACCESS_FLAG_WRITE; - numComponents = 0; // store op does not contribute on the component count resolving - break; - - case OPCODE_ATOMIC_CMP_STORE: - case OPCODE_ATOMIC_AND: - case OPCODE_ATOMIC_IADD: - case OPCODE_ATOMIC_OR: - case OPCODE_ATOMIC_XOR: - case OPCODE_ATOMIC_IMIN: - case OPCODE_ATOMIC_UMIN: - case OPCODE_ATOMIC_IMAX: - case OPCODE_ATOMIC_UMAX: - opIndex = 0; - accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC; - numComponents = 1; - break; - - case OPCODE_IMM_ATOMIC_AND: - case OPCODE_IMM_ATOMIC_IADD: - case OPCODE_IMM_ATOMIC_IMAX: - case OPCODE_IMM_ATOMIC_IMIN: - case OPCODE_IMM_ATOMIC_UMAX: - case OPCODE_IMM_ATOMIC_UMIN: - case OPCODE_IMM_ATOMIC_OR: - case OPCODE_IMM_ATOMIC_XOR: - case OPCODE_IMM_ATOMIC_EXCH: - case OPCODE_IMM_ATOMIC_CMP_EXCH: - opIndex = 1; - accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC; - numComponents = 1; - break; - - // The rest of the ops here are only for buffer UAVs. No need for component count resolving. - case OPCODE_LD_STRUCTURED: - opIndex = 3; - accessFlags = ACCESS_FLAG_READ; - numComponents = 0; - break; - - case OPCODE_STORE_STRUCTURED: - opIndex = 0; - accessFlags = ACCESS_FLAG_WRITE; - numComponents = 0; - break; - - case OPCODE_LD_RAW: - opIndex = 2; - accessFlags = ACCESS_FLAG_READ; - numComponents = 0; - break; - - case OPCODE_STORE_RAW: - opIndex = 0; - accessFlags = ACCESS_FLAG_WRITE; - numComponents = 0; - break; - - case OPCODE_IMM_ATOMIC_ALLOC: - case OPCODE_IMM_ATOMIC_CONSUME: - opIndex = 1; - accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC; - numComponents = 0; - break; - - default: - continue; - } - - // Buffer loads can also happen on non-uav. Skip those. - if (psInst->asOperands[opIndex].eType != OPERAND_TYPE_UNORDERED_ACCESS_VIEW) - continue; - - // Check the instruction is operating on the declared uav - if (psInst->asOperands[opIndex].ui32RegisterNumber != uavReg) - continue; - - psDecl->sUAV.ui32AccessFlags |= accessFlags; - - // get the max components accessed, but only for typed (texture) UAVs - if (numComponents > psDecl->sUAV.ui32NumComponents && psDecl->eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) - { - psDecl->sUAV.ui32NumComponents = numComponents; - } - } - - if (psDecl->eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) - { - const ResourceBinding* psBinding = 0; - if (sInfo.GetResourceFromBindingPoint(RGROUP_UAV, uavReg, &psBinding)) - { - // component count is stored in flags as 2 bits, 00: vec1, 01: vec2, 10: vec3, 11: vec4 - psDecl->sUAV.ui32NumComponents = ((psBinding->ui32Flags >> 2) & 3) + 1; - } - } - } -} - -static void GatherOperandAccessMasks(const Operand *psOperand, char *destTable) -{ - int i; - uint32_t reg; - for (i = 0; i < MAX_SUB_OPERANDS; i++) - { - if (psOperand->m_SubOperands[i].get()) - GatherOperandAccessMasks(psOperand->m_SubOperands[i].get(), destTable); - } - - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; - - reg = psOperand->ui32RegisterNumber & 0xffff; // We add 0x10000 to all newly created ones earlier - - destTable[reg] |= (char)psOperand->GetAccessMask(); -} - -// Coalesce the split temps back based on their original temp register. Keep uint/int/float operations separate -static void CoalesceTemps(Shader *psShader, ShaderPhase *psPhase, uint32_t ui32MaxOrigTemps) -{ - // Just move all operations back to their original registers, but keep the data type assignments. - uint32_t i, k; - Instruction *psLastInstruction = &psPhase->psInst[psPhase->psInst.size() - 1]; - std::vector opAccessMasks; - - // First move all newly created temps to high enough so they won't overlap with the rebased ones - - Instruction *inst = &psPhase->psInst[0]; - - if (psPhase->psInst.size() == 0 || psPhase->ui32OrigTemps == 0) - return; - - while (inst <= psLastInstruction) - { - // Update all operands and their suboperands - for (i = psPhase->ui32OrigTemps; i < psPhase->ui32TotalTemps; i++) - { - for (k = 0; k < inst->ui32NumOperands; k++) - inst->ChangeOperandTempRegister(&inst->asOperands[k], i, 0x10000 + i, OPERAND_4_COMPONENT_MASK_ALL, UD_CHANGE_ALL, 0); - } - inst++; - } - - // Prune the original registers, rebase if necessary - opAccessMasks.clear(); - opAccessMasks.resize(psPhase->ui32TotalTemps, 0); - inst = &psPhase->psInst[0]; - while (inst <= psLastInstruction) - { - for (k = 0; k < inst->ui32NumOperands; k++) - GatherOperandAccessMasks(&inst->asOperands[k], &opAccessMasks[0]); - inst++; - } - - for (i = 0; i < psPhase->ui32TotalTemps; i++) - { - uint32_t rebase, count; - uint32_t newReg = i; - uint32_t origReg = i; - int needsMoving = 0; - SHADER_VARIABLE_TYPE dataType; - - // Figure out rebase and count - rebase = 0; - count = 0; - if (i < psPhase->ui32OrigTemps) - { - // One of the original registers - k = opAccessMasks[i]; - if (k == 0) - continue; - - while ((k & 1) == 0) - { - rebase++; - k = k >> 1; - } - while (k != 0) - { - count++; - k = k >> 1; - } - newReg = i + ui32MaxOrigTemps * rebase; - if (rebase != 0) - needsMoving = 1; - } - else - { - // Newly created split registers, read info from table - // Read the count and rebase from split info table - count = (psPhase->pui32SplitInfo[i] >> 24) & 0xff; - rebase = (psPhase->pui32SplitInfo[i] >> 16) & 0xff; - origReg = 0x10000 + i; - newReg = (psPhase->pui32SplitInfo[i]) & 0xffff; - while (psPhase->pui32SplitInfo[newReg] != 0xffffffff) - newReg = (psPhase->pui32SplitInfo[newReg]) & 0xffff; - - // If count is 4, verify that we have both first and last bit set - ASSERT(count != 4 || (opAccessMasks[i] & 9) == 9); - - newReg = newReg + ui32MaxOrigTemps * rebase; - - // Don't rebase again - rebase = 0; - needsMoving = 1; - } - - if (needsMoving) - { - // printf("Moving reg %d to %d, count %d rebase %d\n", origReg, newReg, count, rebase); - - // Move directly to correct location - inst = &psPhase->psInst[0]; - while (inst <= psLastInstruction) - { - for (k = 0; k < inst->ui32NumOperands; k++) - inst->ChangeOperandTempRegister(&inst->asOperands[k], origReg, newReg, OPERAND_4_COMPONENT_MASK_ALL, UD_CHANGE_ALL, rebase); - inst++; - } - } - // Mark the count - dataType = psPhase->peTempTypes[i * 4 + rebase]; - switch (dataType) - { - default: - ASSERT(0); - break; - case SVT_BOOL: - psShader->psBoolTempSizes[newReg] = std::max(psShader->psBoolTempSizes[newReg], (char)count); - break; - case SVT_FLOAT: - psShader->psFloatTempSizes[newReg] = std::max(psShader->psFloatTempSizes[newReg], (char)count); - break; - case SVT_FLOAT16: - psShader->psFloat16TempSizes[newReg] = std::max(psShader->psFloat16TempSizes[newReg], (char)count); - break; - case SVT_FLOAT10: - psShader->psFloat10TempSizes[newReg] = std::max(psShader->psFloat10TempSizes[newReg], (char)count); - break; - case SVT_INT: - psShader->psIntTempSizes[newReg] = std::max(psShader->psIntTempSizes[newReg], (char)count); - break; - case SVT_INT16: - psShader->psInt16TempSizes[newReg] = std::max(psShader->psInt16TempSizes[newReg], (char)count); - break; - case SVT_INT12: - psShader->psInt12TempSizes[newReg] = std::max(psShader->psInt12TempSizes[newReg], (char)count); - break; - case SVT_UINT: - psShader->psUIntTempSizes[newReg] = std::max(psShader->psUIntTempSizes[newReg], (char)count); - break; - case SVT_UINT16: - psShader->psUInt16TempSizes[newReg] = std::max(psShader->psUInt16TempSizes[newReg], (char)count); - break; - case SVT_DOUBLE: - psShader->psDoubleTempSizes[newReg] = std::max(psShader->psDoubleTempSizes[newReg], (char)count); - break; - } - } -} - -// Mark whether the temp registers are used per each data type. -void Shader::PruneTempRegisters() -{ - uint32_t k; - uint32_t maxOrigTemps = 0; - uint32_t maxTotalTemps = 0; - // First find the total amount of temps - for (k = 0; k < asPhases.size(); k++) - { - ShaderPhase *psPhase = &asPhases[k]; - maxOrigTemps = std::max(maxOrigTemps, psPhase->ui32OrigTemps); - maxTotalTemps = std::max(maxTotalTemps, psPhase->ui32TotalTemps); - } - - if (maxTotalTemps == 0) - return; // splitarrays are nulls, no need to free - - // Allocate and zero-initialize arrays for each temp sizes. *4 is for every possible rebase - psIntTempSizes.clear(); - psIntTempSizes.resize(maxOrigTemps * 4, 0); - psInt12TempSizes.clear(); - psInt12TempSizes.resize(maxOrigTemps * 4, 0); - psInt16TempSizes.clear(); - psInt16TempSizes.resize(maxOrigTemps * 4, 0); - psUIntTempSizes.clear(); - psUIntTempSizes.resize(maxOrigTemps * 4, 0); - psUInt16TempSizes.clear(); - psUInt16TempSizes.resize(maxOrigTemps * 4, 0); - psFloatTempSizes.clear(); - psFloatTempSizes.resize(maxOrigTemps * 4, 0); - psFloat16TempSizes.clear(); - psFloat16TempSizes.resize(maxOrigTemps * 4, 0); - psFloat10TempSizes.clear(); - psFloat10TempSizes.resize(maxOrigTemps * 4, 0); - psDoubleTempSizes.clear(); - psDoubleTempSizes.resize(maxOrigTemps * 4, 0); - psBoolTempSizes.clear(); - psBoolTempSizes.resize(maxOrigTemps * 4, 0); - - for (k = 0; k < asPhases.size(); k++) - { - ShaderPhase *psPhase = &asPhases[k]; - CoalesceTemps(this, psPhase, maxOrigTemps); - if (psPhase->psTempDeclaration) - psPhase->psTempDeclaration->value.ui32NumTemps = maxOrigTemps * 4; - } -} - -static void DoSignatureAnalysis(std::vector &psSignatures, std::vector &outTable) -{ - // Fill the char, 2 bits per component so that each 2 bits encode the following info: - // 0: unused OR used by the first signature we happened to see - // 1: used by the second signature - // 2: used by the third sig - // 3: used by the fourth sig. - - // The counters for each input/output/patch. Start with 8 registers, grow as needed - std::vector counters(8, (unsigned char)0); - outTable.clear(); - outTable.resize(8, (unsigned char)0); - - size_t i; - for (i = 0; i < psSignatures.size(); i++) - { - ShaderInfo::InOutSignature *psSig = &psSignatures[i]; - char currCounter; - char mask; - ASSERT(psSig != NULL); - - // We'll skip SV_Depth and others that put -1 to the register. - if (psSig->ui32Register == 0xffffffffu) - continue; - - // Make sure there's enough room in the table - if (psSig->ui32Register >= counters.size()) - { - counters.resize(psSig->ui32Register * 2, 0); - outTable.resize(psSig->ui32Register * 2, 0); - } - - // Apply counter value to masked items - currCounter = counters[psSig->ui32Register]; - // Duplicate counter bits - currCounter = currCounter | (currCounter << 2) | (currCounter << 4) | (currCounter << 6); - // Widen the mask - mask = (unsigned char)psSig->ui32Mask; - mask = ((mask & 8) << 3) | ((mask & 4) << 2) | ((mask & 2) << 1) | (mask & 1); - mask = mask | (mask << 1); - // Write output - outTable[psSig->ui32Register] |= (currCounter & mask); - // Update counter - counters[psSig->ui32Register]++; - } -} - -void Shader::DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand) -{ - uint32_t i; - uint32_t regSpace = psOperand->GetRegisterSpace(eShaderType, psPhase->ePhase); - unsigned char *redirectTable = NULL; - unsigned char redir = 0; - unsigned char firstFound = 0; - uint32_t mask; - - for (i = 0; i < MAX_SUB_OPERANDS; i++) - if (psOperand->m_SubOperands[i].get()) - DoIOOverlapOperand(psPhase, psOperand->m_SubOperands[i].get()); - - - switch (psOperand->eType) - { - case OPERAND_TYPE_INPUT: - case OPERAND_TYPE_INPUT_CONTROL_POINT: - case OPERAND_TYPE_INPUT_PATCH_CONSTANT: - redirectTable = regSpace == 0 ? &psPhase->acInputNeedsRedirect[0] : &psPhase->acPatchConstantsNeedsRedirect[0]; - break; - - case OPERAND_TYPE_OUTPUT: - case OPERAND_TYPE_OUTPUT_CONTROL_POINT: - redirectTable = regSpace == 0 ? &psPhase->acOutputNeedsRedirect[0] : &psPhase->acPatchConstantsNeedsRedirect[0]; - break; - - default: - // Not a input or output, nothing to do here - return; - } - - redir = redirectTable[psOperand->ui32RegisterNumber]; - - if (redir == 0xff) // Already found overlap? - return; - - mask = psOperand->GetAccessMask(); - i = 0; - // Find the first mask bit set. - while ((mask & (1 << i)) == 0) - i++; - - firstFound = (redir >> (i * 2)) & 3; - for (; i < 4; i++) - { - unsigned char sig; - if ((mask & (1 << i)) == 0) - continue; - - sig = (redir >> (i * 2)) & 3; - // All set bits must access the same signature - if (sig != firstFound) - { - redirectTable[psOperand->ui32RegisterNumber] = 0xff; - return; - } - } -} - -static void PruneRedirectEntry(unsigned char &itr) -{ - if (itr != 0xff) - itr = 0; -} - -// Check if inputs and outputs are accessed across semantic boundaries -// as in, 2x texcoord vec2's are packed together as vec4 but still accessed together. -void Shader::AnalyzeIOOverlap() -{ - uint32_t i, k; - std::vector outData; - DoSignatureAnalysis(sInfo.psInputSignatures, outData); - - // Now data has the values, copy them to all phases - for (i = 0; i < asPhases.size(); i++) - asPhases[i].acInputNeedsRedirect = outData; - - DoSignatureAnalysis(sInfo.psOutputSignatures, outData); - for (i = 0; i < asPhases.size(); i++) - asPhases[i].acOutputNeedsRedirect = outData; - - DoSignatureAnalysis(sInfo.psPatchConstantSignatures, outData); - for (i = 0; i < asPhases.size(); i++) - asPhases[i].acPatchConstantsNeedsRedirect = outData; - - // Now walk through all operands and suboperands in all instructions and write 0xff to the dest (cannot occur otherwise) - // if we're crossing signature borders - for (i = 0; i < asPhases.size(); i++) - { - ShaderPhase *psPhase = &asPhases[i]; - for (k = 0; k < psPhase->psInst.size(); k++) - { - Instruction *psInst = &psPhase->psInst[k]; - uint32_t j; - for (j = 0; j < psInst->ui32NumOperands; j++) - DoIOOverlapOperand(psPhase, &psInst->asOperands[j]); - } - - // Now prune all tables from anything except 0xff. - std::for_each(psPhase->acInputNeedsRedirect.begin(), psPhase->acInputNeedsRedirect.end(), PruneRedirectEntry); - std::for_each(psPhase->acOutputNeedsRedirect.begin(), psPhase->acOutputNeedsRedirect.end(), PruneRedirectEntry); - std::for_each(psPhase->acPatchConstantsNeedsRedirect.begin(), psPhase->acPatchConstantsNeedsRedirect.end(), PruneRedirectEntry); - } -} - -void Shader::SetMaxSemanticIndex() -{ - for (std::vector::iterator it = sInfo.psInputSignatures.begin(); it != sInfo.psInputSignatures.end(); ++it) - maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex); - - for (std::vector::iterator it = sInfo.psOutputSignatures.begin(); it != sInfo.psOutputSignatures.end(); ++it) - maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex); - - for (std::vector::iterator it = sInfo.psPatchConstantSignatures.begin(); it != sInfo.psPatchConstantSignatures.end(); ++it) - maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex); -} - -// In DX bytecode, all const arrays are vec4's, and all arrays are stuffed to one large array. -// Luckily, each chunk is always accessed with suboperand plus (in ui32RegisterNumber) -// So do an analysis pass. Also trim the vec4's into smaller formats if the extra components are never read. -void ShaderPhase::PruneConstArrays() -{ - using namespace std; - auto customDataItr = find_if(psDecl.begin(), psDecl.end(), [](const Declaration &d) { return d.eOpcode == OPCODE_CUSTOMDATA; }); - // Not found? We're done. - if (customDataItr == psDecl.end()) - return; - - // Store the original declaration - m_ConstantArrayInfo.m_OrigDeclaration = &(*customDataItr); - - // Loop through each operand and pick up usage masks - HLSLcc::ForEachOperand(psInst.begin(), psInst.end(), FEO_FLAG_ALL, [this](const std::vector::iterator &psInst, const Operand *psOperand, uint32_t ui32OperandType) - { - using namespace std; - if (psOperand->eType == OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER) - { - uint32_t accessMask = psOperand->GetAccessMask(); - uint32_t offset = psOperand->ui32RegisterNumber; - - // Update the chunk access mask - - // Find all existing entries that have anything common with the access mask - auto cbrange = m_ConstantArrayInfo.m_Chunks.equal_range(offset); - vector matchingEntries; - for (auto itr = cbrange.first; itr != cbrange.second; itr++) - { - if ((itr->second.m_AccessMask & accessMask) != 0) - { - matchingEntries.push_back(itr); - } - } - - if (matchingEntries.empty()) - { - // Not found, create new entry - m_ConstantArrayInfo.m_Chunks.insert(make_pair(offset, ConstantArrayChunk(0u, accessMask, (Operand *)psOperand))); - } - else if (matchingEntries.size() == 1) - { - // Update access mask of the one existing entry - matchingEntries[0]->second.m_AccessMask |= accessMask; - matchingEntries[0]->second.m_UseSites.push_back((Operand *)psOperand); - } - else - { - // Multiple entries with (now) overlapping mask. Merge to the first one. - ChunkMap::iterator tgt = matchingEntries[0]; - tgt->second.m_AccessMask |= accessMask; - tgt->second.m_UseSites.push_back((Operand *)psOperand); - ChunkMap &chunks = m_ConstantArrayInfo.m_Chunks; - for_each(matchingEntries.begin() + 1, matchingEntries.end(), [&tgt, &chunks](ChunkMap::iterator itr) - { - tgt->second.m_AccessMask |= itr->second.m_AccessMask; - chunks.erase(itr); - }); - } - } - }); - - // Figure out how large each chunk is by finding the next chunk that uses any bits from the current mask (or the max size if not found) - - uint32_t totalSize = (uint32_t)m_ConstantArrayInfo.m_OrigDeclaration->asImmediateConstBuffer.size(); - for (auto chunk = m_ConstantArrayInfo.m_Chunks.begin(); chunk != m_ConstantArrayInfo.m_Chunks.end(); chunk++) - { - // Find the next chunk that shares any bits in the access mask - auto nextItr = find_if(m_ConstantArrayInfo.m_Chunks.lower_bound(chunk->first + 1), m_ConstantArrayInfo.m_Chunks.end(), [&chunk](ChunkMap::value_type &itr) - { - return (chunk->second.m_AccessMask & itr.second.m_AccessMask) != 0; - }); - - // Not found? Must continue until the end of array - if (nextItr == m_ConstantArrayInfo.m_Chunks.end()) - chunk->second.m_Size = totalSize - chunk->first; - else - { - // Otherwise we know the chunk size directly. - chunk->second.m_Size = nextItr->first - chunk->first; - } - - // Do rebase on the operands if necessary - chunk->second.m_Rebase = 0; - uint32_t t = chunk->second.m_AccessMask; - ASSERT(t != 0); - while ((t & 1) == 0) - { - chunk->second.m_Rebase++; - t >>= 1; - } - uint32_t rebase = chunk->second.m_Rebase; - uint32_t componentCount = 0; - while (t != 0) - { - componentCount++; - t >>= 1; - } - chunk->second.m_ComponentCount = componentCount; - - for_each(chunk->second.m_UseSites.begin(), chunk->second.m_UseSites.end(), [&rebase, &componentCount](Operand *op) - { - // Store the rebase value to each operand and do the actual rebase. - op->m_Rebase = rebase; - op->m_Size = componentCount; - - if (rebase != 0) - { - // Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask. - switch (op->eSelMode) - { - case OPERAND_4_COMPONENT_MASK_MODE: - { - uint32_t oldMask = op->ui32CompMask; - if (oldMask == 0) - oldMask = OPERAND_4_COMPONENT_MASK_ALL; - - // Check that we're not losing any information - ASSERT((oldMask >> rebase) << rebase == oldMask); - op->ui32CompMask = (oldMask >> rebase); - break; - } - case OPERAND_4_COMPONENT_SELECT_1_MODE: - ASSERT(op->aui32Swizzle[0] >= rebase); - op->aui32Swizzle[0] -= rebase; - break; - case OPERAND_4_COMPONENT_SWIZZLE_MODE: - { - for (int i = 0; i < 4; i++) - { - // Note that this rebase is different from the one done for source operands - ASSERT(op->aui32Swizzle[i] >= rebase); - op->aui32Swizzle[i] -= rebase; - } - break; - } - default: - ASSERT(0); - } - } - }); - } - - - // We'll do the actual declaration and pruning later on, now that we have the info stored up. -} - -HLSLcc::ControlFlow::ControlFlowGraph &ShaderPhase::GetCFG() -{ - if (!m_CFGInitialized) - { - m_CFG.Build(psInst.data(), psInst.data() + psInst.size()); - m_CFGInitialized = true; - } - - return m_CFG; -} - -void ShaderPhase::UnvectorizeImmMoves() -{ - // NOTE must be called before datatype analysis and other analysis phases are done, as the pointers won't match anymore - // (we insert new instructions there) - using namespace std; - vector nInst; - // Reserve 1.5x space - nInst.reserve(psInst.size() * 3 / 2); - - for_each(psInst.begin(), psInst.end(), [&](Instruction &i) - { - if (i.eOpcode != OPCODE_MOV || i.asOperands[0].eType != OPERAND_TYPE_TEMP || i.asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32 || i.asOperands[0].GetNumSwizzleElements() == 1) - { - nInst.push_back(i); - return; - } - // Ok, found one to unvectorize. - ASSERT(i.asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); - uint32_t mask = i.asOperands[0].ui32CompMask; - for (uint32_t j = 0; j < 4; j++) - { - if ((mask & (1 << j)) == 0) - continue; - - Instruction ni = i; - ni.asOperands[0].ui32CompMask = (1 << j); - nInst.push_back(ni); - } - }); - psInst.clear(); - psInst.swap(nInst); -} - -void ShaderPhase::ExpandSWAPCs() -{ - // First find the DCL_TEMPS declaration - auto dcitr = std::find_if(psDecl.begin(), psDecl.end(), [](const Declaration &decl) -> bool { return decl.eOpcode == OPCODE_DCL_TEMPS; }); - if (dcitr == psDecl.end()) - { - // No temp declaration? Probably we won't have SWAPC either, then. - return; - } - Declaration &tmpDecl = *dcitr; - - uint32_t extraTemp = 0; - bool extraTempAllocated = false; - - // Parse through instructions, open up SWAPCs if necessary - while (1) - { - // Need to find from top every time, because we're inserting stuff into the vector - auto swapItr = std::find_if(psInst.begin(), psInst.end(), [](const Instruction &inst) -> bool { return inst.eOpcode == OPCODE_SWAPC; }); - if (swapItr == psInst.end()) - break; - - // Ok swapItr now points to a SWAPC instruction that we'll have to split up like this (from MSDN): - -/* swapc dest0[.mask], - dest1[.mask], - src0[.swizzle], - src1[.swizzle], - src2[.swizzle] - - expands to : - - movc temp[dest0s mask], - src0[.swizzle], - src2[.swizzle], src1[.swizzle] - - movc dest1[.mask], - src0[.swizzle], - src1[.swizzle], src2[.swizzle] - - mov dest0.mask, temp -*/ - // Allocate a new temp, if not already done - if (!extraTempAllocated) - { - extraTemp = tmpDecl.value.ui32NumTemps++; - extraTempAllocated = true; - } - - Instruction origSwapInst; -#if _DEBUG - origSwapInst.id = swapItr->id; -#endif - std::swap(*swapItr, origSwapInst); // Store the original swapc for reading - - // OP 1: MOVC temp[dest0 mask], src0, src2, stc1 - swapItr->eOpcode = OPCODE_MOVC; - swapItr->ui32NumOperands = 4; - swapItr->ui32FirstSrc = 1; - swapItr->asOperands[0] = origSwapInst.asOperands[0]; - swapItr->asOperands[0].eType = OPERAND_TYPE_TEMP; - swapItr->asOperands[0].ui32RegisterNumber = extraTemp; - // mask is already fine - swapItr->asOperands[1] = origSwapInst.asOperands[2]; // src0 - swapItr->asOperands[2] = origSwapInst.asOperands[4]; // src2 - swapItr->asOperands[3] = origSwapInst.asOperands[3]; // src1 - // swapItr is already in the psInst vector. - - Instruction newInst[2] = { Instruction(), Instruction() }; - // OP 2: MOVC dest1, src0, src1, src2 - newInst[0].eOpcode = OPCODE_MOVC; - newInst[0].ui32NumOperands = 4; - newInst[0].ui32FirstSrc = 1; - newInst[0].asOperands[0] = origSwapInst.asOperands[1]; // dest1 - newInst[0].asOperands[1] = origSwapInst.asOperands[2]; // src0 - newInst[0].asOperands[2] = origSwapInst.asOperands[3]; // src1 - newInst[0].asOperands[3] = origSwapInst.asOperands[4]; // src2 -#if _DEBUG - newInst[0].id = swapItr->id; -#endif - - // OP 3: mov dest0.mask, temp - newInst[1].eOpcode = OPCODE_MOV; - newInst[1].ui32NumOperands = 2; - newInst[1].ui32FirstSrc = 1; - newInst[1].asOperands[0] = origSwapInst.asOperands[0]; // dest 0 - // First copy dest0 to src as well to get the mask set up correctly - newInst[1].asOperands[1] = origSwapInst.asOperands[0]; // dest 0; - // Then overwrite with temp reg - newInst[1].asOperands[1].eType = OPERAND_TYPE_TEMP; - newInst[1].asOperands[1].ui32RegisterNumber = extraTemp; -#if _DEBUG - newInst[1].id = swapItr->id; -#endif - - // Insert the new instructions to the vector - psInst.insert(swapItr + 1, newInst, newInst + 2); - } -} - -void Shader::ExpandSWAPCs() -{ - // Just call ExpandSWAPCs for each phase - for (int i = 0; i < asPhases.size(); i++) - { - asPhases[i].ExpandSWAPCs(); - } -} - -void Shader::ForcePositionToHighp() -{ - // Only sensible in vertex shaders (TODO: is this an issue in tessellation shaders? Do we even care?) - if (eShaderType != VERTEX_SHADER) - return; - - ShaderPhase &phase = asPhases[0]; - - // Find the output declaration - std::vector::iterator itr = std::find_if(phase.psDecl.begin(), phase.psDecl.end(), [this](const Declaration &decl) -> bool - { - if (decl.eOpcode == OPCODE_DCL_OUTPUT_SIV) - { - const SPECIAL_NAME specialName = decl.asOperands[0].eSpecialName; - if (specialName == NAME_POSITION || - specialName == NAME_UNDEFINED) // This might be SV_Position (because d3dcompiler is weird). - { - const ShaderInfo::InOutSignature *sig = NULL; - sInfo.GetOutputSignatureFromRegister(decl.asOperands[0].ui32RegisterNumber, decl.asOperands[0].GetAccessMask(), 0, &sig); - ASSERT(sig != NULL); - if ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) - { - ((ShaderInfo::InOutSignature *)sig)->eMinPrec = MIN_PRECISION_DEFAULT; - return true; - } - } - return false; - } - else if (decl.eOpcode == OPCODE_DCL_OUTPUT) - { - const ShaderInfo::InOutSignature *sig = NULL; - sInfo.GetOutputSignatureFromRegister(decl.asOperands[0].ui32RegisterNumber, decl.asOperands[0].GetAccessMask(), 0, &sig); - ASSERT(sig != NULL); - if ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) - { - ((ShaderInfo::InOutSignature *)sig)->eMinPrec = MIN_PRECISION_DEFAULT; - return true; - } - return false; - } - return false; - }); - - // Do nothing if we don't find suitable output. This may well be INTERNALTESSPOS for tessellation etc. - if (itr == phase.psDecl.end()) - return; - - uint32_t outputPosReg = itr->asOperands[0].ui32RegisterNumber; - - HLSLcc::ForEachOperand(phase.psInst.begin(), phase.psInst.end(), FEO_FLAG_DEST_OPERAND, [outputPosReg](std::vector::iterator itr, Operand *op, uint32_t flags) - { - if (op->eType == OPERAND_TYPE_OUTPUT && op->ui32RegisterNumber == outputPosReg) - op->eMinPrecision = OPERAND_MIN_PRECISION_DEFAULT; - }); -} - -void Shader::FindUnusedGlobals(uint32_t flags) -{ - for (int i = 0; i < asPhases.size(); i++) - { - ShaderPhase &phase = asPhases[i]; - - // Loop through every operand and pick up usages - HLSLcc::ForEachOperand(phase.psInst.begin(), phase.psInst.end(), FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, [&](std::vector::iterator inst, Operand *op, uint32_t flags) - { - // Not a constant buffer read? continue - if (op->eType != OPERAND_TYPE_CONSTANT_BUFFER) - return; - - const uint32_t ui32BindingPoint = op->aui32ArraySizes[0]; - const ConstantBuffer *psCBuf = NULL; - sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, ui32BindingPoint, &psCBuf); - - if (!psCBuf) - return; - - // Get all the struct members that can be reached from this usage: - uint32_t mask = op->GetAccessMask(); - for (uint32_t k = 0; k < 4; k++) - { - if ((mask & (1 << k)) == 0) - continue; - - uint32_t tmpSwizzle[4] = {k, k, k, k}; - int rebase; - bool isArray; - - ShaderVarType *psVarType = NULL; - - ShaderInfo::GetShaderVarFromOffset(op->aui32ArraySizes[1], tmpSwizzle, psCBuf, (const ShaderVarType**)&psVarType, &isArray, NULL, &rebase, flags); - - // Mark as used. Also all parents. - while (psVarType) - { - psVarType->m_IsUsed = true; - psVarType = psVarType->Parent; - } - } - }); - } -} diff --git a/third_party/HLSLcc/src/ShaderInfo.cpp b/third_party/HLSLcc/src/ShaderInfo.cpp deleted file mode 100644 index 554f202..0000000 --- a/third_party/HLSLcc/src/ShaderInfo.cpp +++ /dev/null @@ -1,520 +0,0 @@ -#include "ShaderInfo.h" -#include "internal_includes/debug.h" -#include "internal_includes/tokens.h" -#include "Operand.h" -#include -#include -#include - - -SHADER_VARIABLE_TYPE ShaderInfo::GetTextureDataType(uint32_t regNo) -{ - const ResourceBinding* psBinding = 0; - int found; - found = GetResourceFromBindingPoint(RGROUP_TEXTURE, regNo, &psBinding); - ASSERT(found != 0); - return psBinding->GetDataType(); -} - -void ShaderInfo::GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const -{ - ASSERT(ui32MajorVersion > 3); - *ppsConstBuf = &psConstantBuffers[aui32ResourceMap[eGroup][ui32BindPoint]]; -} - -int ShaderInfo::GetResourceFromBindingPoint(const ResourceGroup eGroup, uint32_t const ui32BindPoint, const ResourceBinding** ppsOutBinding) const -{ - size_t i; - const size_t ui32NumBindings = psResourceBindings.size(); - const ResourceBinding* psBindings = &psResourceBindings[0]; - - for (i = 0; i < ui32NumBindings; ++i) - { - if (ResourceTypeToResourceGroup(psBindings[i].eType) == eGroup) - { - if (ui32BindPoint >= psBindings[i].ui32BindPoint && ui32BindPoint < (psBindings[i].ui32BindPoint + psBindings[i].ui32BindCount)) - { - *ppsOutBinding = psBindings + i; - return 1; - } - } - } - return 0; -} - -int ShaderInfo::GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const -{ - size_t i; - const size_t ui32NumVars = psThisPointerConstBuffer->asVars.size(); - - for (i = 0; i < ui32NumVars; ++i) - { - if (ui32Offset >= psThisPointerConstBuffer->asVars[i].ui32StartOffset && - ui32Offset < (psThisPointerConstBuffer->asVars[i].ui32StartOffset + psThisPointerConstBuffer->asVars[i].ui32Size)) - { - *ppsShaderVar = &psThisPointerConstBuffer->asVars[i]; - return 1; - } - } - return 0; -} - -int ShaderInfo::GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const -{ - size_t i; - const size_t ui32NumVars = psInputSignatures.size(); - - for (i = 0; i < ui32NumVars; ++i) - { - if ((ui32Register == psInputSignatures[i].ui32Register) && (((~psInputSignatures[i].ui32Mask) & ui32Mask) == 0)) - { - *ppsOut = &psInputSignatures[i]; - return 1; - } - } - ASSERT(allowNull); - return 0; -} - -int ShaderInfo::GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const -{ - size_t i; - const size_t ui32NumVars = psPatchConstantSignatures.size(); - - for (i = 0; i < ui32NumVars; ++i) - { - if ((ui32Register == psPatchConstantSignatures[i].ui32Register) && (((~psPatchConstantSignatures[i].ui32Mask) & ui32Mask) == 0)) - { - *ppsOut = &psPatchConstantSignatures[i]; - return 1; - } - } - - // There are situations (especially when using dcl_indexrange) where the compiler happily writes outside the actual masks. - // In those situations just take the last signature that uses that register (it's typically the "highest" one) - for (i = ui32NumVars - 1; i-- > 0;) - { - if (ui32Register == psPatchConstantSignatures[i].ui32Register) - { - *ppsOut = &psPatchConstantSignatures[i]; - return 1; - } - } - - ASSERT(allowNull); - return 0; -} - -int ShaderInfo::GetOutputSignatureFromRegister(const uint32_t ui32Register, - const uint32_t ui32CompMask, - const uint32_t ui32Stream, - const InOutSignature** ppsOut, - bool allowNull /* = false */) const -{ - size_t i; - const size_t ui32NumVars = psOutputSignatures.size(); - ASSERT(ui32CompMask != 0); - - for (i = 0; i < ui32NumVars; ++i) - { - if (ui32Register == psOutputSignatures[i].ui32Register && - (ui32CompMask & psOutputSignatures[i].ui32Mask) && - ui32Stream == psOutputSignatures[i].ui32Stream) - { - *ppsOut = &psOutputSignatures[i]; - return 1; - } - } - ASSERT(allowNull); - return 0; -} - -int ShaderInfo::GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const -{ - size_t i; - const size_t ui32NumVars = psOutputSignatures.size(); - - for (i = 0; i < ui32NumVars; ++i) - { - if (eSystemValueType == psOutputSignatures[i].eSystemValueType && - ui32SemanticIndex == psOutputSignatures[i].ui32SemanticIndex) - { - *ppsOut = &psOutputSignatures[i]; - return 1; - } - } - ASSERT(0); - return 0; -} - -uint32_t ShaderInfo::GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize) -{ - // Default is regular matrices, vectors and scalars - uint32_t size = psType->Columns * psType->Rows * 4; - - // Struct size is calculated from the offset and size of its last member. - // Need to take into account that members could be arrays. - if (psType->Class == SVC_STRUCT) - { - size = psType->Members.back().Offset + GetCBVarSize(&psType->Members.back(), matrixAsVectors, true); - } - // Matrices represented as vec4 arrays have special size calculation - else if (matrixAsVectors) - { - if (psType->Class == SVC_MATRIX_ROWS) - { - size = psType->Rows * 16; - } - else if (psType->Class == SVC_MATRIX_COLUMNS) - { - size = psType->Columns * 16; - } - } - - if (wholeArraySize && psType->Elements > 1) - { - uint32_t paddedSize = ((size + 15) / 16) * 16; // Arrays are padded to float4 size - size = (psType->Elements - 1) * paddedSize + size; // Except the last element - } - - return size; -} - -static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType, - uint32_t parentOffset, - uint32_t offsetToFind, - bool* isArray, - std::vector* arrayIndices, - int32_t* pi32Rebase, - uint32_t flags) -{ - uint32_t thisOffset = parentOffset + psType->Offset; - uint32_t thisSize = ShaderInfo::GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0); - uint32_t paddedSize = ((thisSize + 15) / 16) * 16; - uint32_t arraySize = thisSize; - - // Array elements are padded to align on vec4 size, except for the last one - if (psType->Elements) - arraySize = (paddedSize * (psType->Elements - 1)) + thisSize; - - if ((offsetToFind >= thisOffset) && - offsetToFind < (thisOffset + arraySize)) - { - *isArray = false; - if (psType->Class == SVC_STRUCT) - { - if (psType->Elements > 1 && arrayIndices != NULL) - arrayIndices->push_back((offsetToFind - thisOffset) / thisSize); - - // Need to bring offset back to element zero in case of array of structs - uint32_t offsetInStruct = (offsetToFind - thisOffset) % paddedSize; - uint32_t m = 0; - - for (m = 0; m < psType->MemberCount; ++m) - { - const ShaderVarType* psMember = &psType->Members[m]; - - const ShaderVarType* foundType = IsOffsetInType(psMember, thisOffset, thisOffset + offsetInStruct, isArray, arrayIndices, pi32Rebase, flags); - if (foundType != NULL) - return foundType; - } - } - // Check for array of scalars or vectors (both take up 16 bytes per element). - // Matrices are also treated as arrays of vectors. - else if ((psType->Class == SVC_MATRIX_ROWS || psType->Class == SVC_MATRIX_COLUMNS) || - ((psType->Class == SVC_SCALAR || psType->Class == SVC_VECTOR) && psType->Elements > 1)) - { - *isArray = true; - if (arrayIndices != NULL) - arrayIndices->push_back((offsetToFind - thisOffset) / 16); - } - else if (psType->Class == SVC_VECTOR) - { - //Check for vector starting at a non-vec4 offset. - - // cbuffer $Globals - // { - // - // float angle; // Offset: 0 Size: 4 - // float2 angle2; // Offset: 4 Size: 8 - // - // } - - //cb0[0].x = angle - //cb0[0].yzyy = angle2.xyxx - - //Rebase angle2 so that .y maps to .x, .z maps to .y - - pi32Rebase[0] = thisOffset % 16; - } - - return psType; - } - return NULL; -} - -int ShaderInfo::GetShaderVarFromOffset(const uint32_t ui32Vec4Offset, - const uint32_t(&pui32Swizzle)[4], - const ConstantBuffer* psCBuf, - const ShaderVarType** ppsShaderVar, // Output the found var - bool* isArray, // Output bool that tells if the found var is an array - std::vector* arrayIndices, // Output vector of array indices in order from root parent to the found var - int32_t* pi32Rebase, // Output swizzle rebase - uint32_t flags) -{ - size_t i; - - uint32_t ui32ByteOffset = ui32Vec4Offset * 16; - - //Swizzle can point to another variable. In the example below - //cbUIUpdates.g_uMaxFaces would be cb1[2].z. The scalars are combined - //into vectors. psCBuf->ui32NumVars will be 3. - - // cbuffer cbUIUpdates - // { - // float g_fLifeSpan; // Offset: 0 Size: 4 - // float g_fLifeSpanVar; // Offset: 4 Size: 4 [unused] - // float g_fRadiusMin; // Offset: 8 Size: 4 [unused] - // float g_fRadiusMax; // Offset: 12 Size: 4 [unused] - // float g_fGrowTime; // Offset: 16 Size: 4 [unused] - // float g_fStepSize; // Offset: 20 Size: 4 - // float g_fTurnRate; // Offset: 24 Size: 4 - // float g_fTurnSpeed; // Offset: 28 Size: 4 [unused] - // float g_fLeafRate; // Offset: 32 Size: 4 - // float g_fShrinkTime; // Offset: 36 Size: 4 [unused] - // uint g_uMaxFaces; // Offset: 40 Size: 4 - // } - if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Y) - { - ui32ByteOffset += 4; - } - else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Z) - { - ui32ByteOffset += 8; - } - else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_W) - { - ui32ByteOffset += 12; - } - - const size_t ui32NumVars = psCBuf->asVars.size(); - - for (i = 0; i < ui32NumVars; ++i) - { - ppsShaderVar[0] = IsOffsetInType(&psCBuf->asVars[i].sType, psCBuf->asVars[i].ui32StartOffset, ui32ByteOffset, isArray, arrayIndices, pi32Rebase, flags); - - if (ppsShaderVar[0] != NULL) - return 1; - } - return 0; -} - -// Patches the fullName of the var with given array indices. Does not insert the indexing for the var itself if it is an array. -// Searches for brackets and inserts indices one by one. -std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, const std::vector& indices, const std::string& dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors) -{ - std::ostringstream oss; - size_t prevpos = 0; - size_t pos = psShaderVar->fullName.find('[', 0); - uint32_t i = 0; - while (pos != std::string::npos) - { - pos++; - oss << psShaderVar->fullName.substr(prevpos, pos - prevpos); - - // Add possibly given dynamic index for the root array. - if (i == 0 && !dynamicIndex.empty()) - { - oss << dynamicIndex; - - // if we couldn't use original index temp, revert the float4 address calc here - if (revertDynamicIndexCalc) - { - const ShaderVarType* psRootVar = psShaderVar; - while (psRootVar->Parent != NULL) - psRootVar = psRootVar->Parent; - - uint32_t thisSize = (GetCBVarSize(psRootVar, matrixAsVectors) + 15) / 16; // size in float4 - oss << " / " << thisSize; - } - - if (!indices.empty() && indices[i] != 0) - oss << " + " << indices[i]; - } - else if (i < indices.size()) - oss << indices[i]; - - prevpos = pos; - i++; - pos = psShaderVar->fullName.find('[', prevpos); - } - oss << psShaderVar->fullName.substr(prevpos); - - return oss.str(); -} - -ResourceGroup ShaderInfo::ResourceTypeToResourceGroup(ResourceType eType) -{ - switch (eType) - { - case RTYPE_CBUFFER: - return RGROUP_CBUFFER; - - case RTYPE_SAMPLER: - return RGROUP_SAMPLER; - - case RTYPE_TEXTURE: - case RTYPE_BYTEADDRESS: - case RTYPE_STRUCTURED: - return RGROUP_TEXTURE; - - case RTYPE_UAV_RWTYPED: - case RTYPE_UAV_RWSTRUCTURED: - case RTYPE_UAV_RWBYTEADDRESS: - case RTYPE_UAV_APPEND_STRUCTURED: - case RTYPE_UAV_CONSUME_STRUCTURED: - case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER: - return RGROUP_UAV; - - case RTYPE_TBUFFER: - ASSERT(0); // Need to find out which group this belongs to - return RGROUP_TEXTURE; - default: - break; - } - - ASSERT(0); - return RGROUP_CBUFFER; -} - -static inline std::string GetTextureNameFromSamplerName(const std::string& samplerIn) -{ - ASSERT(samplerIn.compare(0, 7, "sampler") == 0); - - // please note that we do not have hard rules about how sampler names should be structured - // what's more they can even skip texture name (but that should be handled separately) - // how do we try to deduce the texture name: we remove known tokens, and take the leftmost (first) "word" - // note that we want to support c-style naming (with underscores for spaces) - // as it is pretty normal to have texture name starting with underscore - // we bind underscores "to the right" - - // note that we want sampler state to be case insensitive - // while checking for a match could be done with strncasecmp/_strnicmp - // windows is missing case-insensetive "find substring" (strcasestr), so we transform to lowercase instead - std::string sampler = samplerIn; - for (std::string::iterator i = sampler.begin(), in = sampler.end(); i != in; ++i) - *i = std::tolower(*i); - - struct Token { const char* str; int len; }; - #define TOKEN(s) { s, (int)strlen(s) } - Token token[] = { - TOKEN("compare"), - TOKEN("point"), TOKEN("trilinear"), TOKEN("linear"), - TOKEN("clamp"), TOKEN("clampu"), TOKEN("clampv"), TOKEN("clampw"), - TOKEN("repeat"), TOKEN("repeatu"), TOKEN("repeatv"), TOKEN("repeatw"), - TOKEN("mirror"), TOKEN("mirroru"), TOKEN("mirrorv"), TOKEN("mirrorw"), - TOKEN("mirroronce"), TOKEN("mirroronceu"), TOKEN("mirroroncev"), TOKEN("mirroroncew"), - }; - #undef TOKEN - - const char* s = sampler.c_str(); - for (int texNameStart = 7; s[texNameStart];) - { - // skip underscores and find the potential beginning of a token - int tokenStart = texNameStart, tokenEnd = -1; - while (s[tokenStart] == '_') - ++tokenStart; - - // check token list for matches - for (int i = 0, n = sizeof(token) / sizeof(token[0]); i < n && tokenEnd < 0; ++i) - if (strncmp(s + tokenStart, token[i].str, token[i].len) == 0) - tokenEnd = tokenStart + token[i].len; - - if (tokenEnd < 0) - { - // we have found texture name - - // find next token - int nextTokenStart = sampler.length(); - for (int i = 0, n = sizeof(token) / sizeof(token[0]); i < n; ++i) - { - // again: note that we want to be case insensitive - const int pos = sampler.find(token[i].str, tokenStart); - - if (pos != std::string::npos && pos < nextTokenStart) - nextTokenStart = pos; - } - - // check preceeding underscores, but only if we have found an actual token (not the end of the string) - if (nextTokenStart < sampler.length()) - { - while (nextTokenStart > tokenStart && s[nextTokenStart - 1] == '_') - --nextTokenStart; - } - - // note that we return the substring of the initial sampler name to preserve case - return samplerIn.substr(texNameStart, nextTokenStart - texNameStart); - } - else - { - // we have found known token - texNameStart = tokenEnd; - } - } - - // if we ended up here, the texture name is missing - return ""; -} - -// note that we dont have the means right now to have unit tests in hlslcc, so we do poor man testing below -// AddSamplerPrecisions is called once for every program, so it is easy to uncomment and test -static inline void Test_GetTextureNameFromSamplerName() -{ - #define CHECK(s, t) ASSERT(GetTextureNameFromSamplerName(std::string(s)) == std::string(t)) - - CHECK("sampler_point_clamp", ""); - CHECK("sampler_point_clamp_Tex", "_Tex"); - CHECK("sampler_point_clamp_Tex__", "_Tex__"); - CHECK("sampler_______point_Tex", "_Tex"); - - CHECK("samplerPointClamp", ""); - CHECK("samplerPointClamp_Tex", "_Tex"); - CHECK("samplerPointClamp_Tex__", "_Tex__"); - - CHECK("samplerPointTexClamp", "Tex"); - CHECK("samplerPoint_TexClamp", "_Tex"); - CHECK("samplerPoint_Tex_Clamp", "_Tex"); - - #undef CHECK -} - -void ShaderInfo::AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info) -{ - if (info.empty()) - return; - -#if _DEBUG && 0 - Test_GetTextureNameFromSamplerName(); -#endif - - for (size_t i = 0; i < psResourceBindings.size(); i++) - { - ResourceBinding *rb = &psResourceBindings[i]; - if (rb->eType != RTYPE_SAMPLER && rb->eType != RTYPE_TEXTURE && rb->eType != RTYPE_UAV_RWTYPED) - continue; - - // Try finding the exact match - HLSLccSamplerPrecisionInfo::iterator j = info.find(rb->name); - - // If match not found, check if name has "sampler" prefix (DX11 style sampler case) - // then we try to recover texture name from sampler name - if (j == info.end() && rb->name.compare(0, 7, "sampler") == 0) - j = info.find(GetTextureNameFromSamplerName(rb->name)); - - // note that if we didnt find the respective texture, we cannot say anything about sampler precision - // currently it will become "unknown" resulting in half format, even if we sample with it the texture explicitly marked as float - // TODO: should we somehow allow overriding it? - if (j != info.end()) - rb->ePrecision = j->second; - } -} diff --git a/third_party/HLSLcc/src/UseDefineChains.cpp b/third_party/HLSLcc/src/UseDefineChains.cpp deleted file mode 100644 index f6f7e89..0000000 --- a/third_party/HLSLcc/src/UseDefineChains.cpp +++ /dev/null @@ -1,814 +0,0 @@ -#include "internal_includes/UseDefineChains.h" -#include "internal_includes/debug.h" -#include "internal_includes/Instruction.h" - -#include "internal_includes/ControlFlowGraph.h" -#include "internal_includes/debug.h" -#include "internal_includes/HLSLccToolkit.h" -#include - -using HLSLcc::ForEachOperand; - -#define DEBUG_UDCHAINS 0 - -#if DEBUG_UDCHAINS -// Debug mode -static void UDCheckConsistencyDUChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions) -{ - DefineUseChain::iterator du = psDUChains[idx].begin(); - UseDefineChain::iterator ud = psUDChains[idx].begin(); - while (du != psDUChains[idx].end()) - { - ASSERT(du->index == idx % 4); - // Check that the definition actually writes to idx - { - uint32_t tempReg = idx / 4; - uint32_t offs = idx - (tempReg * 4); - uint32_t accessMask = 1 << offs; - uint32_t i; - int found = 0; - for (i = 0; i < du->psInst->ui32FirstSrc; i++) - { - if (du->psInst->asOperands[i].eType == OPERAND_TYPE_TEMP) - { - if (du->psInst->asOperands[i].ui32RegisterNumber == tempReg) - { - uint32_t writeMask = GetOperandWriteMask(&du->psInst->asOperands[i]); - if (writeMask & accessMask) - { - ASSERT(writeMask == du->writeMask); - found = 1; - break; - } - } - } - } - ASSERT(found); - } - - // Check that each usage of each definition also is found in the use-define chain - UsageSet::iterator ul = du->usages.begin(); - while (ul != du->usages.end()) - { - // Search for the usage in the chain - UseDefineChain::iterator use = ud; - while (use != psUDChains[idx].end() && &*use != *ul) - use++; - ASSERT(use != psUDChains[idx].end()); - ASSERT(&*use == *ul); - - // Check that the mapping back is also found - ASSERT(std::find(use->defines.begin(), use->defines.end(), &*du) != use->defines.end()); - - ul++; - } - - du++; - } -} - -static void UDCheckConsistencyUDChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions) -{ - DefineUseChain::iterator du = psDUChains[idx].begin(); - UseDefineChain::iterator ud = psUDChains[idx].begin(); - while (ud != psUDChains[idx].end()) - { - // Check that each definition of each usage also is found in the define-use chain - DefineSet::iterator dl = ud->defines.begin(); - ASSERT(ud->psOp->ui32RegisterNumber == idx / 4); - ASSERT(ud->index == idx % 4); - while (dl != ud->defines.end()) - { - // Search for the definition in the chain - DefineUseChain::iterator def = du; - while (def != psDUChains[idx].end() && &*def != *dl) - def++; - ASSERT(def != psDUChains[idx].end()); - ASSERT(&*def == *dl); - - // Check that the mapping back is also found - ASSERT(std::find(def->usages.begin(), def->usages.end(), &*ud) != def->usages.end()); - - dl++; - } - ud++; - } -} - -static void UDCheckConsistency(uint32_t tempRegs, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions) -{ - uint32_t i; - for (i = 0; i < tempRegs * 4; i++) - { - UDCheckConsistencyDUChain(i, psDUChains, psUDChains, activeDefinitions); - UDCheckConsistencyUDChain(i, psDUChains, psUDChains, activeDefinitions); - } -} - -#define printf_console printf - -#endif - -using namespace HLSLcc::ControlFlow; -using std::for_each; - -static DefineUseChainEntry *GetOrCreateDefinition(const BasicBlock::Definition &def, DefineUseChain &psDUChain, uint32_t index) -{ - // Try to find an existing entry - auto itr = std::find_if(psDUChain.begin(), psDUChain.end(), [&](const DefineUseChainEntry &de) - { - return de.psInst == def.m_Instruction && de.psOp == def.m_Operand; - }); - - if (itr != psDUChain.end()) - { - return &(*itr); - } - - // Not found, create - psDUChain.push_front(DefineUseChainEntry()); - DefineUseChainEntry &de = *psDUChain.begin(); - - de.psInst = (Instruction *)def.m_Instruction; - de.psOp = (Operand *)def.m_Operand; - de.index = index; - de.writeMask = def.m_Operand->GetAccessMask(); - de.psSiblings[index] = &de; - - return &de; -} - -// Do flow control analysis on the instructions and build the define-use and use-define chains -void BuildUseDefineChains(std::vector &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChain, UseDefineChains &psUDChain, HLSLcc::ControlFlow::ControlFlowGraph &cfg) -{ - ActiveDefinitions lastSeenDefinitions(ui32NumTemps * 4, NULL); // Array of pointers to the currently active definition for each temp - - psDUChain.clear(); - psUDChain.clear(); - - for (uint32_t i = 0; i < ui32NumTemps * 4; i++) - { - psUDChain.insert(std::make_pair(i, UseDefineChain())); - psDUChain.insert(std::make_pair(i, DefineUseChain())); - } - - const ControlFlowGraph::BasicBlockStorage &blocks = cfg.AllBlocks(); - - // Loop through each block, first calculate the union of all the reachables of all preceding blocks - // and then build on that as we go along the basic block instructions - for_each(blocks.begin(), blocks.end(), [&](const HLSLcc::shared_ptr &bptr) - { - const BasicBlock &b = *bptr.get(); - BasicBlock::ReachableVariables rvars; - for_each(b.Preceding().begin(), b.Preceding().end(), [&](const Instruction *precBlock) - { - const BasicBlock &b = *cfg.GetBasicBlockForInstruction(precBlock); - BasicBlock::RVarUnion(rvars, b.Reachable()); - }); - - // Now we have a Reachable set for the beginning of this block in rvars. Loop through all instructions and their operands and pick up uses and definitions - for (const Instruction *inst = b.First(); inst <= b.Last(); inst++) - { - // Process sources first - ForEachOperand(inst, inst + 1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, - [&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) - { - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; - - uint32_t tempReg = psOperand->ui32RegisterNumber; - uint32_t accessMask = psOperand->GetAccessMask(); - - // Go through each component - for (int k = 0; k < 4; k++) - { - if (!(accessMask & (1 << k))) - continue; - - uint32_t regIdx = tempReg * 4 + k; - - // Add an use for all visible definitions - psUDChain[regIdx].push_front(UseDefineChainEntry()); - UseDefineChainEntry &ue = *psUDChain[regIdx].begin(); - ue.psInst = (Instruction *)psInst; - ue.psOp = (Operand *)psOperand; - ue.accessMask = accessMask; - ue.index = k; - ue.psSiblings[k] = &ue; - // ue.siblings will be filled out later. - - BasicBlock::ReachableDefinitionsPerVariable& rpv = rvars[regIdx]; - for_each(rpv.begin(), rpv.end(), [&](const BasicBlock::Definition &def) - { - DefineUseChainEntry *duentry = GetOrCreateDefinition(def, psDUChain[regIdx], k); - ue.defines.insert(duentry); - duentry->usages.insert(&ue); - }); - } - return; - }); - - // Then the destination operands - ForEachOperand(inst, inst + 1, FEO_FLAG_DEST_OPERAND, - [&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) - { - if (psOperand->eType != OPERAND_TYPE_TEMP) - return; - - uint32_t tempReg = psOperand->ui32RegisterNumber; - uint32_t accessMask = psOperand->GetAccessMask(); - - // Go through each component - for (int k = 0; k < 4; k++) - { - if (!(accessMask & (1 << k))) - continue; - - uint32_t regIdx = tempReg * 4 + k; - - // Overwrite whatever's in rvars; they are killed by this - rvars[regIdx].clear(); - rvars[regIdx].insert(BasicBlock::Definition(psInst, psOperand)); - - // Make sure the definition gets created even though it doesn't have any uses at all - // (happens when sampling a texture but not all channels are used etc). - GetOrCreateDefinition(BasicBlock::Definition(psInst, psOperand), psDUChain[regIdx], k); - } - return; - }); - } - }); - - // Connect the siblings for all uses and definitions - for_each(psUDChain.begin(), psUDChain.end(), [&](std::pair &udpair) - { - UseDefineChain &ud = udpair.second; - // Clear out the bottom 2 bits to get the actual base reg - uint32_t baseReg = udpair.first & ~(3); - - for_each(ud.begin(), ud.end(), [&](UseDefineChainEntry &ue) - { - ASSERT(baseReg / 4 == ue.psOp->ui32RegisterNumber); - - // Go through each component - for (int k = 0; k < 4; k++) - { - // Skip components that we don't access, or the one that's our own - if (!(ue.accessMask & (1 << k)) || ue.index == k) - continue; - - // Find the corresponding sibling. We can uniquely identify it by the operand pointer alone. - UseDefineChain::iterator siblItr = std::find_if(psUDChain[baseReg + k].begin(), psUDChain[baseReg + k].end(), [&](const UseDefineChainEntry &_sibl) -> bool { return _sibl.psOp == ue.psOp; }); - ASSERT(siblItr != psUDChain[baseReg + k].end()); - UseDefineChainEntry &sibling = *siblItr; - ue.psSiblings[k] = &sibling; - } - }); - }); - - // Same for definitions - for_each(psDUChain.begin(), psDUChain.end(), [&](std::pair &dupair) - { - DefineUseChain &du = dupair.second; - // Clear out the bottom 2 bits to get the actual base reg - uint32_t baseReg = dupair.first & ~(3); - - for_each(du.begin(), du.end(), [&](DefineUseChainEntry &de) - { - ASSERT(baseReg / 4 == de.psOp->ui32RegisterNumber); - - // Go through each component - for (int k = 0; k < 4; k++) - { - // Skip components that we don't access, or the one that's our own - if (!(de.writeMask & (1 << k)) || de.index == k) - continue; - - // Find the corresponding sibling. We can uniquely identify it by the operand pointer alone. - DefineUseChain::iterator siblItr = std::find_if(psDUChain[baseReg + k].begin(), psDUChain[baseReg + k].end(), [&](const DefineUseChainEntry &_sibl) -> bool { return _sibl.psOp == de.psOp; }); - ASSERT(siblItr != psDUChain[baseReg + k].end()); - DefineUseChainEntry &sibling = *siblItr; - de.psSiblings[k] = &sibling; - } - }); - }); - -#if DEBUG_UDCHAINS - UDCheckConsistency(ui32NumTemps, psDUChain, psUDChain, lastSeenDefinitions); -#endif -} - -typedef std::vector SplitDefinitions; - -// Split out a define to use a new temp register -static void UDDoSplit(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector &pui32SplitTable) -{ - uint32_t newReg = *psNumTemps; - uint32_t oldReg = defs[0]->psOp->ui32RegisterNumber; - uint32_t accessMask = defs[0]->writeMask; - uint32_t i, u32def; - uint32_t rebase, count; - uint32_t splitTableValue; - - ASSERT(defs.size() > 0); - for (i = 1; i < defs.size(); i++) - { - ASSERT(defs[i]->psOp->ui32RegisterNumber == oldReg); - accessMask |= defs[i]->writeMask; - } - - - (*psNumTemps)++; - - -#if DEBUG_UDCHAINS - UDCheckConsistency((*psNumTemps) - 1, psDUChains, psUDChains, ActiveDefinitions()); -#endif - ASSERT(accessMask != 0 && accessMask <= 0xf); - // Calculate rebase value and component count - rebase = 0; - count = 0; - i = accessMask; - while ((i & 1) == 0) - { - rebase++; - i = i >> 1; - } - while (i != 0) - { - count++; - i = i >> 1; - } - - // Make sure there's enough room in the split table - if (pui32SplitTable.size() <= newReg) - { - size_t newSize = pui32SplitTable.size() * 2; - pui32SplitTable.resize(newSize, 0xffffffff); - } - - // Set the original temp of the new register - { - uint32_t origTemp = oldReg; - while (pui32SplitTable[origTemp] != 0xffffffff) - origTemp = pui32SplitTable[origTemp] & 0xffff; - - ASSERT(rebase < 4); - ASSERT(count <= 4); - splitTableValue = (count << 24) | (rebase << 16) | origTemp; - - pui32SplitTable[newReg] = splitTableValue; - } - - // Insert the new temps to the map - for (i = newReg * 4; i < newReg * 4 + 4; i++) - { - psUDChains.insert(std::make_pair(i, UseDefineChain())); - psDUChains.insert(std::make_pair(i, DefineUseChain())); - } - - for (u32def = 0; u32def < defs.size(); u32def++) - { - DefineUseChainEntry *defineToSplit = defs[u32def]; - uint32_t oldIdx = defineToSplit->index; -#if DEBUG_UDCHAINS - printf("Split def at instruction %d (reg %d -> %d, access %X, rebase %d, count: %d)\n", (int)defineToSplit->psInst->id, oldReg, newReg, accessMask, rebase, count); -#endif - - // We may have moved the opcodes already because of multiple defines pointing to the same op - if (defineToSplit->psOp->ui32RegisterNumber != newReg) - { - ASSERT(defineToSplit->psOp->ui32RegisterNumber == oldReg); - // Update the declaration operand - // Don't change possible suboperands as they are sources - defineToSplit->psInst->ChangeOperandTempRegister(defineToSplit->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase); - } - - defineToSplit->writeMask >>= rebase; - defineToSplit->index -= rebase; - // Change the temp register number for all usages - UsageSet::iterator ul = defineToSplit->usages.begin(); - while (ul != defineToSplit->usages.end()) - { - // Already updated by one of the siblings? Skip. - if ((*ul)->psOp->ui32RegisterNumber != newReg) - { - ASSERT((*ul)->psOp->ui32RegisterNumber == oldReg); - (*ul)->psInst->ChangeOperandTempRegister((*ul)->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase); - } - - // Update the UD chain - { - UseDefineChain::iterator udLoc = psUDChains[oldReg * 4 + oldIdx].begin(); - while (udLoc != psUDChains[oldReg * 4 + oldIdx].end()) - { - if (&*udLoc == *ul) - { - // Move to new list - psUDChains[newReg * 4 + oldIdx - rebase].splice(psUDChains[newReg * 4 + oldIdx - rebase].begin(), psUDChains[oldReg * 4 + oldIdx], udLoc); - - if (rebase > 0) - { - (*ul)->accessMask >>= rebase; - (*ul)->index -= rebase; - memmove((*ul)->psSiblings, (*ul)->psSiblings + rebase, (4 - rebase) * sizeof(UseDefineChain *)); - } - break; - } - udLoc++; - } - } - - ul++; - } - - // Move the define out of the old chain (if its still there) - { - // Find the define in the old chain - DefineUseChain::iterator duLoc = psDUChains[oldReg * 4 + oldIdx].begin(); - while (duLoc != psDUChains[oldReg * 4 + oldIdx].end() && ((&*duLoc) != defineToSplit)) - { - duLoc++; - } - ASSERT(duLoc != psDUChains[oldReg * 4 + oldIdx].end()); - { - // Move directly to new chain - psDUChains[newReg * 4 + oldIdx - rebase].splice(psDUChains[newReg * 4 + oldIdx - rebase].begin(), psDUChains[oldReg * 4 + oldIdx], duLoc); - if (rebase != 0) - { - memmove(defineToSplit->psSiblings, defineToSplit->psSiblings + rebase, (4 - rebase) * sizeof(DefineUseChain *)); - } - } - } - } - -#if DEBUG_UDCHAINS - UDCheckConsistency(*psNumTemps, psDUChains, psUDChains, ActiveDefinitions()); -#endif -} - -// Adds a define and all its siblings to the list, checking duplicates -static void AddDefineToList(SplitDefinitions &defs, DefineUseChainEntry *newDef) -{ - uint32_t k; - for (k = 0; k < 4; k++) - { - if (newDef->psSiblings[k]) - { - DefineUseChainEntry *defToAdd = newDef->psSiblings[k]; - uint32_t m; - int defFound = 0; - for (m = 0; m < defs.size(); m++) - { - if (defs[m] == defToAdd) - { - defFound = 1; - break; - } - } - if (defFound == 0) - { - defs.push_back(newDef->psSiblings[k]); - } - } - } -} - -// Check if a set of definitions can be split and does the split. Returns nonzero if a split took place -static int AttemptSplitDefinitions(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector &pui32SplitTable) -{ - uint32_t reg; - uint32_t combinedMask; - uint32_t i, k, u32def; - int canSplit = 1; - DefineUseChain::iterator du; - int hasLeftoverDefinitions = 0; - // Initial checks: all definitions must: - // Access the same register - // Have at least one definition in any of the 4 register slots that isn't included - if (defs.empty()) - return 0; - - reg = defs[0]->psOp->ui32RegisterNumber; - combinedMask = defs[0]->writeMask; - for (i = 1; i < defs.size(); i++) - { - if (reg != defs[i]->psOp->ui32RegisterNumber) - return 0; - - combinedMask |= defs[i]->writeMask; - } - for (i = 0; i < 4; i++) - { - du = psDUChains[reg * 4 + i].begin(); - while (du != psDUChains[reg * 4 + i].end()) - { - int defFound = 0; - for (k = 0; k < defs.size(); k++) - { - if (&*du == defs[k]) - { - defFound = 1; - break; - } - } - if (defFound == 0) - { - hasLeftoverDefinitions = 1; - break; - } - du++; - } - if (hasLeftoverDefinitions) - break; - } - // We'd be splitting the entire register and all its definitions, no point in that. - if (hasLeftoverDefinitions == 0) - return 0; - - // Check all the definitions. Any of them must not have any usages that see any definitions not in our defs array. - for (u32def = 0; u32def < defs.size(); u32def++) - { - DefineUseChainEntry *def = defs[u32def]; - - UsageSet::iterator ul = def->usages.begin(); - while (ul != def->usages.end()) - { - uint32_t j; - - // Check that we only read a subset of the combined writemask - if (((*ul)->accessMask & (~combinedMask)) != 0) - { - // Do an additional attempt, pick up all the sibling definitions as well - // Only do this if we have the space in the definitions table - for (j = 0; j < 4; j++) - { - if (((*ul)->accessMask & (1 << j)) == 0) - continue; - AddDefineToList(defs, *(*ul)->psSiblings[j]->defines.begin()); - } - return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); - } - - // It must have at least one declaration - ASSERT(!(*ul)->defines.empty()); - - // Check that all siblings for the usage use one of the definitions - for (j = 0; j < 4; j++) - { - uint32_t m; - int defineFound = 0; - if (((*ul)->accessMask & (1 << j)) == 0) - continue; - - ASSERT((*ul)->psSiblings[j] != NULL); - ASSERT(!(*ul)->psSiblings[j]->defines.empty()); - - // Check that all definitions for this usage are found from the definitions table - DefineSet::iterator dl = (*ul)->psSiblings[j]->defines.begin(); - while (dl != (*ul)->psSiblings[j]->defines.end()) - { - defineFound = 0; - for (m = 0; m < defs.size(); m++) - { - if (*dl == defs[m]) - { - defineFound = 1; - break; - } - } - if (defineFound == 0) - { - // Add this define and all its siblings to the table and try again - AddDefineToList(defs, *dl); - return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); - } - - dl++; - } - - if (defineFound == 0) - { - canSplit = 0; - break; - } - } - if (canSplit == 0) - break; - - // This'll do, check next usage - ul++; - } - if (canSplit == 0) - break; - } - if (canSplit) - { - UDDoSplit(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); - return 1; - } - return 0; -} - -// Do temp splitting based on use-define chains -void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector &pui32SplitTable) -{ - // Algorithm overview: - // Take each definition and look at all its usages. If all usages only see this definition (and this is not the only definition for this variable), - // split it out. - uint32_t i; - uint32_t tempsAtStart = *psNumTemps; // We don't need to try to analyze the newly created ones, they're unsplittable by definition - for (i = 0; i < tempsAtStart * 4; i++) - { - // No definitions? - if (psDUChains[i].empty()) - continue; - - DefineUseChain::iterator du = psDUChains[i].begin(); - // Ok we have multiple definitions for a temp, check them through - while (du != psDUChains[i].end()) - { - SplitDefinitions sd; - AddDefineToList(sd, &*du); - du++; - // If we split, we'll have to start from the beginning of this chain because du might no longer be in this chain - if (AttemptSplitDefinitions(sd, psNumTemps, psDUChains, psUDChains, pui32SplitTable)) - { - du = psDUChains[i].begin(); - } - } - } -} - -// Returns true if all the usages of this definitions are instructions that deal with floating point data -static bool HasOnlyFloatUsages(DefineUseChain::iterator du) -{ - UsageSet::iterator itr = du->usages.begin(); - for (; itr != du->usages.end(); itr++) - { - Instruction *psInst = (*itr)->psInst; - - if ((*itr)->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT) - return false; - - switch (psInst->eOpcode) - { - case OPCODE_ADD: - case OPCODE_MUL: - case OPCODE_MOV: - case OPCODE_MAD: - case OPCODE_DIV: - case OPCODE_LOG: - case OPCODE_EXP: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_DP2: - case OPCODE_DP2ADD: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_RSQ: - case OPCODE_SQRT: - break; - default: - return false; - } - } - return true; -} - -// Based on the sampler precisions, downgrade the definitions if possible. -void UpdateSamplerPrecisions(const ShaderInfo &info, DefineUseChains &psDUChains, uint32_t ui32NumTemps) -{ - uint32_t madeProgress = 0; - do - { - uint32_t i; - madeProgress = 0; - for (i = 0; i < ui32NumTemps * 4; i++) - { - DefineUseChain::iterator du = psDUChains[i].begin(); - while (du != psDUChains[i].end()) - { - OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT; - if (du->psInst->IsPartialPrecisionSamplerInstruction(info, &sType) - && du->psInst->asOperands[0].eType == OPERAND_TYPE_TEMP - && du->psInst->asOperands[0].eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT - && du->isStandalone - && HasOnlyFloatUsages(du)) - { - uint32_t sibl; - // Ok we can change the precision. - ASSERT(du->psOp->eType == OPERAND_TYPE_TEMP); - ASSERT(sType != OPERAND_MIN_PRECISION_DEFAULT); - du->psOp->eMinPrecision = sType; - - // Update all the uses of all the siblings - for (sibl = 0; sibl < 4; sibl++) - { - if (!du->psSiblings[sibl]) - continue; - - UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin(); - while (ul != du->psSiblings[sibl]->usages.end()) - { - ASSERT((*ul)->psOp->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT || - (*ul)->psOp->eMinPrecision == sType); - // We may well write this multiple times to the same op but that's fine. - (*ul)->psOp->eMinPrecision = sType; - - ul++; - } - } - madeProgress = 1; - } - du++; - } - } - } - while (madeProgress != 0); -} - -void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps) -{ - uint32_t i; - for (i = 0; i < ui32NumTemps * 4; i++) - { - DefineUseChain::iterator du = psDUChains[i].begin(); - while (du != psDUChains[i].end()) - { - uint32_t sibl; - int isStandalone = 1; - if (du->isStandalone) - { - du++; - continue; - } - - for (sibl = 0; sibl < 4; sibl++) - { - if (!du->psSiblings[sibl]) - continue; - - UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin(); - while (ul != du->psSiblings[sibl]->usages.end()) - { - uint32_t k; - ASSERT(!(*ul)->defines.empty()); - - // Need to check that all the siblings of this usage only see this definition's corresponding sibling - for (k = 0; k < 4; k++) - { - if (!(*ul)->psSiblings[k]) - continue; - - if ((*ul)->psSiblings[k]->defines.size() > 1 - || *(*ul)->psSiblings[k]->defines.begin() != du->psSiblings[k]) - { - isStandalone = 0; - break; - } - } - if (isStandalone == 0) - break; - - ul++; - } - if (isStandalone == 0) - break; - } - - if (isStandalone) - { - // Yep, mark it - for (sibl = 0; sibl < 4; sibl++) - { - if (!du->psSiblings[sibl]) - continue; - du->psSiblings[sibl]->isStandalone = 1; - } - } - du++; - } - } -} - -// Write the uses and defines back to Instruction and Operand member lists. -void WriteBackUsesAndDefines(DefineUseChains &psDUChains) -{ - using namespace std; - // Loop through the whole data structure, and write usages and defines to Instructions and Operands as we see them - for_each(psDUChains.begin(), psDUChains.end(), [](const DefineUseChains::value_type &itr) - { - const DefineUseChain &duChain = itr.second; - for_each(duChain.begin(), duChain.end(), [](const DefineUseChain::value_type &du) - { - for_each(du.usages.begin(), du.usages.end(), [&du](const UseDefineChainEntry *usage) - { - // Update instruction use list - du.psInst->m_Uses.push_back(Instruction::Use(usage->psInst, usage->psOp)); - // And the usage's definition - usage->psOp->m_Defines.push_back(Operand::Define(du.psInst, du.psOp)); - }); - }); - }); -} diff --git a/third_party/HLSLcc/src/cbstring/bsafe.c b/third_party/HLSLcc/src/cbstring/bsafe.c deleted file mode 100644 index 6503761..0000000 --- a/third_party/HLSLcc/src/cbstring/bsafe.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * This source file is part of the bstring string library. This code was - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause - * BSD open source license or GPL v2.0. Refer to the accompanying documentation - * for details on usage and license. - */ - -/* - * bsafe.c - * - * This is an optional module that can be used to help enforce a safety - * standard based on pervasive usage of bstrlib. This file is not necessarily - * portable, however, it has been tested to work correctly with Intel's C/C++ - * compiler, WATCOM C/C++ v11.x and Microsoft Visual C++. - */ - -#include -#include -#include "bsafe.h" - -#if 0 -static int bsafeShouldExit = 1; - -char * strcpy(char *dst, const char *src); -char * strcat(char *dst, const char *src); - -char * strcpy(char *dst, const char *src) -{ - dst = dst; - src = src; - fprintf(stderr, "bsafe error: strcpy() is not safe, use bstrcpy instead.\n"); - if (bsafeShouldExit) exit(-1); - return NULL; -} - -char * strcat(char *dst, const char *src) -{ - dst = dst; - src = src; - fprintf(stderr, "bsafe error: strcat() is not safe, use bstrcat instead.\n"); - if (bsafeShouldExit) exit(-1); - return NULL; -} - -#if !defined(__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310)) -char * (gets)(char * buf) { - buf = buf; - fprintf(stderr, "bsafe error: gets() is not safe, use bgets.\n"); - if (bsafeShouldExit) exit(-1); - return NULL; -} -#endif - -char * (strncpy)(char *dst, const char *src, size_t n) { - dst = dst; - src = src; - n = n; - fprintf(stderr, "bsafe error: strncpy() is not safe, use bmidstr instead.\n"); - if (bsafeShouldExit) exit(-1); - return NULL; -} - -char * (strncat)(char *dst, const char *src, size_t n) { - dst = dst; - src = src; - n = n; - fprintf(stderr, "bsafe error: strncat() is not safe, use bstrcat then btrunc\n\tor cstr2tbstr, btrunc then bstrcat instead.\n"); - if (bsafeShouldExit) exit(-1); - return NULL; -} - -char * (strtok)(char *s1, const char *s2) { - s1 = s1; - s2 = s2; - fprintf(stderr, "bsafe error: strtok() is not safe, use bsplit or bsplits instead.\n"); - if (bsafeShouldExit) exit(-1); - return NULL; -} - -char * (strdup)(const char *s) { - s = s; - fprintf(stderr, "bsafe error: strdup() is not safe, use bstrcpy.\n"); - if (bsafeShouldExit) exit(-1); - return NULL; -} - -#endif diff --git a/third_party/HLSLcc/src/cbstring/bsafe.h b/third_party/HLSLcc/src/cbstring/bsafe.h deleted file mode 100644 index d921917..0000000 --- a/third_party/HLSLcc/src/cbstring/bsafe.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * This source file is part of the bstring string library. This code was - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause - * BSD open source license or GPL v2.0. Refer to the accompanying documentation - * for details on usage and license. - */ - -/* - * bsafe.h - * - * This is an optional module that can be used to help enforce a safety - * standard based on pervasive usage of bstrlib. This file is not necessarily - * portable, however, it has been tested to work correctly with Intel's C/C++ - * compiler, WATCOM C/C++ v11.x and Microsoft Visual C++. - */ - -#ifndef BSTRLIB_BSAFE_INCLUDE -#define BSTRLIB_BSAFE_INCLUDE - -#ifdef __cplusplus -extern "C" { -#endif - -#if !defined(__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310)) -/* This is caught in the linker, so its not necessary for gcc. */ -extern char * (gets)(char * buf); -#endif - -extern char * (strncpy)(char *dst, const char *src, size_t n); -extern char * (strncat)(char *dst, const char *src, size_t n); -extern char * (strtok)(char *s1, const char *s2); -extern char * (strdup)(const char *s); - -#undef strcpy -#undef strcat -#define strcpy(a, b) bsafe_strcpy(a,b) -#define strcat(a, b) bsafe_strcat(a,b) - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/third_party/HLSLcc/src/cbstring/bstraux.c b/third_party/HLSLcc/src/cbstring/bstraux.c deleted file mode 100644 index 34cb3d3..0000000 --- a/third_party/HLSLcc/src/cbstring/bstraux.c +++ /dev/null @@ -1,1273 +0,0 @@ -/* - * This source file is part of the bstring string library. This code was - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause - * BSD open source license or GPL v2.0. Refer to the accompanying documentation - * for details on usage and license. - */ - -/* - * bstraux.c - * - * This file is not necessarily part of the core bstring library itself, but - * is just an auxilliary module which includes miscellaneous or trivial - * functions. - */ - -#include -#include -#include -#include -#include -#include "bstrlib.h" -#include "bstraux.h" - -/* bstring bTail (bstring b, int n) - * - * Return with a string of the last n characters of b. - */ -bstring bTail(bstring b, int n) -{ - if (b == NULL || n < 0 || (b->mlen < b->slen && b->mlen > 0)) return NULL; - if (n >= b->slen) return bstrcpy(b); - return bmidstr(b, b->slen - n, n); -} - -/* bstring bHead (bstring b, int n) - * - * Return with a string of the first n characters of b. - */ -bstring bHead(bstring b, int n) -{ - if (b == NULL || n < 0 || (b->mlen < b->slen && b->mlen > 0)) return NULL; - if (n >= b->slen) return bstrcpy(b); - return bmidstr(b, 0, n); -} - -/* int bFill (bstring a, char c, int len) - * - * Fill a given bstring with the character in parameter c, for a length n. - */ -int bFill(bstring b, char c, int len) -{ - if (b == NULL || len < 0 || (b->mlen < b->slen && b->mlen > 0)) return -__LINE__; - b->slen = 0; - return bsetstr(b, len, NULL, c); -} - -/* int bReplicate (bstring b, int n) - * - * Replicate the contents of b end to end n times and replace it in b. - */ -int bReplicate(bstring b, int n) -{ - return bpattern(b, n * b->slen); -} - -/* int bReverse (bstring b) - * - * Reverse the contents of b in place. - */ -int bReverse(bstring b) -{ - int i, n, m; - unsigned char t; - - if (b == NULL || b->slen < 0 || b->mlen < b->slen) return -__LINE__; - n = b->slen; - if (2 <= n) - { - m = ((unsigned)n) >> 1; - n--; - for (i = 0; i < m; i++) - { - t = b->data[n - i]; - b->data[n - i] = b->data[i]; - b->data[i] = t; - } - } - return 0; -} - -/* int bInsertChrs (bstring b, int pos, int len, unsigned char c, unsigned char fill) - * - * Insert a repeated sequence of a given character into the string at - * position pos for a length len. - */ -int bInsertChrs(bstring b, int pos, int len, unsigned char c, unsigned char fill) -{ - if (b == NULL || b->slen < 0 || b->mlen < b->slen || pos < 0 || len <= 0) return -__LINE__; - - if (pos > b->slen - && 0 > bsetstr(b, pos, NULL, fill)) return -__LINE__; - - if (0 > balloc(b, b->slen + len)) return -__LINE__; - if (pos < b->slen) memmove(b->data + pos + len, b->data + pos, b->slen - pos); - memset(b->data + pos, c, len); - b->slen += len; - b->data[b->slen] = (unsigned char)'\0'; - return BSTR_OK; -} - -/* int bJustifyLeft (bstring b, int space) - * - * Left justify a string. - */ -int bJustifyLeft(bstring b, int space) -{ - int j, i, s, t; - unsigned char c = (unsigned char)space; - - if (b == NULL || b->slen < 0 || b->mlen < b->slen) return -__LINE__; - if (space != (int)c) return BSTR_OK; - - for (s = j = i = 0; i < b->slen; i++) - { - t = s; - s = c != (b->data[j] = b->data[i]); - j += (t | s); - } - if (j > 0 && b->data[j - 1] == c) j--; - - b->data[j] = (unsigned char)'\0'; - b->slen = j; - return BSTR_OK; -} - -/* int bJustifyRight (bstring b, int width, int space) - * - * Right justify a string to within a given width. - */ -int bJustifyRight(bstring b, int width, int space) -{ - int ret; - if (width <= 0) return -__LINE__; - if (0 > (ret = bJustifyLeft(b, space))) return ret; - if (b->slen <= width) - return bInsertChrs(b, 0, width - b->slen, (unsigned char)space, (unsigned char)space); - return BSTR_OK; -} - -/* int bJustifyCenter (bstring b, int width, int space) - * - * Center a string's non-white space characters to within a given width by - * inserting whitespaces at the beginning. - */ -int bJustifyCenter(bstring b, int width, int space) -{ - int ret; - if (width <= 0) return -__LINE__; - if (0 > (ret = bJustifyLeft(b, space))) return ret; - if (b->slen <= width) - return bInsertChrs(b, 0, (width - b->slen + 1) >> 1, (unsigned char)space, (unsigned char)space); - return BSTR_OK; -} - -/* int bJustifyMargin (bstring b, int width, int space) - * - * Stretch a string to flush against left and right margins by evenly - * distributing additional white space between words. If the line is too - * long to be margin justified, it is left justified. - */ -int bJustifyMargin(bstring b, int width, int space) -{ - struct bstrList * sl; - int i, l, c; - - if (b == NULL || b->slen < 0 || b->mlen == 0 || b->mlen < b->slen) return -__LINE__; - if (NULL == (sl = bsplit(b, (unsigned char)space))) return -__LINE__; - for (l = c = i = 0; i < sl->qty; i++) - { - if (sl->entry[i]->slen > 0) - { - c++; - l += sl->entry[i]->slen; - } - } - - if (l + c >= width || c < 2) - { - bstrListDestroy(sl); - return bJustifyLeft(b, space); - } - - b->slen = 0; - for (i = 0; i < sl->qty; i++) - { - if (sl->entry[i]->slen > 0) - { - if (b->slen > 0) - { - int s = (width - l + (c / 2)) / c; - bInsertChrs(b, b->slen, s, (unsigned char)space, (unsigned char)space); - l += s; - } - bconcat(b, sl->entry[i]); - c--; - if (c <= 0) break; - } - } - - bstrListDestroy(sl); - return BSTR_OK; -} - -static size_t readNothing(void *buff, size_t elsize, size_t nelem, void *parm) -{ - return 0; /* Immediately indicate EOF. */ -} - -/* struct bStream * bsFromBstr (const_bstring b); - * - * Create a bStream whose contents are a copy of the bstring passed in. - * This allows the use of all the bStream APIs with bstrings. - */ -struct bStream * bsFromBstr(const_bstring b) -{ - struct bStream * s = bsopen((bNread)readNothing, NULL); - bsunread(s, b); /* Push the bstring data into the empty bStream. */ - return s; -} - -static size_t readRef(void *buff, size_t elsize, size_t nelem, void *parm) -{ - struct tagbstring * t = (struct tagbstring *)parm; - size_t tsz = elsize * nelem; - - if (tsz > (size_t)t->slen) tsz = (size_t)t->slen; - if (tsz > 0) - { - memcpy(buff, t->data, tsz); - t->slen -= (int)tsz; - t->data += tsz; - return tsz / elsize; - } - return 0; -} - -/* The "by reference" version of the above function. This function puts - * a number of restrictions on the call site (the passed in struct - * tagbstring *will* be modified by this function, and the source data - * must remain alive and constant for the lifetime of the bStream). - * Hence it is not presented as an extern. - */ -static struct bStream * bsFromBstrRef(struct tagbstring * t) -{ - if (!t) return NULL; - return bsopen((bNread)readRef, t); -} - -/* char * bStr2NetStr (const_bstring b) - * - * Convert a bstring to a netstring. See - * http://cr.yp.to/proto/netstrings.txt for a description of netstrings. - * Note: 1) The value returned should be freed with a call to bcstrfree() at - * the point when it will no longer be referenced to avoid a memory - * leak. - * 2) If the returned value is non-NULL, then it also '\0' terminated - * in the character position one past the "," terminator. - */ -char * bStr2NetStr(const_bstring b) -{ - char strnum[sizeof(b->slen) * 3 + 1]; - bstring s; - unsigned char * buff; - - if (b == NULL || b->data == NULL || b->slen < 0) return NULL; - sprintf(strnum, "%d:", b->slen); - if (NULL == (s = bfromcstr(strnum)) - || bconcat(s, b) == BSTR_ERR || bconchar(s, (char)',') == BSTR_ERR) - { - bdestroy(s); - return NULL; - } - buff = s->data; - bcstrfree((char *)s); - return (char *)buff; -} - -/* bstring bNetStr2Bstr (const char * buf) - * - * Convert a netstring to a bstring. See - * http://cr.yp.to/proto/netstrings.txt for a description of netstrings. - * Note that the terminating "," *must* be present, however a following '\0' - * is *not* required. - */ -bstring bNetStr2Bstr(const char * buff) -{ - int i, x; - bstring b; - if (buff == NULL) return NULL; - x = 0; - for (i = 0; buff[i] != ':'; i++) - { - unsigned int v = buff[i] - '0'; - if (v > 9 || x > ((INT_MAX - (signed int)v) / 10)) return NULL; - x = (x * 10) + v; - } - - /* This thing has to be properly terminated */ - if (buff[i + 1 + x] != ',') return NULL; - - if (NULL == (b = bfromcstr(""))) return NULL; - if (balloc(b, x + 1) != BSTR_OK) - { - bdestroy(b); - return NULL; - } - memcpy(b->data, buff + i + 1, x); - b->data[x] = (unsigned char)'\0'; - b->slen = x; - return b; -} - -static char b64ETable[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - -/* bstring bBase64Encode (const_bstring b) - * - * Generate a base64 encoding. See: RFC1341 - */ -bstring bBase64Encode(const_bstring b) -{ - int i, c0, c1, c2, c3; - bstring out; - - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - - out = bfromcstr(""); - for (i = 0; i + 2 < b->slen; i += 3) - { - if (i && ((i % 57) == 0)) - { - if (bconchar(out, (char)'\015') < 0 || bconchar(out, (char)'\012') < 0) - { - bdestroy(out); - return NULL; - } - } - c0 = b->data[i] >> 2; - c1 = ((b->data[i] << 4) | - (b->data[i + 1] >> 4)) & 0x3F; - c2 = ((b->data[i + 1] << 2) | - (b->data[i + 2] >> 6)) & 0x3F; - c3 = b->data[i + 2] & 0x3F; - if (bconchar(out, b64ETable[c0]) < 0 || - bconchar(out, b64ETable[c1]) < 0 || - bconchar(out, b64ETable[c2]) < 0 || - bconchar(out, b64ETable[c3]) < 0) - { - bdestroy(out); - return NULL; - } - } - - if (i && ((i % 57) == 0)) - { - if (bconchar(out, (char)'\015') < 0 || bconchar(out, (char)'\012') < 0) - { - bdestroy(out); - return NULL; - } - } - - switch (i + 2 - b->slen) - { - case 0: c0 = b->data[i] >> 2; - c1 = ((b->data[i] << 4) | - (b->data[i + 1] >> 4)) & 0x3F; - c2 = (b->data[i + 1] << 2) & 0x3F; - if (bconchar(out, b64ETable[c0]) < 0 || - bconchar(out, b64ETable[c1]) < 0 || - bconchar(out, b64ETable[c2]) < 0 || - bconchar(out, (char)'=') < 0) - { - bdestroy(out); - return NULL; - } - break; - case 1: c0 = b->data[i] >> 2; - c1 = (b->data[i] << 4) & 0x3F; - if (bconchar(out, b64ETable[c0]) < 0 || - bconchar(out, b64ETable[c1]) < 0 || - bconchar(out, (char)'=') < 0 || - bconchar(out, (char)'=') < 0) - { - bdestroy(out); - return NULL; - } - break; - case 2: break; - } - - return out; -} - -#define B64_PAD (-2) -#define B64_ERR (-1) - -static int base64DecodeSymbol(unsigned char alpha) -{ - if ((alpha >= 'A') && (alpha <= 'Z')) return (int)(alpha - 'A'); - else if ((alpha >= 'a') && (alpha <= 'z')) - return 26 + (int)(alpha - 'a'); - else if ((alpha >= '0') && (alpha <= '9')) - return 52 + (int)(alpha - '0'); - else if (alpha == '+') return 62; - else if (alpha == '/') return 63; - else if (alpha == '=') return B64_PAD; - else return B64_ERR; -} - -/* bstring bBase64DecodeEx (const_bstring b, int * boolTruncError) - * - * Decode a base64 block of data. All MIME headers are assumed to have been - * removed. See: RFC1341 - */ -bstring bBase64DecodeEx(const_bstring b, int * boolTruncError) -{ - int i, v; - unsigned char c0, c1, c2; - bstring out; - - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - if (boolTruncError) *boolTruncError = 0; - out = bfromcstr(""); - i = 0; - for (;;) - { - do - { - if (i >= b->slen) return out; - if (b->data[i] == '=') /* Bad "too early" truncation */ - { - if (boolTruncError) - { - *boolTruncError = 1; - return out; - } - bdestroy(out); - return NULL; - } - v = base64DecodeSymbol(b->data[i]); - i++; - } - while (v < 0); - c0 = (unsigned char)(v << 2); - do - { - if (i >= b->slen || b->data[i] == '=') /* Bad "too early" truncation */ - { - if (boolTruncError) - { - *boolTruncError = 1; - return out; - } - bdestroy(out); - return NULL; - } - v = base64DecodeSymbol(b->data[i]); - i++; - } - while (v < 0); - c0 |= (unsigned char)(v >> 4); - c1 = (unsigned char)(v << 4); - do - { - if (i >= b->slen) - { - if (boolTruncError) - { - *boolTruncError = 1; - return out; - } - bdestroy(out); - return NULL; - } - if (b->data[i] == '=') - { - i++; - if (i >= b->slen || b->data[i] != '=' || bconchar(out, c0) < 0) - { - if (boolTruncError) - { - *boolTruncError = 1; - return out; - } - bdestroy(out); /* Missing "=" at the end. */ - return NULL; - } - return out; - } - v = base64DecodeSymbol(b->data[i]); - i++; - } - while (v < 0); - c1 |= (unsigned char)(v >> 2); - c2 = (unsigned char)(v << 6); - do - { - if (i >= b->slen) - { - if (boolTruncError) - { - *boolTruncError = 1; - return out; - } - bdestroy(out); - return NULL; - } - if (b->data[i] == '=') - { - if (bconchar(out, c0) < 0 || bconchar(out, c1) < 0) - { - if (boolTruncError) - { - *boolTruncError = 1; - return out; - } - bdestroy(out); - return NULL; - } - if (boolTruncError) *boolTruncError = 0; - return out; - } - v = base64DecodeSymbol(b->data[i]); - i++; - } - while (v < 0); - c2 |= (unsigned char)(v); - if (bconchar(out, c0) < 0 || - bconchar(out, c1) < 0 || - bconchar(out, c2) < 0) - { - if (boolTruncError) - { - *boolTruncError = -1; - return out; - } - bdestroy(out); - return NULL; - } - } -} - -#define UU_DECODE_BYTE(b) (((b) == (signed int)'`') ? 0 : (b) - (signed int)' ') - -struct bUuInOut -{ - bstring src, dst; - int * badlines; -}; - -#define UU_MAX_LINELEN 45 - -static int bUuDecLine(void * parm, int ofs, int len) -{ - struct bUuInOut * io = (struct bUuInOut *)parm; - bstring s = io->src; - bstring t = io->dst; - int i, llen, otlen, ret, c0, c1, c2, c3, d0, d1, d2, d3; - - if (len == 0) return 0; - llen = UU_DECODE_BYTE(s->data[ofs]); - ret = 0; - - otlen = t->slen; - - if (((unsigned)llen) > UU_MAX_LINELEN) - { - ret = -__LINE__; - goto bl; - } - - llen += t->slen; - - for (i = 1; i < s->slen && t->slen < llen; i += 4) - { - unsigned char outoctet[3]; - c0 = UU_DECODE_BYTE(d0 = (int)bchare(s, i + ofs + 0, ' ' - 1)); - c1 = UU_DECODE_BYTE(d1 = (int)bchare(s, i + ofs + 1, ' ' - 1)); - c2 = UU_DECODE_BYTE(d2 = (int)bchare(s, i + ofs + 2, ' ' - 1)); - c3 = UU_DECODE_BYTE(d3 = (int)bchare(s, i + ofs + 3, ' ' - 1)); - - if (((unsigned)(c0 | c1) >= 0x40)) - { - if (!ret) ret = -__LINE__; - if (d0 > 0x60 || (d0 < (' ' - 1) && !isspace(d0)) || - d1 > 0x60 || (d1 < (' ' - 1) && !isspace(d1))) - { - t->slen = otlen; - goto bl; - } - c0 = c1 = 0; - } - outoctet[0] = (unsigned char)((c0 << 2) | ((unsigned)c1 >> 4)); - if (t->slen + 1 >= llen) - { - if (0 > bconchar(t, (char)outoctet[0])) return -__LINE__; - break; - } - if ((unsigned)c2 >= 0x40) - { - if (!ret) ret = -__LINE__; - if (d2 > 0x60 || (d2 < (' ' - 1) && !isspace(d2))) - { - t->slen = otlen; - goto bl; - } - c2 = 0; - } - outoctet[1] = (unsigned char)((c1 << 4) | ((unsigned)c2 >> 2)); - if (t->slen + 2 >= llen) - { - if (0 > bcatblk(t, outoctet, 2)) return -__LINE__; - break; - } - if ((unsigned)c3 >= 0x40) - { - if (!ret) ret = -__LINE__; - if (d3 > 0x60 || (d3 < (' ' - 1) && !isspace(d3))) - { - t->slen = otlen; - goto bl; - } - c3 = 0; - } - outoctet[2] = (unsigned char)((c2 << 6) | ((unsigned)c3)); - if (0 > bcatblk(t, outoctet, 3)) return -__LINE__; - } - if (t->slen < llen) - { - if (0 == ret) ret = -__LINE__; - t->slen = otlen; - } -bl:; - if (ret && io->badlines) - { - (*io->badlines)++; - return 0; - } - return ret; -} - -/* bstring bUuDecodeEx (const_bstring src, int * badlines) - * - * Performs a UUDecode of a block of data. If there are errors in the - * decoding, they are counted up and returned in "badlines", if badlines is - * not NULL. It is assumed that the "begin" and "end" lines have already - * been stripped off. The potential security problem of writing the - * filename in the begin line is something that is beyond the scope of a - * portable library. - */ - -#ifdef _MSC_VER -#pragma warning(disable:4204) -#endif - -bstring bUuDecodeEx(const_bstring src, int * badlines) -{ - struct tagbstring t; - struct bStream * s; - struct bStream * d; - bstring b; - - if (!src) return NULL; - t = *src; /* Short lifetime alias to header of src */ - s = bsFromBstrRef(&t); /* t is undefined after this */ - if (!s) return NULL; - d = bsUuDecode(s, badlines); - b = bfromcstralloc(256, ""); - if (NULL == b || 0 > bsread(b, d, INT_MAX)) - { - bdestroy(b); - bsclose(d); - bsclose(s); - return NULL; - } - return b; -} - -struct bsUuCtx -{ - struct bUuInOut io; - struct bStream * sInp; -}; - -static size_t bsUuDecodePart(void *buff, size_t elsize, size_t nelem, void *parm) -{ - static struct tagbstring eol = bsStatic("\r\n"); - struct bsUuCtx * luuCtx = (struct bsUuCtx *)parm; - size_t tsz; - int l, lret; - - if (NULL == buff || NULL == parm) return 0; - tsz = elsize * nelem; - -CheckInternalBuffer:; - /* If internal buffer has sufficient data, just output it */ - if (((size_t)luuCtx->io.dst->slen) > tsz) - { - memcpy(buff, luuCtx->io.dst->data, tsz); - bdelete(luuCtx->io.dst, 0, (int)tsz); - return nelem; - } - -DecodeMore:; - if (0 <= (l = binchr(luuCtx->io.src, 0, &eol))) - { - int ol = 0; - struct tagbstring t; - bstring s = luuCtx->io.src; - luuCtx->io.src = &t; - - do - { - if (l > ol) - { - bmid2tbstr(t, s, ol, l - ol); - lret = bUuDecLine(&luuCtx->io, 0, t.slen); - if (0 > lret) - { - luuCtx->io.src = s; - goto Done; - } - } - ol = l + 1; - if (((size_t)luuCtx->io.dst->slen) > tsz) break; - l = binchr(s, ol, &eol); - } - while (BSTR_ERR != l); - bdelete(s, 0, ol); - luuCtx->io.src = s; - goto CheckInternalBuffer; - } - - if (BSTR_ERR != bsreada(luuCtx->io.src, luuCtx->sInp, bsbufflength(luuCtx->sInp, BSTR_BS_BUFF_LENGTH_GET))) - { - goto DecodeMore; - } - - bUuDecLine(&luuCtx->io, 0, luuCtx->io.src->slen); - -Done:; - /* Output any lingering data that has been translated */ - if (((size_t)luuCtx->io.dst->slen) > 0) - { - if (((size_t)luuCtx->io.dst->slen) > tsz) goto CheckInternalBuffer; - memcpy(buff, luuCtx->io.dst->data, luuCtx->io.dst->slen); - tsz = luuCtx->io.dst->slen / elsize; - luuCtx->io.dst->slen = 0; - if (tsz > 0) return tsz; - } - - /* Deallocate once EOF becomes triggered */ - bdestroy(luuCtx->io.dst); - bdestroy(luuCtx->io.src); - free(luuCtx); - return 0; -} - -/* bStream * bsUuDecode (struct bStream * sInp, int * badlines) - * - * Creates a bStream which performs the UUDecode of an an input stream. If - * there are errors in the decoding, they are counted up and returned in - * "badlines", if badlines is not NULL. It is assumed that the "begin" and - * "end" lines have already been stripped off. The potential security - * problem of writing the filename in the begin line is something that is - * beyond the scope of a portable library. - */ - -struct bStream * bsUuDecode(struct bStream * sInp, int * badlines) -{ - struct bsUuCtx * luuCtx = (struct bsUuCtx *)malloc(sizeof(struct bsUuCtx)); - struct bStream * sOut; - - if (NULL == luuCtx) return NULL; - - luuCtx->io.src = bfromcstr(""); - luuCtx->io.dst = bfromcstr(""); - if (NULL == luuCtx->io.dst || NULL == luuCtx->io.src) - { - CleanUpFailureToAllocate :; - bdestroy(luuCtx->io.dst); - bdestroy(luuCtx->io.src); - free(luuCtx); - return NULL; - } - luuCtx->io.badlines = badlines; - if (badlines) *badlines = 0; - - luuCtx->sInp = sInp; - - sOut = bsopen((bNread)bsUuDecodePart, luuCtx); - if (NULL == sOut) goto CleanUpFailureToAllocate; - return sOut; -} - -#define UU_ENCODE_BYTE(b) (char) (((b) == 0) ? '`' : ((b) + ' ')) - -/* bstring bUuEncode (const_bstring src) - * - * Performs a UUEncode of a block of data. The "begin" and "end" lines are - * not appended. - */ -bstring bUuEncode(const_bstring src) -{ - bstring out; - int i, j, jm; - unsigned int c0, c1, c2; - if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; - if ((out = bfromcstr("")) == NULL) return NULL; - for (i = 0; i < src->slen; i += UU_MAX_LINELEN) - { - if ((jm = i + UU_MAX_LINELEN) > src->slen) jm = src->slen; - if (bconchar(out, UU_ENCODE_BYTE(jm - i)) < 0) - { - bstrFree(out); - break; - } - for (j = i; j < jm; j += 3) - { - c0 = (unsigned int)bchar(src, j); - c1 = (unsigned int)bchar(src, j + 1); - c2 = (unsigned int)bchar(src, j + 2); - if (bconchar(out, UU_ENCODE_BYTE((c0 & 0xFC) >> 2)) < 0 || - bconchar(out, UU_ENCODE_BYTE(((c0 & 0x03) << 4) | ((c1 & 0xF0) >> 4))) < 0 || - bconchar(out, UU_ENCODE_BYTE(((c1 & 0x0F) << 2) | ((c2 & 0xC0) >> 6))) < 0 || - bconchar(out, UU_ENCODE_BYTE((c2 & 0x3F))) < 0) - { - bstrFree(out); - goto End; - } - } - if (bconchar(out, (char)'\r') < 0 || bconchar(out, (char)'\n') < 0) - { - bstrFree(out); - break; - } - } -End:; - return out; -} - -/* bstring bYEncode (const_bstring src) - * - * Performs a YEncode of a block of data. No header or tail info is - * appended. See: http://www.yenc.org/whatis.htm and - * http://www.yenc.org/yenc-draft.1.3.txt - */ -bstring bYEncode(const_bstring src) -{ - int i; - bstring out; - unsigned char c; - - if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; - if ((out = bfromcstr("")) == NULL) return NULL; - for (i = 0; i < src->slen; i++) - { - c = (unsigned char)(src->data[i] + 42); - if (c == '=' || c == '\0' || c == '\r' || c == '\n') - { - if (0 > bconchar(out, (char)'=')) - { - bdestroy(out); - return NULL; - } - c += (unsigned char)64; - } - if (0 > bconchar(out, c)) - { - bdestroy(out); - return NULL; - } - } - return out; -} - -/* bstring bYDecode (const_bstring src) - * - * Performs a YDecode of a block of data. See: - * http://www.yenc.org/whatis.htm and http://www.yenc.org/yenc-draft.1.3.txt - */ -#define MAX_OB_LEN (64) - -bstring bYDecode(const_bstring src) -{ - int i; - bstring out; - unsigned char c; - unsigned char octetbuff[MAX_OB_LEN]; - int obl; - - if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; - if ((out = bfromcstr("")) == NULL) return NULL; - - obl = 0; - - for (i = 0; i < src->slen; i++) - { - if ('=' == (c = src->data[i])) /* The = escape mode */ - { - i++; - if (i >= src->slen) - { - bdestroy(out); - return NULL; - } - c = (unsigned char)(src->data[i] - 64); - } - else - { - if ('\0' == c) - { - bdestroy(out); - return NULL; - } - - /* Extraneous CR/LFs are to be ignored. */ - if (c == '\r' || c == '\n') continue; - } - - octetbuff[obl] = (unsigned char)((int)c - 42); - obl++; - - if (obl >= MAX_OB_LEN) - { - if (0 > bcatblk(out, octetbuff, obl)) - { - bdestroy(out); - return NULL; - } - obl = 0; - } - } - - if (0 > bcatblk(out, octetbuff, obl)) - { - bdestroy(out); - out = NULL; - } - return out; -} - -/* bstring bStrfTime (const char * fmt, const struct tm * timeptr) - * - * Takes a format string that is compatible with strftime and a struct tm - * pointer, formats the time according to the format string and outputs - * the bstring as a result. Note that if there is an early generation of a - * '\0' character, the bstring will be truncated to this end point. - */ -bstring bStrfTime(const char * fmt, const struct tm * timeptr) -{ -#if defined(__TURBOC__) && !defined(__BORLANDC__) - static struct tagbstring ns = bsStatic("bStrfTime Not supported"); - fmt = fmt; - timeptr = timeptr; - return &ns; -#else - bstring buff; - int n; - size_t r; - - if (fmt == NULL) return NULL; - - /* Since the length is not determinable beforehand, a search is - performed using the truncating "strftime" call on increasing - potential sizes for the output result. */ - - if ((n = (int)(2 * strlen(fmt))) < 16) n = 16; - buff = bfromcstralloc(n + 2, ""); - - for (;;) - { - if (BSTR_OK != balloc(buff, n + 2)) - { - bdestroy(buff); - return NULL; - } - - r = strftime((char *)buff->data, n + 1, fmt, timeptr); - - if (r > 0) - { - buff->slen = (int)r; - break; - } - - n += n; - } - - return buff; -#endif -} - -/* int bSetCstrChar (bstring a, int pos, char c) - * - * Sets the character at position pos to the character c in the bstring a. - * If the character c is NUL ('\0') then the string is truncated at this - * point. Note: this does not enable any other '\0' character in the bstring - * as terminator indicator for the string. pos must be in the position - * between 0 and b->slen inclusive, otherwise BSTR_ERR will be returned. - */ -int bSetCstrChar(bstring b, int pos, char c) -{ - if (NULL == b || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen) - return BSTR_ERR; - if (pos < 0 || pos > b->slen) return BSTR_ERR; - - if (pos == b->slen) - { - if ('\0' != c) return bconchar(b, c); - return 0; - } - - b->data[pos] = (unsigned char)c; - if ('\0' == c) b->slen = pos; - - return 0; -} - -/* int bSetChar (bstring b, int pos, char c) - * - * Sets the character at position pos to the character c in the bstring a. - * The string is not truncated if the character c is NUL ('\0'). pos must - * be in the position between 0 and b->slen inclusive, otherwise BSTR_ERR - * will be returned. - */ -int bSetChar(bstring b, int pos, char c) -{ - if (NULL == b || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen) - return BSTR_ERR; - if (pos < 0 || pos > b->slen) return BSTR_ERR; - - if (pos == b->slen) - { - return bconchar(b, c); - } - - b->data[pos] = (unsigned char)c; - return 0; -} - -#define INIT_SECURE_INPUT_LENGTH (256) - -/* bstring bSecureInput (int maxlen, int termchar, - * bNgetc vgetchar, void * vgcCtx) - * - * Read input from an abstracted input interface, for a length of at most - * maxlen characters. If maxlen <= 0, then there is no length limit put - * on the input. The result is terminated early if vgetchar() return EOF - * or the user specified value termchar. - * - */ -bstring bSecureInput(int maxlen, int termchar, bNgetc vgetchar, void * vgcCtx) -{ - int i, m, c; - bstring b, t; - - if (!vgetchar) return NULL; - - b = bfromcstralloc(INIT_SECURE_INPUT_LENGTH, ""); - if ((c = UCHAR_MAX + 1) == termchar) c++; - - for (i = 0;; i++) - { - if (termchar == c || (maxlen > 0 && i >= maxlen)) c = EOF; - else c = vgetchar(vgcCtx); - - if (EOF == c) break; - - if (i + 1 >= b->mlen) - { - /* Double size, but deal with unusual case of numeric - overflows */ - - if ((m = b->mlen << 1) <= b->mlen && - (m = b->mlen + 1024) <= b->mlen && - (m = b->mlen + 16) <= b->mlen && - (m = b->mlen + 1) <= b->mlen) t = NULL; - else t = bfromcstralloc(m, ""); - - if (t) memcpy(t->data, b->data, i); - bSecureDestroy(b); /* Cleanse previous buffer */ - b = t; - if (!b) return b; - } - - b->data[i] = (unsigned char)c; - } - - b->slen = i; - b->data[i] = (unsigned char)'\0'; - return b; -} - -#define BWS_BUFF_SZ (1024) - -struct bwriteStream -{ - bstring buff; /* Buffer for underwrites */ - void * parm; /* The stream handle for core stream */ - bNwrite writeFn; /* fwrite work-a-like fnptr for core stream */ - int isEOF; /* track stream's EOF state */ - int minBuffSz; -}; - -/* struct bwriteStream * bwsOpen (bNwrite writeFn, void * parm) - * - * Wrap a given open stream (described by a fwrite work-a-like function - * pointer and stream handle) into an open bwriteStream suitable for write - * streaming functions. - */ -struct bwriteStream * bwsOpen(bNwrite writeFn, void * parm) -{ - struct bwriteStream * ws; - - if (NULL == writeFn) return NULL; - ws = (struct bwriteStream *)malloc(sizeof(struct bwriteStream)); - if (ws) - { - if (NULL == (ws->buff = bfromcstr(""))) - { - free(ws); - ws = NULL; - } - else - { - ws->parm = parm; - ws->writeFn = writeFn; - ws->isEOF = 0; - ws->minBuffSz = BWS_BUFF_SZ; - } - } - return ws; -} - -#define internal_bwswriteout(ws, b) {\ - if ((b)->slen > 0) { \ - if (1 != (ws->writeFn ((b)->data, (b)->slen, 1, ws->parm))) { \ - ws->isEOF = 1; \ - return BSTR_ERR; \ - } \ - } \ -} - -/* int bwsWriteFlush (struct bwriteStream * ws) - * - * Force any pending data to be written to the core stream. - */ -int bwsWriteFlush(struct bwriteStream * ws) -{ - if (NULL == ws || ws->isEOF || 0 >= ws->minBuffSz || - NULL == ws->writeFn || NULL == ws->buff) return BSTR_ERR; - internal_bwswriteout(ws, ws->buff); - ws->buff->slen = 0; - return 0; -} - -/* int bwsWriteBstr (struct bwriteStream * ws, const_bstring b) - * - * Send a bstring to a bwriteStream. If the stream is at EOF BSTR_ERR is - * returned. Note that there is no deterministic way to determine the exact - * cut off point where the core stream stopped accepting data. - */ -int bwsWriteBstr(struct bwriteStream * ws, const_bstring b) -{ - struct tagbstring t; - int l; - - if (NULL == ws || NULL == b || NULL == ws->buff || - ws->isEOF || 0 >= ws->minBuffSz || NULL == ws->writeFn) - return BSTR_ERR; - - /* Buffer prepacking optimization */ - if (b->slen > 0 && ws->buff->mlen - ws->buff->slen > b->slen) - { - static struct tagbstring empty = bsStatic(""); - if (0 > bconcat(ws->buff, b)) return BSTR_ERR; - return bwsWriteBstr(ws, &empty); - } - - if (0 > (l = ws->minBuffSz - ws->buff->slen)) - { - internal_bwswriteout(ws, ws->buff); - ws->buff->slen = 0; - l = ws->minBuffSz; - } - - if (b->slen < l) return bconcat(ws->buff, b); - - if (0 > bcatblk(ws->buff, b->data, l)) return BSTR_ERR; - internal_bwswriteout(ws, ws->buff); - ws->buff->slen = 0; - - bmid2tbstr(t, (bstring)b, l, b->slen); - - if (t.slen >= ws->minBuffSz) - { - internal_bwswriteout(ws, &t); - return 0; - } - - return bassign(ws->buff, &t); -} - -/* int bwsWriteBlk (struct bwriteStream * ws, void * blk, int len) - * - * Send a block of data a bwriteStream. If the stream is at EOF BSTR_ERR is - * returned. - */ -int bwsWriteBlk(struct bwriteStream * ws, void * blk, int len) -{ - struct tagbstring t; - if (NULL == blk || len < 0) return BSTR_ERR; - blk2tbstr(t, blk, len); - return bwsWriteBstr(ws, &t); -} - -/* int bwsIsEOF (const struct bwriteStream * ws) - * - * Returns 0 if the stream is currently writable, 1 if the core stream has - * responded by not accepting the previous attempted write. - */ -int bwsIsEOF(const struct bwriteStream * ws) -{ - if (NULL == ws || NULL == ws->buff || 0 > ws->minBuffSz || - NULL == ws->writeFn) return BSTR_ERR; - return ws->isEOF; -} - -/* int bwsBuffLength (struct bwriteStream * ws, int sz) - * - * Set the length of the buffer used by the bwsStream. If sz is zero, the - * length is not set. This function returns with the previous length. - */ -int bwsBuffLength(struct bwriteStream * ws, int sz) -{ - int oldSz; - if (ws == NULL || sz < 0) return BSTR_ERR; - oldSz = ws->minBuffSz; - if (sz > 0) ws->minBuffSz = sz; - return oldSz; -} - -/* void * bwsClose (struct bwriteStream * s) - * - * Close the bwriteStream, and return the handle to the stream that was - * originally used to open the given stream. Note that even if the stream - * is at EOF it still needs to be closed with a call to bwsClose. - */ -void * bwsClose(struct bwriteStream * ws) -{ - void * parm; - if (NULL == ws || NULL == ws->buff || 0 >= ws->minBuffSz || - NULL == ws->writeFn) return NULL; - bwsWriteFlush(ws); - parm = ws->parm; - ws->parm = NULL; - ws->minBuffSz = -1; - ws->writeFn = NULL; - bstrFree(ws->buff); - free(ws); - return parm; -} diff --git a/third_party/HLSLcc/src/cbstring/bstraux.h b/third_party/HLSLcc/src/cbstring/bstraux.h deleted file mode 100644 index e31929f..0000000 --- a/third_party/HLSLcc/src/cbstring/bstraux.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * This source file is part of the bstring string library. This code was - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause - * BSD open source license or GPL v2.0. Refer to the accompanying documentation - * for details on usage and license. - */ - -/* - * bstraux.h - * - * This file is not a necessary part of the core bstring library itself, but - * is just an auxilliary module which includes miscellaneous or trivial - * functions. - */ - -#ifndef BSTRAUX_INCLUDE -#define BSTRAUX_INCLUDE - -#include -#include "bstrlib.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* Safety mechanisms */ -#define bstrDeclare(b) bstring (b) = NULL; -#define bstrFree(b) {if ((b) != NULL && (b)->slen >= 0 && (b)->mlen >= (b)->slen) { bdestroy (b); (b) = NULL; }} - -/* Backward compatibilty with previous versions of Bstrlib */ -#define bAssign(a, b) ((bassign)((a), (b))) -#define bSubs(b, pos, len, a, c) ((breplace)((b),(pos),(len),(a),(unsigned char)(c))) -#define bStrchr(b, c) ((bstrchr)((b), (c))) -#define bStrchrFast(b, c) ((bstrchr)((b), (c))) -#define bCatCstr(b, s) ((bcatcstr)((b), (s))) -#define bCatBlk(b, s, len) ((bcatblk)((b),(s),(len))) -#define bCatStatic(b, s) bCatBlk ((b), ("" s ""), sizeof (s) - 1) -#define bTrunc(b, n) ((btrunc)((b), (n))) -#define bReplaceAll(b, find, repl, pos) ((bfindreplace)((b),(find),(repl),(pos))) -#define bUppercase(b) ((btoupper)(b)) -#define bLowercase(b) ((btolower)(b)) -#define bCaselessCmp(a, b) ((bstricmp)((a), (b))) -#define bCaselessNCmp(a, b, n) ((bstrnicmp)((a), (b), (n))) -#define bBase64Decode(b) (bBase64DecodeEx ((b), NULL)) -#define bUuDecode(b) (bUuDecodeEx ((b), NULL)) - -/* Unusual functions */ -extern struct bStream * bsFromBstr(const_bstring b); -extern bstring bTail(bstring b, int n); -extern bstring bHead(bstring b, int n); -extern int bSetCstrChar(bstring a, int pos, char c); -extern int bSetChar(bstring b, int pos, char c); -extern int bFill(bstring a, char c, int len); -extern int bReplicate(bstring b, int n); -extern int bReverse(bstring b); -extern int bInsertChrs(bstring b, int pos, int len, unsigned char c, unsigned char fill); -extern bstring bStrfTime(const char * fmt, const struct tm * timeptr); -#define bAscTime(t) (bStrfTime ("%c\n", (t))) -#define bCTime(t) ((t) ? bAscTime (localtime (t)) : NULL) - -/* Spacing formatting */ -extern int bJustifyLeft(bstring b, int space); -extern int bJustifyRight(bstring b, int width, int space); -extern int bJustifyMargin(bstring b, int width, int space); -extern int bJustifyCenter(bstring b, int width, int space); - -/* Esoteric standards specific functions */ -extern char * bStr2NetStr(const_bstring b); -extern bstring bNetStr2Bstr(const char * buf); -extern bstring bBase64Encode(const_bstring b); -extern bstring bBase64DecodeEx(const_bstring b, int * boolTruncError); -extern struct bStream * bsUuDecode(struct bStream * sInp, int * badlines); -extern bstring bUuDecodeEx(const_bstring src, int * badlines); -extern bstring bUuEncode(const_bstring src); -extern bstring bYEncode(const_bstring src); -extern bstring bYDecode(const_bstring src); - -/* Writable stream */ -typedef int (* bNwrite) (const void * buf, size_t elsize, size_t nelem, void * parm); - -struct bwriteStream * bwsOpen(bNwrite writeFn, void * parm); -int bwsWriteBstr(struct bwriteStream * stream, const_bstring b); -int bwsWriteBlk(struct bwriteStream * stream, void * blk, int len); -int bwsWriteFlush(struct bwriteStream * stream); -int bwsIsEOF(const struct bwriteStream * stream); -int bwsBuffLength(struct bwriteStream * stream, int sz); -void * bwsClose(struct bwriteStream * stream); - -/* Security functions */ -#define bSecureDestroy(b) { \ -bstring bstr__tmp = (b); \ - if (bstr__tmp && bstr__tmp->mlen > 0 && bstr__tmp->data) { \ - (void) memset (bstr__tmp->data, 0, (size_t) bstr__tmp->mlen); \ - bdestroy (bstr__tmp); \ - } \ -} -#define bSecureWriteProtect(t) { \ - if ((t).mlen >= 0) { \ - if ((t).mlen > (t).slen)) { \ - (void) memset ((t).data + (t).slen, 0, (size_t) (t).mlen - (t).slen); \ - } \ - (t).mlen = -1; \ - } \ -} -extern bstring bSecureInput(int maxlen, int termchar, - bNgetc vgetchar, void * vgcCtx); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/third_party/HLSLcc/src/cbstring/bstrlib.c b/third_party/HLSLcc/src/cbstring/bstrlib.c deleted file mode 100644 index e1a8590..0000000 --- a/third_party/HLSLcc/src/cbstring/bstrlib.c +++ /dev/null @@ -1,3280 +0,0 @@ -/* - * This source file is part of the bstring string library. This code was - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause - * BSD open source license or GPL v2.0. Refer to the accompanying documentation - * for details on usage and license. - */ - -/* - * bstrlib.c - * - * This file is the core module for implementing the bstring functions. - */ - -#include -#include -#include -#include -#include -#include -#include "bstrlib.h" - -/* Optionally include a mechanism for debugging memory */ - -#if defined(MEMORY_DEBUG) || defined(BSTRLIB_MEMORY_DEBUG) -#include "memdbg.h" -#endif - -#ifndef bstr__alloc -#define bstr__alloc(x) malloc (x) -#endif - -#ifndef bstr__free -#define bstr__free(p) free (p) -#endif - -#ifndef bstr__realloc -#define bstr__realloc(p, x) realloc ((p), (x)) -#endif - -#ifndef bstr__memcpy -#define bstr__memcpy(d, s, l) memcpy ((d), (s), (l)) -#endif - -#ifndef bstr__memmove -#define bstr__memmove(d, s, l) memmove ((d), (s), (l)) -#endif - -#ifndef bstr__memset -#define bstr__memset(d, c, l) memset ((d), (c), (l)) -#endif - -#ifndef bstr__memcmp -#define bstr__memcmp(d, c, l) memcmp ((d), (c), (l)) -#endif - -#ifndef bstr__memchr -#define bstr__memchr(s, c, l) memchr ((s), (c), (l)) -#endif - -/* Just a length safe wrapper for memmove. */ - -#define bBlockCopy(D, S, L) { if ((L) > 0) bstr__memmove ((D),(S),(L)); } - -/* Compute the snapped size for a given requested size. By snapping to powers - of 2 like this, repeated reallocations are avoided. */ -static int snapUpSize(int i) -{ - if (i < 8) - { - i = 8; - } - else - { - unsigned int j; - j = (unsigned int)i; - - j |= (j >> 1); - j |= (j >> 2); - j |= (j >> 4); - j |= (j >> 8); /* Ok, since int >= 16 bits */ -#if (UINT_MAX != 0xffff) - j |= (j >> 16); /* For 32 bit int systems */ -#if (UINT_MAX > 0xffffffffUL) - j |= (j >> 32); /* For 64 bit int systems */ -#endif -#endif - /* Least power of two greater than i */ - j++; - if ((int)j >= i) i = (int)j; - } - return i; -} - -/* int balloc (bstring b, int len) - * - * Increase the size of the memory backing the bstring b to at least len. - */ -int balloc(bstring b, int olen) -{ - int len; - if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen <= 0 || - b->mlen < b->slen || olen <= 0) - { - return BSTR_ERR; - } - - if (olen >= b->mlen) - { - unsigned char * x; - - if ((len = snapUpSize(olen)) <= b->mlen) return BSTR_OK; - - /* Assume probability of a non-moving realloc is 0.125 */ - if (7 * b->mlen < 8 * b->slen) - { - /* If slen is close to mlen in size then use realloc to reduce - the memory defragmentation */ - - reallocStrategy:; - - x = (unsigned char *)bstr__realloc(b->data, (size_t)len); - if (x == NULL) - { - /* Since we failed, try allocating the tighest possible - allocation */ - - if (NULL == (x = (unsigned char *)bstr__realloc(b->data, (size_t)(len = olen)))) - { - return BSTR_ERR; - } - } - } - else - { - /* If slen is not close to mlen then avoid the penalty of copying - the extra bytes that are allocated, but not considered part of - the string */ - - if (NULL == (x = (unsigned char *)bstr__alloc((size_t)len))) - { - /* Perhaps there is no available memory for the two - allocations to be in memory at once */ - - goto reallocStrategy; - } - else - { - if (b->slen) bstr__memcpy((char *)x, (char *)b->data, (size_t)b->slen); - bstr__free(b->data); - } - } - b->data = x; - b->mlen = len; - b->data[b->slen] = (unsigned char)'\0'; - } - - return BSTR_OK; -} - -/* int ballocmin (bstring b, int len) - * - * Set the size of the memory backing the bstring b to len or b->slen+1, - * whichever is larger. Note that repeated use of this function can degrade - * performance. - */ -int ballocmin(bstring b, int len) -{ - unsigned char * s; - - if (b == NULL || b->data == NULL || (b->slen + 1) < 0 || b->mlen <= 0 || - b->mlen < b->slen || len <= 0) - { - return BSTR_ERR; - } - - if (len < b->slen + 1) len = b->slen + 1; - - if (len != b->mlen) - { - s = (unsigned char *)bstr__realloc(b->data, (size_t)len); - if (NULL == s) return BSTR_ERR; - s[b->slen] = (unsigned char)'\0'; - b->data = s; - b->mlen = len; - } - - return BSTR_OK; -} - -/* bstring bfromcstr (const char * str) - * - * Create a bstring which contains the contents of the '\0' terminated char * - * buffer str. - */ -bstring bfromcstr(const char * str) -{ - bstring b; - int i; - size_t j; - - if (str == NULL) return NULL; - j = (strlen)(str); - i = snapUpSize((int)(j + (2 - (j != 0)))); - if (i <= (int)j) return NULL; - - b = (bstring)bstr__alloc(sizeof(struct tagbstring)); - if (NULL == b) return NULL; - b->slen = (int)j; - if (NULL == (b->data = (unsigned char *)bstr__alloc(b->mlen = i))) - { - bstr__free(b); - return NULL; - } - - bstr__memcpy(b->data, str, j + 1); - return b; -} - -/* bstring bfromcstralloc (int mlen, const char * str) - * - * Create a bstring which contains the contents of the '\0' terminated char * - * buffer str. The memory buffer backing the string is at least len - * characters in length. - */ -bstring bfromcstralloc(int mlen, const char * str) -{ - bstring b; - int i; - size_t j; - - if (str == NULL) return NULL; - j = (strlen)(str); - i = snapUpSize((int)(j + (2 - (j != 0)))); - if (i <= (int)j) return NULL; - - b = (bstring)bstr__alloc(sizeof(struct tagbstring)); - if (b == NULL) return NULL; - b->slen = (int)j; - if (i < mlen) i = mlen; - - if (NULL == (b->data = (unsigned char *)bstr__alloc(b->mlen = i))) - { - bstr__free(b); - return NULL; - } - - bstr__memcpy(b->data, str, j + 1); - return b; -} - -/* bstring blk2bstr (const void * blk, int len) - * - * Create a bstring which contains the content of the block blk of length - * len. - */ -bstring blk2bstr(const void * blk, int len) -{ - bstring b; - int i; - - if (blk == NULL || len < 0) return NULL; - b = (bstring)bstr__alloc(sizeof(struct tagbstring)); - if (b == NULL) return NULL; - b->slen = len; - - i = len + (2 - (len != 0)); - i = snapUpSize(i); - - b->mlen = i; - - b->data = (unsigned char *)bstr__alloc((size_t)b->mlen); - if (b->data == NULL) - { - bstr__free(b); - return NULL; - } - - if (len > 0) bstr__memcpy(b->data, blk, (size_t)len); - b->data[len] = (unsigned char)'\0'; - - return b; -} - -/* char * bstr2cstr (const_bstring s, char z) - * - * Create a '\0' terminated char * buffer which is equal to the contents of - * the bstring s, except that any contained '\0' characters are converted - * to the character in z. This returned value should be freed with a - * bcstrfree () call, by the calling application. - */ -char * bstr2cstr(const_bstring b, char z) -{ - int i, l; - char * r; - - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - l = b->slen; - r = (char *)bstr__alloc((size_t)(l + 1)); - if (r == NULL) return r; - - for (i = 0; i < l; i++) - { - r[i] = (char)((b->data[i] == '\0') ? z : (char)(b->data[i])); - } - - r[l] = (unsigned char)'\0'; - - return r; -} - -/* int bcstrfree (char * s) - * - * Frees a C-string generated by bstr2cstr (). This is normally unnecessary - * since it just wraps a call to bstr__free (), however, if bstr__alloc () - * and bstr__free () have been redefined as a macros within the bstrlib - * module (via defining them in memdbg.h after defining - * BSTRLIB_MEMORY_DEBUG) with some difference in behaviour from the std - * library functions, then this allows a correct way of freeing the memory - * that allows higher level code to be independent from these macro - * redefinitions. - */ -int bcstrfree(char * s) -{ - if (s) - { - bstr__free(s); - return BSTR_OK; - } - return BSTR_ERR; -} - -/* int bconcat (bstring b0, const_bstring b1) - * - * Concatenate the bstring b1 to the bstring b0. - */ -int bconcat(bstring b0, const_bstring b1) -{ - int len, d; - bstring aux = (bstring)b1; - - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL) return BSTR_ERR; - - d = b0->slen; - len = b1->slen; - if ((d | (b0->mlen - d) | len | (d + len)) < 0) return BSTR_ERR; - - if (b0->mlen <= d + len + 1) - { - ptrdiff_t pd = b1->data - b0->data; - if (0 <= pd && pd < b0->mlen) - { - if (NULL == (aux = bstrcpy(b1))) return BSTR_ERR; - } - if (balloc(b0, d + len + 1) != BSTR_OK) - { - if (aux != b1) bdestroy(aux); - return BSTR_ERR; - } - } - - bBlockCopy(&b0->data[d], &aux->data[0], (size_t)len); - b0->data[d + len] = (unsigned char)'\0'; - b0->slen = d + len; - if (aux != b1) bdestroy(aux); - return BSTR_OK; -} - -/* int bconchar (bstring b, char c) -/ * - * Concatenate the single character c to the bstring b. - */ -int bconchar(bstring b, char c) -{ - int d; - - if (b == NULL) return BSTR_ERR; - d = b->slen; - if ((d | (b->mlen - d)) < 0 || balloc(b, d + 2) != BSTR_OK) return BSTR_ERR; - b->data[d] = (unsigned char)c; - b->data[d + 1] = (unsigned char)'\0'; - b->slen++; - return BSTR_OK; -} - -/* int bcatcstr (bstring b, const char * s) - * - * Concatenate a char * string to a bstring. - */ -int bcatcstr(bstring b, const char * s) -{ - char * d; - int i, l; - - if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen - || b->mlen <= 0 || s == NULL) return BSTR_ERR; - - /* Optimistically concatenate directly */ - l = b->mlen - b->slen; - d = (char *)&b->data[b->slen]; - for (i = 0; i < l; i++) - { - if ((*d++ = *s++) == '\0') - { - b->slen += i; - return BSTR_OK; - } - } - b->slen += i; - - /* Need to explicitely resize and concatenate tail */ - return bcatblk(b, (const void *)s, (int)strlen(s)); -} - -/* int bcatblk (bstring b, const void * s, int len) - * - * Concatenate a fixed length buffer to a bstring. - */ -int bcatblk(bstring b, const void * s, int len) -{ - int nl; - - if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen - || b->mlen <= 0 || s == NULL || len < 0) return BSTR_ERR; - - if (0 > (nl = b->slen + len)) return BSTR_ERR; /* Overflow? */ - if (b->mlen <= nl && 0 > balloc(b, nl + 1)) return BSTR_ERR; - - bBlockCopy(&b->data[b->slen], s, (size_t)len); - b->slen = nl; - b->data[nl] = (unsigned char)'\0'; - return BSTR_OK; -} - -/* bstring bstrcpy (const_bstring b) - * - * Create a copy of the bstring b. - */ -bstring bstrcpy(const_bstring b) -{ - bstring b0; - int i, j; - - /* Attempted to copy an invalid string? */ - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - - b0 = (bstring)bstr__alloc(sizeof(struct tagbstring)); - if (b0 == NULL) - { - /* Unable to allocate memory for string header */ - return NULL; - } - - i = b->slen; - j = snapUpSize(i + 1); - - b0->data = (unsigned char *)bstr__alloc(j); - if (b0->data == NULL) - { - j = i + 1; - b0->data = (unsigned char *)bstr__alloc(j); - if (b0->data == NULL) - { - /* Unable to allocate memory for string data */ - bstr__free(b0); - return NULL; - } - } - - b0->mlen = j; - b0->slen = i; - - if (i) bstr__memcpy((char *)b0->data, (char *)b->data, i); - b0->data[b0->slen] = (unsigned char)'\0'; - - return b0; -} - -/* int bassign (bstring a, const_bstring b) - * - * Overwrite the string a with the contents of string b. - */ -int bassign(bstring a, const_bstring b) -{ - if (b == NULL || b->data == NULL || b->slen < 0) - return BSTR_ERR; - if (b->slen != 0) - { - if (balloc(a, b->slen) != BSTR_OK) return BSTR_ERR; - bstr__memmove(a->data, b->data, b->slen); - } - else - { - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0) - return BSTR_ERR; - } - a->data[b->slen] = (unsigned char)'\0'; - a->slen = b->slen; - return BSTR_OK; -} - -/* int bassignmidstr (bstring a, const_bstring b, int left, int len) - * - * Overwrite the string a with the middle of contents of string b - * starting from position left and running for a length len. left and - * len are clamped to the ends of b as with the function bmidstr. - */ -int bassignmidstr(bstring a, const_bstring b, int left, int len) -{ - if (b == NULL || b->data == NULL || b->slen < 0) - return BSTR_ERR; - - if (left < 0) - { - len += left; - left = 0; - } - - if (len > b->slen - left) len = b->slen - left; - - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0) - return BSTR_ERR; - - if (len > 0) - { - if (balloc(a, len) != BSTR_OK) return BSTR_ERR; - bstr__memmove(a->data, b->data + left, len); - a->slen = len; - } - else - { - a->slen = 0; - } - a->data[a->slen] = (unsigned char)'\0'; - return BSTR_OK; -} - -/* int bassigncstr (bstring a, const char * str) - * - * Overwrite the string a with the contents of char * string str. Note that - * the bstring a must be a well defined and writable bstring. If an error - * occurs BSTR_ERR is returned however a may be partially overwritten. - */ -int bassigncstr(bstring a, const char * str) -{ - int i; - size_t len; - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0 || NULL == str) - return BSTR_ERR; - - for (i = 0; i < a->mlen; i++) - { - if ('\0' == (a->data[i] = str[i])) - { - a->slen = i; - return BSTR_OK; - } - } - - a->slen = i; - len = strlen(str + i); - if (len > INT_MAX || i + len + 1 > INT_MAX || - 0 > balloc(a, (int)(i + len + 1))) return BSTR_ERR; - bBlockCopy(a->data + i, str + i, (size_t)len + 1); - a->slen += (int)len; - return BSTR_OK; -} - -/* int bassignblk (bstring a, const void * s, int len) - * - * Overwrite the string a with the contents of the block (s, len). Note that - * the bstring a must be a well defined and writable bstring. If an error - * occurs BSTR_ERR is returned and a is not overwritten. - */ -int bassignblk(bstring a, const void * s, int len) -{ - if (a == NULL || a->data == NULL || a->mlen < a->slen || - a->slen < 0 || a->mlen == 0 || NULL == s || len + 1 < 1) - return BSTR_ERR; - if (len + 1 > a->mlen && 0 > balloc(a, len + 1)) return BSTR_ERR; - bBlockCopy(a->data, s, (size_t)len); - a->data[len] = (unsigned char)'\0'; - a->slen = len; - return BSTR_OK; -} - -/* int btrunc (bstring b, int n) - * - * Truncate the bstring to at most n characters. - */ -int btrunc(bstring b, int n) -{ - if (n < 0 || b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - if (b->slen > n) - { - b->slen = n; - b->data[n] = (unsigned char)'\0'; - } - return BSTR_OK; -} - -#define upcase(c) (toupper ((unsigned char) c)) -#define downcase(c) (tolower ((unsigned char) c)) -#define wspace(c) (isspace ((unsigned char) c)) - -/* int btoupper (bstring b) - * - * Convert contents of bstring to upper case. - */ -int btoupper(bstring b) -{ - int i, len; - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - for (i = 0, len = b->slen; i < len; i++) - { - b->data[i] = (unsigned char)upcase(b->data[i]); - } - return BSTR_OK; -} - -/* int btolower (bstring b) - * - * Convert contents of bstring to lower case. - */ -int btolower(bstring b) -{ - int i, len; - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - for (i = 0, len = b->slen; i < len; i++) - { - b->data[i] = (unsigned char)downcase(b->data[i]); - } - return BSTR_OK; -} - -/* int bstricmp (const_bstring b0, const_bstring b1) - * - * Compare two strings without differentiating between case. The return - * value is the difference of the values of the characters where the two - * strings first differ after lower case transformation, otherwise 0 is - * returned indicating that the strings are equal. If the lengths are - * different, then a difference from 0 is given, but if the first extra - * character is '\0', then it is taken to be the value UCHAR_MAX+1. - */ -int bstricmp(const_bstring b0, const_bstring b1) -{ - int i, v, n; - - if (bdata(b0) == NULL || b0->slen < 0 || - bdata(b1) == NULL || b1->slen < 0) return SHRT_MIN; - if ((n = b0->slen) > b1->slen) n = b1->slen; - else if (b0->slen == b1->slen && b0->data == b1->data) return BSTR_OK; - - for (i = 0; i < n; i++) - { - v = (char)downcase(b0->data[i]) - - (char)downcase(b1->data[i]); - if (0 != v) return v; - } - - if (b0->slen > n) - { - v = (char)downcase(b0->data[n]); - if (v) return v; - return UCHAR_MAX + 1; - } - if (b1->slen > n) - { - v = -(char)downcase(b1->data[n]); - if (v) return v; - return -(int)(UCHAR_MAX + 1); - } - return BSTR_OK; -} - -/* int bstrnicmp (const_bstring b0, const_bstring b1, int n) - * - * Compare two strings without differentiating between case for at most n - * characters. If the position where the two strings first differ is - * before the nth position, the return value is the difference of the values - * of the characters, otherwise 0 is returned. If the lengths are different - * and less than n characters, then a difference from 0 is given, but if the - * first extra character is '\0', then it is taken to be the value - * UCHAR_MAX+1. - */ -int bstrnicmp(const_bstring b0, const_bstring b1, int n) -{ - int i, v, m; - - if (bdata(b0) == NULL || b0->slen < 0 || - bdata(b1) == NULL || b1->slen < 0 || n < 0) return SHRT_MIN; - m = n; - if (m > b0->slen) m = b0->slen; - if (m > b1->slen) m = b1->slen; - - if (b0->data != b1->data) - { - for (i = 0; i < m; i++) - { - v = (char)downcase(b0->data[i]); - v -= (char)downcase(b1->data[i]); - if (v != 0) return b0->data[i] - b1->data[i]; - } - } - - if (n == m || b0->slen == b1->slen) return BSTR_OK; - - if (b0->slen > m) - { - v = (char)downcase(b0->data[m]); - if (v) return v; - return UCHAR_MAX + 1; - } - - v = -(char)downcase(b1->data[m]); - if (v) return v; - return -(int)(UCHAR_MAX + 1); -} - -/* int biseqcaseless (const_bstring b0, const_bstring b1) - * - * Compare two strings for equality without differentiating between case. - * If the strings differ other than in case, 0 is returned, if the strings - * are the same, 1 is returned, if there is an error, -1 is returned. If - * the length of the strings are different, this function is O(1). '\0' - * termination characters are not treated in any special way. - */ -int biseqcaseless(const_bstring b0, const_bstring b1) -{ - int i, n; - - if (bdata(b0) == NULL || b0->slen < 0 || - bdata(b1) == NULL || b1->slen < 0) return BSTR_ERR; - if (b0->slen != b1->slen) return BSTR_OK; - if (b0->data == b1->data || b0->slen == 0) return 1; - for (i = 0, n = b0->slen; i < n; i++) - { - if (b0->data[i] != b1->data[i]) - { - unsigned char c = (unsigned char)downcase(b0->data[i]); - if (c != (unsigned char)downcase(b1->data[i])) return 0; - } - } - return 1; -} - -/* int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) - * - * Compare beginning of string b0 with a block of memory of length len - * without differentiating between case for equality. If the beginning of b0 - * differs from the memory block other than in case (or if b0 is too short), - * 0 is returned, if the strings are the same, 1 is returned, if there is an - * error, -1 is returned. '\0' characters are not treated in any special - * way. - */ -int bisstemeqcaselessblk(const_bstring b0, const void * blk, int len) -{ - int i; - - if (bdata(b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) - return BSTR_ERR; - if (b0->slen < len) return BSTR_OK; - if (b0->data == (const unsigned char *)blk || len == 0) return 1; - - for (i = 0; i < len; i++) - { - if (b0->data[i] != ((const unsigned char *)blk)[i]) - { - if (downcase(b0->data[i]) != - downcase(((const unsigned char *)blk)[i])) return 0; - } - } - return 1; -} - -/* - * int bltrimws (bstring b) - * - * Delete whitespace contiguous from the left end of the string. - */ -int bltrimws(bstring b) -{ - int i, len; - - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - - for (len = b->slen, i = 0; i < len; i++) - { - if (!wspace(b->data[i])) - { - return bdelete(b, 0, i); - } - } - - b->data[0] = (unsigned char)'\0'; - b->slen = 0; - return BSTR_OK; -} - -/* - * int brtrimws (bstring b) - * - * Delete whitespace contiguous from the right end of the string. - */ -int brtrimws(bstring b) -{ - int i; - - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - - for (i = b->slen - 1; i >= 0; i--) - { - if (!wspace(b->data[i])) - { - if (b->mlen > i) b->data[i + 1] = (unsigned char)'\0'; - b->slen = i + 1; - return BSTR_OK; - } - } - - b->data[0] = (unsigned char)'\0'; - b->slen = 0; - return BSTR_OK; -} - -/* - * int btrimws (bstring b) - * - * Delete whitespace contiguous from both ends of the string. - */ -int btrimws(bstring b) -{ - int i, j; - - if (b == NULL || b->data == NULL || b->mlen < b->slen || - b->slen < 0 || b->mlen <= 0) return BSTR_ERR; - - for (i = b->slen - 1; i >= 0; i--) - { - if (!wspace(b->data[i])) - { - if (b->mlen > i) b->data[i + 1] = (unsigned char)'\0'; - b->slen = i + 1; - for (j = 0; wspace(b->data[j]); j++) - { - } - return bdelete(b, 0, j); - } - } - - b->data[0] = (unsigned char)'\0'; - b->slen = 0; - return BSTR_OK; -} - -/* int biseq (const_bstring b0, const_bstring b1) - * - * Compare the string b0 and b1. If the strings differ, 0 is returned, if - * the strings are the same, 1 is returned, if there is an error, -1 is - * returned. If the length of the strings are different, this function is - * O(1). '\0' termination characters are not treated in any special way. - */ -int biseq(const_bstring b0, const_bstring b1) -{ - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || - b0->slen < 0 || b1->slen < 0) return BSTR_ERR; - if (b0->slen != b1->slen) return BSTR_OK; - if (b0->data == b1->data || b0->slen == 0) return 1; - return !bstr__memcmp(b0->data, b1->data, b0->slen); -} - -/* int bisstemeqblk (const_bstring b0, const void * blk, int len) - * - * Compare beginning of string b0 with a block of memory of length len for - * equality. If the beginning of b0 differs from the memory block (or if b0 - * is too short), 0 is returned, if the strings are the same, 1 is returned, - * if there is an error, -1 is returned. '\0' characters are not treated in - * any special way. - */ -int bisstemeqblk(const_bstring b0, const void * blk, int len) -{ - int i; - - if (bdata(b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) - return BSTR_ERR; - if (b0->slen < len) return BSTR_OK; - if (b0->data == (const unsigned char *)blk || len == 0) return 1; - - for (i = 0; i < len; i++) - { - if (b0->data[i] != ((const unsigned char *)blk)[i]) return BSTR_OK; - } - return 1; -} - -/* int biseqcstr (const_bstring b, const char *s) - * - * Compare the bstring b and char * string s. The C string s must be '\0' - * terminated at exactly the length of the bstring b, and the contents - * between the two must be identical with the bstring b with no '\0' - * characters for the two contents to be considered equal. This is - * equivalent to the condition that their current contents will be always be - * equal when comparing them in the same format after converting one or the - * other. If the strings are equal 1 is returned, if they are unequal 0 is - * returned and if there is a detectable error BSTR_ERR is returned. - */ -int biseqcstr(const_bstring b, const char * s) -{ - int i; - if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; - for (i = 0; i < b->slen; i++) - { - if (s[i] == '\0' || b->data[i] != (unsigned char)s[i]) return BSTR_OK; - } - return s[i] == '\0'; -} - -/* int biseqcstrcaseless (const_bstring b, const char *s) - * - * Compare the bstring b and char * string s. The C string s must be '\0' - * terminated at exactly the length of the bstring b, and the contents - * between the two must be identical except for case with the bstring b with - * no '\0' characters for the two contents to be considered equal. This is - * equivalent to the condition that their current contents will be always be - * equal ignoring case when comparing them in the same format after - * converting one or the other. If the strings are equal, except for case, - * 1 is returned, if they are unequal regardless of case 0 is returned and - * if there is a detectable error BSTR_ERR is returned. - */ -int biseqcstrcaseless(const_bstring b, const char * s) -{ - int i; - if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; - for (i = 0; i < b->slen; i++) - { - if (s[i] == '\0' || - (b->data[i] != (unsigned char)s[i] && - downcase(b->data[i]) != (unsigned char)downcase(s[i]))) - return BSTR_OK; - } - return s[i] == '\0'; -} - -/* int bstrcmp (const_bstring b0, const_bstring b1) - * - * Compare the string b0 and b1. If there is an error, SHRT_MIN is returned, - * otherwise a value less than or greater than zero, indicating that the - * string pointed to by b0 is lexicographically less than or greater than - * the string pointed to by b1 is returned. If the the string lengths are - * unequal but the characters up until the length of the shorter are equal - * then a value less than, or greater than zero, indicating that the string - * pointed to by b0 is shorter or longer than the string pointed to by b1 is - * returned. 0 is returned if and only if the two strings are the same. If - * the length of the strings are different, this function is O(n). Like its - * standard C library counter part strcmp, the comparison does not proceed - * past any '\0' termination characters encountered. - */ -int bstrcmp(const_bstring b0, const_bstring b1) -{ - int i, v, n; - - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || - b0->slen < 0 || b1->slen < 0) return SHRT_MIN; - n = b0->slen; if (n > b1->slen) n = b1->slen; - if (b0->slen == b1->slen && (b0->data == b1->data || b0->slen == 0)) - return BSTR_OK; - - for (i = 0; i < n; i++) - { - v = ((char)b0->data[i]) - ((char)b1->data[i]); - if (v != 0) return v; - if (b0->data[i] == (unsigned char)'\0') return BSTR_OK; - } - - if (b0->slen > n) return 1; - if (b1->slen > n) return -1; - return BSTR_OK; -} - -/* int bstrncmp (const_bstring b0, const_bstring b1, int n) - * - * Compare the string b0 and b1 for at most n characters. If there is an - * error, SHRT_MIN is returned, otherwise a value is returned as if b0 and - * b1 were first truncated to at most n characters then bstrcmp was called - * with these new strings are paremeters. If the length of the strings are - * different, this function is O(n). Like its standard C library counter - * part strcmp, the comparison does not proceed past any '\0' termination - * characters encountered. - */ -int bstrncmp(const_bstring b0, const_bstring b1, int n) -{ - int i, v, m; - - if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || - b0->slen < 0 || b1->slen < 0) return SHRT_MIN; - m = n; - if (m > b0->slen) m = b0->slen; - if (m > b1->slen) m = b1->slen; - - if (b0->data != b1->data) - { - for (i = 0; i < m; i++) - { - v = ((char)b0->data[i]) - ((char)b1->data[i]); - if (v != 0) return v; - if (b0->data[i] == (unsigned char)'\0') return BSTR_OK; - } - } - - if (n == m || b0->slen == b1->slen) return BSTR_OK; - - if (b0->slen > m) return 1; - return -1; -} - -/* bstring bmidstr (const_bstring b, int left, int len) - * - * Create a bstring which is the substring of b starting from position left - * and running for a length len (clamped by the end of the bstring b.) If - * b is detectably invalid, then NULL is returned. The section described - * by (left, len) is clamped to the boundaries of b. - */ -bstring bmidstr(const_bstring b, int left, int len) -{ - if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; - - if (left < 0) - { - len += left; - left = 0; - } - - if (len > b->slen - left) len = b->slen - left; - - if (len <= 0) return bfromcstr(""); - return blk2bstr(b->data + left, len); -} - -/* int bdelete (bstring b, int pos, int len) - * - * Removes characters from pos to pos+len-1 inclusive and shifts the tail of - * the bstring starting from pos+len to pos. len must be positive for this - * call to have any effect. The section of the string described by (pos, - * len) is clamped to boundaries of the bstring b. - */ -int bdelete(bstring b, int pos, int len) -{ - /* Clamp to left side of bstring */ - if (pos < 0) - { - len += pos; - pos = 0; - } - - if (len < 0 || b == NULL || b->data == NULL || b->slen < 0 || - b->mlen < b->slen || b->mlen <= 0) - return BSTR_ERR; - if (len > 0 && pos < b->slen) - { - if (pos + len >= b->slen) - { - b->slen = pos; - } - else - { - bBlockCopy((char *)(b->data + pos), - (char *)(b->data + pos + len), - b->slen - (pos + len)); - b->slen -= len; - } - b->data[b->slen] = (unsigned char)'\0'; - } - return BSTR_OK; -} - -/* int bdestroy (bstring b) - * - * Free up the bstring. Note that if b is detectably invalid or not writable - * then no action is performed and BSTR_ERR is returned. Like a freed memory - * allocation, dereferences, writes or any other action on b after it has - * been bdestroyed is undefined. - */ -int bdestroy(bstring b) -{ - if (b == NULL || b->slen < 0 || b->mlen <= 0 || b->mlen < b->slen || - b->data == NULL) - return BSTR_ERR; - - bstr__free(b->data); - - /* In case there is any stale usage, there is one more chance to - notice this error. */ - - b->slen = -1; - b->mlen = -__LINE__; - b->data = NULL; - - bstr__free(b); - return BSTR_OK; -} - -/* int binstr (const_bstring b1, int pos, const_bstring b2) - * - * Search for the bstring b2 in b1 starting from position pos, and searching - * forward. If it is found then return with the first position where it is - * found, otherwise return BSTR_ERR. Note that this is just a brute force - * string searcher that does not attempt clever things like the Boyer-Moore - * search algorithm. Because of this there are many degenerate cases where - * this can take much longer than it needs to. - */ -int binstr(const_bstring b1, int pos, const_bstring b2) -{ - int j, ii, ll, lf; - unsigned char * d0; - unsigned char c0; - register unsigned char * d1; - register unsigned char c1; - register int i; - - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos) return (b2->slen == 0) ? pos : BSTR_ERR; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; - - /* No space to find such a string? */ - if ((lf = b1->slen - b2->slen + 1) <= pos) return BSTR_ERR; - - /* An obvious alias case */ - if (b1->data == b2->data && pos == 0) return 0; - - i = pos; - - d0 = b2->data; - d1 = b1->data; - ll = b2->slen; - - /* Peel off the b2->slen == 1 case */ - c0 = d0[0]; - if (1 == ll) - { - for (; i < lf; i++) - if (c0 == d1[i]) return i; - return BSTR_ERR; - } - - c1 = c0; - j = 0; - lf = b1->slen - 1; - - ii = -1; - if (i < lf) - do - { - /* Unrolled current character test */ - if (c1 != d1[i]) - { - if (c1 != d1[1 + i]) - { - i += 2; - continue; - } - i++; - } - - /* Take note if this is the start of a potential match */ - if (0 == j) ii = i; - - /* Shift the test character down by one */ - j++; - i++; - - /* If this isn't past the last character continue */ - if (j < ll) - { - c1 = d0[j]; - continue; - } - - N0:; - - /* If no characters mismatched, then we matched */ - if (i == ii + j) return ii; - - /* Shift back to the beginning */ - i -= j; - j = 0; - c1 = c0; - } - while (i < lf); - - /* Deal with last case if unrolling caused a misalignment */ - if (i == lf && ll == j + 1 && c1 == d1[i]) goto N0; - - return BSTR_ERR; -} - -/* int binstrr (const_bstring b1, int pos, const_bstring b2) - * - * Search for the bstring b2 in b1 starting from position pos, and searching - * backward. If it is found then return with the first position where it is - * found, otherwise return BSTR_ERR. Note that this is just a brute force - * string searcher that does not attempt clever things like the Boyer-Moore - * search algorithm. Because of this there are many degenerate cases where - * this can take much longer than it needs to. - */ -int binstrr(const_bstring b1, int pos, const_bstring b2) -{ - int j, i, l; - unsigned char * d0, * d1; - - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos && b2->slen == 0) return pos; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; - - /* Obvious alias case */ - if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return 0; - - i = pos; - if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; - - /* If no space to find such a string then snap back */ - if (l + 1 <= i) i = l; - j = 0; - - d0 = b2->data; - d1 = b1->data; - l = b2->slen; - - for (;;) - { - if (d0[j] == d1[i + j]) - { - j++; - if (j >= l) return i; - } - else - { - i--; - if (i < 0) break; - j = 0; - } - } - - return BSTR_ERR; -} - -/* int binstrcaseless (const_bstring b1, int pos, const_bstring b2) - * - * Search for the bstring b2 in b1 starting from position pos, and searching - * forward but without regard to case. If it is found then return with the - * first position where it is found, otherwise return BSTR_ERR. Note that - * this is just a brute force string searcher that does not attempt clever - * things like the Boyer-Moore search algorithm. Because of this there are - * many degenerate cases where this can take much longer than it needs to. - */ -int binstrcaseless(const_bstring b1, int pos, const_bstring b2) -{ - int j, i, l, ll; - unsigned char * d0, * d1; - - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos) return (b2->slen == 0) ? pos : BSTR_ERR; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; - - l = b1->slen - b2->slen + 1; - - /* No space to find such a string? */ - if (l <= pos) return BSTR_ERR; - - /* An obvious alias case */ - if (b1->data == b2->data && pos == 0) return BSTR_OK; - - i = pos; - j = 0; - - d0 = b2->data; - d1 = b1->data; - ll = b2->slen; - - for (;;) - { - if (d0[j] == d1[i + j] || downcase(d0[j]) == downcase(d1[i + j])) - { - j++; - if (j >= ll) return i; - } - else - { - i++; - if (i >= l) break; - j = 0; - } - } - - return BSTR_ERR; -} - -/* int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) - * - * Search for the bstring b2 in b1 starting from position pos, and searching - * backward but without regard to case. If it is found then return with the - * first position where it is found, otherwise return BSTR_ERR. Note that - * this is just a brute force string searcher that does not attempt clever - * things like the Boyer-Moore search algorithm. Because of this there are - * many degenerate cases where this can take much longer than it needs to. - */ -int binstrrcaseless(const_bstring b1, int pos, const_bstring b2) -{ - int j, i, l; - unsigned char * d0, * d1; - - if (b1 == NULL || b1->data == NULL || b1->slen < 0 || - b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; - if (b1->slen == pos && b2->slen == 0) return pos; - if (b1->slen < pos || pos < 0) return BSTR_ERR; - if (b2->slen == 0) return pos; - - /* Obvious alias case */ - if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return BSTR_OK; - - i = pos; - if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; - - /* If no space to find such a string then snap back */ - if (l + 1 <= i) i = l; - j = 0; - - d0 = b2->data; - d1 = b1->data; - l = b2->slen; - - for (;;) - { - if (d0[j] == d1[i + j] || downcase(d0[j]) == downcase(d1[i + j])) - { - j++; - if (j >= l) return i; - } - else - { - i--; - if (i < 0) break; - j = 0; - } - } - - return BSTR_ERR; -} - -/* int bstrchrp (const_bstring b, int c, int pos) - * - * Search for the character c in b forwards from the position pos - * (inclusive). - */ -int bstrchrp(const_bstring b, int c, int pos) -{ - unsigned char * p; - - if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; - p = (unsigned char *)bstr__memchr((b->data + pos), (unsigned char)c, (b->slen - pos)); - if (p) return (int)(p - b->data); - return BSTR_ERR; -} - -/* int bstrrchrp (const_bstring b, int c, int pos) - * - * Search for the character c in b backwards from the position pos in string - * (inclusive). - */ -int bstrrchrp(const_bstring b, int c, int pos) -{ - int i; - - if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; - for (i = pos; i >= 0; i--) - { - if (b->data[i] == (unsigned char)c) return i; - } - return BSTR_ERR; -} - -#if !defined(BSTRLIB_AGGRESSIVE_MEMORY_FOR_SPEED_TRADEOFF) -#define LONG_LOG_BITS_QTY (3) -#define LONG_BITS_QTY (1 << LONG_LOG_BITS_QTY) -#define LONG_TYPE unsigned char - -#define CFCLEN ((1 << CHAR_BIT) / LONG_BITS_QTY) -struct charField { LONG_TYPE content[CFCLEN]; }; -#define testInCharField(cf, c) ((cf)->content[(c) >> LONG_LOG_BITS_QTY] & (((long)1) << ((c) & (LONG_BITS_QTY-1)))) -#define setInCharField(cf, idx) {\ - unsigned int c = (unsigned int) (idx); \ - (cf)->content[c >> LONG_LOG_BITS_QTY] |= (LONG_TYPE) (1ul << (c & (LONG_BITS_QTY-1))); \ -} - -#else - -#define CFCLEN (1 << CHAR_BIT) -struct charField { unsigned char content[CFCLEN]; }; -#define testInCharField(cf, c) ((cf)->content[(unsigned char) (c)]) -#define setInCharField(cf, idx) (cf)->content[(unsigned int) (idx)] = ~0 - -#endif - -/* Convert a bstring to charField */ -static int buildCharField(struct charField * cf, const_bstring b) -{ - int i; - if (b == NULL || b->data == NULL || b->slen <= 0) return BSTR_ERR; - memset((void *)cf->content, 0, sizeof(struct charField)); - for (i = 0; i < b->slen; i++) - { - setInCharField(cf, b->data[i]); - } - return BSTR_OK; -} - -static void invertCharField(struct charField * cf) -{ - int i; - for (i = 0; i < CFCLEN; i++) - cf->content[i] = ~cf->content[i]; -} - -/* Inner engine for binchr */ -static int binchrCF(const unsigned char * data, int len, int pos, const struct charField * cf) -{ - int i; - for (i = pos; i < len; i++) - { - unsigned char c = (unsigned char)data[i]; - if (testInCharField(cf, c)) return i; - } - return BSTR_ERR; -} - -/* int binchr (const_bstring b0, int pos, const_bstring b1); - * - * Search for the first position in b0 starting from pos or after, in which - * one of the characters in b1 is found and return it. If such a position - * does not exist in b0, then BSTR_ERR is returned. - */ -int binchr(const_bstring b0, int pos, const_bstring b1) -{ - struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || - b0->slen <= pos) return BSTR_ERR; - if (1 == b1->slen) return bstrchrp(b0, b1->data[0], pos); - if (0 > buildCharField(&chrs, b1)) return BSTR_ERR; - return binchrCF(b0->data, b0->slen, pos, &chrs); -} - -/* Inner engine for binchrr */ -static int binchrrCF(const unsigned char * data, int pos, const struct charField * cf) -{ - int i; - for (i = pos; i >= 0; i--) - { - unsigned int c = (unsigned int)data[i]; - if (testInCharField(cf, c)) return i; - } - return BSTR_ERR; -} - -/* int binchrr (const_bstring b0, int pos, const_bstring b1); - * - * Search for the last position in b0 no greater than pos, in which one of - * the characters in b1 is found and return it. If such a position does not - * exist in b0, then BSTR_ERR is returned. - */ -int binchrr(const_bstring b0, int pos, const_bstring b1) -{ - struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || b1 == NULL || - b0->slen < pos) return BSTR_ERR; - if (pos == b0->slen) pos--; - if (1 == b1->slen) return bstrrchrp(b0, b1->data[0], pos); - if (0 > buildCharField(&chrs, b1)) return BSTR_ERR; - return binchrrCF(b0->data, pos, &chrs); -} - -/* int bninchr (const_bstring b0, int pos, const_bstring b1); - * - * Search for the first position in b0 starting from pos or after, in which - * none of the characters in b1 is found and return it. If such a position - * does not exist in b0, then BSTR_ERR is returned. - */ -int bninchr(const_bstring b0, int pos, const_bstring b1) -{ - struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || - b0->slen <= pos) return BSTR_ERR; - if (buildCharField(&chrs, b1) < 0) return BSTR_ERR; - invertCharField(&chrs); - return binchrCF(b0->data, b0->slen, pos, &chrs); -} - -/* int bninchrr (const_bstring b0, int pos, const_bstring b1); - * - * Search for the last position in b0 no greater than pos, in which none of - * the characters in b1 is found and return it. If such a position does not - * exist in b0, then BSTR_ERR is returned. - */ -int bninchrr(const_bstring b0, int pos, const_bstring b1) -{ - struct charField chrs; - if (pos < 0 || b0 == NULL || b0->data == NULL || - b0->slen < pos) return BSTR_ERR; - if (pos == b0->slen) pos--; - if (buildCharField(&chrs, b1) < 0) return BSTR_ERR; - invertCharField(&chrs); - return binchrrCF(b0->data, pos, &chrs); -} - -/* int bsetstr (bstring b0, int pos, bstring b1, unsigned char fill) - * - * Overwrite the string b0 starting at position pos with the string b1. If - * the position pos is past the end of b0, then the character "fill" is - * appended as necessary to make up the gap between the end of b0 and pos. - * If b1 is NULL, it behaves as if it were a 0-length string. - */ -int bsetstr(bstring b0, int pos, const_bstring b1, unsigned char fill) -{ - int d, newlen; - ptrdiff_t pd; - bstring aux = (bstring)b1; - - if (pos < 0 || b0 == NULL || b0->slen < 0 || NULL == b0->data || - b0->mlen < b0->slen || b0->mlen <= 0) return BSTR_ERR; - if (b1 != NULL && (b1->slen < 0 || b1->data == NULL)) return BSTR_ERR; - - d = pos; - - /* Aliasing case */ - if (NULL != aux) - { - if ((pd = (ptrdiff_t)(b1->data - b0->data)) >= 0 && pd < (ptrdiff_t)b0->mlen) - { - if (NULL == (aux = bstrcpy(b1))) return BSTR_ERR; - } - d += aux->slen; - } - - /* Increase memory size if necessary */ - if (balloc(b0, d + 1) != BSTR_OK) - { - if (aux != b1) bdestroy(aux); - return BSTR_ERR; - } - - newlen = b0->slen; - - /* Fill in "fill" character as necessary */ - if (pos > newlen) - { - bstr__memset(b0->data + b0->slen, (int)fill, (size_t)(pos - b0->slen)); - newlen = pos; - } - - /* Copy b1 to position pos in b0. */ - if (aux != NULL) - { - bBlockCopy((char *)(b0->data + pos), (char *)aux->data, aux->slen); - if (aux != b1) bdestroy(aux); - } - - /* Indicate the potentially increased size of b0 */ - if (d > newlen) newlen = d; - - b0->slen = newlen; - b0->data[newlen] = (unsigned char)'\0'; - - return BSTR_OK; -} - -/* int binsert (bstring b1, int pos, bstring b2, unsigned char fill) - * - * Inserts the string b2 into b1 at position pos. If the position pos is - * past the end of b1, then the character "fill" is appended as necessary to - * make up the gap between the end of b1 and pos. Unlike bsetstr, binsert - * does not allow b2 to be NULL. - */ -int binsert(bstring b1, int pos, const_bstring b2, unsigned char fill) -{ - int d, l; - ptrdiff_t pd; - bstring aux = (bstring)b2; - - if (pos < 0 || b1 == NULL || b2 == NULL || b1->slen < 0 || - b2->slen < 0 || b1->mlen < b1->slen || b1->mlen <= 0) return BSTR_ERR; - - /* Aliasing case */ - if ((pd = (ptrdiff_t)(b2->data - b1->data)) >= 0 && pd < (ptrdiff_t)b1->mlen) - { - if (NULL == (aux = bstrcpy(b2))) return BSTR_ERR; - } - - /* Compute the two possible end pointers */ - d = b1->slen + aux->slen; - l = pos + aux->slen; - if ((d | l) < 0) return BSTR_ERR; - - if (l > d) - { - /* Inserting past the end of the string */ - if (balloc(b1, l + 1) != BSTR_OK) - { - if (aux != b2) bdestroy(aux); - return BSTR_ERR; - } - bstr__memset(b1->data + b1->slen, (int)fill, (size_t)(pos - b1->slen)); - b1->slen = l; - } - else - { - /* Inserting in the middle of the string */ - if (balloc(b1, d + 1) != BSTR_OK) - { - if (aux != b2) bdestroy(aux); - return BSTR_ERR; - } - bBlockCopy(b1->data + l, b1->data + pos, d - l); - b1->slen = d; - } - bBlockCopy(b1->data + pos, aux->data, aux->slen); - b1->data[b1->slen] = (unsigned char)'\0'; - if (aux != b2) bdestroy(aux); - return BSTR_OK; -} - -/* int breplace (bstring b1, int pos, int len, bstring b2, - * unsigned char fill) - * - * Replace a section of a string from pos for a length len with the string b2. - * fill is used is pos > b1->slen. - */ -int breplace(bstring b1, int pos, int len, const_bstring b2, - unsigned char fill) -{ - int pl, ret; - ptrdiff_t pd; - bstring aux = (bstring)b2; - - if (pos < 0 || len < 0 || (pl = pos + len) < 0 || b1 == NULL || - b2 == NULL || b1->data == NULL || b2->data == NULL || - b1->slen < 0 || b2->slen < 0 || b1->mlen < b1->slen || - b1->mlen <= 0) return BSTR_ERR; - - /* Straddles the end? */ - if (pl >= b1->slen) - { - if ((ret = bsetstr(b1, pos, b2, fill)) < 0) return ret; - if (pos + b2->slen < b1->slen) - { - b1->slen = pos + b2->slen; - b1->data[b1->slen] = (unsigned char)'\0'; - } - return ret; - } - - /* Aliasing case */ - if ((pd = (ptrdiff_t)(b2->data - b1->data)) >= 0 && pd < (ptrdiff_t)b1->slen) - { - if (NULL == (aux = bstrcpy(b2))) return BSTR_ERR; - } - - if (aux->slen > len) - { - if (balloc(b1, b1->slen + aux->slen - len) != BSTR_OK) - { - if (aux != b2) bdestroy(aux); - return BSTR_ERR; - } - } - - if (aux->slen != len) bstr__memmove(b1->data + pos + aux->slen, b1->data + pos + len, b1->slen - (pos + len)); - bstr__memcpy(b1->data + pos, aux->data, aux->slen); - b1->slen += aux->slen - len; - b1->data[b1->slen] = (unsigned char)'\0'; - if (aux != b2) bdestroy(aux); - return BSTR_OK; -} - -/* - * findreplaceengine is used to implement bfindreplace and - * bfindreplacecaseless. It works by breaking the three cases of - * expansion, reduction and replacement, and solving each of these - * in the most efficient way possible. - */ - -typedef int (*instr_fnptr) (const_bstring s1, int pos, const_bstring s2); - -#define INITIAL_STATIC_FIND_INDEX_COUNT 32 - -static int findreplaceengine(bstring b, const_bstring find, const_bstring repl, int pos, instr_fnptr instr) -{ - int i, ret, slen, mlen, delta, acc; - int * d; - int static_d[INITIAL_STATIC_FIND_INDEX_COUNT + 1]; /* This +1 is unnecessary, but it shuts up LINT. */ - ptrdiff_t pd; - bstring auxf = (bstring)find; - bstring auxr = (bstring)repl; - - if (b == NULL || b->data == NULL || find == NULL || - find->data == NULL || repl == NULL || repl->data == NULL || - pos < 0 || find->slen <= 0 || b->mlen < 0 || b->slen > b->mlen || - b->mlen <= 0 || b->slen < 0 || repl->slen < 0) return BSTR_ERR; - if (pos > b->slen - find->slen) return BSTR_OK; - - /* Alias with find string */ - pd = (ptrdiff_t)(find->data - b->data); - if ((ptrdiff_t)(pos - find->slen) < pd && pd < (ptrdiff_t)b->slen) - { - if (NULL == (auxf = bstrcpy(find))) return BSTR_ERR; - } - - /* Alias with repl string */ - pd = (ptrdiff_t)(repl->data - b->data); - if ((ptrdiff_t)(pos - repl->slen) < pd && pd < (ptrdiff_t)b->slen) - { - if (NULL == (auxr = bstrcpy(repl))) - { - if (auxf != find) bdestroy(auxf); - return BSTR_ERR; - } - } - - delta = auxf->slen - auxr->slen; - - /* in-place replacement since find and replace strings are of equal - length */ - if (delta == 0) - { - while ((pos = instr(b, pos, auxf)) >= 0) - { - bstr__memcpy(b->data + pos, auxr->data, auxr->slen); - pos += auxf->slen; - } - if (auxf != find) bdestroy(auxf); - if (auxr != repl) bdestroy(auxr); - return BSTR_OK; - } - - /* shrinking replacement since auxf->slen > auxr->slen */ - if (delta > 0) - { - acc = 0; - - while ((i = instr(b, pos, auxf)) >= 0) - { - if (acc && i > pos) - bstr__memmove(b->data + pos - acc, b->data + pos, i - pos); - if (auxr->slen) - bstr__memcpy(b->data + i - acc, auxr->data, auxr->slen); - acc += delta; - pos = i + auxf->slen; - } - - if (acc) - { - i = b->slen; - if (i > pos) - bstr__memmove(b->data + pos - acc, b->data + pos, i - pos); - b->slen -= acc; - b->data[b->slen] = (unsigned char)'\0'; - } - - if (auxf != find) bdestroy(auxf); - if (auxr != repl) bdestroy(auxr); - return BSTR_OK; - } - - /* expanding replacement since find->slen < repl->slen. Its a lot - more complicated. This works by first finding all the matches and - storing them to a growable array, then doing at most one resize of - the destination bstring and then performing the direct memory transfers - of the string segment pieces to form the final result. The growable - array of matches uses a deferred doubling reallocing strategy. What - this means is that it starts as a reasonably fixed sized auto array in - the hopes that many if not most cases will never need to grow this - array. But it switches as soon as the bounds of the array will be - exceeded. An extra find result is always appended to this array that - corresponds to the end of the destination string, so slen is checked - against mlen - 1 rather than mlen before resizing. - */ - - mlen = INITIAL_STATIC_FIND_INDEX_COUNT; - d = (int *)static_d; /* Avoid malloc for trivial/initial cases */ - acc = slen = 0; - - while ((pos = instr(b, pos, auxf)) >= 0) - { - if (slen >= mlen - 1) - { - int sl, *t; - - mlen += mlen; - sl = sizeof(int *) * mlen; - if (static_d == d) d = NULL; /* static_d cannot be realloced */ - if (mlen <= 0 || sl < mlen || NULL == (t = (int *)bstr__realloc(d, sl))) - { - ret = BSTR_ERR; - goto done; - } - if (NULL == d) bstr__memcpy(t, static_d, sizeof(static_d)); - d = t; - } - d[slen] = pos; - slen++; - acc -= delta; - pos += auxf->slen; - if (pos < 0 || acc < 0) - { - ret = BSTR_ERR; - goto done; - } - } - - /* slen <= INITIAL_STATIC_INDEX_COUNT-1 or mlen-1 here. */ - d[slen] = b->slen; - - if (BSTR_OK == (ret = balloc(b, b->slen + acc + 1))) - { - b->slen += acc; - for (i = slen - 1; i >= 0; i--) - { - int s, l; - s = d[i] + auxf->slen; - l = d[i + 1] - s; /* d[slen] may be accessed here. */ - if (l) - { - bstr__memmove(b->data + s + acc, b->data + s, l); - } - if (auxr->slen) - { - bstr__memmove(b->data + s + acc - auxr->slen, - auxr->data, auxr->slen); - } - acc += delta; - } - b->data[b->slen] = (unsigned char)'\0'; - } - -done:; - if (static_d == d) d = NULL; - bstr__free(d); - if (auxf != find) bdestroy(auxf); - if (auxr != repl) bdestroy(auxr); - return ret; -} - -/* int bfindreplace (bstring b, const_bstring find, const_bstring repl, - * int pos) - * - * Replace all occurrences of a find string with a replace string after a - * given point in a bstring. - */ -int bfindreplace(bstring b, const_bstring find, const_bstring repl, int pos) -{ - return findreplaceengine(b, find, repl, pos, binstr); -} - -/* int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, - * int pos) - * - * Replace all occurrences of a find string, ignoring case, with a replace - * string after a given point in a bstring. - */ -int bfindreplacecaseless(bstring b, const_bstring find, const_bstring repl, int pos) -{ - return findreplaceengine(b, find, repl, pos, binstrcaseless); -} - -/* int binsertch (bstring b, int pos, int len, unsigned char fill) - * - * Inserts the character fill repeatedly into b at position pos for a - * length len. If the position pos is past the end of b, then the - * character "fill" is appended as necessary to make up the gap between the - * end of b and the position pos + len. - */ -int binsertch(bstring b, int pos, int len, unsigned char fill) -{ - int d, l, i; - - if (pos < 0 || b == NULL || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || len < 0) return BSTR_ERR; - - /* Compute the two possible end pointers */ - d = b->slen + len; - l = pos + len; - if ((d | l) < 0) return BSTR_ERR; - - if (l > d) - { - /* Inserting past the end of the string */ - if (balloc(b, l + 1) != BSTR_OK) return BSTR_ERR; - pos = b->slen; - b->slen = l; - } - else - { - /* Inserting in the middle of the string */ - if (balloc(b, d + 1) != BSTR_OK) return BSTR_ERR; - for (i = d - 1; i >= l; i--) - { - b->data[i] = b->data[i - len]; - } - b->slen = d; - } - - for (i = pos; i < l; i++) - b->data[i] = fill; - b->data[b->slen] = (unsigned char)'\0'; - return BSTR_OK; -} - -/* int bpattern (bstring b, int len) - * - * Replicate the bstring, b in place, end to end repeatedly until it - * surpasses len characters, then chop the result to exactly len characters. - * This function operates in-place. The function will return with BSTR_ERR - * if b is NULL or of length 0, otherwise BSTR_OK is returned. - */ -int bpattern(bstring b, int len) -{ - int i, d; - - d = blength(b); - if (d <= 0 || len < 0 || balloc(b, len + 1) != BSTR_OK) return BSTR_ERR; - if (len > 0) - { - if (d == 1) return bsetstr(b, len, NULL, b->data[0]); - for (i = d; i < len; i++) - b->data[i] = b->data[i - d]; - } - b->data[len] = (unsigned char)'\0'; - b->slen = len; - return BSTR_OK; -} - -#define BS_BUFF_SZ (1024) - -/* int breada (bstring b, bNread readPtr, void * parm) - * - * Use a finite buffer fread-like function readPtr to concatenate to the - * bstring b the entire contents of file-like source data in a roughly - * efficient way. - */ -int breada(bstring b, bNread readPtr, void * parm) -{ - int i, l, n; - - if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || readPtr == NULL) return BSTR_ERR; - - i = b->slen; - for (n = i + 16;; n += ((n < BS_BUFF_SZ) ? n : BS_BUFF_SZ)) - { - if (BSTR_OK != balloc(b, n + 1)) return BSTR_ERR; - l = (int)readPtr((void *)(b->data + i), 1, n - i, parm); - i += l; - b->slen = i; - if (i < n) break; - } - - b->data[i] = (unsigned char)'\0'; - return BSTR_OK; -} - -/* bstring bread (bNread readPtr, void * parm) - * - * Use a finite buffer fread-like function readPtr to create a bstring - * filled with the entire contents of file-like source data in a roughly - * efficient way. - */ -bstring bread(bNread readPtr, void * parm) -{ - bstring buff; - - if (0 > breada(buff = bfromcstr(""), readPtr, parm)) - { - bdestroy(buff); - return NULL; - } - return buff; -} - -/* int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) - * - * Use an fgetc-like single character stream reading function (getcPtr) to - * obtain a sequence of characters which are concatenated to the end of the - * bstring b. The stream read is terminated by the passed in terminator - * parameter. - * - * If getcPtr returns with a negative number, or the terminator character - * (which is appended) is read, then the stream reading is halted and the - * function returns with a partial result in b. If there is an empty partial - * result, 1 is returned. If no characters are read, or there is some other - * detectable error, BSTR_ERR is returned. - */ -int bassigngets(bstring b, bNgetc getcPtr, void * parm, char terminator) -{ - int c, d, e; - - if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; - d = 0; - e = b->mlen - 2; - - while ((c = getcPtr(parm)) >= 0) - { - if (d > e) - { - b->slen = d; - if (balloc(b, d + 2) != BSTR_OK) return BSTR_ERR; - e = b->mlen - 2; - } - b->data[d] = (unsigned char)c; - d++; - if (c == terminator) break; - } - - b->data[d] = (unsigned char)'\0'; - b->slen = d; - - return d == 0 && c < 0; -} - -/* int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) - * - * Use an fgetc-like single character stream reading function (getcPtr) to - * obtain a sequence of characters which are concatenated to the end of the - * bstring b. The stream read is terminated by the passed in terminator - * parameter. - * - * If getcPtr returns with a negative number, or the terminator character - * (which is appended) is read, then the stream reading is halted and the - * function returns with a partial result concatentated to b. If there is - * an empty partial result, 1 is returned. If no characters are read, or - * there is some other detectable error, BSTR_ERR is returned. - */ -int bgetsa(bstring b, bNgetc getcPtr, void * parm, char terminator) -{ - int c, d, e; - - if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || - b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; - d = b->slen; - e = b->mlen - 2; - - while ((c = getcPtr(parm)) >= 0) - { - if (d > e) - { - b->slen = d; - if (balloc(b, d + 2) != BSTR_OK) return BSTR_ERR; - e = b->mlen - 2; - } - b->data[d] = (unsigned char)c; - d++; - if (c == terminator) break; - } - - b->data[d] = (unsigned char)'\0'; - b->slen = d; - - return d == 0 && c < 0; -} - -/* bstring bgets (bNgetc getcPtr, void * parm, char terminator) - * - * Use an fgetc-like single character stream reading function (getcPtr) to - * obtain a sequence of characters which are concatenated into a bstring. - * The stream read is terminated by the passed in terminator function. - * - * If getcPtr returns with a negative number, or the terminator character - * (which is appended) is read, then the stream reading is halted and the - * result obtained thus far is returned. If no characters are read, or - * there is some other detectable error, NULL is returned. - */ -bstring bgets(bNgetc getcPtr, void * parm, char terminator) -{ - bstring buff; - - if (0 > bgetsa(buff = bfromcstr(""), getcPtr, parm, terminator) || 0 >= buff->slen) - { - bdestroy(buff); - buff = NULL; - } - return buff; -} - -struct bStream -{ - bstring buff; /* Buffer for over-reads */ - void * parm; /* The stream handle for core stream */ - bNread readFnPtr; /* fread compatible fnptr for core stream */ - int isEOF; /* track file's EOF state */ - int maxBuffSz; -}; - -/* struct bStream * bsopen (bNread readPtr, void * parm) - * - * Wrap a given open stream (described by a fread compatible function - * pointer and stream handle) into an open bStream suitable for the bstring - * library streaming functions. - */ -struct bStream * bsopen(bNread readPtr, void * parm) -{ - struct bStream * s; - - if (readPtr == NULL) return NULL; - s = (struct bStream *)bstr__alloc(sizeof(struct bStream)); - if (s == NULL) return NULL; - s->parm = parm; - s->buff = bfromcstr(""); - s->readFnPtr = readPtr; - s->maxBuffSz = BS_BUFF_SZ; - s->isEOF = 0; - return s; -} - -/* int bsbufflength (struct bStream * s, int sz) - * - * Set the length of the buffer used by the bStream. If sz is zero, the - * length is not set. This function returns with the previous length. - */ -int bsbufflength(struct bStream * s, int sz) -{ - int oldSz; - if (s == NULL || sz < 0) return BSTR_ERR; - oldSz = s->maxBuffSz; - if (sz > 0) s->maxBuffSz = sz; - return oldSz; -} - -int bseof(const struct bStream * s) -{ - if (s == NULL || s->readFnPtr == NULL) return BSTR_ERR; - return s->isEOF && (s->buff->slen == 0); -} - -/* void * bsclose (struct bStream * s) - * - * Close the bStream, and return the handle to the stream that was originally - * used to open the given stream. - */ -void * bsclose(struct bStream * s) -{ - void * parm; - if (s == NULL) return NULL; - s->readFnPtr = NULL; - if (s->buff) bdestroy(s->buff); - s->buff = NULL; - parm = s->parm; - s->parm = NULL; - s->isEOF = 1; - bstr__free(s); - return parm; -} - -/* int bsreadlna (bstring r, struct bStream * s, char terminator) - * - * Read a bstring terminated by the terminator character or the end of the - * stream from the bStream (s) and return it into the parameter r. This - * function may read additional characters from the core stream that are not - * returned, but will be retained for subsequent read operations. - */ -int bsreadlna(bstring r, struct bStream * s, char terminator) -{ - int i, l, ret, rlo; - char * b; - struct tagbstring x; - - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 || - r->slen < 0 || r->mlen < r->slen) return BSTR_ERR; - l = s->buff->slen; - if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - b = (char *)s->buff->data; - x.data = (unsigned char *)b; - - /* First check if the current buffer holds the terminator */ - b[l] = terminator; /* Set sentinel */ - for (i = 0; b[i] != terminator; i++) - ; - if (i < l) - { - x.slen = i + 1; - ret = bconcat(r, &x); - s->buff->slen = l; - if (BSTR_OK == ret) bdelete(s->buff, 0, i + 1); - return BSTR_OK; - } - - rlo = r->slen; - - /* If not then just concatenate the entire buffer to the output */ - x.slen = l; - if (BSTR_OK != bconcat(r, &x)) return BSTR_ERR; - - /* Perform direct in-place reads into the destination to allow for - the minimum of data-copies */ - for (;;) - { - if (BSTR_OK != balloc(r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; - b = (char *)(r->data + r->slen); - l = (int)s->readFnPtr(b, 1, s->maxBuffSz, s->parm); - if (l <= 0) - { - r->data[r->slen] = (unsigned char)'\0'; - s->buff->slen = 0; - s->isEOF = 1; - /* If nothing was read return with an error message */ - return BSTR_ERR & -(r->slen == rlo); - } - b[l] = terminator; /* Set sentinel */ - for (i = 0; b[i] != terminator; i++) - ; - if (i < l) break; - r->slen += l; - } - - /* Terminator found, push over-read back to buffer */ - i++; - r->slen += i; - s->buff->slen = l - i; - bstr__memcpy(s->buff->data, b + i, l - i); - r->data[r->slen] = (unsigned char)'\0'; - return BSTR_OK; -} - -/* int bsreadlnsa (bstring r, struct bStream * s, bstring term) - * - * Read a bstring terminated by any character in the term string or the end - * of the stream from the bStream (s) and return it into the parameter r. - * This function may read additional characters from the core stream that - * are not returned, but will be retained for subsequent read operations. - */ -int bsreadlnsa(bstring r, struct bStream * s, const_bstring term) -{ - int i, l, ret, rlo; - unsigned char * b; - struct tagbstring x; - struct charField cf; - - if (s == NULL || s->buff == NULL || r == NULL || term == NULL || - term->data == NULL || r->mlen <= 0 || r->slen < 0 || - r->mlen < r->slen) return BSTR_ERR; - if (term->slen == 1) return bsreadlna(r, s, term->data[0]); - if (term->slen < 1 || buildCharField(&cf, term)) return BSTR_ERR; - - l = s->buff->slen; - if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - b = (unsigned char *)s->buff->data; - x.data = b; - - /* First check if the current buffer holds the terminator */ - b[l] = term->data[0]; /* Set sentinel */ - for (i = 0; !testInCharField(&cf, b[i]); i++) - ; - if (i < l) - { - x.slen = i + 1; - ret = bconcat(r, &x); - s->buff->slen = l; - if (BSTR_OK == ret) bdelete(s->buff, 0, i + 1); - return BSTR_OK; - } - - rlo = r->slen; - - /* If not then just concatenate the entire buffer to the output */ - x.slen = l; - if (BSTR_OK != bconcat(r, &x)) return BSTR_ERR; - - /* Perform direct in-place reads into the destination to allow for - the minimum of data-copies */ - for (;;) - { - if (BSTR_OK != balloc(r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; - b = (unsigned char *)(r->data + r->slen); - l = (int)s->readFnPtr(b, 1, s->maxBuffSz, s->parm); - if (l <= 0) - { - r->data[r->slen] = (unsigned char)'\0'; - s->buff->slen = 0; - s->isEOF = 1; - /* If nothing was read return with an error message */ - return BSTR_ERR & -(r->slen == rlo); - } - - b[l] = term->data[0]; /* Set sentinel */ - for (i = 0; !testInCharField(&cf, b[i]); i++) - ; - if (i < l) break; - r->slen += l; - } - - /* Terminator found, push over-read back to buffer */ - i++; - r->slen += i; - s->buff->slen = l - i; - bstr__memcpy(s->buff->data, b + i, l - i); - r->data[r->slen] = (unsigned char)'\0'; - return BSTR_OK; -} - -/* int bsreada (bstring r, struct bStream * s, int n) - * - * Read a bstring of length n (or, if it is fewer, as many bytes as is - * remaining) from the bStream. This function may read additional - * characters from the core stream that are not returned, but will be - * retained for subsequent read operations. This function will not read - * additional characters from the core stream beyond virtual stream pointer. - */ -int bsreada(bstring r, struct bStream * s, int n) -{ - int l, ret, orslen; - char * b; - struct tagbstring x; - - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 - || r->slen < 0 || r->mlen < r->slen || n <= 0) return BSTR_ERR; - - n += r->slen; - if (n <= 0) return BSTR_ERR; - - l = s->buff->slen; - - orslen = r->slen; - - if (0 == l) - { - if (s->isEOF) return BSTR_ERR; - if (r->mlen > n) - { - l = (int)s->readFnPtr(r->data + r->slen, 1, n - r->slen, s->parm); - if (0 >= l || l > n - r->slen) - { - s->isEOF = 1; - return BSTR_ERR; - } - r->slen += l; - r->data[r->slen] = (unsigned char)'\0'; - return 0; - } - } - - if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - b = (char *)s->buff->data; - x.data = (unsigned char *)b; - - do - { - if (l + r->slen >= n) - { - x.slen = n - r->slen; - ret = bconcat(r, &x); - s->buff->slen = l; - if (BSTR_OK == ret) bdelete(s->buff, 0, x.slen); - return BSTR_ERR & -(r->slen == orslen); - } - - x.slen = l; - if (BSTR_OK != bconcat(r, &x)) break; - - l = n - r->slen; - if (l > s->maxBuffSz) l = s->maxBuffSz; - - l = (int)s->readFnPtr(b, 1, l, s->parm); - } - while (l > 0); - if (l < 0) l = 0; - if (l == 0) s->isEOF = 1; - s->buff->slen = l; - return BSTR_ERR & -(r->slen == orslen); -} - -/* int bsreadln (bstring r, struct bStream * s, char terminator) - * - * Read a bstring terminated by the terminator character or the end of the - * stream from the bStream (s) and return it into the parameter r. This - * function may read additional characters from the core stream that are not - * returned, but will be retained for subsequent read operations. - */ -int bsreadln(bstring r, struct bStream * s, char terminator) -{ - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0) - return BSTR_ERR; - if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - r->slen = 0; - return bsreadlna(r, s, terminator); -} - -/* int bsreadlns (bstring r, struct bStream * s, bstring term) - * - * Read a bstring terminated by any character in the term string or the end - * of the stream from the bStream (s) and return it into the parameter r. - * This function may read additional characters from the core stream that - * are not returned, but will be retained for subsequent read operations. - */ -int bsreadlns(bstring r, struct bStream * s, const_bstring term) -{ - if (s == NULL || s->buff == NULL || r == NULL || term == NULL - || term->data == NULL || r->mlen <= 0) return BSTR_ERR; - if (term->slen == 1) return bsreadln(r, s, term->data[0]); - if (term->slen < 1) return BSTR_ERR; - if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - r->slen = 0; - return bsreadlnsa(r, s, term); -} - -/* int bsread (bstring r, struct bStream * s, int n) - * - * Read a bstring of length n (or, if it is fewer, as many bytes as is - * remaining) from the bStream. This function may read additional - * characters from the core stream that are not returned, but will be - * retained for subsequent read operations. This function will not read - * additional characters from the core stream beyond virtual stream pointer. - */ -int bsread(bstring r, struct bStream * s, int n) -{ - if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 - || n <= 0) return BSTR_ERR; - if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; - r->slen = 0; - return bsreada(r, s, n); -} - -/* int bsunread (struct bStream * s, const_bstring b) - * - * Insert a bstring into the bStream at the current position. These - * characters will be read prior to those that actually come from the core - * stream. - */ -int bsunread(struct bStream * s, const_bstring b) -{ - if (s == NULL || s->buff == NULL) return BSTR_ERR; - return binsert(s->buff, 0, b, (unsigned char)'?'); -} - -/* int bspeek (bstring r, const struct bStream * s) - * - * Return the currently buffered characters from the bStream that will be - * read prior to reads from the core stream. - */ -int bspeek(bstring r, const struct bStream * s) -{ - if (s == NULL || s->buff == NULL) return BSTR_ERR; - return bassign(r, s->buff); -} - -/* bstring bjoin (const struct bstrList * bl, const_bstring sep); - * - * Join the entries of a bstrList into one bstring by sequentially - * concatenating them with the sep string in between. If there is an error - * NULL is returned, otherwise a bstring with the correct result is returned. - */ -bstring bjoin(const struct bstrList * bl, const_bstring sep) -{ - bstring b; - int i, c, v; - - if (bl == NULL || bl->qty < 0) return NULL; - if (sep != NULL && (sep->slen < 0 || sep->data == NULL)) return NULL; - - for (i = 0, c = 1; i < bl->qty; i++) - { - v = bl->entry[i]->slen; - if (v < 0) return NULL; /* Invalid input */ - c += v; - if (c < 0) return NULL; /* Wrap around ?? */ - } - - if (sep != NULL) c += (bl->qty - 1) * sep->slen; - - b = (bstring)bstr__alloc(sizeof(struct tagbstring)); - if (NULL == b) return NULL; /* Out of memory */ - b->data = (unsigned char *)bstr__alloc(c); - if (b->data == NULL) - { - bstr__free(b); - return NULL; - } - - b->mlen = c; - b->slen = c - 1; - - for (i = 0, c = 0; i < bl->qty; i++) - { - if (i > 0 && sep != NULL) - { - bstr__memcpy(b->data + c, sep->data, sep->slen); - c += sep->slen; - } - v = bl->entry[i]->slen; - bstr__memcpy(b->data + c, bl->entry[i]->data, v); - c += v; - } - b->data[c] = (unsigned char)'\0'; - return b; -} - -#define BSSSC_BUFF_LEN (256) - -/* int bssplitscb (struct bStream * s, const_bstring splitStr, - * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) - * - * Iterate the set of disjoint sequential substrings read from a stream - * divided by any of the characters in splitStr. An empty splitStr causes - * the whole stream to be iterated once. - * - * Note: At the point of calling the cb function, the bStream pointer is - * pointed exactly at the position right after having read the split - * character. The cb function can act on the stream by causing the bStream - * pointer to move, and bssplitscb will continue by starting the next split - * at the position of the pointer after the return from cb. - * - * However, if the cb causes the bStream s to be destroyed then the cb must - * return with a negative value, otherwise bssplitscb will continue in an - * undefined manner. - */ -int bssplitscb(struct bStream * s, const_bstring splitStr, - int (* cb)(void * parm, int ofs, const_bstring entry), void * parm) -{ - struct charField chrs; - bstring buff; - int i, p, ret; - - if (cb == NULL || s == NULL || s->readFnPtr == NULL - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - - if (NULL == (buff = bfromcstr(""))) return BSTR_ERR; - - if (splitStr->slen == 0) - { - while (bsreada(buff, s, BSSSC_BUFF_LEN) >= 0) - ; - if ((ret = cb(parm, 0, buff)) > 0) - ret = 0; - } - else - { - buildCharField(&chrs, splitStr); - ret = p = i = 0; - for (;;) - { - if (i >= buff->slen) - { - bsreada(buff, s, BSSSC_BUFF_LEN); - if (i >= buff->slen) - { - if (0 < (ret = cb(parm, p, buff))) ret = 0; - break; - } - } - if (testInCharField(&chrs, buff->data[i])) - { - struct tagbstring t; - unsigned char c; - - blk2tbstr(t, buff->data + i + 1, buff->slen - (i + 1)); - if ((ret = bsunread(s, &t)) < 0) break; - buff->slen = i; - c = buff->data[i]; - buff->data[i] = (unsigned char)'\0'; - if ((ret = cb(parm, p, buff)) < 0) break; - buff->data[i] = c; - buff->slen = 0; - p += i + 1; - i = -1; - } - i++; - } - } - - bdestroy(buff); - return ret; -} - -/* int bssplitstrcb (struct bStream * s, const_bstring splitStr, - * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) - * - * Iterate the set of disjoint sequential substrings read from a stream - * divided by the entire substring splitStr. An empty splitStr causes - * each character of the stream to be iterated. - * - * Note: At the point of calling the cb function, the bStream pointer is - * pointed exactly at the position right after having read the split - * character. The cb function can act on the stream by causing the bStream - * pointer to move, and bssplitscb will continue by starting the next split - * at the position of the pointer after the return from cb. - * - * However, if the cb causes the bStream s to be destroyed then the cb must - * return with a negative value, otherwise bssplitscb will continue in an - * undefined manner. - */ -int bssplitstrcb(struct bStream * s, const_bstring splitStr, - int (* cb)(void * parm, int ofs, const_bstring entry), void * parm) -{ - bstring buff; - int i, p, ret; - - if (cb == NULL || s == NULL || s->readFnPtr == NULL - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - - if (splitStr->slen == 1) return bssplitscb(s, splitStr, cb, parm); - - if (NULL == (buff = bfromcstr(""))) return BSTR_ERR; - - if (splitStr->slen == 0) - { - for (i = 0; bsreada(buff, s, BSSSC_BUFF_LEN) >= 0; i++) - { - if ((ret = cb(parm, 0, buff)) < 0) - { - bdestroy(buff); - return ret; - } - buff->slen = 0; - } - return BSTR_OK; - } - else - { - ret = p = i = 0; - for (i = p = 0;;) - { - if ((ret = binstr(buff, 0, splitStr)) >= 0) - { - struct tagbstring t; - blk2tbstr(t, buff->data, ret); - i = ret + splitStr->slen; - if ((ret = cb(parm, p, &t)) < 0) break; - p += i; - bdelete(buff, 0, i); - } - else - { - bsreada(buff, s, BSSSC_BUFF_LEN); - if (bseof(s)) - { - if ((ret = cb(parm, p, buff)) > 0) ret = 0; - break; - } - } - } - } - - bdestroy(buff); - return ret; -} - -/* int bstrListCreate (void) - * - * Create a bstrList. - */ -struct bstrList * bstrListCreate(void) -{ - struct bstrList * sl = (struct bstrList *)bstr__alloc(sizeof(struct bstrList)); - if (sl) - { - sl->entry = (bstring *)bstr__alloc(1 * sizeof(bstring)); - if (!sl->entry) - { - bstr__free(sl); - sl = NULL; - } - else - { - sl->qty = 0; - sl->mlen = 1; - } - } - return sl; -} - -/* int bstrListDestroy (struct bstrList * sl) - * - * Destroy a bstrList that has been created by bsplit, bsplits or bstrListCreate. - */ -int bstrListDestroy(struct bstrList * sl) -{ - int i; - if (sl == NULL || sl->qty < 0) return BSTR_ERR; - for (i = 0; i < sl->qty; i++) - { - if (sl->entry[i]) - { - bdestroy(sl->entry[i]); - sl->entry[i] = NULL; - } - } - sl->qty = -1; - sl->mlen = -1; - bstr__free(sl->entry); - sl->entry = NULL; - bstr__free(sl); - return BSTR_OK; -} - -/* int bstrListAlloc (struct bstrList * sl, int msz) - * - * Ensure that there is memory for at least msz number of entries for the - * list. - */ -int bstrListAlloc(struct bstrList * sl, int msz) -{ - bstring * l; - int smsz; - size_t nsz; - if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; - if (sl->mlen >= msz) return BSTR_OK; - smsz = snapUpSize(msz); - nsz = ((size_t)smsz) * sizeof(bstring); - if (nsz < (size_t)smsz) return BSTR_ERR; - l = (bstring *)bstr__realloc(sl->entry, nsz); - if (!l) - { - smsz = msz; - nsz = ((size_t)smsz) * sizeof(bstring); - l = (bstring *)bstr__realloc(sl->entry, nsz); - if (!l) return BSTR_ERR; - } - sl->mlen = smsz; - sl->entry = l; - return BSTR_OK; -} - -/* int bstrListAllocMin (struct bstrList * sl, int msz) - * - * Try to allocate the minimum amount of memory for the list to include at - * least msz entries or sl->qty whichever is greater. - */ -int bstrListAllocMin(struct bstrList * sl, int msz) -{ - bstring * l; - size_t nsz; - if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; - if (msz < sl->qty) msz = sl->qty; - if (sl->mlen == msz) return BSTR_OK; - nsz = ((size_t)msz) * sizeof(bstring); - if (nsz < (size_t)msz) return BSTR_ERR; - l = (bstring *)bstr__realloc(sl->entry, nsz); - if (!l) return BSTR_ERR; - sl->mlen = msz; - sl->entry = l; - return BSTR_OK; -} - -/* int bsplitcb (const_bstring str, unsigned char splitChar, int pos, - * int (* cb) (void * parm, int ofs, int len), void * parm) - * - * Iterate the set of disjoint sequential substrings over str divided by the - * character in splitChar. - * - * Note: Non-destructive modification of str from within the cb function - * while performing this split is not undefined. bsplitcb behaves in - * sequential lock step with calls to cb. I.e., after returning from a cb - * that return a non-negative integer, bsplitcb continues from the position - * 1 character after the last detected split character and it will halt - * immediately if the length of str falls below this point. However, if the - * cb function destroys str, then it *must* return with a negative value, - * otherwise bsplitcb will continue in an undefined manner. - */ -int bsplitcb(const_bstring str, unsigned char splitChar, int pos, - int (* cb)(void * parm, int ofs, int len), void * parm) -{ - int i, p, ret; - - if (cb == NULL || str == NULL || pos < 0 || pos > str->slen) - return BSTR_ERR; - - p = pos; - do - { - for (i = p; i < str->slen; i++) - { - if (str->data[i] == splitChar) break; - } - if ((ret = cb(parm, p, i - p)) < 0) return ret; - p = i + 1; - } - while (p <= str->slen); - return BSTR_OK; -} - -/* int bsplitscb (const_bstring str, const_bstring splitStr, int pos, - * int (* cb) (void * parm, int ofs, int len), void * parm) - * - * Iterate the set of disjoint sequential substrings over str divided by any - * of the characters in splitStr. An empty splitStr causes the whole str to - * be iterated once. - * - * Note: Non-destructive modification of str from within the cb function - * while performing this split is not undefined. bsplitscb behaves in - * sequential lock step with calls to cb. I.e., after returning from a cb - * that return a non-negative integer, bsplitscb continues from the position - * 1 character after the last detected split character and it will halt - * immediately if the length of str falls below this point. However, if the - * cb function destroys str, then it *must* return with a negative value, - * otherwise bsplitscb will continue in an undefined manner. - */ -int bsplitscb(const_bstring str, const_bstring splitStr, int pos, - int (* cb)(void * parm, int ofs, int len), void * parm) -{ - struct charField chrs; - int i, p, ret; - - if (cb == NULL || str == NULL || pos < 0 || pos > str->slen - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - if (splitStr->slen == 0) - { - if ((ret = cb(parm, 0, str->slen)) > 0) ret = 0; - return ret; - } - - if (splitStr->slen == 1) - return bsplitcb(str, splitStr->data[0], pos, cb, parm); - - buildCharField(&chrs, splitStr); - - p = pos; - do - { - for (i = p; i < str->slen; i++) - { - if (testInCharField(&chrs, str->data[i])) break; - } - if ((ret = cb(parm, p, i - p)) < 0) return ret; - p = i + 1; - } - while (p <= str->slen); - return BSTR_OK; -} - -/* int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, - * int (* cb) (void * parm, int ofs, int len), void * parm) - * - * Iterate the set of disjoint sequential substrings over str divided by the - * substring splitStr. An empty splitStr causes the whole str to be - * iterated once. - * - * Note: Non-destructive modification of str from within the cb function - * while performing this split is not undefined. bsplitstrcb behaves in - * sequential lock step with calls to cb. I.e., after returning from a cb - * that return a non-negative integer, bsplitscb continues from the position - * 1 character after the last detected split character and it will halt - * immediately if the length of str falls below this point. However, if the - * cb function destroys str, then it *must* return with a negative value, - * otherwise bsplitscb will continue in an undefined manner. - */ -int bsplitstrcb(const_bstring str, const_bstring splitStr, int pos, - int (* cb)(void * parm, int ofs, int len), void * parm) -{ - int i, p, ret; - - if (cb == NULL || str == NULL || pos < 0 || pos > str->slen - || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; - - if (0 == splitStr->slen) - { - for (i = pos; i < str->slen; i++) - { - if ((ret = cb(parm, i, 1)) < 0) return ret; - } - return BSTR_OK; - } - - if (splitStr->slen == 1) - return bsplitcb(str, splitStr->data[0], pos, cb, parm); - - for (i = p = pos; i <= str->slen - splitStr->slen; i++) - { - if (0 == bstr__memcmp(splitStr->data, str->data + i, splitStr->slen)) - { - if ((ret = cb(parm, p, i - p)) < 0) return ret; - i += splitStr->slen; - p = i; - } - } - if ((ret = cb(parm, p, str->slen - p)) < 0) return ret; - return BSTR_OK; -} - -struct genBstrList -{ - bstring b; - struct bstrList * bl; -}; - -static int bscb(void * parm, int ofs, int len) -{ - struct genBstrList * g = (struct genBstrList *)parm; - if (g->bl->qty >= g->bl->mlen) - { - int mlen = g->bl->mlen * 2; - bstring * tbl; - - while (g->bl->qty >= mlen) - { - if (mlen < g->bl->mlen) return BSTR_ERR; - mlen += mlen; - } - - tbl = (bstring *)bstr__realloc(g->bl->entry, sizeof(bstring) * mlen); - if (tbl == NULL) return BSTR_ERR; - - g->bl->entry = tbl; - g->bl->mlen = mlen; - } - - g->bl->entry[g->bl->qty] = bmidstr(g->b, ofs, len); - g->bl->qty++; - return BSTR_OK; -} - -/* struct bstrList * bsplit (const_bstring str, unsigned char splitChar) - * - * Create an array of sequential substrings from str divided by the character - * splitChar. - */ -struct bstrList * bsplit(const_bstring str, unsigned char splitChar) -{ - struct genBstrList g; - - if (str == NULL || str->data == NULL || str->slen < 0) return NULL; - - g.bl = (struct bstrList *)bstr__alloc(sizeof(struct bstrList)); - if (g.bl == NULL) return NULL; - g.bl->mlen = 4; - g.bl->entry = (bstring *)bstr__alloc(g.bl->mlen * sizeof(bstring)); - if (NULL == g.bl->entry) - { - bstr__free(g.bl); - return NULL; - } - - g.b = (bstring)str; - g.bl->qty = 0; - if (bsplitcb(str, splitChar, 0, bscb, &g) < 0) - { - bstrListDestroy(g.bl); - return NULL; - } - return g.bl; -} - -/* struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) - * - * Create an array of sequential substrings from str divided by the entire - * substring splitStr. - */ -struct bstrList * bsplitstr(const_bstring str, const_bstring splitStr) -{ - struct genBstrList g; - - if (str == NULL || str->data == NULL || str->slen < 0) return NULL; - - g.bl = (struct bstrList *)bstr__alloc(sizeof(struct bstrList)); - if (g.bl == NULL) return NULL; - g.bl->mlen = 4; - g.bl->entry = (bstring *)bstr__alloc(g.bl->mlen * sizeof(bstring)); - if (NULL == g.bl->entry) - { - bstr__free(g.bl); - return NULL; - } - - g.b = (bstring)str; - g.bl->qty = 0; - if (bsplitstrcb(str, splitStr, 0, bscb, &g) < 0) - { - bstrListDestroy(g.bl); - return NULL; - } - return g.bl; -} - -/* struct bstrList * bsplits (const_bstring str, bstring splitStr) - * - * Create an array of sequential substrings from str divided by any of the - * characters in splitStr. An empty splitStr causes a single entry bstrList - * containing a copy of str to be returned. - */ -struct bstrList * bsplits(const_bstring str, const_bstring splitStr) -{ - struct genBstrList g; - - if (str == NULL || str->slen < 0 || str->data == NULL || - splitStr == NULL || splitStr->slen < 0 || splitStr->data == NULL) - return NULL; - - g.bl = (struct bstrList *)bstr__alloc(sizeof(struct bstrList)); - if (g.bl == NULL) return NULL; - g.bl->mlen = 4; - g.bl->entry = (bstring *)bstr__alloc(g.bl->mlen * sizeof(bstring)); - if (NULL == g.bl->entry) - { - bstr__free(g.bl); - return NULL; - } - g.b = (bstring)str; - g.bl->qty = 0; - - if (bsplitscb(str, splitStr, 0, bscb, &g) < 0) - { - bstrListDestroy(g.bl); - return NULL; - } - return g.bl; -} - -#if defined(__TURBOC__) && !defined(__BORLANDC__) -# ifndef BSTRLIB_NOVSNP -# define BSTRLIB_NOVSNP -# endif -#endif - -/* Give WATCOM C/C++, MSVC some latitude for their non-support of vsnprintf */ -#if defined(__WATCOMC__) || defined(_MSC_VER) -#define exvsnprintf(r, b, n, f, a) {r = _vsnprintf (b,n,f,a);} -#else -#ifdef BSTRLIB_NOVSNP -/* This is just a hack. If you are using a system without a vsnprintf, it is - not recommended that bformat be used at all. */ -#define exvsnprintf(r, b, n, f, a) {vsprintf (b,f,a); r = -1;} -#define START_VSNBUFF (256) -#else - -#ifdef __GNUC__ -/* Something is making gcc complain about this prototype not being here, so - I've just gone ahead and put it in. */ -//extern int vsnprintf (char *buf, size_t count, const char *format, va_list arg); -#endif - -#define exvsnprintf(r, b, n, f, a) {r = vsnprintf (b,n,f,a);} -#endif -#endif - -#if !defined(BSTRLIB_NOVSNP) - -#ifndef START_VSNBUFF -#define START_VSNBUFF (16) -#endif - -/* On IRIX vsnprintf returns n-1 when the operation would overflow the target - buffer, WATCOM and MSVC both return -1, while C99 requires that the - returned value be exactly what the length would be if the buffer would be - large enough. This leads to the idea that if the return value is larger - than n, then changing n to the return value will reduce the number of - iterations required. */ - -/* int bformata (bstring b, const char * fmt, ...) - * - * After the first parameter, it takes the same parameters as printf (), but - * rather than outputting results to stdio, it appends the results to - * a bstring which contains what would have been output. Note that if there - * is an early generation of a '\0' character, the bstring will be truncated - * to this end point. - */ -int bformata(bstring b, const char * fmt, ...) -{ - va_list arglist; - bstring buff; - int n, r; - - if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 - || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; - - /* Since the length is not determinable beforehand, a search is - performed using the truncating "vsnprintf" call (to avoid buffer - overflows) on increasing potential sizes for the output result. */ - - if ((n = (int)(2 * strlen(fmt))) < START_VSNBUFF) n = START_VSNBUFF; - if (NULL == (buff = bfromcstralloc(n + 2, ""))) - { - n = 1; - if (NULL == (buff = bfromcstralloc(n + 2, ""))) return BSTR_ERR; - } - - for (;;) - { - va_start(arglist, fmt); - exvsnprintf(r, (char *)buff->data, n + 1, fmt, arglist); - va_end(arglist); - - buff->data[n] = (unsigned char)'\0'; - buff->slen = (int)(strlen)((char *)buff->data); - - if (buff->slen < n) break; - - if (r > n) n = r; else n += n; - - if (BSTR_OK != balloc(buff, n + 2)) - { - bdestroy(buff); - return BSTR_ERR; - } - } - - r = bconcat(b, buff); - bdestroy(buff); - return r; -} - -/* int bassignformat (bstring b, const char * fmt, ...) - * - * After the first parameter, it takes the same parameters as printf (), but - * rather than outputting results to stdio, it outputs the results to - * the bstring parameter b. Note that if there is an early generation of a - * '\0' character, the bstring will be truncated to this end point. - */ -int bassignformat(bstring b, const char * fmt, ...) -{ - va_list arglist; - bstring buff; - int n, r; - - if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 - || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; - - /* Since the length is not determinable beforehand, a search is - performed using the truncating "vsnprintf" call (to avoid buffer - overflows) on increasing potential sizes for the output result. */ - - if ((n = (int)(2 * strlen(fmt))) < START_VSNBUFF) n = START_VSNBUFF; - if (NULL == (buff = bfromcstralloc(n + 2, ""))) - { - n = 1; - if (NULL == (buff = bfromcstralloc(n + 2, ""))) return BSTR_ERR; - } - - for (;;) - { - va_start(arglist, fmt); - exvsnprintf(r, (char *)buff->data, n + 1, fmt, arglist); - va_end(arglist); - - buff->data[n] = (unsigned char)'\0'; - buff->slen = (int)(strlen)((char *)buff->data); - - if (buff->slen < n) break; - - if (r > n) n = r; else n += n; - - if (BSTR_OK != balloc(buff, n + 2)) - { - bdestroy(buff); - return BSTR_ERR; - } - } - - r = bassign(b, buff); - bdestroy(buff); - return r; -} - -/* bstring bformat (const char * fmt, ...) - * - * Takes the same parameters as printf (), but rather than outputting results - * to stdio, it forms a bstring which contains what would have been output. - * Note that if there is an early generation of a '\0' character, the - * bstring will be truncated to this end point. - */ -bstring bformat(const char * fmt, ...) -{ - va_list arglist; - bstring buff; - int n, r; - - if (fmt == NULL) return NULL; - - /* Since the length is not determinable beforehand, a search is - performed using the truncating "vsnprintf" call (to avoid buffer - overflows) on increasing potential sizes for the output result. */ - - if ((n = (int)(2 * strlen(fmt))) < START_VSNBUFF) n = START_VSNBUFF; - if (NULL == (buff = bfromcstralloc(n + 2, ""))) - { - n = 1; - if (NULL == (buff = bfromcstralloc(n + 2, ""))) return NULL; - } - - for (;;) - { - va_start(arglist, fmt); - exvsnprintf(r, (char *)buff->data, n + 1, fmt, arglist); - va_end(arglist); - - buff->data[n] = (unsigned char)'\0'; - buff->slen = (int)(strlen)((char *)buff->data); - - if (buff->slen < n) break; - - if (r > n) n = r; else n += n; - - if (BSTR_OK != balloc(buff, n + 2)) - { - bdestroy(buff); - return NULL; - } - } - - return buff; -} - -/* int bvcformata (bstring b, int count, const char * fmt, va_list arglist) - * - * The bvcformata function formats data under control of the format control - * string fmt and attempts to append the result to b. The fmt parameter is - * the same as that of the printf function. The variable argument list is - * replaced with arglist, which has been initialized by the va_start macro. - * The size of the appended output is upper bounded by count. If the - * required output exceeds count, the string b is not augmented with any - * contents and a value below BSTR_ERR is returned. If a value below -count - * is returned then it is recommended that the negative of this value be - * used as an update to the count in a subsequent pass. On other errors, - * such as running out of memory, parameter errors or numeric wrap around - * BSTR_ERR is returned. BSTR_OK is returned when the output is successfully - * generated and appended to b. - * - * Note: There is no sanity checking of arglist, and this function is - * destructive of the contents of b from the b->slen point onward. If there - * is an early generation of a '\0' character, the bstring will be truncated - * to this end point. - */ -int bvcformata(bstring b, int count, const char * fmt, va_list arg) -{ - int n, r, l; - - if (b == NULL || fmt == NULL || count <= 0 || b->data == NULL - || b->mlen <= 0 || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; - - if (count > (n = b->slen + count) + 2) return BSTR_ERR; - if (BSTR_OK != balloc(b, n + 2)) return BSTR_ERR; - - exvsnprintf(r, (char *)b->data + b->slen, count + 2, fmt, arg); - - /* Did the operation complete successfully within bounds? */ - for (l = b->slen; l <= n; l++) - { - if ('\0' == b->data[l]) - { - b->slen = l; - return BSTR_OK; - } - } - - /* Abort, since the buffer was not large enough. The return value - tries to help set what the retry length should be. */ - - b->data[b->slen] = '\0'; - if (r > count + 1) /* Does r specify a particular target length? */ - { - n = r; - } - else - { - n = count + count; /* If not, just double the size of count */ - if (count > n) n = INT_MAX; - } - n = -n; - - if (n > BSTR_ERR - 1) n = BSTR_ERR - 1; - return n; -} - -#endif diff --git a/third_party/HLSLcc/src/cbstring/bstrlib.h b/third_party/HLSLcc/src/cbstring/bstrlib.h deleted file mode 100644 index 5ea8454..0000000 --- a/third_party/HLSLcc/src/cbstring/bstrlib.h +++ /dev/null @@ -1,306 +0,0 @@ -/* - * This source file is part of the bstring string library. This code was - * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause - * BSD open source license or GPL v2.0. Refer to the accompanying documentation - * for details on usage and license. - */ - -/* - * bstrlib.h - * - * This file is the header file for the core module for implementing the - * bstring functions. - */ - -#ifndef BSTRLIB_INCLUDE -#define BSTRLIB_INCLUDE - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include -#include - -#if !defined(BSTRLIB_VSNP_OK) && !defined(BSTRLIB_NOVSNP) -# if defined(__TURBOC__) && !defined(__BORLANDC__) -# define BSTRLIB_NOVSNP -# endif -#endif - -#define BSTR_ERR (-1) -#define BSTR_OK (0) -#define BSTR_BS_BUFF_LENGTH_GET (0) - -typedef struct tagbstring * bstring; -typedef const struct tagbstring * const_bstring; - -/* Copy functions */ -#define cstr2bstr bfromcstr -extern bstring bfromcstr(const char * str); -extern bstring bfromcstralloc(int mlen, const char * str); -extern bstring blk2bstr(const void * blk, int len); -extern char * bstr2cstr(const_bstring s, char z); -extern int bcstrfree(char * s); -extern bstring bstrcpy(const_bstring b1); -extern int bassign(bstring a, const_bstring b); -extern int bassignmidstr(bstring a, const_bstring b, int left, int len); -extern int bassigncstr(bstring a, const char * str); -extern int bassignblk(bstring a, const void * s, int len); - -/* Destroy function */ -extern int bdestroy(bstring b); - -/* Space allocation hinting functions */ -extern int balloc(bstring s, int len); -extern int ballocmin(bstring b, int len); - -/* Substring extraction */ -extern bstring bmidstr(const_bstring b, int left, int len); - -/* Various standard manipulations */ -extern int bconcat(bstring b0, const_bstring b1); -extern int bconchar(bstring b0, char c); -extern int bcatcstr(bstring b, const char * s); -extern int bcatblk(bstring b, const void * s, int len); -extern int binsert(bstring s1, int pos, const_bstring s2, unsigned char fill); -extern int binsertch(bstring s1, int pos, int len, unsigned char fill); -extern int breplace(bstring b1, int pos, int len, const_bstring b2, unsigned char fill); -extern int bdelete(bstring s1, int pos, int len); -extern int bsetstr(bstring b0, int pos, const_bstring b1, unsigned char fill); -extern int btrunc(bstring b, int n); - -/* Scan/search functions */ -extern int bstricmp(const_bstring b0, const_bstring b1); -extern int bstrnicmp(const_bstring b0, const_bstring b1, int n); -extern int biseqcaseless(const_bstring b0, const_bstring b1); -extern int bisstemeqcaselessblk(const_bstring b0, const void * blk, int len); -extern int biseq(const_bstring b0, const_bstring b1); -extern int bisstemeqblk(const_bstring b0, const void * blk, int len); -extern int biseqcstr(const_bstring b, const char * s); -extern int biseqcstrcaseless(const_bstring b, const char * s); -extern int bstrcmp(const_bstring b0, const_bstring b1); -extern int bstrncmp(const_bstring b0, const_bstring b1, int n); -extern int binstr(const_bstring s1, int pos, const_bstring s2); -extern int binstrr(const_bstring s1, int pos, const_bstring s2); -extern int binstrcaseless(const_bstring s1, int pos, const_bstring s2); -extern int binstrrcaseless(const_bstring s1, int pos, const_bstring s2); -extern int bstrchrp(const_bstring b, int c, int pos); -extern int bstrrchrp(const_bstring b, int c, int pos); -#define bstrchr(b, c) bstrchrp ((b), (c), 0) -#define bstrrchr(b, c) bstrrchrp ((b), (c), blength(b)-1) -extern int binchr(const_bstring b0, int pos, const_bstring b1); -extern int binchrr(const_bstring b0, int pos, const_bstring b1); -extern int bninchr(const_bstring b0, int pos, const_bstring b1); -extern int bninchrr(const_bstring b0, int pos, const_bstring b1); -extern int bfindreplace(bstring b, const_bstring find, const_bstring repl, int pos); -extern int bfindreplacecaseless(bstring b, const_bstring find, const_bstring repl, int pos); - -/* List of string container functions */ -struct bstrList -{ - int qty, mlen; - bstring * entry; -}; -extern struct bstrList * bstrListCreate(void); -extern int bstrListDestroy(struct bstrList * sl); -extern int bstrListAlloc(struct bstrList * sl, int msz); -extern int bstrListAllocMin(struct bstrList * sl, int msz); - -/* String split and join functions */ -extern struct bstrList * bsplit(const_bstring str, unsigned char splitChar); -extern struct bstrList * bsplits(const_bstring str, const_bstring splitStr); -extern struct bstrList * bsplitstr(const_bstring str, const_bstring splitStr); -extern bstring bjoin(const struct bstrList * bl, const_bstring sep); -extern int bsplitcb(const_bstring str, unsigned char splitChar, int pos, - int (* cb)(void * parm, int ofs, int len), void * parm); -extern int bsplitscb(const_bstring str, const_bstring splitStr, int pos, - int (* cb)(void * parm, int ofs, int len), void * parm); -extern int bsplitstrcb(const_bstring str, const_bstring splitStr, int pos, - int (* cb)(void * parm, int ofs, int len), void * parm); - -/* Miscellaneous functions */ -extern int bpattern(bstring b, int len); -extern int btoupper(bstring b); -extern int btolower(bstring b); -extern int bltrimws(bstring b); -extern int brtrimws(bstring b); -extern int btrimws(bstring b); - -/* <*>printf format functions */ -#if !defined(BSTRLIB_NOVSNP) -extern bstring bformat(const char * fmt, ...); -extern int bformata(bstring b, const char * fmt, ...); -extern int bassignformat(bstring b, const char * fmt, ...); -extern int bvcformata(bstring b, int count, const char * fmt, va_list arglist); - -#define bvformata(ret, b, fmt, lastarg) { \ -bstring bstrtmp_b = (b); \ -const char * bstrtmp_fmt = (fmt); \ -int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \ - for (;;) { \ - va_list bstrtmp_arglist; \ - va_start (bstrtmp_arglist, lastarg); \ - bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \ - va_end (bstrtmp_arglist); \ - if (bstrtmp_r >= 0) { /* Everything went ok */ \ - bstrtmp_r = BSTR_OK; \ - break; \ - } else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \ - bstrtmp_r = BSTR_ERR; \ - break; \ - } \ - bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \ - } \ - ret = bstrtmp_r; \ -} - -#endif - -typedef int (*bNgetc) (void *parm); -typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm); - -/* Input functions */ -extern bstring bgets(bNgetc getcPtr, void * parm, char terminator); -extern bstring bread(bNread readPtr, void * parm); -extern int bgetsa(bstring b, bNgetc getcPtr, void * parm, char terminator); -extern int bassigngets(bstring b, bNgetc getcPtr, void * parm, char terminator); -extern int breada(bstring b, bNread readPtr, void * parm); - -/* Stream functions */ -extern struct bStream * bsopen(bNread readPtr, void * parm); -extern void * bsclose(struct bStream * s); -extern int bsbufflength(struct bStream * s, int sz); -extern int bsreadln(bstring b, struct bStream * s, char terminator); -extern int bsreadlns(bstring r, struct bStream * s, const_bstring term); -extern int bsread(bstring b, struct bStream * s, int n); -extern int bsreadlna(bstring b, struct bStream * s, char terminator); -extern int bsreadlnsa(bstring r, struct bStream * s, const_bstring term); -extern int bsreada(bstring b, struct bStream * s, int n); -extern int bsunread(struct bStream * s, const_bstring b); -extern int bspeek(bstring r, const struct bStream * s); -extern int bssplitscb(struct bStream * s, const_bstring splitStr, - int (* cb)(void * parm, int ofs, const_bstring entry), void * parm); -extern int bssplitstrcb(struct bStream * s, const_bstring splitStr, - int (* cb)(void * parm, int ofs, const_bstring entry), void * parm); -extern int bseof(const struct bStream * s); - -struct tagbstring -{ - int mlen; - int slen; - unsigned char * data; -}; - -/* Accessor macros */ -#define blengthe(b, e) (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen)) -#define blength(b) (blengthe ((b), 0)) -#define bdataofse(b, o, e) (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o)) -#define bdataofs(b, o) (bdataofse ((b), (o), (void *)0)) -#define bdatae(b, e) (bdataofse (b, 0, e)) -#define bdata(b) (bdataofs (b, 0)) -#define bchare(b, p, e) ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e)) -#define bchar(b, p) bchare ((b), (p), '\0') - -/* Static constant string initialization macro */ -#define bsStaticMlen(q, m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")} -#if defined(_MSC_VER) -/* There are many versions of MSVC which emit __LINE__ as a non-constant. */ -# define bsStatic(q) bsStaticMlen(q,-32) -#endif -#ifndef bsStatic -# define bsStatic(q) bsStaticMlen(q,-__LINE__) -#endif - -/* Static constant block parameter pair */ -#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1) - -/* Reference building macros */ -#define cstr2tbstr btfromcstr -#define btfromcstr(t, s) { \ - (t).data = (unsigned char *) (s); \ - (t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \ - (t).mlen = -1; \ -} -#define blk2tbstr(t, s, l) { \ - (t).data = (unsigned char *) (s); \ - (t).slen = l; \ - (t).mlen = -1; \ -} -#define btfromblk(t, s, l) blk2tbstr(t,s,l) -#define bmid2tbstr(t, b, p, l) { \ - const_bstring bstrtmp_s = (b); \ - if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) { \ - int bstrtmp_left = (p); \ - int bstrtmp_len = (l); \ - if (bstrtmp_left < 0) { \ - bstrtmp_len += bstrtmp_left; \ - bstrtmp_left = 0; \ - } \ - if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left) \ - bstrtmp_len = bstrtmp_s->slen - bstrtmp_left; \ - if (bstrtmp_len <= 0) { \ - (t).data = (unsigned char *)""; \ - (t).slen = 0; \ - } else { \ - (t).data = bstrtmp_s->data + bstrtmp_left; \ - (t).slen = bstrtmp_len; \ - } \ - } else { \ - (t).data = (unsigned char *)""; \ - (t).slen = 0; \ - } \ - (t).mlen = -__LINE__; \ -} -#define btfromblkltrimws(t, s, l) { \ - int bstrtmp_idx = 0, bstrtmp_len = (l); \ - unsigned char * bstrtmp_s = (s); \ - if (bstrtmp_s && bstrtmp_len >= 0) { \ - for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) { \ - if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \ - } \ - } \ - (t).data = bstrtmp_s + bstrtmp_idx; \ - (t).slen = bstrtmp_len - bstrtmp_idx; \ - (t).mlen = -__LINE__; \ -} -#define btfromblkrtrimws(t, s, l) { \ - int bstrtmp_len = (l) - 1; \ - unsigned char * bstrtmp_s = (s); \ - if (bstrtmp_s && bstrtmp_len >= 0) { \ - for (; bstrtmp_len >= 0; bstrtmp_len--) { \ - if (!isspace (bstrtmp_s[bstrtmp_len])) break; \ - } \ - } \ - (t).data = bstrtmp_s; \ - (t).slen = bstrtmp_len + 1; \ - (t).mlen = -__LINE__; \ -} -#define btfromblktrimws(t, s, l) { \ - int bstrtmp_idx = 0, bstrtmp_len = (l) - 1; \ - unsigned char * bstrtmp_s = (s); \ - if (bstrtmp_s && bstrtmp_len >= 0) { \ - for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) { \ - if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \ - } \ - for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) { \ - if (!isspace (bstrtmp_s[bstrtmp_len])) break; \ - } \ - } \ - (t).data = bstrtmp_s + bstrtmp_idx; \ - (t).slen = bstrtmp_len + 1 - bstrtmp_idx; \ - (t).mlen = -__LINE__; \ -} - -/* Write protection macros */ -#define bwriteprotect(t) { if ((t).mlen >= 0) (t).mlen = -1; } -#define bwriteallow(t) { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); } -#define biswriteprotected(t) ((t).mlen <= 0) - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/third_party/HLSLcc/src/cbstring/bstrlib.txt b/third_party/HLSLcc/src/cbstring/bstrlib.txt deleted file mode 100644 index bf48491..0000000 --- a/third_party/HLSLcc/src/cbstring/bstrlib.txt +++ /dev/null @@ -1,3202 +0,0 @@ -Better String library ---------------------- - -by Paul Hsieh - -The bstring library is an attempt to provide improved string processing -functionality to the C and C++ language. At the heart of the bstring library -(Bstrlib for short) is the management of "bstring"s which are a significant -improvement over '\0' terminated char buffers. - -=============================================================================== - -Motivation ----------- - -The standard C string library has serious problems: - - 1) Its use of '\0' to denote the end of the string means knowing a - string's length is O(n) when it could be O(1). - 2) It imposes an interpretation for the character value '\0'. - 3) gets() always exposes the application to a buffer overflow. - 4) strtok() modifies the string its parsing and thus may not be usable in - programs which are re-entrant or multithreaded. - 5) fgets has the unusual semantic of ignoring '\0's that occur before - '\n's are consumed. - 6) There is no memory management, and actions performed such as strcpy, - strcat and sprintf are common places for buffer overflows. - 7) strncpy() doesn't '\0' terminate the destination in some cases. - 8) Passing NULL to C library string functions causes an undefined NULL - pointer access. - 9) Parameter aliasing (overlapping, or self-referencing parameters) - within most C library functions has undefined behavior. - 10) Many C library string function calls take integer parameters with - restricted legal ranges. Parameters passed outside these ranges are - not typically detected and cause undefined behavior. - -So the desire is to create an alternative string library that does not suffer -from the above problems and adds in the following functionality: - - 1) Incorporate string functionality seen from other languages. - a) MID$() - from BASIC - b) split()/join() - from Python - c) string/char x n - from Perl - 2) Implement analogs to functions that combine stream IO and char buffers - without creating a dependency on stream IO functionality. - 3) Implement the basic text editor-style functions insert, delete, find, - and replace. - 4) Implement reference based sub-string access (as a generalization of - pointer arithmetic.) - 5) Implement runtime write protection for strings. - -There is also a desire to avoid "API-bloat". So functionality that can be -implemented trivially in other functionality is omitted. So there is no -left$() or right$() or reverse() or anything like that as part of the core -functionality. - -Explaining Bstrings -------------------- - -A bstring is basically a header which wraps a pointer to a char buffer. Lets -start with the declaration of a struct tagbstring: - - struct tagbstring { - int mlen; - int slen; - unsigned char * data; - }; - -This definition is considered exposed, not opaque (though it is neither -necessary nor recommended that low level maintenance of bstrings be performed -whenever the abstract interfaces are sufficient). The mlen field (usually) -describes a lower bound for the memory allocated for the data field. The -slen field describes the exact length for the bstring. The data field is a -single contiguous buffer of unsigned chars. Note that the existence of a '\0' -character in the unsigned char buffer pointed to by the data field does not -necessarily denote the end of the bstring. - -To be a well formed modifiable bstring the mlen field must be at least the -length of the slen field, and slen must be non-negative. Furthermore, the -data field must point to a valid buffer in which access to the first mlen -characters has been acquired. So the minimal check for correctness is: - - (slen >= 0 && mlen >= slen && data != NULL) - -bstrings returned by bstring functions can be assumed to be either NULL or -satisfy the above property. (When bstrings are only readable, the mlen >= -slen restriction is not required; this is discussed later in this section.) -A bstring itself is just a pointer to a struct tagbstring: - - typedef struct tagbstring * bstring; - -Note that use of the prefix "tag" in struct tagbstring is required to work -around the inconsistency between C and C++'s struct namespace usage. This -definition is also considered exposed. - -Bstrlib basically manages bstrings allocated as a header and an associated -data-buffer. Since the implementation is exposed, they can also be -constructed manually. Functions which mutate bstrings assume that the header -and data buffer have been malloced; the bstring library may perform free() or -realloc() on both the header and data buffer of any bstring parameter. -Functions which return bstring's create new bstrings. The string memory is -freed by a bdestroy() call (or using the bstrFree macro). - -The following related typedef is also provided: - - typedef const struct tagbstring * const_bstring; - -which is also considered exposed. These are directly bstring compatible (no -casting required) but are just used for parameters which are meant to be -non-mutable. So in general, bstring parameters which are read as input but -not meant to be modified will be declared as const_bstring, and bstring -parameters which may be modified will be declared as bstring. This convention -is recommended for user written functions as well. - -Since bstrings maintain interoperability with C library char-buffer style -strings, all functions which modify, update or create bstrings also append a -'\0' character into the position slen + 1. This trailing '\0' character is -not required for bstrings input to the bstring functions; this is provided -solely as a convenience for interoperability with standard C char-buffer -functionality. - -Analogs for the ANSI C string library functions have been created when they -are necessary, but have also been left out when they are not. In particular -there are no functions analogous to fwrite, or puts just for the purposes of -bstring. The ->data member of any string is exposed, and therefore can be -used just as easily as char buffers for C functions which read strings. - -For those that wish to hand construct bstrings, the following should be kept -in mind: - - 1) While bstrlib can accept constructed bstrings without terminating - '\0' characters, the rest of the C language string library will not - function properly on such non-terminated strings. This is obvious - but must be kept in mind. - 2) If it is intended that a constructed bstring be written to by the - bstring library functions then the data portion should be allocated - by the malloc function and the slen and mlen fields should be entered - properly. The struct tagbstring header is not reallocated, and only - freed by bdestroy. - 3) Writing arbitrary '\0' characters at various places in the string - will not modify its length as perceived by the bstring library - functions. In fact, '\0' is a legitimate non-terminating character - for a bstring to contain. - 4) For read only parameters, bstring functions do not check the mlen. - I.e., the minimal correctness requirements are reduced to: - - (slen >= 0 && data != NULL) - -Better pointer arithmetic -------------------------- - -One built-in feature of '\0' terminated char * strings, is that its very easy -and fast to obtain a reference to the tail of any string using pointer -arithmetic. Bstrlib does one better by providing a way to get a reference to -any substring of a bstring (or any other length delimited block of memory.) -So rather than just having pointer arithmetic, with bstrlib one essentially -has segment arithmetic. This is achieved using the macro blk2tbstr() which -builds a reference to a block of memory and the macro bmid2tbstr() which -builds a reference to a segment of a bstring. Bstrlib also includes -functions for direct consumption of memory blocks into bstrings, namely -bcatblk () and blk2bstr (). - -One scenario where this can be extremely useful is when string contains many -substrings which one would like to pass as read-only reference parameters to -some string consuming function without the need to allocate entire new -containers for the string data. More concretely, imagine parsing a command -line string whose parameters are space delimited. This can only be done for -tails of the string with '\0' terminated char * strings. - -Improved NULL semantics and error handling ------------------------------------------- - -Unless otherwise noted, if a NULL pointer is passed as a bstring or any other -detectably illegal parameter, the called function will return with an error -indicator (either NULL or BSTR_ERR) rather than simply performing a NULL -pointer access, or having undefined behavior. - -To illustrate the value of this, consider the following example: - - strcpy (p = malloc (13 * sizeof (char)), "Hello,"); - strcat (p, " World"); - -This is not correct because malloc may return NULL (due to an out of memory -condition), and the behaviour of strcpy is undefined if either of its -parameters are NULL. However: - - bstrcat (p = bfromcstr ("Hello,"), q = bfromcstr (" World")); - bdestroy (q); - -is well defined, because if either p or q are assigned NULL (indicating a -failure to allocate memory) both bstrcat and bdestroy will recognize it and -perform no detrimental action. - -Note that it is not necessary to check any of the members of a returned -bstring for internal correctness (in particular the data member does not need -to be checked against NULL when the header is non-NULL), since this is -assured by the bstring library itself. - -bStreams --------- - -In addition to the bgets and bread functions, bstrlib can abstract streams -with a high performance read only stream called a bStream. In general, the -idea is to open a core stream (with something like fopen) then pass its -handle as well as a bNread function pointer (like fread) to the bsopen -function which will return a handle to an open bStream. Then the functions -bsread, bsreadln or bsreadlns can be called to read portions of the stream. -Finally, the bsclose function is called to close the bStream -- it will -return a handle to the original (core) stream. So bStreams, essentially, -wrap other streams. - -The bStreams have two main advantages over the bgets and bread (as well as -fgets/ungetc) paradigms: - -1) Improved functionality via the bunread function which allows a stream to - unread characters, giving the bStream stack-like functionality if so - desired. -2) A very high performance bsreadln function. The C library function fgets() - (and the bgets function) can typically be written as a loop on top of - fgetc(), thus paying all of the overhead costs of calling fgetc on a per - character basis. bsreadln will read blocks at a time, thus amortizing the - overhead of fread calls over many characters at once. - -However, clearly bStreams are suboptimal or unusable for certain kinds of -streams (stdin) or certain usage patterns (a few spotty, or non-sequential -reads from a slow stream.) For those situations, using bgets will be more -appropriate. - -The semantics of bStreams allows practical construction of layerable data -streams. What this means is that by writing a bNread compatible function on -top of a bStream, one can construct a new bStream on top of it. This can be -useful for writing multi-pass parsers that don't actually read the entire -input more than once and don't require the use of intermediate storage. - -Aliasing --------- - -Aliasing occurs when a function is given two parameters which point to data -structures which overlap in the memory they occupy. While this does not -disturb read only functions, for many libraries this can make functions that -write to these memory locations malfunction. This is a common problem of the -C standard library and especially the string functions in the C standard -library. - -The C standard string library is entirely char by char oriented (as is -bstring) which makes conforming implementations alias safe for some -scenarios. However no actual detection of aliasing is typically performed, -so it is easy to find cases where the aliasing will cause anomolous or -undesirable behaviour (consider: strcat (p, p).) The C99 standard includes -the "restrict" pointer modifier which allows the compiler to document and -assume a no-alias condition on usage. However, only the most trivial cases -can be caught (if at all) by the compiler at compile time, and thus there is -no actual enforcement of non-aliasing. - -Bstrlib, by contrast, permits aliasing and is completely aliasing safe, in -the C99 sense of aliasing. That is to say, under the assumption that -pointers of incompatible types from distinct objects can never alias, bstrlib -is completely aliasing safe. (In practice this means that the data buffer -portion of any bstring and header of any bstring are assumed to never alias.) -With the exception of the reference building macros, the library behaves as -if all read-only parameters are first copied and replaced by temporary -non-aliased parameters before any writing to any output bstring is performed -(though actual copying is extremely rarely ever done.) - -Besides being a useful safety feature, bstring searching/comparison -functions can improve to O(1) execution when aliasing is detected. - -Note that aliasing detection and handling code in Bstrlib is generally -extremely cheap. There is almost never any appreciable performance penalty -for using aliased parameters. - -Reenterancy ------------ - -Nearly every function in Bstrlib is a leaf function, and is completely -reenterable with the exception of writing to common bstrings. The split -functions which use a callback mechanism requires only that the source string -not be destroyed by the callback function unless the callback function returns -with an error status (note that Bstrlib functions which return an error do -not modify the string in any way.) The string can in fact be modified by the -callback and the behaviour is deterministic. See the documentation of the -various split functions for more details. - -Undefined scenarios -------------------- - -One of the basic important premises for Bstrlib is to not to increase the -propogation of undefined situations from parameters that are otherwise legal -in of themselves. In particular, except for extremely marginal cases, usages -of bstrings that use the bstring library functions alone cannot lead to any -undefined action. But due to C/C++ language and library limitations, there -is no way to define a non-trivial library that is completely without -undefined operations. All such possible undefined operations are described -below: - -1) bstrings or struct tagbstrings that are not explicitely initialized cannot - be passed as a parameter to any bstring function. -2) The members of the NULL bstring cannot be accessed directly. (Though all - APIs and macros detect the NULL bstring.) -3) A bstring whose data member has not been obtained from a malloc or - compatible call and which is write accessible passed as a writable - parameter will lead to undefined results. (i.e., do not writeAllow any - constructed bstrings unless the data portion has been obtained from the - heap.) -4) If the headers of two strings alias but are not identical (which can only - happen via a defective manual construction), then passing them to a - bstring function in which one is writable is not defined. -5) If the mlen member is larger than the actual accessible length of the data - member for a writable bstring, or if the slen member is larger than the - readable length of the data member for a readable bstring, then the - corresponding bstring operations are undefined. -6) Any bstring definition whose header or accessible data portion has been - assigned to inaccessible or otherwise illegal memory clearly cannot be - acted upon by the bstring library in any way. -7) Destroying the source of an incremental split from within the callback - and not returning with a negative value (indicating that it should abort) - will lead to undefined behaviour. (Though *modifying* or adjusting the - state of the source data, even if those modification fail within the - bstrlib API, has well defined behavior.) -8) Modifying a bstring which is write protected by direct access has - undefined behavior. - -While this may seem like a long list, with the exception of invalid uses of -the writeAllow macro, and source destruction during an iterative split -without an accompanying abort, no usage of the bstring API alone can cause -any undefined scenario to occurr. I.e., the policy of restricting usage of -bstrings to the bstring API can significantly reduce the risk of runtime -errors (in practice it should eliminate them) related to string manipulation -due to undefined action. - -C++ wrapper ------------ - -A C++ wrapper has been created to enable bstring functionality for C++ in the -most natural (for C++ programers) way possible. The mandate for the C++ -wrapper is different from the base C bstring library. Since the C++ language -has far more abstracting capabilities, the CBString structure is considered -fully abstracted -- i.e., hand generated CBStrings are not supported (though -conversion from a struct tagbstring is allowed) and all detectable errors are -manifest as thrown exceptions. - -- The C++ class definitions are all under the namespace Bstrlib. bstrwrap.h - enables this namespace (with a using namespace Bstrlib; directive at the - end) unless the macro BSTRLIB_DONT_ASSUME_NAMESPACE has been defined before - it is included. - -- Erroneous accesses results in an exception being thrown. The exception - parameter is of type "struct CBStringException" which is derived from - std::exception if STL is used. A verbose description of the error message - can be obtained from the what() method. - -- CBString is a C++ structure derived from a struct tagbstring. An address - of a CBString cast to a bstring must not be passed to bdestroy. The bstring - C API has been made C++ safe and can be used directly in a C++ project. - -- It includes constructors which can take a char, '\0' terminated char - buffer, tagbstring, (char, repeat-value), a length delimited buffer or a - CBStringList to initialize it. - -- Concatenation is performed with the + and += operators. Comparisons are - done with the ==, !=, <, >, <= and >= operators. Note that == and != use - the biseq call, while <, >, <= and >= use bstrcmp. - -- CBString's can be directly cast to const character buffers. - -- CBString's can be directly cast to double, float, int or unsigned int so - long as the CBString are decimal representations of those types (otherwise - an exception will be thrown). Converting the other way should be done with - the format(a) method(s). - -- CBString contains the length, character and [] accessor methods. The - character and [] accessors are aliases of each other. If the bounds for - the string are exceeded, an exception is thrown. To avoid the overhead for - this check, first cast the CBString to a (const char *) and use [] to - dereference the array as normal. Note that the character and [] accessor - methods allows both reading and writing of individual characters. - -- The methods: format, formata, find, reversefind, findcaseless, - reversefindcaseless, midstr, insert, insertchrs, replace, findreplace, - findreplacecaseless, remove, findchr, nfindchr, alloc, toupper, tolower, - gets, read are analogous to the functions that can be found in the C API. - -- The caselessEqual and caselessCmp methods are analogous to biseqcaseless - and bstricmp functions respectively. - -- Note that just like the bformat function, the format and formata methods do - not automatically cast CBStrings into char * strings for "%s"-type - substitutions: - - CBString w("world"); - CBString h("Hello"); - CBString hw; - - /* The casts are necessary */ - hw.format ("%s, %s", (const char *)h, (const char *)w); - -- The methods trunc and repeat have been added instead of using pattern. - -- ltrim, rtrim and trim methods have been added. These remove characters - from a given character string set (defaulting to the whitespace characters) - from either the left, right or both ends of the CBString, respectively. - -- The method setsubstr is also analogous in functionality to bsetstr, except - that it cannot be passed NULL. Instead the method fill and the fill-style - constructor have been supplied to enable this functionality. - -- The writeprotect(), writeallow() and iswriteprotected() methods are - analogous to the bwriteprotect(), bwriteallow() and biswriteprotected() - macros in the C API. Write protection semantics in CBString are stronger - than with the C API in that indexed character assignment is checked for - write protection. However, unlike with the C API, a write protected - CBString can be destroyed by the destructor. - -- CBStream is a C++ structure which wraps a struct bStream (its not derived - from it, since destruction is slightly different). It is constructed by - passing in a bNread function pointer and a stream parameter cast to void *. - This structure includes methods for detecting eof, setting the buffer - length, reading the whole stream or reading entries line by line or block - by block, an unread function, and a peek function. - -- If STL is available, the CBStringList structure is derived from a vector of - CBString with various split methods. The split method has been overloaded - to accept either a character or CBString as the second parameter (when the - split parameter is a CBString any character in that CBString is used as a - seperator). The splitstr method takes a CBString as a substring seperator. - Joins can be performed via a CBString constructor which takes a - CBStringList as a parameter, or just using the CBString::join() method. - -- If there is proper support for std::iostreams, then the >> and << operators - and the getline() function have been added (with semantics the same as - those for std::string). - -Multithreading --------------- - -A mutable bstring is kind of analogous to a small (two entry) linked list -allocated by malloc, with all aliasing completely under programmer control. -I.e., manipulation of one bstring will never affect any other distinct -bstring unless explicitely constructed to do so by the programmer via hand -construction or via building a reference. Bstrlib also does not use any -static or global storage, so there are no hidden unremovable race conditions. -Bstrings are also clearly not inherently thread local. So just like -char *'s, bstrings can be passed around from thread to thread and shared and -so on, so long as modifications to a bstring correspond to some kind of -exclusive access lock as should be expected (or if the bstring is read-only, -which can be enforced by bstring write protection) for any sort of shared -object in a multithreaded environment. - -Bsafe module ------------- - -For convenience, a bsafe module has been included. The idea is that if this -module is included, inadvertant usage of the most dangerous C functions will -be overridden and lead to an immediate run time abort. Of course, it should -be emphasized that usage of this module is completely optional. The -intention is essentially to provide an option for creating project safety -rules which can be enforced mechanically rather than socially. This is -useful for larger, or open development projects where its more difficult to -enforce social rules or "coding conventions". - -Problems not solved -------------------- - -Bstrlib is written for the C and C++ languages, which have inherent weaknesses -that cannot be easily solved: - -1. Memory leaks: Forgetting to call bdestroy on a bstring that is about to be - unreferenced, just as forgetting to call free on a heap buffer that is - about to be dereferenced. Though bstrlib itself is leak free. -2. Read before write usage: In C, declaring an auto bstring does not - automatically fill it with legal/valid contents. This problem has been - somewhat mitigated in C++. (The bstrDeclare and bstrFree macros from - bstraux can be used to help mitigate this problem.) - -Other problems not addressed: - -3. Built-in mutex usage to automatically avoid all bstring internal race - conditions in multitasking environments: The problem with trying to - implement such things at this low a level is that it is typically more - efficient to use locks in higher level primitives. There is also no - platform independent way to implement locks or mutexes. -4. Unicode/widecharacter support. - -Note that except for spotty support of wide characters, the default C -standard library does not address any of these problems either. - -Configurable compilation options --------------------------------- - -All configuration options are meant solely for the purpose of compiler -compatibility. Configuration options are not meant to change the semantics -or capabilities of the library, except where it is unavoidable. - -Since some C++ compilers don't include the Standard Template Library and some -have the options of disabling exception handling, a number of macros can be -used to conditionally compile support for each of this: - -BSTRLIB_CAN_USE_STL - - - defining this will enable the used of the Standard Template Library. - Defining BSTRLIB_CAN_USE_STL overrides the BSTRLIB_CANNOT_USE_STL macro. - -BSTRLIB_CANNOT_USE_STL - - - defining this will disable the use of the Standard Template Library. - Defining BSTRLIB_CAN_USE_STL overrides the BSTRLIB_CANNOT_USE_STL macro. - -BSTRLIB_CAN_USE_IOSTREAM - - - defining this will enable the used of streams from class std. Defining - BSTRLIB_CAN_USE_IOSTREAM overrides the BSTRLIB_CANNOT_USE_IOSTREAM macro. - -BSTRLIB_CANNOT_USE_IOSTREAM - - - defining this will disable the use of streams from class std. Defining - BSTRLIB_CAN_USE_IOSTREAM overrides the BSTRLIB_CANNOT_USE_IOSTREAM macro. - -BSTRLIB_THROWS_EXCEPTIONS - - - defining this will enable the exception handling within bstring. - Defining BSTRLIB_THROWS_EXCEPTIONS overrides the - BSTRLIB_DOESNT_THROWS_EXCEPTIONS macro. - -BSTRLIB_DOESNT_THROW_EXCEPTIONS - - - defining this will disable the exception handling within bstring. - Defining BSTRLIB_THROWS_EXCEPTIONS overrides the - BSTRLIB_DOESNT_THROW_EXCEPTIONS macro. - -Note that these macros must be defined consistently throughout all modules -that use CBStrings including bstrwrap.cpp. - -Some older C compilers do not support functions such as vsnprintf. This is -handled by the following macro variables: - -BSTRLIB_NOVSNP - - - defining this indicates that the compiler does not support vsnprintf. - This will cause bformat and bformata to not be declared. Note that - for some compilers, such as Turbo C, this is set automatically. - Defining BSTRLIB_NOVSNP overrides the BSTRLIB_VSNP_OK macro. - -BSTRLIB_VSNP_OK - - - defining this will disable the autodetection of compilers the do not - support of compilers that do not support vsnprintf. - Defining BSTRLIB_NOVSNP overrides the BSTRLIB_VSNP_OK macro. - -Semantic compilation options ----------------------------- - -Bstrlib comes with very few compilation options for changing the semantics of -of the library. These are described below. - -BSTRLIB_DONT_ASSUME_NAMESPACE - - - Defining this before including bstrwrap.h will disable the automatic - enabling of the Bstrlib namespace for the C++ declarations. - -BSTRLIB_DONT_USE_VIRTUAL_DESTRUCTOR - - - Defining this will make the CBString destructor non-virtual. - -BSTRLIB_MEMORY_DEBUG - - - Defining this will cause the bstrlib modules bstrlib.c and bstrwrap.cpp - to invoke a #include "memdbg.h". memdbg.h has to be supplied by the user. - -Note that these macros must be defined consistently throughout all modules -that use bstrings or CBStrings including bstrlib.c, bstraux.c and -bstrwrap.cpp. - -=============================================================================== - -Files ------ - -bstrlib.c - C implementaion of bstring functions. -bstrlib.h - C header file for bstring functions. -bstraux.c - C example that implements trivial additional functions. -bstraux.h - C header for bstraux.c -bstest.c - C unit/regression test for bstrlib.c - -bstrwrap.cpp - C++ implementation of CBString. -bstrwrap.h - C++ header file for CBString. -test.cpp - C++ unit/regression test for bstrwrap.cpp - -bsafe.c - C runtime stubs to abort usage of unsafe C functions. -bsafe.h - C header file for bsafe.c functions. - -C projects need only include bstrlib.h and compile/link bstrlib.c to use the -bstring library. C++ projects need to additionally include bstrwrap.h and -compile/link bstrwrap.cpp. For both, there may be a need to make choices -about feature configuration as described in the "Configurable compilation -options" in the section above. - -Other files that are included in this archive are: - -license.txt - The 3 clause BSD license for Bstrlib -gpl.txt - The GPL version 2 -security.txt - A security statement useful for auditting Bstrlib -porting.txt - A guide to porting Bstrlib -bstrlib.txt - This file - -=============================================================================== - -The functions -------------- - - extern bstring bfromcstr (const char * str); - - Take a standard C library style '\0' terminated char buffer and generate - a bstring with the same contents as the char buffer. If an error occurs - NULL is returned. - - So for example: - - bstring b = bfromcstr ("Hello"); - if (!b) { - fprintf (stderr, "Out of memory"); - } else { - puts ((char *) b->data); - } - - .......................................................................... - - extern bstring bfromcstralloc (int mlen, const char * str); - - Create a bstring which contains the contents of the '\0' terminated - char * buffer str. The memory buffer backing the bstring is at least - mlen characters in length. If an error occurs NULL is returned. - - So for example: - - bstring b = bfromcstralloc (64, someCstr); - if (b) b->data[63] = 'x'; - - The idea is that this will set the 64th character of b to 'x' if it is at - least 64 characters long otherwise do nothing. And we know this is well - defined so long as b was successfully created, since it will have been - allocated with at least 64 characters. - - .......................................................................... - - extern bstring blk2bstr (const void * blk, int len); - - Create a bstring whose contents are described by the contiguous buffer - pointing to by blk with a length of len bytes. Note that this function - creates a copy of the data in blk, rather than simply referencing it. - Compare with the blk2tbstr macro. If an error occurs NULL is returned. - - .......................................................................... - - extern char * bstr2cstr (const_bstring s, char z); - - Create a '\0' terminated char buffer which contains the contents of the - bstring s, except that any contained '\0' characters are converted to the - character in z. This returned value should be freed with bcstrfree(), by - the caller. If an error occurs NULL is returned. - - .......................................................................... - - extern int bcstrfree (char * s); - - Frees a C-string generated by bstr2cstr (). This is normally unnecessary - since it just wraps a call to free (), however, if malloc () and free () - have been redefined as a macros within the bstrlib module (via macros in - the memdbg.h backdoor) with some difference in behaviour from the std - library functions, then this allows a correct way of freeing the memory - that allows higher level code to be independent from these macro - redefinitions. - - .......................................................................... - - extern bstring bstrcpy (const_bstring b1); - - Make a copy of the passed in bstring. The copied bstring is returned if - there is no error, otherwise NULL is returned. - - .......................................................................... - - extern int bassign (bstring a, const_bstring b); - - Overwrite the bstring a with the contents of bstring b. Note that the - bstring a must be a well defined and writable bstring. If an error - occurs BSTR_ERR is returned and a is not overwritten. - - .......................................................................... - - int bassigncstr (bstring a, const char * str); - - Overwrite the string a with the contents of char * string str. Note that - the bstring a must be a well defined and writable bstring. If an error - occurs BSTR_ERR is returned and a may be partially overwritten. - - .......................................................................... - - int bassignblk (bstring a, const void * s, int len); - - Overwrite the string a with the contents of the block (s, len). Note that - the bstring a must be a well defined and writable bstring. If an error - occurs BSTR_ERR is returned and a is not overwritten. - - .......................................................................... - - extern int bassignmidstr (bstring a, const_bstring b, int left, int len); - - Overwrite the bstring a with the middle of contents of bstring b - starting from position left and running for a length len. left and - len are clamped to the ends of b as with the function bmidstr. Note that - the bstring a must be a well defined and writable bstring. If an error - occurs BSTR_ERR is returned and a is not overwritten. - - .......................................................................... - - extern bstring bmidstr (const_bstring b, int left, int len); - - Create a bstring which is the substring of b starting from position left - and running for a length len (clamped by the end of the bstring b.) If - there was no error, the value of this constructed bstring is returned - otherwise NULL is returned. - - .......................................................................... - - extern int bdelete (bstring s1, int pos, int len); - - Removes characters from pos to pos+len-1 and shifts the tail of the - bstring starting from pos+len to pos. len must be positive for this call - to have any effect. The section of the bstring described by (pos, len) - is clamped to boundaries of the bstring b. The value BSTR_OK is returned - if the operation is successful, otherwise BSTR_ERR is returned. - - .......................................................................... - - extern int bconcat (bstring b0, const_bstring b1); - - Concatenate the bstring b1 to the end of bstring b0. The value BSTR_OK - is returned if the operation is successful, otherwise BSTR_ERR is - returned. - - .......................................................................... - - extern int bconchar (bstring b, char c); - - Concatenate the character c to the end of bstring b. The value BSTR_OK - is returned if the operation is successful, otherwise BSTR_ERR is - returned. - - .......................................................................... - - extern int bcatcstr (bstring b, const char * s); - - Concatenate the char * string s to the end of bstring b. The value - BSTR_OK is returned if the operation is successful, otherwise BSTR_ERR is - returned. - - .......................................................................... - - extern int bcatblk (bstring b, const void * s, int len); - - Concatenate a fixed length buffer (s, len) to the end of bstring b. The - value BSTR_OK is returned if the operation is successful, otherwise - BSTR_ERR is returned. - - .......................................................................... - - extern int biseq (const_bstring b0, const_bstring b1); - - Compare the bstring b0 and b1 for equality. If the bstrings differ, 0 - is returned, if the bstrings are the same, 1 is returned, if there is an - error, -1 is returned. If the length of the bstrings are different, this - function has O(1) complexity. Contained '\0' characters are not treated - as a termination character. - - Note that the semantics of biseq are not completely compatible with - bstrcmp because of its different treatment of the '\0' character. - - .......................................................................... - - extern int bisstemeqblk (const_bstring b, const void * blk, int len); - - Compare beginning of bstring b0 with a block of memory of length len for - equality. If the beginning of b0 differs from the memory block (or if b0 - is too short), 0 is returned, if the bstrings are the same, 1 is returned, - if there is an error, -1 is returned. - - .......................................................................... - - extern int biseqcaseless (const_bstring b0, const_bstring b1); - - Compare two bstrings for equality without differentiating between case. - If the bstrings differ other than in case, 0 is returned, if the bstrings - are the same, 1 is returned, if there is an error, -1 is returned. If - the length of the bstrings are different, this function is O(1). '\0' - termination characters are not treated in any special way. - - .......................................................................... - - extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len); - - Compare beginning of bstring b0 with a block of memory of length len - without differentiating between case for equality. If the beginning of b0 - differs from the memory block other than in case (or if b0 is too short), - 0 is returned, if the bstrings are the same, 1 is returned, if there is an - error, -1 is returned. - - .......................................................................... - - extern int biseqcstr (const_bstring b, const char *s); - - Compare the bstring b and char * bstring s. The C string s must be '\0' - terminated at exactly the length of the bstring b, and the contents - between the two must be identical with the bstring b with no '\0' - characters for the two contents to be considered equal. This is - equivalent to the condition that their current contents will be always be - equal when comparing them in the same format after converting one or the - other. If they are equal 1 is returned, if they are unequal 0 is - returned and if there is a detectable error BSTR_ERR is returned. - - .......................................................................... - - extern int biseqcstrcaseless (const_bstring b, const char *s); - - Compare the bstring b and char * string s. The C string s must be '\0' - terminated at exactly the length of the bstring b, and the contents - between the two must be identical except for case with the bstring b with - no '\0' characters for the two contents to be considered equal. This is - equivalent to the condition that their current contents will be always be - equal ignoring case when comparing them in the same format after - converting one or the other. If they are equal, except for case, 1 is - returned, if they are unequal regardless of case 0 is returned and if - there is a detectable error BSTR_ERR is returned. - - .......................................................................... - - extern int bstrcmp (const_bstring b0, const_bstring b1); - - Compare the bstrings b0 and b1 for ordering. If there is an error, - SHRT_MIN is returned, otherwise a value less than or greater than zero, - indicating that the bstring pointed to by b0 is lexicographically less - than or greater than the bstring pointed to by b1 is returned. If the - bstring lengths are unequal but the characters up until the length of the - shorter are equal then a value less than, or greater than zero, - indicating that the bstring pointed to by b0 is shorter or longer than the - bstring pointed to by b1 is returned. 0 is returned if and only if the - two bstrings are the same. If the length of the bstrings are different, - this function is O(n). Like its standard C library counter part, the - comparison does not proceed past any '\0' termination characters - encountered. - - The seemingly odd error return value, merely provides slightly more - granularity than the undefined situation given in the C library function - strcmp. The function otherwise behaves very much like strcmp(). - - Note that the semantics of bstrcmp are not completely compatible with - biseq because of its different treatment of the '\0' termination - character. - - .......................................................................... - - extern int bstrncmp (const_bstring b0, const_bstring b1, int n); - - Compare the bstrings b0 and b1 for ordering for at most n characters. If - there is an error, SHRT_MIN is returned, otherwise a value is returned as - if b0 and b1 were first truncated to at most n characters then bstrcmp - was called with these new bstrings are paremeters. If the length of the - bstrings are different, this function is O(n). Like its standard C - library counter part, the comparison does not proceed past any '\0' - termination characters encountered. - - The seemingly odd error return value, merely provides slightly more - granularity than the undefined situation given in the C library function - strncmp. The function otherwise behaves very much like strncmp(). - - .......................................................................... - - extern int bstricmp (const_bstring b0, const_bstring b1); - - Compare two bstrings without differentiating between case. The return - value is the difference of the values of the characters where the two - bstrings first differ, otherwise 0 is returned indicating that the - bstrings are equal. If the lengths are different, then a difference from - 0 is given, but if the first extra character is '\0', then it is taken to - be the value UCHAR_MAX+1. - - .......................................................................... - - extern int bstrnicmp (const_bstring b0, const_bstring b1, int n); - - Compare two bstrings without differentiating between case for at most n - characters. If the position where the two bstrings first differ is - before the nth position, the return value is the difference of the values - of the characters, otherwise 0 is returned. If the lengths are different - and less than n characters, then a difference from 0 is given, but if the - first extra character is '\0', then it is taken to be the value - UCHAR_MAX+1. - - .......................................................................... - - extern int bdestroy (bstring b); - - Deallocate the bstring passed. Passing NULL in as a parameter will have - no effect. Note that both the header and the data portion of the bstring - will be freed. No other bstring function which modifies one of its - parameters will free or reallocate the header. Because of this, in - general, bdestroy cannot be called on any declared struct tagbstring even - if it is not write protected. A bstring which is write protected cannot - be destroyed via the bdestroy call. Any attempt to do so will result in - no action taken, and BSTR_ERR will be returned. - - Note to C++ users: Passing in a CBString cast to a bstring will lead to - undefined behavior (free will be called on the header, rather than the - CBString destructor.) Instead just use the ordinary C++ language - facilities to dealloc a CBString. - - .......................................................................... - - extern int binstr (const_bstring s1, int pos, const_bstring s2); - - Search for the bstring s2 in s1 starting at position pos and looking in a - forward (increasing) direction. If it is found then it returns with the - first position after pos where it is found, otherwise it returns BSTR_ERR. - The algorithm used is brute force; O(m*n). - - .......................................................................... - - extern int binstrr (const_bstring s1, int pos, const_bstring s2); - - Search for the bstring s2 in s1 starting at position pos and looking in a - backward (decreasing) direction. If it is found then it returns with the - first position after pos where it is found, otherwise return BSTR_ERR. - Note that the current position at pos is tested as well -- so to be - disjoint from a previous forward search it is recommended that the - position be backed up (decremented) by one position. The algorithm used - is brute force; O(m*n). - - .......................................................................... - - extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2); - - Search for the bstring s2 in s1 starting at position pos and looking in a - forward (increasing) direction but without regard to case. If it is - found then it returns with the first position after pos where it is - found, otherwise it returns BSTR_ERR. The algorithm used is brute force; - O(m*n). - - .......................................................................... - - extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2); - - Search for the bstring s2 in s1 starting at position pos and looking in a - backward (decreasing) direction but without regard to case. If it is - found then it returns with the first position after pos where it is - found, otherwise return BSTR_ERR. Note that the current position at pos - is tested as well -- so to be disjoint from a previous forward search it - is recommended that the position be backed up (decremented) by one - position. The algorithm used is brute force; O(m*n). - - .......................................................................... - - extern int binchr (const_bstring b0, int pos, const_bstring b1); - - Search for the first position in b0 starting from pos or after, in which - one of the characters in b1 is found. This function has an execution - time of O(b0->slen + b1->slen). If such a position does not exist in b0, - then BSTR_ERR is returned. - - .......................................................................... - - extern int binchrr (const_bstring b0, int pos, const_bstring b1); - - Search for the last position in b0 no greater than pos, in which one of - the characters in b1 is found. This function has an execution time - of O(b0->slen + b1->slen). If such a position does not exist in b0, - then BSTR_ERR is returned. - - .......................................................................... - - extern int bninchr (const_bstring b0, int pos, const_bstring b1); - - Search for the first position in b0 starting from pos or after, in which - none of the characters in b1 is found and return it. This function has - an execution time of O(b0->slen + b1->slen). If such a position does - not exist in b0, then BSTR_ERR is returned. - - .......................................................................... - - extern int bninchrr (const_bstring b0, int pos, const_bstring b1); - - Search for the last position in b0 no greater than pos, in which none of - the characters in b1 is found and return it. This function has an - execution time of O(b0->slen + b1->slen). If such a position does not - exist in b0, then BSTR_ERR is returned. - - .......................................................................... - - extern int bstrchr (const_bstring b, int c); - - Search for the character c in the bstring b forwards from the start of - the bstring. Returns the position of the found character or BSTR_ERR if - it is not found. - - NOTE: This has been implemented as a macro on top of bstrchrp (). - - .......................................................................... - - extern int bstrrchr (const_bstring b, int c); - - Search for the character c in the bstring b backwards from the end of the - bstring. Returns the position of the found character or BSTR_ERR if it is - not found. - - NOTE: This has been implemented as a macro on top of bstrrchrp (). - - .......................................................................... - - extern int bstrchrp (const_bstring b, int c, int pos); - - Search for the character c in b forwards from the position pos - (inclusive). Returns the position of the found character or BSTR_ERR if - it is not found. - - .......................................................................... - - extern int bstrrchrp (const_bstring b, int c, int pos); - - Search for the character c in b backwards from the position pos in bstring - (inclusive). Returns the position of the found character or BSTR_ERR if - it is not found. - - .......................................................................... - - extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill); - - Overwrite the bstring b0 starting at position pos with the bstring b1. If - the position pos is past the end of b0, then the character "fill" is - appended as necessary to make up the gap between the end of b0 and pos. - If b1 is NULL, it behaves as if it were a 0-length bstring. The value - BSTR_OK is returned if the operation is successful, otherwise BSTR_ERR is - returned. - - .......................................................................... - - extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill); - - Inserts the bstring s2 into s1 at position pos. If the position pos is - past the end of s1, then the character "fill" is appended as necessary to - make up the gap between the end of s1 and pos. The value BSTR_OK is - returned if the operation is successful, otherwise BSTR_ERR is returned. - - .......................................................................... - - extern int binsertch (bstring s1, int pos, int len, unsigned char fill); - - Inserts the character fill repeatedly into s1 at position pos for a - length len. If the position pos is past the end of s1, then the - character "fill" is appended as necessary to make up the gap between the - end of s1 and the position pos + len (exclusive). The value BSTR_OK is - returned if the operation is successful, otherwise BSTR_ERR is returned. - - .......................................................................... - - extern int breplace (bstring b1, int pos, int len, const_bstring b2, - unsigned char fill); - - Replace a section of a bstring from pos for a length len with the bstring - b2. If the position pos is past the end of b1 then the character "fill" - is appended as necessary to make up the gap between the end of b1 and - pos. - - .......................................................................... - - extern int bfindreplace (bstring b, const_bstring find, - const_bstring replace, int position); - - Replace all occurrences of the find substring with a replace bstring - after a given position in the bstring b. The find bstring must have a - length > 0 otherwise BSTR_ERR is returned. This function does not - perform recursive per character replacement; that is to say successive - searches resume at the position after the last replace. - - So for example: - - bfindreplace (a0 = bfromcstr("aabaAb"), a1 = bfromcstr("a"), - a2 = bfromcstr("aa"), 0); - - Should result in changing a0 to "aaaabaaAb". - - This function performs exactly (b->slen - position) bstring comparisons, - and data movement is bounded above by character volume equivalent to size - of the output bstring. - - .......................................................................... - - extern int bfindreplacecaseless (bstring b, const_bstring find, - const_bstring replace, int position); - - Replace all occurrences of the find substring, ignoring case, with a - replace bstring after a given position in the bstring b. The find bstring - must have a length > 0 otherwise BSTR_ERR is returned. This function - does not perform recursive per character replacement; that is to say - successive searches resume at the position after the last replace. - - So for example: - - bfindreplacecaseless (a0 = bfromcstr("AAbaAb"), a1 = bfromcstr("a"), - a2 = bfromcstr("aa"), 0); - - Should result in changing a0 to "aaaabaaaab". - - This function performs exactly (b->slen - position) bstring comparisons, - and data movement is bounded above by character volume equivalent to size - of the output bstring. - - .......................................................................... - - extern int balloc (bstring b, int length); - - Increase the allocated memory backing the data buffer for the bstring b - to a length of at least length. If the memory backing the bstring b is - already large enough, not action is performed. This has no effect on the - bstring b that is visible to the bstring API. Usually this function will - only be used when a minimum buffer size is required coupled with a direct - access to the ->data member of the bstring structure. - - Be warned that like any other bstring function, the bstring must be well - defined upon entry to this function. I.e., doing something like: - - b->slen *= 2; /* ?? Most likely incorrect */ - balloc (b, b->slen); - - is invalid, and should be implemented as: - - int t; - if (BSTR_OK == balloc (b, t = (b->slen * 2))) b->slen = t; - - This function will return with BSTR_ERR if b is not detected as a valid - bstring or length is not greater than 0, otherwise BSTR_OK is returned. - - .......................................................................... - - extern int ballocmin (bstring b, int length); - - Change the amount of memory backing the bstring b to at least length. - This operation will never truncate the bstring data including the - extra terminating '\0' and thus will not decrease the length to less than - b->slen + 1. Note that repeated use of this function may cause - performance problems (realloc may be called on the bstring more than - the O(log(INT_MAX)) times). This function will return with BSTR_ERR if b - is not detected as a valid bstring or length is not greater than 0, - otherwise BSTR_OK is returned. - - So for example: - - if (BSTR_OK == ballocmin (b, 64)) b->data[63] = 'x'; - - The idea is that this will set the 64th character of b to 'x' if it is at - least 64 characters long otherwise do nothing. And we know this is well - defined so long as the ballocmin call was successfully, since it will - ensure that b has been allocated with at least 64 characters. - - .......................................................................... - - int btrunc (bstring b, int n); - - Truncate the bstring to at most n characters. This function will return - with BSTR_ERR if b is not detected as a valid bstring or n is less than - 0, otherwise BSTR_OK is returned. - - .......................................................................... - - extern int bpattern (bstring b, int len); - - Replicate the starting bstring, b, end to end repeatedly until it - surpasses len characters, then chop the result to exactly len characters. - This function operates in-place. This function will return with BSTR_ERR - if b is NULL or of length 0, otherwise BSTR_OK is returned. - - .......................................................................... - - extern int btoupper (bstring b); - - Convert contents of bstring to upper case. This function will return with - BSTR_ERR if b is NULL or of length 0, otherwise BSTR_OK is returned. - - .......................................................................... - - extern int btolower (bstring b); - - Convert contents of bstring to lower case. This function will return with - BSTR_ERR if b is NULL or of length 0, otherwise BSTR_OK is returned. - - .......................................................................... - - extern int bltrimws (bstring b); - - Delete whitespace contiguous from the left end of the bstring. This - function will return with BSTR_ERR if b is NULL or of length 0, otherwise - BSTR_OK is returned. - - .......................................................................... - - extern int brtrimws (bstring b); - - Delete whitespace contiguous from the right end of the bstring. This - function will return with BSTR_ERR if b is NULL or of length 0, otherwise - BSTR_OK is returned. - - .......................................................................... - - extern int btrimws (bstring b); - - Delete whitespace contiguous from both ends of the bstring. This function - will return with BSTR_ERR if b is NULL or of length 0, otherwise BSTR_OK - is returned. - - .......................................................................... - - extern int bstrListCreate (void); - - Create an empty struct bstrList. The struct bstrList output structure is - declared as follows: - - struct bstrList { - int qty, mlen; - bstring * entry; - }; - - The entry field actually is an array with qty number entries. The mlen - record counts the maximum number of bstring's for which there is memory - in the entry record. - - The Bstrlib API does *NOT* include a comprehensive set of functions for - full management of struct bstrList in an abstracted way. The reason for - this is because aliasing semantics of the list are best left to the user - of this function, and performance varies wildly depending on the - assumptions made. For a complete list of bstring data type it is - recommended that the C++ public std::vector be used, since its - semantics are usage are more standard. - - .......................................................................... - - extern int bstrListDestroy (struct bstrList * sl); - - Destroy a struct bstrList structure that was returned by the bsplit - function. Note that this will destroy each bstring in the ->entry array - as well. See bstrListCreate() above for structure of struct bstrList. - - .......................................................................... - - extern int bstrListAlloc (struct bstrList * sl, int msz); - - Ensure that there is memory for at least msz number of entries for the - list. - - .......................................................................... - - extern int bstrListAllocMin (struct bstrList * sl, int msz); - - Try to allocate the minimum amount of memory for the list to include at - least msz entries or sl->qty whichever is greater. - - .......................................................................... - - extern struct bstrList * bsplit (bstring str, unsigned char splitChar); - - Create an array of sequential substrings from str divided by the - character splitChar. Successive occurrences of the splitChar will be - divided by empty bstring entries, following the semantics from the Python - programming language. To reclaim the memory from this output structure, - bstrListDestroy () should be called. See bstrListCreate() above for - structure of struct bstrList. - - .......................................................................... - - extern struct bstrList * bsplits (bstring str, const_bstring splitStr); - - Create an array of sequential substrings from str divided by any - character contained in splitStr. An empty splitStr causes a single entry - bstrList containing a copy of str to be returned. See bstrListCreate() - above for structure of struct bstrList. - - .......................................................................... - - extern struct bstrList * bsplitstr (bstring str, const_bstring splitStr); - - Create an array of sequential substrings from str divided by the entire - substring splitStr. An empty splitStr causes a single entry bstrList - containing a copy of str to be returned. See bstrListCreate() above for - structure of struct bstrList. - - .......................................................................... - - extern bstring bjoin (const struct bstrList * bl, const_bstring sep); - - Join the entries of a bstrList into one bstring by sequentially - concatenating them with the sep bstring in between. If sep is NULL, it - is treated as if it were the empty bstring. Note that: - - bjoin (l = bsplit (b, s->data[0]), s); - - should result in a copy of b, if s->slen is 1. If there is an error NULL - is returned, otherwise a bstring with the correct result is returned. - See bstrListCreate() above for structure of struct bstrList. - - .......................................................................... - - extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm); - - Iterate the set of disjoint sequential substrings over str starting at - position pos divided by the character splitChar. The parm passed to - bsplitcb is passed on to cb. If the function cb returns a value < 0, - then further iterating is halted and this value is returned by bsplitcb. - - Note: Non-destructive modification of str from within the cb function - while performing this split is not undefined. bsplitcb behaves in - sequential lock step with calls to cb. I.e., after returning from a cb - that return a non-negative integer, bsplitcb continues from the position - 1 character after the last detected split character and it will halt - immediately if the length of str falls below this point. However, if the - cb function destroys str, then it *must* return with a negative value, - otherwise bsplitcb will continue in an undefined manner. - - This function is provided as an incremental alternative to bsplit that is - abortable and which does not impose additional memory allocation. - - .......................................................................... - - extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm); - - Iterate the set of disjoint sequential substrings over str starting at - position pos divided by any of the characters in splitStr. An empty - splitStr causes the whole str to be iterated once. The parm passed to - bsplitcb is passed on to cb. If the function cb returns a value < 0, - then further iterating is halted and this value is returned by bsplitcb. - - Note: Non-destructive modification of str from within the cb function - while performing this split is not undefined. bsplitscb behaves in - sequential lock step with calls to cb. I.e., after returning from a cb - that return a non-negative integer, bsplitscb continues from the position - 1 character after the last detected split character and it will halt - immediately if the length of str falls below this point. However, if the - cb function destroys str, then it *must* return with a negative value, - otherwise bsplitscb will continue in an undefined manner. - - This function is provided as an incremental alternative to bsplits that - is abortable and which does not impose additional memory allocation. - - .......................................................................... - - extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, - int (* cb) (void * parm, int ofs, int len), void * parm); - - Iterate the set of disjoint sequential substrings over str starting at - position pos divided by the entire substring splitStr. An empty splitStr - causes each character of str to be iterated. The parm passed to bsplitcb - is passed on to cb. If the function cb returns a value < 0, then further - iterating is halted and this value is returned by bsplitcb. - - Note: Non-destructive modification of str from within the cb function - while performing this split is not undefined. bsplitstrcb behaves in - sequential lock step with calls to cb. I.e., after returning from a cb - that return a non-negative integer, bsplitstrcb continues from the position - 1 character after the last detected split character and it will halt - immediately if the length of str falls below this point. However, if the - cb function destroys str, then it *must* return with a negative value, - otherwise bsplitscb will continue in an undefined manner. - - This function is provided as an incremental alternative to bsplitstr that - is abortable and which does not impose additional memory allocation. - - .......................................................................... - - extern bstring bformat (const char * fmt, ...); - - Takes the same parameters as printf (), but rather than outputting - results to stdio, it forms a bstring which contains what would have been - output. Note that if there is an early generation of a '\0' character, - the bstring will be truncated to this end point. - - Note that %s format tokens correspond to '\0' terminated char * buffers, - not bstrings. To print a bstring, first dereference data element of the - the bstring: - - /* b1->data needs to be '\0' terminated, so tagbstrings generated - by blk2tbstr () might not be suitable. */ - b0 = bformat ("Hello, %s", b1->data); - - Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been - compiled the bformat function is not present. - - .......................................................................... - - extern int bformata (bstring b, const char * fmt, ...); - - In addition to the initial output buffer b, bformata takes the same - parameters as printf (), but rather than outputting results to stdio, it - appends the results to the initial bstring parameter. Note that if - there is an early generation of a '\0' character, the bstring will be - truncated to this end point. - - Note that %s format tokens correspond to '\0' terminated char * buffers, - not bstrings. To print a bstring, first dereference data element of the - the bstring: - - /* b1->data needs to be '\0' terminated, so tagbstrings generated - by blk2tbstr () might not be suitable. */ - bformata (b0 = bfromcstr ("Hello"), ", %s", b1->data); - - Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been - compiled the bformata function is not present. - - .......................................................................... - - extern int bassignformat (bstring b, const char * fmt, ...); - - After the first parameter, it takes the same parameters as printf (), but - rather than outputting results to stdio, it outputs the results to - the bstring parameter b. Note that if there is an early generation of a - '\0' character, the bstring will be truncated to this end point. - - Note that %s format tokens correspond to '\0' terminated char * buffers, - not bstrings. To print a bstring, first dereference data element of the - the bstring: - - /* b1->data needs to be '\0' terminated, so tagbstrings generated - by blk2tbstr () might not be suitable. */ - bassignformat (b0 = bfromcstr ("Hello"), ", %s", b1->data); - - Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been - compiled the bassignformat function is not present. - - .......................................................................... - - extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist); - - The bvcformata function formats data under control of the format control - string fmt and attempts to append the result to b. The fmt parameter is - the same as that of the printf function. The variable argument list is - replaced with arglist, which has been initialized by the va_start macro. - The size of the output is upper bounded by count. If the required output - exceeds count, the string b is not augmented with any contents and a value - below BSTR_ERR is returned. If a value below -count is returned then it - is recommended that the negative of this value be used as an update to the - count in a subsequent pass. On other errors, such as running out of - memory, parameter errors or numeric wrap around BSTR_ERR is returned. - BSTR_OK is returned when the output is successfully generated and - appended to b. - - Note: There is no sanity checking of arglist, and this function is - destructive of the contents of b from the b->slen point onward. If there - is an early generation of a '\0' character, the bstring will be truncated - to this end point. - - Although this function is part of the external API for Bstrlib, the - interface and semantics (length limitations, and unusual return codes) - are fairly atypical. The real purpose for this function is to provide an - engine for the bvformata macro. - - Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been - compiled the bvcformata function is not present. - - .......................................................................... - - extern bstring bread (bNread readPtr, void * parm); - typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, - void *parm); - - Read an entire stream into a bstring, verbatum. The readPtr function - pointer is compatible with fread sematics, except that it need not obtain - the stream data from a file. The intention is that parm would contain - the stream data context/state required (similar to the role of the FILE* - I/O stream parameter of fread.) - - Abstracting the block read function allows for block devices other than - file streams to be read if desired. Note that there is an ANSI - compatibility issue if "fread" is used directly; see the ANSI issues - section below. - - .......................................................................... - - extern int breada (bstring b, bNread readPtr, void * parm); - - Read an entire stream and append it to a bstring, verbatum. Behaves - like bread, except that it appends it results to the bstring b. - BSTR_ERR is returned on error, otherwise 0 is returned. - - .......................................................................... - - extern bstring bgets (bNgetc getcPtr, void * parm, char terminator); - typedef int (* bNgetc) (void * parm); - - Read a bstring from a stream. As many bytes as is necessary are read - until the terminator is consumed or no more characters are available from - the stream. If read from the stream, the terminator character will be - appended to the end of the returned bstring. The getcPtr function must - have the same semantics as the fgetc C library function (i.e., returning - an integer whose value is negative when there are no more characters - available, otherwise the value of the next available unsigned character - from the stream.) The intention is that parm would contain the stream - data context/state required (similar to the role of the FILE* I/O stream - parameter of fgets.) If no characters are read, or there is some other - detectable error, NULL is returned. - - bgets will never call the getcPtr function more often than necessary to - construct its output (including a single call, if required, to determine - that the stream contains no more characters.) - - Abstracting the character stream function and terminator character allows - for different stream devices and string formats other than '\n' - terminated lines in a file if desired (consider \032 terminated email - messages, in a UNIX mailbox for example.) - - For files, this function can be used analogously as fgets as follows: - - fp = fopen ( ... ); - if (fp) b = bgets ((bNgetc) fgetc, fp, '\n'); - - (Note that only one terminator character can be used, and that '\0' is - not assumed to terminate the stream in addition to the terminator - character. This is consistent with the semantics of fgets.) - - .......................................................................... - - extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator); - - Read from a stream and concatenate to a bstring. Behaves like bgets, - except that it appends it results to the bstring b. The value 1 is - returned if no characters are read before a negative result is returned - from getcPtr. Otherwise BSTR_ERR is returned on error, and 0 is returned - in other normal cases. - - .......................................................................... - - extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator); - - Read from a stream and concatenate to a bstring. Behaves like bgets, - except that it assigns the results to the bstring b. The value 1 is - returned if no characters are read before a negative result is returned - from getcPtr. Otherwise BSTR_ERR is returned on error, and 0 is returned - in other normal cases. - - .......................................................................... - - extern struct bStream * bsopen (bNread readPtr, void * parm); - - Wrap a given open stream (described by a fread compatible function - pointer and stream handle) into an open bStream suitable for the bstring - library streaming functions. - - .......................................................................... - - extern void * bsclose (struct bStream * s); - - Close the bStream, and return the handle to the stream that was - originally used to open the given stream. If s is NULL or detectably - invalid, NULL will be returned. - - .......................................................................... - - extern int bsbufflength (struct bStream * s, int sz); - - Set the length of the buffer used by the bStream. If sz is the macro - BSTR_BS_BUFF_LENGTH_GET (which is 0), the length is not set. If s is - NULL or sz is negative, the function will return with BSTR_ERR, otherwise - this function returns with the previous length. - - .......................................................................... - - extern int bsreadln (bstring r, struct bStream * s, char terminator); - - Read a bstring terminated by the terminator character or the end of the - stream from the bStream (s) and return it into the parameter r. The - matched terminator, if found, appears at the end of the line read. If - the stream has been exhausted of all available data, before any can be - read, BSTR_ERR is returned. This function may read additional characters - into the stream buffer from the core stream that are not returned, but - will be retained for subsequent read operations. When reading from high - speed streams, this function can perform significantly faster than bgets. - - .......................................................................... - - extern int bsreadlna (bstring r, struct bStream * s, char terminator); - - Read a bstring terminated by the terminator character or the end of the - stream from the bStream (s) and concatenate it to the parameter r. The - matched terminator, if found, appears at the end of the line read. If - the stream has been exhausted of all available data, before any can be - read, BSTR_ERR is returned. This function may read additional characters - into the stream buffer from the core stream that are not returned, but - will be retained for subsequent read operations. When reading from high - speed streams, this function can perform significantly faster than bgets. - - .......................................................................... - - extern int bsreadlns (bstring r, struct bStream * s, bstring terminators); - - Read a bstring terminated by any character in the terminators bstring or - the end of the stream from the bStream (s) and return it into the - parameter r. This function may read additional characters from the core - stream that are not returned, but will be retained for subsequent read - operations. - - .......................................................................... - - extern int bsreadlnsa (bstring r, struct bStream * s, bstring terminators); - - Read a bstring terminated by any character in the terminators bstring or - the end of the stream from the bStream (s) and concatenate it to the - parameter r. If the stream has been exhausted of all available data, - before any can be read, BSTR_ERR is returned. This function may read - additional characters from the core stream that are not returned, but - will be retained for subsequent read operations. - - .......................................................................... - - extern int bsread (bstring r, struct bStream * s, int n); - - Read a bstring of length n (or, if it is fewer, as many bytes as is - remaining) from the bStream. This function will read the minimum - required number of additional characters from the core stream. When the - stream is at the end of the file BSTR_ERR is returned, otherwise BSTR_OK - is returned. - - .......................................................................... - - extern int bsreada (bstring r, struct bStream * s, int n); - - Read a bstring of length n (or, if it is fewer, as many bytes as is - remaining) from the bStream and concatenate it to the parameter r. This - function will read the minimum required number of additional characters - from the core stream. When the stream is at the end of the file BSTR_ERR - is returned, otherwise BSTR_OK is returned. - - .......................................................................... - - extern int bsunread (struct bStream * s, const_bstring b); - - Insert a bstring into the bStream at the current position. These - characters will be read prior to those that actually come from the core - stream. - - .......................................................................... - - extern int bspeek (bstring r, const struct bStream * s); - - Return the number of currently buffered characters from the bStream that - will be read prior to reads from the core stream, and append it to the - the parameter r. - - .......................................................................... - - extern int bssplitscb (struct bStream * s, const_bstring splitStr, - int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); - - Iterate the set of disjoint sequential substrings over the stream s - divided by any character from the bstring splitStr. The parm passed to - bssplitscb is passed on to cb. If the function cb returns a value < 0, - then further iterating is halted and this return value is returned by - bssplitscb. - - Note: At the point of calling the cb function, the bStream pointer is - pointed exactly at the position right after having read the split - character. The cb function can act on the stream by causing the bStream - pointer to move, and bssplitscb will continue by starting the next split - at the position of the pointer after the return from cb. - - However, if the cb causes the bStream s to be destroyed then the cb must - return with a negative value, otherwise bssplitscb will continue in an - undefined manner. - - This function is provided as way to incrementally parse through a file - or other generic stream that in total size may otherwise exceed the - practical or desired memory available. As with the other split callback - based functions this is abortable and does not impose additional memory - allocation. - - .......................................................................... - - extern int bssplitstrcb (struct bStream * s, const_bstring splitStr, - int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); - - Iterate the set of disjoint sequential substrings over the stream s - divided by the entire substring splitStr. The parm passed to - bssplitstrcb is passed on to cb. If the function cb returns a - value < 0, then further iterating is halted and this return value is - returned by bssplitstrcb. - - Note: At the point of calling the cb function, the bStream pointer is - pointed exactly at the position right after having read the split - character. The cb function can act on the stream by causing the bStream - pointer to move, and bssplitstrcb will continue by starting the next - split at the position of the pointer after the return from cb. - - However, if the cb causes the bStream s to be destroyed then the cb must - return with a negative value, otherwise bssplitscb will continue in an - undefined manner. - - This function is provided as way to incrementally parse through a file - or other generic stream that in total size may otherwise exceed the - practical or desired memory available. As with the other split callback - based functions this is abortable and does not impose additional memory - allocation. - - .......................................................................... - - extern int bseof (const struct bStream * s); - - Return the defacto "EOF" (end of file) state of a stream (1 if the - bStream is in an EOF state, 0 if not, and BSTR_ERR if stream is closed or - detectably erroneous.) When the readPtr callback returns a value <= 0 - the stream reaches its "EOF" state. Note that bunread with non-empty - content will essentially turn off this state, and the stream will not be - in its "EOF" state so long as its possible to read more data out of it. - - Also note that the semantics of bseof() are slightly different from - something like feof(). I.e., reaching the end of the stream does not - necessarily guarantee that bseof() will return with a value indicating - that this has happened. bseof() will only return indicating that it has - reached the "EOF" and an attempt has been made to read past the end of - the bStream. - -The macros ----------- - - The macros described below are shown in a prototype form indicating their - intended usage. Note that the parameters passed to these macros will be - referenced multiple times. As with all macros, programmer care is - required to guard against unintended side effects. - - int blengthe (const_bstring b, int err); - - Returns the length of the bstring. If the bstring is NULL err is - returned. - - .......................................................................... - - int blength (const_bstring b); - - Returns the length of the bstring. If the bstring is NULL, the length - returned is 0. - - .......................................................................... - - int bchare (const_bstring b, int p, int c); - - Returns the p'th character of the bstring b. If the position p refers to - a position that does not exist in the bstring or the bstring is NULL, - then c is returned. - - .......................................................................... - - char bchar (const_bstring b, int p); - - Returns the p'th character of the bstring b. If the position p refers to - a position that does not exist in the bstring or the bstring is NULL, - then '\0' is returned. - - .......................................................................... - - char * bdatae (bstring b, char * err); - - Returns the char * data portion of the bstring b. If b is NULL, err is - returned. - - .......................................................................... - - char * bdata (bstring b); - - Returns the char * data portion of the bstring b. If b is NULL, NULL is - returned. - - .......................................................................... - - char * bdataofse (bstring b, int ofs, char * err); - - Returns the char * data portion of the bstring b offset by ofs. If b is - NULL, err is returned. - - .......................................................................... - - char * bdataofs (bstring b, int ofs); - - Returns the char * data portion of the bstring b offset by ofs. If b is - NULL, NULL is returned. - - .......................................................................... - - struct tagbstring var = bsStatic ("..."); - - The bsStatic macro allows for static declarations of literal string - constants as struct tagbstring structures. The resulting tagbstring does - not need to be freed or destroyed. Note that this macro is only well - defined for string literal arguments. For more general string pointers, - use the btfromcstr macro. - - The resulting struct tagbstring is permanently write protected. Attempts - to write to this struct tagbstring from any bstrlib function will lead to - BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct - tagbstring has no effect. - - .......................................................................... - - <- bsStaticBlkParms ("...") - - The bsStaticBlkParms macro emits a pair of comma seperated parameters - corresponding to the block parameters for the block functions in Bstrlib - (i.e., blk2bstr, bcatblk, blk2tbstr, bisstemeqblk, bisstemeqcaselessblk.) - Note that this macro is only well defined for string literal arguments. - - Examples: - - bstring b = blk2bstr (bsStaticBlkParms ("Fast init. ")); - bcatblk (b, bsStaticBlkParms ("No frills fast concatenation.")); - - These are faster than using bfromcstr() and bcatcstr() respectively - because the length of the inline string is known as a compile time - constant. Also note that seperate struct tagbstring declarations for - holding the output of a bsStatic() macro are not required. - - .......................................................................... - - void btfromcstr (struct tagbstring& t, const char * s); - - Fill in the tagbstring t with the '\0' terminated char buffer s. This - action is purely reference oriented; no memory management is done. The - data member is just assigned s, and slen is assigned the strlen of s. - The s parameter is accessed exactly once in this macro. - - The resulting struct tagbstring is initially write protected. Attempts - to write to this struct tagbstring in a write protected state from any - bstrlib function will lead to BSTR_ERR being returned. Invoke the - bwriteallow on this struct tagbstring to make it writeable (though this - requires that s be obtained from a function compatible with malloc.) - - .......................................................................... - - void btfromblk (struct tagbstring& t, void * s, int len); - - Fill in the tagbstring t with the data buffer s with length len. This - action is purely reference oriented; no memory management is done. The - data member of t is just assigned s, and slen is assigned len. Note that - the buffer is not appended with a '\0' character. The s and len - parameters are accessed exactly once each in this macro. - - The resulting struct tagbstring is initially write protected. Attempts - to write to this struct tagbstring in a write protected state from any - bstrlib function will lead to BSTR_ERR being returned. Invoke the - bwriteallow on this struct tagbstring to make it writeable (though this - requires that s be obtained from a function compatible with malloc.) - - .......................................................................... - - void btfromblkltrimws (struct tagbstring& t, void * s, int len); - - Fill in the tagbstring t with the data buffer s with length len after it - has been left trimmed. This action is purely reference oriented; no - memory management is done. The data member of t is just assigned to a - pointer inside the buffer s. Note that the buffer is not appended with a - '\0' character. The s and len parameters are accessed exactly once each - in this macro. - - The resulting struct tagbstring is permanently write protected. Attempts - to write to this struct tagbstring from any bstrlib function will lead to - BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct - tagbstring has no effect. - - .......................................................................... - - void btfromblkrtrimws (struct tagbstring& t, void * s, int len); - - Fill in the tagbstring t with the data buffer s with length len after it - has been right trimmed. This action is purely reference oriented; no - memory management is done. The data member of t is just assigned to a - pointer inside the buffer s. Note that the buffer is not appended with a - '\0' character. The s and len parameters are accessed exactly once each - in this macro. - - The resulting struct tagbstring is permanently write protected. Attempts - to write to this struct tagbstring from any bstrlib function will lead to - BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct - tagbstring has no effect. - - .......................................................................... - - void btfromblktrimws (struct tagbstring& t, void * s, int len); - - Fill in the tagbstring t with the data buffer s with length len after it - has been left and right trimmed. This action is purely reference - oriented; no memory management is done. The data member of t is just - assigned to a pointer inside the buffer s. Note that the buffer is not - appended with a '\0' character. The s and len parameters are accessed - exactly once each in this macro. - - The resulting struct tagbstring is permanently write protected. Attempts - to write to this struct tagbstring from any bstrlib function will lead to - BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct - tagbstring has no effect. - - .......................................................................... - - void bmid2tbstr (struct tagbstring& t, bstring b, int pos, int len); - - Fill the tagbstring t with the substring from b, starting from position - pos with a length len. The segment is clamped by the boundaries of - the bstring b. This action is purely reference oriented; no memory - management is done. Note that the buffer is not appended with a '\0' - character. Note that the t parameter to this macro may be accessed - multiple times. Note that the contents of t will become undefined - if the contents of b change or are destroyed. - - The resulting struct tagbstring is permanently write protected. Attempts - to write to this struct tagbstring in a write protected state from any - bstrlib function will lead to BSTR_ERR being returned. Invoking the - bwriteallow macro on this struct tagbstring will have no effect. - - .......................................................................... - - void bvformata (int& ret, bstring b, const char * format, lastarg); - - Append the bstring b with printf like formatting with the format control - string, and the arguments taken from the ... list of arguments after - lastarg passed to the containing function. If the containing function - does not have ... parameters or lastarg is not the last named parameter - before the ... then the results are undefined. If successful, the - results are appended to b and BSTR_OK is assigned to ret. Otherwise - BSTR_ERR is assigned to ret. - - Example: - - void dbgerror (FILE * fp, const char * fmt, ...) { - int ret; - bstring b; - bvformata (ret, b = bfromcstr ("DBG: "), fmt, fmt); - if (BSTR_OK == ret) fputs ((char *) bdata (b), fp); - bdestroy (b); - } - - Note that if the BSTRLIB_NOVSNP macro was set when bstrlib had been - compiled the bvformata macro will not link properly. If the - BSTRLIB_NOVSNP macro has been set, the bvformata macro will not be - available. - - .......................................................................... - - void bwriteprotect (struct tagbstring& t); - - Disallow bstring from being written to via the bstrlib API. Attempts to - write to the resulting tagbstring from any bstrlib function will lead to - BSTR_ERR being returned. - - Note: bstrings which are write protected cannot be destroyed via bdestroy. - - Note to C++ users: Setting a CBString as write protected will not prevent - it from being destroyed by the destructor. - - .......................................................................... - - void bwriteallow (struct tagbstring& t); - - Allow bstring to be written to via the bstrlib API. Note that such an - action makes the bstring both writable and destroyable. If the bstring is - not legitimately writable (as is the case for struct tagbstrings - initialized with a bsStatic value), the results of this are undefined. - - Note that invoking the bwriteallow macro may increase the number of - reallocs by one more than necessary for every call to bwriteallow - interleaved with any bstring API which writes to this bstring. - - .......................................................................... - - int biswriteprotected (struct tagbstring& t); - - Returns 1 if the bstring is write protected, otherwise 0 is returned. - -=============================================================================== - -The bstest module ------------------ - -The bstest module is just a unit test for the bstrlib module. For correct -implementations of bstrlib, it should execute with 0 failures being reported. -This test should be utilized if modifications/customizations to bstrlib have -been performed. It tests each core bstrlib function with bstrings of every -mode (read-only, NULL, static and mutable) and ensures that the expected -semantics are observed (including results that should indicate an error). It -also tests for aliasing support. Passing bstest is a necessary but not a -sufficient condition for ensuring the correctness of the bstrlib module. - - -The test module ---------------- - -The test module is just a unit test for the bstrwrap module. For correct -implementations of bstrwrap, it should execute with 0 failures being -reported. This test should be utilized if modifications/customizations to -bstrwrap have been performed. It tests each core bstrwrap function with -CBStrings write protected or not and ensures that the expected semantics are -observed (including expected exceptions.) Note that exceptions cannot be -disabled to run this test. Passing test is a necessary but not a sufficient -condition for ensuring the correctness of the bstrwrap module. - -=============================================================================== - -Using Bstring and CBString as an alternative to the C library -------------------------------------------------------------- - -First let us give a table of C library functions and the alternative bstring -functions and CBString methods that should be used instead of them. - -C-library Bstring alternative CBString alternative ---------- ------------------- -------------------- -gets bgets ::gets -strcpy bassign = operator -strncpy bassignmidstr ::midstr -strcat bconcat += operator -strncat bconcat + btrunc += operator + ::trunc -strtok bsplit, bsplits ::split -sprintf b(assign)format ::format -snprintf b(assign)format + btrunc ::format + ::trunc -vsprintf bvformata bvformata - -vsnprintf bvformata + btrunc bvformata + btrunc -vfprintf bvformata + fputs use bvformata + fputs -strcmp biseq, bstrcmp comparison operators. -strncmp bstrncmp, memcmp bstrncmp, memcmp -strlen ->slen, blength ::length -strdup bstrcpy constructor -strset bpattern ::fill -strstr binstr ::find -strpbrk binchr ::findchr -stricmp bstricmp cast & use bstricmp -strlwr btolower cast & use btolower -strupr btoupper cast & use btoupper -strrev bReverse (aux module) cast & use bReverse -strchr bstrchr cast & use bstrchr -strspnp use strspn use strspn -ungetc bsunread bsunread - -The top 9 C functions listed here are troublesome in that they impose memory -management in the calling function. The Bstring and CBstring interfaces have -built-in memory management, so there is far less code with far less potential -for buffer overrun problems. strtok can only be reliably called as a "leaf" -calculation, since it (quite bizarrely) maintains hidden internal state. And -gets is well known to be broken no matter what. The Bstrlib alternatives do -not suffer from those sorts of problems. - -The substitute for strncat can be performed with higher performance by using -the blk2tbstr macro to create a presized second operand for bconcat. - -C-library Bstring alternative CBString alternative ---------- ------------------- -------------------- -strspn strspn acceptable strspn acceptable -strcspn strcspn acceptable strcspn acceptable -strnset strnset acceptable strnset acceptable -printf printf acceptable printf acceptable -puts puts acceptable puts acceptable -fprintf fprintf acceptable fprintf acceptable -fputs fputs acceptable fputs acceptable -memcmp memcmp acceptable memcmp acceptable - -Remember that Bstring (and CBstring) functions will automatically append the -'\0' character to the character data buffer. So by simply accessing the data -buffer directly, ordinary C string library functions can be called directly -on them. Note that bstrcmp is not the same as memcmp in exactly the same way -that strcmp is not the same as memcmp. - -C-library Bstring alternative CBString alternative ---------- ------------------- -------------------- -fread balloc + fread ::alloc + fread -fgets balloc + fgets ::alloc + fgets - -These are odd ones because of the exact sizing of the buffer required. The -Bstring and CBString alternatives requires that the buffers are forced to -hold at least the prescribed length, then just use fread or fgets directly. -However, typically the automatic memory management of Bstring and CBstring -will make the typical use of fgets and fread to read specifically sized -strings unnecessary. - -Implementation Choices ----------------------- - -Overhead: -......... - -The bstring library has more overhead versus straight char buffers for most -functions. This overhead is essentially just the memory management and -string header allocation. This overhead usually only shows up for small -string manipulations. The performance loss has to be considered in -light of the following: - -1) What would be the performance loss of trying to write this management - code in one's own application? -2) Since the bstring library source code is given, a sufficiently powerful - modern inlining globally optimizing compiler can remove function call - overhead. - -Since the data type is exposed, a developer can replace any unsatisfactory -function with their own inline implementation. And that is besides the main -point of what the better string library is mainly meant to provide. Any -overhead lost has to be compared against the value of the safe abstraction -for coupling memory management and string functionality. - -Performance of the C interface: -............................... - -The algorithms used have performance advantages versus the analogous C -library functions. For example: - -1. bfromcstr/blk2str/bstrcpy versus strcpy/strdup. By using memmove instead - of strcpy, the break condition of the copy loop is based on an independent - counter (that should be allocated in a register) rather than having to - check the results of the load. Modern out-of-order executing CPUs can - parallelize the final branch mis-predict penality with the loading of the - source string. Some CPUs will also tend to have better built-in hardware - support for counted memory moves than load-compare-store. (This is a - minor, but non-zero gain.) -2. biseq versus strcmp. If the strings are unequal in length, bsiseq will - return in O(1) time. If the strings are aliased, or have aliased data - buffers, biseq will return in O(1) time. strcmp will always be O(k), - where k is the length of the common prefix or the whole string if they are - identical. -3. ->slen versus strlen. ->slen is obviously always O(1), while strlen is - always O(n) where n is the length of the string. -4. bconcat versus strcat. Both rely on precomputing the length of the - destination string argument, which will favor the bstring library. On - iterated concatenations the performance difference can be enormous. -5. bsreadln versus fgets. The bsreadln function reads large blocks at a time - from the given stream, then parses out lines from the buffers directly. - Some C libraries will implement fgets as a loop over single fgetc calls. - Testing indicates that the bsreadln approach can be several times faster - for fast stream devices (such as a file that has been entirely cached.) -6. bsplits/bsplitscb versus strspn. Accelerators for the set of match - characters are generated only once. -7. binstr versus strstr. The binstr implementation unrolls the loops to - help reduce loop overhead. This will matter if the target string is - long and source string is not found very early in the target string. - With strstr, while it is possible to unroll the source contents, it is - not possible to do so with the destination contents in a way that is - effective because every destination character must be tested against - '\0' before proceeding to the next character. -8. bReverse versus strrev. The C function must find the end of the string - first before swaping character pairs. -9. bstrrchr versus no comparable C function. Its not hard to write some C - code to search for a character from the end going backwards. But there - is no way to do this without computing the length of the string with - strlen. - -Practical testing indicates that in general Bstrlib is never signifcantly -slower than the C library for common operations, while very often having a -performance advantage that ranges from significant to massive. Even for -functions like b(n)inchr versus str(c)spn() (where, in theory, there is no -advantage for the Bstrlib architecture) the performance of Bstrlib is vastly -superior to most tested C library implementations. - -Some of Bstrlib's extra functionality also lead to inevitable performance -advantages over typical C solutions. For example, using the blk2tbstr macro, -one can (in O(1) time) generate an internal substring by reference while not -disturbing the original string. If disturbing the original string is not an -option, typically, a comparable char * solution would have to make a copy of -the substring to provide similar functionality. Another example is reverse -character set scanning -- the str(c)spn functions only scan in a forward -direction which can complicate some parsing algorithms. - -Where high performance char * based algorithms are available, Bstrlib can -still leverage them by accessing the ->data field on bstrings. So -realistically Bstrlib can never be significantly slower than any standard -'\0' terminated char * based solutions. - -Performance of the C++ interface: -................................. - -The C++ interface has been designed with an emphasis on abstraction and safety -first. However, since it is substantially a wrapper for the C bstring -functions, for longer strings the performance comments described in the -"Performance of the C interface" section above still apply. Note that the -(CBString *) type can be directly cast to a (bstring) type, and passed as -parameters to the C functions (though a CBString must never be passed to -bdestroy.) - -Probably the most controversial choice is performing full bounds checking on -the [] operator. This decision was made because 1) the fast alternative of -not bounds checking is still available by first casting the CBString to a -(const char *) buffer or to a (struct tagbstring) then derefencing .data and -2) because the lack of bounds checking is seen as one of the main weaknesses -of C/C++ versus other languages. This check being done on every access leads -to individual character extraction being actually slower than other languages -in this one respect (other language's compilers will normally dedicate more -resources on hoisting or removing bounds checking as necessary) but otherwise -bring C++ up to the level of other languages in terms of functionality. - -It is common for other C++ libraries to leverage the abstractions provided by -C++ to use reference counting and "copy on write" policies. While these -techniques can speed up some scenarios, they impose a problem with respect to -thread safety. bstrings and CBStrings can be properly protected with -"per-object" mutexes, meaning that two bstrlib calls can be made and execute -simultaneously, so long as the bstrings and CBstrings are distinct. With a -reference count and alias before copy on write policy, global mutexes are -required that prevent multiple calls to the strings library to execute -simultaneously regardless of whether or not the strings represent the same -string. - -One interesting trade off in CBString is that the default constructor is not -trivial. I.e., it always prepares a ready to use memory buffer. The purpose -is to ensure that there is a uniform internal composition for any functioning -CBString that is compatible with bstrings. It also means that the other -methods in the class are not forced to perform "late initialization" checks. -In the end it means that construction of CBStrings are slower than other -comparable C++ string classes. Initial testing, however, indicates that -CBString outperforms std::string and MFC's CString, for example, in all other -operations. So to work around this weakness it is recommended that CBString -declarations be pushed outside of inner loops. - -Practical testing indicates that with the exception of the caveats given -above (constructors and safe index character manipulations) the C++ API for -Bstrlib generally outperforms popular standard C++ string classes. Amongst -the standard libraries and compilers, the quality of concatenation operations -varies wildly and very little care has gone into search functions. Bstrlib -dominates those performance benchmarks. - -Memory management: -.................. - -The bstring functions which write and modify bstrings will automatically -reallocate the backing memory for the char buffer whenever it is required to -grow. The algorithm for resizing chosen is to snap up to sizes that are a -power of two which are sufficient to hold the intended new size. Memory -reallocation is not performed when the required size of the buffer is -decreased. This behavior can be relied on, and is necessary to make the -behaviour of balloc deterministic. This trades off additional memory usage -for decreasing the frequency for required reallocations: - -1. For any bstring whose size never exceeds n, its buffer is not ever - reallocated more than log_2(n) times for its lifetime. -2. For any bstring whose size never exceeds n, its buffer is never more than - 2*(n+1) in length. (The extra characters beyond 2*n are to allow for the - implicit '\0' which is always added by the bstring modifying functions.) - -Decreasing the buffer size when the string decreases in size would violate 1) -above and in real world case lead to pathological heap thrashing. Similarly, -allocating more tightly than "least power of 2 greater than necessary" would -lead to a violation of 1) and have the same potential for heap thrashing. - -Property 2) needs emphasizing. Although the memory allocated is always a -power of 2, for a bstring that grows linearly in size, its buffer memory also -grows linearly, not exponentially. The reason is that the amount of extra -space increases with each reallocation, which decreases the frequency of -future reallocations. - -Obviously, given that bstring writing functions may reallocate the data -buffer backing the target bstring, one should not attempt to cache the data -buffer address and use it after such bstring functions have been called. -This includes making reference struct tagbstrings which alias to a writable -bstring. - -balloc or bfromcstralloc can be used to preallocate the minimum amount of -space used for a given bstring. This will reduce even further the number of -times the data portion is reallocated. If the length of the string is never -more than one less than the memory length then there will be no further -reallocations. - -Note that invoking the bwriteallow macro may increase the number of reallocs -by one more than necessary for every call to bwriteallow interleaved with any -bstring API which writes to this bstring. - -The library does not use any mechanism for automatic clean up for the C API. -Thus explicit clean up via calls to bdestroy() are required to avoid memory -leaks. - -Constant and static tagbstrings: -................................ - -A struct tagbstring can be write protected from any bstrlib function using -the bwriteprotect macro. A write protected struct tagbstring can then be -reset to being writable via the bwriteallow macro. There is, of course, no -protection from attempts to directly access the bstring members. Modifying a -bstring which is write protected by direct access has undefined behavior. - -static struct tagbstrings can be declared via the bsStatic macro. They are -considered permanently unwritable. Such struct tagbstrings's are declared -such that attempts to write to it are not well defined. Invoking either -bwriteallow or bwriteprotect on static struct tagbstrings has no effect. - -struct tagbstring's initialized via btfromcstr or blk2tbstr are protected by -default but can be made writeable via the bwriteallow macro. If bwriteallow -is called on such struct tagbstring's, it is the programmer's responsibility -to ensure that: - -1) the buffer supplied was allocated from the heap. -2) bdestroy is not called on this tagbstring (unless the header itself has - also been allocated from the heap.) -3) free is called on the buffer to reclaim its memory. - -bwriteallow and bwriteprotect can be invoked on ordinary bstrings (they have -to be dereferenced with the (*) operator to get the levels of indirection -correct) to give them write protection. - -Buffer declaration: -................... - -The memory buffer is actually declared "unsigned char *" instead of "char *". -The reason for this is to trigger compiler warnings whenever uncasted char -buffers are assigned to the data portion of a bstring. This will draw more -diligent programmers into taking a second look at the code where they -have carelessly left off the typically required cast. (Research from -AT&T/Lucent indicates that additional programmer eyeballs is one of the most -effective mechanisms at ferreting out bugs.) - -Function pointers: -.................. - -The bgets, bread and bStream functions use function pointers to obtain -strings from data streams. The function pointer declarations have been -specifically chosen to be compatible with the fgetc and fread functions. -While this may seem to be a convoluted way of implementing fgets and fread -style functionality, it has been specifically designed this way to ensure -that there is no dependency on a single narrowly defined set of device -interfaces, such as just stream I/O. In the embedded world, its quite -possible to have environments where such interfaces may not exist in the -standard C library form. Furthermore, the generalization that this opens up -allows for more sophisticated uses for these functions (performing an fgets -like function on a socket, for example.) By using function pointers, it also -allows such abstract stream interfaces to be created using the bstring library -itself while not creating a circular dependency. - -Use of int's for sizes: -....................... - -This is just a recognition that 16bit platforms with requirements for strings -that are larger than 64K and 32bit+ platforms with requirements for strings -that are larger than 4GB are pretty marginal. The main focus is for 32bit -platforms, and emerging 64bit platforms with reasonable < 4GB string -requirements. Using ints allows for negative values which has meaning -internally to bstrlib. - -Semantic consideration: -....................... - -Certain care needs to be taken when copying and aliasing bstrings. A bstring -is essentially a pointer type which points to a multipart abstract data -structure. Thus usage, and lifetime of bstrings have semantics that follow -these considerations. For example: - - bstring a, b; - struct tagbstring t; - - a = bfromcstr("Hello"); /* Create new bstring and copy "Hello" into it. */ - b = a; /* Alias b to the contents of a. */ - t = *a; /* Create a current instance pseudo-alias of a. */ - bconcat (a, b); /* Double a and b, t is now undefined. */ - bdestroy (a); /* Destroy the contents of both a and b. */ - -Variables of type bstring are really just references that point to real -bstring objects. The equal operator (=) creates aliases, and the asterisk -dereference operator (*) creates a kind of alias to the current instance (which -is generally not useful for any purpose.) Using bstrcpy() is the correct way -of creating duplicate instances. The ampersand operator (&) is useful for -creating aliases to struct tagbstrings (remembering that constructed struct -tagbstrings are not writable by default.) - -CBStrings use complete copy semantics for the equal operator (=), and thus do -not have these sorts of issues. - -Debugging: -.......... - -Bstrings have a simple, exposed definition and construction, and the library -itself is open source. So most debugging is going to be fairly straight- -forward. But the memory for bstrings come from the heap, which can often be -corrupted indirectly, and it might not be obvious what has happened even from -direct examination of the contents in a debugger or a core dump. There are -some tools such as Purify, Insure++ and Electric Fence which can help solve -such problems, however another common approach is to directly instrument the -calls to malloc, realloc, calloc, free, memcpy, memmove and/or other calls -by overriding them with macro definitions. - -Although the user could hack on the Bstrlib sources directly as necessary to -perform such an instrumentation, Bstrlib comes with a built-in mechanism for -doing this. By defining the macro BSTRLIB_MEMORY_DEBUG and providing an -include file named memdbg.h this will force the core Bstrlib modules to -attempt to include this file. In such a file, macros could be defined which -overrides Bstrlib's useage of the C standard library. - -Rather than calling malloc, realloc, free, memcpy or memmove directly, Bstrlib -emits the macros bstr__alloc, bstr__realloc, bstr__free, bstr__memcpy and -bstr__memmove in their place respectively. By default these macros are simply -assigned to be equivalent to their corresponding C standard library function -call. However, if they are given earlier macro definitions (via the back -door include file) they will not be given their default definition. In this -way Bstrlib's interface to the standard library can be changed but without -having to directly redefine or link standard library symbols (both of which -are not strictly ANSI C compliant.) - -An example definition might include: - - #define bstr__alloc(sz) X_malloc ((sz), __LINE__, __FILE__) - -which might help contextualize heap entries in a debugging environment. - -The NULL parameter and sanity checking of bstrings is part of the Bstrlib -API, and thus Bstrlib itself does not present any different modes which would -correspond to "Debug" or "Release" modes. Bstrlib always contains mechanisms -which one might think of as debugging features, but retains the performance -and small memory footprint one would normally associate with release mode -code. - -Integration Microsoft's Visual Studio debugger: -............................................... - -Microsoft's Visual Studio debugger has a capability of customizable mouse -float over data type descriptions. This is accomplished by editting the -AUTOEXP.DAT file to include the following: - - ; new for CBString - tagbstring =slen= mlen= - Bstrlib::CBStringList =count= - -In Visual C++ 6.0 this file is located in the directory: - - C:\Program Files\Microsoft Visual Studio\Common\MSDev98\Bin - -and in Visual Studio .NET 2003 its located here: - - C:\Program Files\Microsoft Visual Studio .NET 2003\Common7\Packages\Debugger - -This will improve the ability of debugging with Bstrlib under Visual Studio. - -Security --------- - -Bstrlib does not come with explicit security features outside of its fairly -comprehensive error detection, coupled with its strict semantic support. -That is to say that certain common security problems, such as buffer overrun, -constant overwrite, arbitrary truncation etc, are far less likely to happen -inadvertently. Where it does help, Bstrlib maximizes its advantage by -providing developers a simple adoption path that lets them leave less secure -string mechanisms behind. The library will not leave developers wanting, so -they will be less likely to add new code using a less secure string library -to add functionality that might be missing from Bstrlib. - -That said there are a number of security ideas not addressed by Bstrlib: - -1. Race condition exploitation (i.e., verifying a string's contents, then -raising the privilege level and execute it as a shell command as two -non-atomic steps) is well beyond the scope of what Bstrlib can provide. It -should be noted that MFC's built-in string mutex actually does not solve this -problem either -- it just removes immediate data corruption as a possible -outcome of such exploit attempts (it can be argued that this is worse, since -it will leave no trace of the exploitation). In general race conditions have -to be dealt with by careful design and implementation; it cannot be assisted -by a string library. - -2. Any kind of access control or security attributes to prevent usage in -dangerous interfaces such as system(). Perl includes a "trust" attribute -which can be endowed upon strings that are intended to be passed to such -dangerous interfaces. However, Perl's solution reflects its own limitations --- notably that it is not a strongly typed language. In the example code for -Bstrlib, there is a module called taint.cpp. It demonstrates how to write a -simple wrapper class for managing "untainted" or trusted strings using the -type system to prevent questionable mixing of ordinary untrusted strings with -untainted ones then passing them to dangerous interfaces. In this way the -security correctness of the code reduces to auditing the direct usages of -dangerous interfaces or promotions of tainted strings to untainted ones. - -3. Encryption of string contents is way beyond the scope of Bstrlib. -Maintaining encrypted string contents in the futile hopes of thwarting things -like using system-level debuggers to examine sensitive string data is likely -to be a wasted effort (imagine a debugger that runs at a higher level than a -virtual processor where the application runs). For more standard encryption -usages, since the bstring contents are simply binary blocks of data, this -should pose no problem for usage with other standard encryption libraries. - -Compatibility -------------- - -The Better String Library is known to compile and function correctly with the -following compilers: - - - Microsoft Visual C++ - - Watcom C/C++ - - Intel's C/C++ compiler (Windows) - - The GNU C/C++ compiler (cygwin and Linux on PPC64) - - Borland C - - Turbo C - -Setting of configuration options should be unnecessary for these compilers -(unless exceptions are being disabled or STLport has been added to WATCOM -C/C++). Bstrlib has been developed with an emphasis on portability. As such -porting it to other compilers should be straight forward. This package -includes a porting guide (called porting.txt) which explains what issues may -exist for porting Bstrlib to different compilers and environments. - -ANSI issues ------------ - -1. The function pointer types bNgetc and bNread have prototypes which are very -similar to, but not exactly the same as fgetc and fread respectively. -Basically the FILE * parameter is replaced by void *. The purpose of this -was to allow one to create other functions with fgetc and fread like -semantics without being tied to ANSI C's file streaming mechanism. I.e., one -could very easily adapt it to sockets, or simply reading a block of memory, -or procedurally generated strings (for fractal generation, for example.) - -The problem is that invoking the functions (bNgetc)fgetc and (bNread)fread is -not technically legal in ANSI C. The reason being that the compiler is only -able to coerce the function pointers themselves into the target type, however -are unable to perform any cast (implicit or otherwise) on the parameters -passed once invoked. I.e., if internally void * and FILE * need some kind of -mechanical coercion, the compiler will not properly perform this conversion -and thus lead to undefined behavior. - -Apparently a platform from Data General called "Eclipse" and another from -Tandem called "NonStop" have a different representation for pointers to bytes -and pointers to words, for example, where coercion via casting is necessary. -(Actual confirmation of the existence of such machines is hard to come by, so -it is prudent to be skeptical about this information.) However, this is not -an issue for any known contemporary platforms. One may conclude that such -platforms are effectively apocryphal even if they do exist. - -To correctly work around this problem to the satisfaction of the ANSI -limitations, one needs to create wrapper functions for fgets and/or -fread with the prototypes of bNgetc and/or bNread respectively which performs -no other action other than to explicitely cast the void * parameter to a -FILE *, and simply pass the remaining parameters straight to the function -pointer call. - -The wrappers themselves are trivial: - - size_t freadWrap (void * buff, size_t esz, size_t eqty, void * parm) { - return fread (buff, esz, eqty, (FILE *) parm); - } - - int fgetcWrap (void * parm) { - return fgetc ((FILE *) parm); - } - -These have not been supplied in bstrlib or bstraux to prevent unnecessary -linking with file I/O functions. - -2. vsnprintf is not available on all compilers. Because of this, the bformat -and bformata functions (and format and formata methods) are not guaranteed to -work properly. For those compilers that don't have vsnprintf, the -BSTRLIB_NOVSNP macro should be set before compiling bstrlib, and the format -functions/method will be disabled. - -The more recent ANSI C standards have specified the required inclusion of a -vsnprintf function. - -3. The bstrlib function names are not unique in the first 6 characters. This -is only an issue for older C compiler environments which do not store more -than 6 characters for function names. - -4. The bsafe module defines macros and function names which are part of the -C library. This simply overrides the definition as expected on all platforms -tested, however it is not sanctioned by the ANSI standard. This module is -clearly optional and should be omitted on platforms which disallow its -undefined semantics. - -In practice the real issue is that some compilers in some modes of operation -can/will inline these standard library functions on a module by module basis -as they appear in each. The linker will thus have no opportunity to override -the implementation of these functions for those cases. This can lead to -inconsistent behaviour of the bsafe module on different platforms and -compilers. - -=============================================================================== - -Comparison with Microsoft's CString class ------------------------------------------ - -Although developed independently, CBStrings have very similar functionality to -Microsoft's CString class. However, the bstring library has significant -advantages over CString: - -1. Bstrlib is a C-library as well as a C++ library (using the C++ wrapper). - - - Thus it is compatible with more programming environments and - available to a wider population of programmers. - -2. The internal structure of a bstring is considered exposed. - - - A single contiguous block of data can be cut into read-only pieces by - simply creating headers, without allocating additional memory to create - reference copies of each of these sub-strings. - - In this way, using bstrings in a totally abstracted way becomes a choice - rather than an imposition. Further this choice can be made differently - at different layers of applications that use it. - -3. Static declaration support precludes the need for constructor - invocation. - - - Allows for static declarations of constant strings that has no - additional constructor overhead. - -4. Bstrlib is not attached to another library. - - - Bstrlib is designed to be easily plugged into any other library - collection, without dependencies on other libraries or paradigms (such - as "MFC".) - -The bstring library also comes with a few additional functions that are not -available in the CString class: - - - bsetstr - - bsplit - - bread - - breplace (this is different from CString::Replace()) - - Writable indexed characters (for example a[i]='x') - -Interestingly, although Microsoft did implement mid$(), left$() and right$() -functional analogues (these are functions from GWBASIC) they seem to have -forgotten that mid$() could be also used to write into the middle of a string. -This functionality exists in Bstrlib with the bsetstr() and breplace() -functions. - -Among the disadvantages of Bstrlib is that there is no special support for -localization or wide characters. Such things are considered beyond the scope -of what bstrings are trying to deliver. CString essentially supports the -older UCS-2 version of Unicode via widechar_t as an application-wide compile -time switch. - -CString's also use built-in mechanisms for ensuring thread safety under all -situations. While this makes writing thread safe code that much easier, this -built-in safety feature has a price -- the inner loops of each CString method -runs in its own critical section (grabbing and releasing a light weight mutex -on every operation.) The usual way to decrease the impact of a critical -section performance penalty is to amortize more operations per critical -section. But since the implementation of CStrings is fixed as a one critical -section per-operation cost, there is no way to leverage this common -performance enhancing idea. - -The search facilities in Bstrlib are comparable to those in MFC's CString -class, though it is missing locale specific collation. But because Bstrlib -is interoperable with C's char buffers, it will allow programmers to write -their own string searching mechanism (such as Boyer-Moore), or be able to -choose from a variety of available existing string searching libraries (such -as those for regular expressions) without difficulty. - -Microsoft used a very non-ANSI conforming trick in its implementation to -allow printf() to use the "%s" specifier to output a CString correctly. This -can be convenient, but it is inherently not portable. CBString requires an -explicit cast, while bstring requires the data member to be dereferenced. -Microsoft's own documentation recommends casting, instead of relying on this -feature. - -Comparison with C++'s std::string ---------------------------------- - -This is the C++ language's standard STL based string class. - -1. There is no C implementation. -2. The [] operator is not bounds checked. -3. Missing a lot of useful functions like printf-like formatting. -4. Some sub-standard std::string implementations (SGI) are necessarily unsafe - to use with multithreading. -5. Limited by STL's std::iostream which in turn is limited by ifstream which - can only take input from files. (Compare to CBStream's API which can take - abstracted input.) -6. Extremely uneven performance across implementations. - -Comparison with ISO C TR 24731 proposal ---------------------------------------- - -Following the ISO C99 standard, Microsoft has proposed a group of C library -extensions which are supposedly "safer and more secure". This proposal is -expected to be adopted by the ISO C standard which follows C99. - -The proposal reveals itself to be very similar to Microsoft's "StrSafe" -library. The functions are basically the same as other standard C library -string functions except that destination parameters are paired with an -additional length parameter of type rsize_t. rsize_t is the same as size_t, -however, the range is checked to make sure its between 1 and RSIZE_MAX. Like -Bstrlib, the functions perform a "parameter check". Unlike Bstrlib, when a -parameter check fails, rather than simply outputing accumulatable error -statuses, they call a user settable global error function handler, and upon -return of control performs no (additional) detrimental action. The proposal -covers basic string functions as well as a few non-reenterable functions -(asctime, ctime, and strtok). - -1. Still based solely on char * buffers (and therefore strlen() and strcat() - is still O(n), and there are no faster streq() comparison functions.) -2. No growable string semantics. -3. Requires manual buffer length synchronization in the source code. -4. No attempt to enhance functionality of the C library. -5. Introduces a new error scenario (strings exceeding RSIZE_MAX length). - -The hope is that by exposing the buffer length requirements there will be -fewer buffer overrun errors. However, the error modes are really just -transformed, rather than removed. The real problem of buffer overflows is -that they all happen as a result of erroneous programming. So forcing -programmers to manually deal with buffer limits, will make them more aware of -the problem but doesn't remove the possibility of erroneous programming. So -a programmer that erroneously mixes up the rsize_t parameters is no better off -from a programmer that introduces potential buffer overflows through other -more typical lapses. So at best this may reduce the rate of erroneous -programming, rather than making any attempt at removing failure modes. - -The error handler can discriminate between types of failures, but does not -take into account any callsite context. So the problem is that the error is -going to be manifest in a piece of code, but there is no pointer to that -code. It would seem that passing in the call site __FILE__, __LINE__ as -parameters would be very useful, but the API clearly doesn't support such a -thing (it would increase code bloat even more than the extra length -parameter does, and would require macro tricks to implement). - -The Bstrlib C API takes the position that error handling needs to be done at -the callsite, and just tries to make it as painless as possible. Furthermore, -error modes are removed by supporting auto-growing strings and aliasing. For -capturing errors in more central code fragments, Bstrlib's C++ API uses -exception handling extensively, which is superior to the leaf-only error -handler approach. - -Comparison with Managed String Library CERT proposal ----------------------------------------------------- - -The main webpage for the managed string library: -http://www.cert.org/secure-coding/managedstring.html - -Robert Seacord at CERT has proposed a C string library that he calls the -"Managed String Library" for C. Like Bstrlib, it introduces a new type -which is called a managed string. The structure of a managed string -(string_m) is like a struct tagbstring but missing the length field. This -internal structure is considered opaque. The length is, like the C standard -library, always computed on the fly by searching for a terminating NUL on -every operation that requires it. So it suffers from every performance -problem that the C standard library suffers from. Interoperating with C -string APIs (like printf, fopen, or anything else that takes a string -parameter) requires copying to additionally allocating buffers that have to -be manually freed -- this makes this library probably slower and more -cumbersome than any other string library in existence. - -The library gives a fully populated error status as the return value of every -string function. The hope is to be able to diagnose all problems -specifically from the return code alone. Comparing this to Bstrlib, which -aways returns one consistent error message, might make it seem that Bstrlib -would be harder to debug; but this is not true. With Bstrlib, if an error -occurs there is always enough information from just knowing there was an error -and examining the parameters to deduce exactly what kind of error has -happened. The managed string library thus gives up nested function calls -while achieving little benefit, while Bstrlib does not. - -One interesting feature that "managed strings" has is the idea of data -sanitization via character set whitelisting. That is to say, a globally -definable filter that makes any attempt to put invalid characters into strings -lead to an error and not modify the string. The author gives the following -example: - - // create valid char set - if (retValue = strcreate_m(&str1, "abc") ) { - fprintf( - stderr, - "Error %d from strcreate_m.\n", - retValue - ); - } - if (retValue = setcharset(str1)) { - fprintf( - stderr, - "Error %d from setcharset().\n", - retValue - ); - } - if (retValue = strcreate_m(&str1, "aabbccabc")) { - fprintf( - stderr, - "Error %d from strcreate_m.\n", - retValue - ); - } - // create string with invalid char set - if (retValue = strcreate_m(&str1, "abbccdabc")) { - fprintf( - stderr, - "Error %d from strcreate_m.\n", - retValue - ); - } - -Which we can compare with a more Bstrlib way of doing things: - - bstring bCreateWithFilter (const char * cstr, const_bstring filter) { - bstring b = bfromcstr (cstr); - if (BSTR_ERR != bninchr (b, filter) && NULL != b) { - fprintf (stderr, "Filter violation.\n"); - bdestroy (b); - b = NULL; - } - return b; - } - - struct tagbstring charFilter = bsStatic ("abc"); - bstring str1 = bCreateWithFilter ("aabbccabc", &charFilter); - bstring str2 = bCreateWithFilter ("aabbccdabc", &charFilter); - -The first thing we should notice is that with the Bstrlib approach you can -have different filters for different strings if necessary. Furthermore, -selecting a charset filter in the Managed String Library is uni-contextual. -That is to say, there can only be one such filter active for the entire -program, which means its usage is not well defined for intermediate library -usage (a library that uses it will interfere with user code that uses it, and -vice versa.) It is also likely to be poorly defined in multi-threading -environments. - -There is also a question as to whether the data sanitization filter is checked -on every operation, or just on creation operations. Since the charset can be -set arbitrarily at run time, it might be set *after* some managed strings have -been created. This would seem to imply that all functions should run this -additional check every time if there is an attempt to enforce this. This -would make things tremendously slow. On the other hand, if it is assumed that -only creates and other operations that take char *'s as input need be checked -because the charset was only supposed to be called once at and before any -other managed string was created, then one can see that its easy to cover -Bstrlib with equivalent functionality via a few wrapper calls such as the -example given above. - -And finally we have to question the value of sanitation in the first place. -For example, for httpd servers, there is generally a requirement that the -URLs parsed have some form that avoids undesirable translation to local file -system filenames or resources. The problem is that the way URLs can be -encoded, it must be completely parsed and translated to know if it is using -certain invalid character combinations. That is to say, merely filtering -each character one at a time is not necessarily the right way to ensure that -a string has safe contents. - -In the article that describes this proposal, it is claimed that it fairly -closely approximates the existing C API semantics. On this point we should -compare this "closeness" with Bstrlib: - - Bstrlib Managed String Library - ------- ---------------------- - -Pointer arithmetic Segment arithmetic N/A - -Use in C Std lib ->data, or bdata{e} getstr_m(x,*) ... free(x) - -String literals bsStatic, bsStaticBlk strcreate_m() - -Transparency Complete None - -Its pretty clear that the semantic mapping from C strings to Bstrlib is fairly -straightforward, and that in general semantic capabilities are the same or -superior in Bstrlib. On the other hand the Managed String Library is either -missing semantics or changes things fairly significantly. - -Comparison with Annexia's c2lib library ---------------------------------------- - -This library is available at: -http://www.annexia.org/freeware/c2lib - -1. Still based solely on char * buffers (and therefore strlen() and strcat() - is still O(n), and there are no faster streq() comparison functions.) - Their suggestion that alternatives which wrap the string data type (such as - bstring does) imposes a difficulty in interoperating with the C langauge's - ordinary C string library is not founded. -2. Introduction of memory (and vector?) abstractions imposes a learning - curve, and some kind of memory usage policy that is outside of the strings - themselves (and therefore must be maintained by the developer.) -3. The API is massive, and filled with all sorts of trivial (pjoin) and - controvertial (pmatch -- regular expression are not sufficiently - standardized, and there is a very large difference in performance between - compiled and non-compiled, REs) functions. Bstrlib takes a decidely - minimal approach -- none of the functionality in c2lib is difficult or - challenging to implement on top of Bstrlib (except the regex stuff, which - is going to be difficult, and controvertial no matter what.) -4. Understanding why c2lib is the way it is pretty much requires a working - knowledge of Perl. bstrlib requires only knowledge of the C string library - while providing just a very select few worthwhile extras. -5. It is attached to a lot of cruft like a matrix math library (that doesn't - include any functions for getting the determinant, eigenvectors, - eigenvalues, the matrix inverse, test for singularity, test for - orthogonality, a grahm schmit orthogonlization, LU decomposition ... I - mean why bother?) - -Convincing a development house to use c2lib is likely quite difficult. It -introduces too much, while not being part of any kind of standards body. The -code must therefore be trusted, or maintained by those that use it. While -bstring offers nothing more on this front, since its so much smaller, covers -far less in terms of scope, and will typically improve string performance, -the barrier to usage should be much smaller. - -Comparison with stralloc/qmail ------------------------------- - -More information about this library can be found here: -http://www.canonical.org/~kragen/stralloc.html or here: -http://cr.yp.to/lib/stralloc.html - -1. Library is very very minimal. A little too minimal. -2. Untargetted source parameters are not declared const. -3. Slightly different expected emphasis (like _cats function which takes an - ordinary C string char buffer as a parameter.) Its clear that the - remainder of the C string library is still required to perform more - useful string operations. - -The struct declaration for their string header is essentially the same as that -for bstring. But its clear that this was a quickly written hack whose goals -are clearly a subset of what Bstrlib supplies. For anyone who is served by -stralloc, Bstrlib is complete substitute that just adds more functionality. - -stralloc actually uses the interesting policy that a NULL data pointer -indicates an empty string. In this way, non-static empty strings can be -declared without construction. This advantage is minimal, since static empty -bstrings can be declared inline without construction, and if the string needs -to be written to it should be constructed from an empty string (or its first -initializer) in any event. - -wxString class --------------- - -This is the string class used in the wxWindows project. A description of -wxString can be found here: -http://www.wxwindows.org/manuals/2.4.2/wx368.htm#wxstring - -This C++ library is similar to CBString. However, it is littered with -trivial functions (IsAscii, UpperCase, RemoveLast etc.) - -1. There is no C implementation. -2. The memory management strategy is to allocate a bounded fixed amount of - additional space on each resize, meaning that it does not have the - log_2(n) property that Bstrlib has (it will thrash very easily, cause - massive fragmentation in common heap implementations, and can easily be a - common source of performance problems). -3. The library uses a "copy on write" strategy, meaning that it has to deal - with multithreading problems. - -Vstr ----- - -This is a highly orthogonal C string library with an emphasis on -networking/realtime programming. It can be found here: -http://www.and.org/vstr/ - -1. The convoluted internal structure does not contain a '\0' char * compatible - buffer, so interoperability with the C library a non-starter. -2. The API and implementation is very large (owing to its orthogonality) and - can lead to difficulty in understanding its exact functionality. -3. An obvious dependency on gnu tools (confusing make configure step) -4. Uses a reference counting system, meaning that it is not likely to be - thread safe. - -The implementation has an extreme emphasis on performance for nontrivial -actions (adds, inserts and deletes are all constant or roughly O(#operations) -time) following the "zero copy" principle. This trades off performance of -trivial functions (character access, char buffer access/coersion, alias -detection) which becomes significantly slower, as well as incremental -accumulative costs for its searching/parsing functions. Whether or not Vstr -wins any particular performance benchmark will depend a lot on the benchmark, -but it should handily win on some, while losing dreadfully on others. - -The learning curve for Vstr is very steep, and it doesn't come with any -obvious way to build for Windows or other platforms without gnu tools. At -least one mechanism (the iterator) introduces a new undefined scenario -(writing to a Vstr while iterating through it.) Vstr has a very large -footprint, and is very ambitious in its total functionality. Vstr has no C++ -API. - -Vstr usage requires context initialization via vstr_init() which must be run -in a thread-local context. Given the totally reference based architecture -this means that sharing Vstrings across threads is not well defined, or at -least not safe from race conditions. This API is clearly geared to the older -standard of fork() style multitasking in UNIX, and is not safely transportable -to modern shared memory multithreading available in Linux and Windows. There -is no portable external solution making the library thread safe (since it -requires a mutex around each Vstr context -- not each string.) - -In the documentation for this library, a big deal is made of its self hosted -s(n)printf-like function. This is an issue for older compilers that don't -include vsnprintf(), but also an issue because Vstr has a slow conversion to -'\0' terminated char * mechanism. That is to say, using "%s" to format data -that originates from Vstr would be slow without some sort of native function -to do so. Bstrlib sidesteps the issue by relying on what snprintf-like -functionality does exist and having a high performance conversion to a char * -compatible string so that "%s" can be used directly. - -Str Library ------------ - -This is a fairly extensive string library, that includes full unicode support -and targetted at the goal of out performing MFC and STL. The architecture, -similarly to MFC's CStrings, is a copy on write reference counting mechanism. - -http://www.utilitycode.com/str/default.aspx - -1. Commercial. -2. C++ only. - -This library, like Vstr, uses a ref counting system. There is only so deeply -I can analyze it, since I don't have a license for it. However, performance -improvements over MFC's and STL, doesn't seem like a sufficient reason to -move your source base to it. For example, in the future, Microsoft may -improve the performance CString. - -It should be pointed out that performance testing of Bstrlib has indicated -that its relative performance advantage versus MFC's CString and STL's -std::string is at least as high as that for the Str library. - -libmib astrings ---------------- - -A handful of functional extensions to the C library that add dynamic string -functionality. -http://www.mibsoftware.com/libmib/astring/ - -This package basically references strings through char ** pointers and assumes -they are pointing to the top of an allocated heap entry (or NULL, in which -case memory will be newly allocated from the heap.) So its still up to user -to mix and match the older C string functions with these functions whenever -pointer arithmetic is used (i.e., there is no leveraging of the type system -to assert semantic differences between references and base strings as Bstrlib -does since no new types are introduced.) Unlike Bstrlib, exact string length -meta data is not stored, thus requiring a strlen() call on *every* string -writing operation. The library is very small, covering only a handful of C's -functions. - -While this is better than nothing, it is clearly slower than even the -standard C library, less safe and less functional than Bstrlib. - -To explain the advantage of using libmib, their website shows an example of -how dangerous C code: - - char buf[256]; - char *pszExtraPath = ";/usr/local/bin"; - - strcpy(buf,getenv("PATH")); /* oops! could overrun! */ - strcat(buf,pszExtraPath); /* Could overrun as well! */ - - printf("Checking...%s\n",buf); /* Some printfs overrun too! */ - -is avoided using libmib: - - char *pasz = 0; /* Must initialize to 0 */ - char *paszOut = 0; - char *pszExtraPath = ";/usr/local/bin"; - - if (!astrcpy(&pasz,getenv("PATH"))) /* malloc error */ exit(-1); - if (!astrcat(&pasz,pszExtraPath)) /* malloc error */ exit(-1); - - /* Finally, a "limitless" printf! we can use */ - asprintf(&paszOut,"Checking...%s\n",pasz);fputs(paszOut,stdout); - - astrfree(&pasz); /* Can use free(pasz) also. */ - astrfree(&paszOut); - -However, compare this to Bstrlib: - - bstring b, out; - - bcatcstr (b = bfromcstr (getenv ("PATH")), ";/usr/local/bin"); - out = bformat ("Checking...%s\n", bdatae (b, "")); - /* if (out && b) */ fputs (bdatae (out, ""), stdout); - bdestroy (b); - bdestroy (out); - -Besides being shorter, we can see that error handling can be deferred right -to the very end. Also, unlike the above two versions, if getenv() returns -with NULL, the Bstrlib version will not exhibit undefined behavior. -Initialization starts with the relevant content rather than an extra -autoinitialization step. - -libclc ------- - -An attempt to add to the standard C library with a number of common useful -functions, including additional string functions. -http://libclc.sourceforge.net/ - -1. Uses standard char * buffer, and adopts C 99's usage of "restrict" to pass - the responsibility to guard against aliasing to the programmer. -2. Adds no safety or memory management whatsoever. -3. Most of the supplied string functions are completely trivial. - -The goals of libclc and Bstrlib are clearly quite different. - -fireString ----------- - -http://firestuff.org/ - -1. Uses standard char * buffer, and adopts C 99's usage of "restrict" to pass - the responsibility to guard against aliasing to the programmer. -2. Mixes char * and length wrapped buffers (estr) functions, doubling the API - size, with safety limited to only half of the functions. - -Firestring was originally just a wrapper of char * functionality with extra -length parameters. However, it has been augmented with the inclusion of the -estr type which has similar functionality to stralloc. But firestring does -not nearly cover the functional scope of Bstrlib. - -Safe C String Library ---------------------- - -A library written for the purpose of increasing safety and power to C's string -handling capabilities. -http://www.zork.org/safestr/safestr.html - -1. While the safestr_* functions are safe in of themselves, interoperating - with char * string has dangerous unsafe modes of operation. -2. The architecture of safestr's causes the base pointer to change. Thus, - its not practical/safe to store a safestr in multiple locations if any - single instance can be manipulated. -3. Dependent on an additional error handling library. -4. Uses reference counting, meaning that it is either not thread safe or - slow and not portable. - -I think the idea of reallocating (and hence potentially changing) the base -pointer is a serious design flaw that is fatal to this architecture. True -safety is obtained by having automatic handling of all common scenarios -without creating implicit constraints on the user. - -Because of its automatic temporary clean up system, it cannot use "const" -semantics on input arguments. Interesting anomolies such as: - - safestr_t s, t; - s = safestr_replace (t = SAFESTR_TEMP ("This is a test"), - SAFESTR_TEMP (" "), SAFESTR_TEMP (".")); - /* t is now undefined. */ - -are possible. If one defines a function which takes a safestr_t as a -parameter, then the function would not know whether or not the safestr_t is -defined after it passes it to a safestr library function. The author -recommended method for working around this problem is to examine the -attributes of the safestr_t within the function which is to modify any of -its parameters and play games with its reference count. I think, therefore, -that the whole SAFESTR_TEMP idea is also fatally broken. - -The library implements immutability, optional non-resizability, and a "trust" -flag. This trust flag is interesting, and suggests that applying any -arbitrary sequence of safestr_* function calls on any set of trusted strings -will result in a trusted string. It seems to me, however, that if one wanted -to implement a trusted string semantic, one might do so by actually creating -a different *type* and only implement the subset of string functions that are -deemed safe (i.e., user input would be excluded, for example.) This, in -essence, would allow the compiler to enforce trust propogation at compile -time rather than run time. Non-resizability is also interesting, however, -it seems marginal (i.e., to want a string that cannot be resized, yet can be -modified and yet where a fixed sized buffer is undesirable.) - -=============================================================================== - -Examples --------- - - Dumping a line numbered file: - - FILE * fp; - int i, ret; - struct bstrList * lines; - struct tagbstring prefix = bsStatic ("-> "); - - if (NULL != (fp = fopen ("bstrlib.txt", "rb"))) { - bstring b = bread ((bNread) fread, fp); - fclose (fp); - if (NULL != (lines = bsplit (b, '\n'))) { - for (i=0; i < lines->qty; i++) { - binsert (lines->entry[i], 0, &prefix, '?'); - printf ("%04d: %s\n", i, bdatae (lines->entry[i], "NULL")); - } - bstrListDestroy (lines); - } - bdestroy (b); - } - -For numerous other examples, see bstraux.c, bstraux.h and the example archive. - -=============================================================================== - -License -------- - -This is a fork of The Better String Library, licensed under the 3-clause BSD -license (see the accompanying license.txt). The original work is available under -either the 3 clause BSD license or the Gnu Public License version 2 at the option -of the user. - -=============================================================================== - -Acknowledgements ----------------- - -The following individuals have made significant contributions to the design -and testing of the Better String Library: - -Bjorn Augestad -Clint Olsen -Darryl Bleau -Fabian Cenedese -Graham Wideman -Ignacio Burgueno -International Business Machines Corporation -Ira Mica -John Kortink -Manuel Woelker -Marcel van Kervinck -Michael Hsieh -Richard A. Smith -Simon Ekstrom -Wayne Scott - -=============================================================================== diff --git a/third_party/HLSLcc/src/cbstring/license.txt b/third_party/HLSLcc/src/cbstring/license.txt deleted file mode 100644 index cf78a98..0000000 --- a/third_party/HLSLcc/src/cbstring/license.txt +++ /dev/null @@ -1,29 +0,0 @@ -Copyright (c) 2002-2008 Paul Hsieh -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - Neither the name of bstrlib nor the names of its contributors may be used - to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - diff --git a/third_party/HLSLcc/src/cbstring/porting.txt b/third_party/HLSLcc/src/cbstring/porting.txt deleted file mode 100644 index 11d8d13..0000000 --- a/third_party/HLSLcc/src/cbstring/porting.txt +++ /dev/null @@ -1,172 +0,0 @@ -Better String library Porting Guide ------------------------------------ - -by Paul Hsieh - -The bstring library is an attempt to provide improved string processing -functionality to the C and C++ language. At the heart of the bstring library -is the management of "bstring"s which are a significant improvement over '\0' -terminated char buffers. See the accompanying documenation file bstrlib.txt -for more information. - -=============================================================================== - -Identifying the Compiler ------------------------- - -Bstrlib has been tested on the following compilers: - - Microsoft Visual C++ - Watcom C/C++ (32 bit flat) - Intel's C/C++ compiler (on Windows) - The GNU C/C++ compiler (on Windows/Linux on x86 and PPC64) - Borland C++ - Turbo C - -There are slight differences in these compilers which requires slight -differences in the implementation of Bstrlib. These are accomodated in the -same sources using #ifdef/#if defined() on compiler specific macros. To -port Bstrlib to a new compiler not listed above, it is recommended that the -same strategy be followed. If you are unaware of the compiler specific -identifying preprocessor macro for your compiler you might find it here: - -http://predef.sourceforge.net/precomp.html - -Note that Intel C/C++ on Windows sets the Microsoft identifier: _MSC_VER. - -16-bit vs. 32-bit vs. 64-bit Systems ------------------------------------- - -Bstrlib has been architected to deal with strings of length between 0 and -INT_MAX (inclusive). Since the values of int are never higher than size_t -there will be no issue here. Note that on most 64-bit systems int is 32-bit. - -Dependency on The C-Library ---------------------------- - -Bstrlib uses the functions memcpy, memmove, malloc, realloc, free and -vsnprintf. Many free standing C compiler implementations that have a mode in -which the C library is not available will typically not include these -functions which will make porting Bstrlib to it onerous. Bstrlib is not -designed for such bare bones compiler environments. This usually includes -compilers that target ROM environments. - -Porting Issues --------------- - -Bstrlib has been written completely in ANSI/ISO C and ISO C++, however, there -are still a few porting issues. These are described below. - -1. The vsnprintf () function. - -Unfortunately, the earlier ANSI/ISO C standards did not include this function. -If the compiler of interest does not support this function then the -BSTRLIB_NOVSNP should be defined via something like: - - #if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP) - # if defined (__TURBOC__) || defined (__COMPILERVENDORSPECIFICMACRO__) - # define BSTRLIB_NOVSNP - # endif - #endif - -which appears at the top of bstrlib.h. Note that the bformat(a) functions -will not be declared or implemented if the BSTRLIB_NOVSNP macro is set. If -the compiler has renamed vsnprintf() to some other named function, then -search for the definition of the exvsnprintf macro in bstrlib.c file and be -sure its defined appropriately: - - #if defined (__COMPILERVENDORSPECIFICMACRO__) - # define exvsnprintf(r,b,n,f,a) {r=__compiler_specific_vsnprintf(b,n,f,a);} - #else - # define exvsnprintf(r,b,n,f,a) {r=vsnprintf(b,n,f,a);} - #endif - -Take notice of the return value being captured in the variable r. It is -assumed that r exceeds n if and only if the underlying vsnprintf function has -determined what the true maximal output length would be for output if the -buffer were large enough to hold it. Non-modern implementations must output a -lesser number (the macro can and should be modified to ensure this). - -2. Weak C++ compiler. - -C++ is a much more complicated language to implement than C. This has lead -to varying quality of compiler implementations. The weaknesses isolated in -the initial ports are inclusion of the Standard Template Library, -std::iostream and exception handling. By default it is assumed that the C++ -compiler supports all of these things correctly. If your compiler does not -support one or more of these define the corresponding macro: - - BSTRLIB_CANNOT_USE_STL - BSTRLIB_CANNOT_USE_IOSTREAM - BSTRLIB_DOESNT_THROW_EXCEPTIONS - -The compiler specific detected macro should be defined at the top of -bstrwrap.h in the Configuration defines section. Note that these disabling -macros can be overrided with the associated enabling macro if a subsequent -version of the compiler gains support. (For example, its possible to rig -up STLport to provide STL support for WATCOM C/C++, so -DBSTRLIB_CAN_USE_STL -can be passed in as a compiler option.) - -3. The bsafe module, and reserved words. - -The bsafe module is in gross violation of the ANSI/ISO C standard in the -sense that it redefines what could be implemented as reserved words on a -given compiler. The typical problem is that a compiler may inline some of the -functions and thus not be properly overridden by the definitions in the bsafe -module. It is also possible that a compiler may prohibit the redefinitions in -the bsafe module. Compiler specific action will be required to deal with -these situations. - -Platform Specific Files ------------------------ - -The makefiles for the examples are basically setup of for particular -environments for each platform. In general these makefiles are not portable -and should be constructed as necessary from scratch for each platform. - -Testing a port --------------- - -To test that a port compiles correctly do the following: - -1. Build a sample project that includes the bstrlib, bstraux, bstrwrap, and - bsafe modules. -2. Compile bstest against the bstrlib module. -3. Run bstest and ensure that 0 errors are reported. -4. Compile test against the bstrlib and bstrwrap modules. -5. Run test and ensure that 0 errors are reported. -6. Compile each of the examples (except for the "re" example, which may be - complicated and is not a real test of bstrlib and except for the mfcbench - example which is Windows specific.) -7. Run each of the examples. - -The builds must have 0 errors, and should have the absolute minimum number of -warnings (in most cases can be reduced to 0.) The result of execution should -be essentially identical on each platform. - -Performance ------------ - -Different CPU and compilers have different capabilities in terms of -performance. It is possible for Bstrlib to assume performance -characteristics that a platform doesn't have (since it was primarily -developed on just one platform). The goal of Bstrlib is to provide very good -performance on all platforms regardless of this but without resorting to -extreme measures (such as using assembly language, or non-portable intrinsics -or library extensions.) - -There are two performance benchmarks that can be found in the example/ -directory. They are: cbench.c and cppbench.cpp. These are variations and -expansions of a benchmark for another string library. They don't cover all -string functionality, but do include the most basic functions which will be -common in most string manipulation kernels. - -............................................................................... - -Feedback --------- - -In all cases, you may email issues found to the primary author of Bstrlib at -the email address: websnarf@users.sourceforge.net - -=============================================================================== diff --git a/third_party/HLSLcc/src/cbstring/security.txt b/third_party/HLSLcc/src/cbstring/security.txt deleted file mode 100644 index 9761409..0000000 --- a/third_party/HLSLcc/src/cbstring/security.txt +++ /dev/null @@ -1,221 +0,0 @@ -Better String library Security Statement ----------------------------------------- - -by Paul Hsieh - -=============================================================================== - -Introduction ------------- - -The Better String library (hereafter referred to as Bstrlib) is an attempt to -provide improved string processing functionality to the C and C++ languages. -At the heart of the Bstrlib is the management of "bstring"s which are a -significant improvement over '\0' terminated char buffers. See the -accompanying documenation file bstrlib.txt for more information. - -DISCLAIMER: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND -CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT -NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; -OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Like any software, there is always a possibility of failure due to a flawed -implementation. Nevertheless a good faith effort has been made to minimize -such flaws in Bstrlib. Also, use of Bstrlib by itself will not make an -application secure or free from implementation failures. However, it is the -author's conviction that use of Bstrlib can greatly facilitate the creation -of software meeting the highest possible standards of security. - -Part of the reason why this document has been created, is for the purpose of -security auditing, or the creation of further "Statements on Security" for -software that is created that uses Bstrlib. An auditor may check the claims -below against Bstrlib, and use this as a basis for analysis of software which -uses Bstrlib. - -=============================================================================== - -Statement on Security ---------------------- - -This is a document intended to give consumers of the Better String Library -who are interested in security an idea of where the Better String Library -stands on various security issues. Any deviation observed in the actual -library itself from the descriptions below should be considered an -implementation error, not a design flaw. - -This statement is not an analytical proof of correctness or an outline of one -but rather an assertion similar to a scientific claim or hypothesis. By use, -testing and open independent examination (otherwise known as scientific -falsifiability), the credibility of the claims made below can rise to the -level of an established theory. - -Common security issues: -....................... - -1. Buffer Overflows - -The Bstrlib API allows the programmer a way to deal with strings without -having to deal with the buffers containing them. Ordinary usage of the -Bstrlib API itself makes buffer overflows impossible. - -Furthermore, the Bstrlib API has a superset of basic string functionality as -compared to the C library's char * functions, C++'s std::string class and -Microsoft's MFC based CString class. It also has abstracted mechanisms for -dealing with IO. This is important as it gives developers a way of migrating -all their code from a functionality point of view. - -2. Memory size overflow/wrap around attack - -Bstrlib is, by design, impervious to memory size overflow attacks. The -reason is it is resiliant to length overflows is that bstring lengths are -bounded above by INT_MAX, instead of ~(size_t)0. So length addition -overflows cause a wrap around of the integer value making them negative -causing balloc() to fail before an erroneous operation can occurr. Attempted -conversions of char * strings which may have lengths greater than INT_MAX are -detected and the conversion is aborted. - -It is unknown if this property holds on machines that don't represent -integers as 2s complement. It is recommended that Bstrlib be carefully -auditted by anyone using a system which is not 2s complement based. - -3. Constant string protection - -Bstrlib implements runtime enforced constant and read-only string semantics. -I.e., bstrings which are declared as constant via the bsStatic() macro cannot -be modified or deallocated directly through the Bstrlib API, and this cannot -be subverted by casting or other type coercion. This is independent of the -use of the const_bstring data type. - -The Bstrlib C API uses the type const_bstring to specify bstring parameters -whose contents do not change. Although the C language cannot enforce this, -this is nevertheless guaranteed by the implementation of the Bstrlib library -of C functions. The C++ API enforces the const attribute on CBString types -correctly. - -4. Aliased bstring support - -Bstrlib detects and supports aliased parameter management throughout the API. -The kind of aliasing that is allowed is the one where pointers of the same -basic type may be pointing to overlapping objects (this is the assumption the -ANSI C99 specification makes.) Each function behaves as if all read-only -parameters were copied to temporaries which are used in their stead before -the function is enacted (it rarely actually does this). No function in the -Bstrlib uses the "restrict" parameter attribute from the ANSI C99 -specification. - -5. Information leaking - -In bstraux.h, using the semantically equivalent macros bSecureDestroy() and -bSecureWriteProtect() in place of bdestroy() and bwriteprotect() respectively -will ensure that stale data does not linger in the heap's free space after -strings have been released back to memory. Created bstrings or CBStrings -are not linked to anything external to themselves, and thus cannot expose -deterministic data leaking. If a bstring is resized, the preimage may exist -as a copy that is released to the heap. Thus for sensitive data, the bstring -should be sufficiently presized before manipulated so that it is not resized. -bSecureInput() has been supplied in bstraux.c, which can be used to obtain -input securely without any risk of leaving any part of the input image in the -heap except for the allocated bstring that is returned. - -6. Memory leaking - -Bstrlib can be built using memdbg.h enabled via the BSTRLIB_MEMORY_DEBUG -macro. User generated definitions for malloc, realloc and free can then be -supplied which can implement special strategies for memory corruption -detection or memory leaking. Otherwise, bstrlib does not do anything out of -the ordinary to attempt to deal with the standard problem of memory leaking -(i.e., losing references to allocated memory) when programming in the C and -C++ languages. However, it does not compound the problem any more than exists -either, as it doesn't have any intrinsic inescapable leaks in it. Bstrlib -does not preclude the use of automatic garbage collection mechanisms such as -the Boehm garbage collector. - -7. Encryption - -Bstrlib does not present any built-in encryption mechanism. However, it -supports full binary contents in its data buffers, so any standard block -based encryption mechanism can make direct use of bstrings/CBStrings for -buffer management. - -8. Double freeing - -Freeing a pointer that is already free is an extremely rare, but nevertheless -a potentially ruthlessly corrupting operation (its possible to cause Win 98 to -reboot, by calling free mulitiple times on already freed data using the WATCOM -CRT.) Bstrlib invalidates the bstring header data before freeing, so that in -many cases a double free will be detected and an error will be reported -(though this behaviour is not guaranteed and should not be relied on). - -Using bstrFree pervasively (instead of bdestroy) can lead to somewhat -improved invalid free avoidance (it is completely safe whenever bstring -instances are only stored in unique variables). For example: - - struct tagbstring hw = bsStatic ("Hello, world"); - bstring cpHw = bstrcpy (&hw); - - #ifdef NOT_QUITE_AS_SAFE - bdestroy (cpHw); /* Never fail */ - bdestroy (cpHw); /* Error sometimes detected at runtime */ - bdestroy (&hw); /* Error detected at run time */ - #else - bstrFree (cpHw); /* Never fail */ - bstrFree (cpHw); /* Will do nothing */ - bstrFree (&hw); /* Will lead to a compile time error */ - #endif - -9. Resource based denial of service - -bSecureInput() has been supplied in bstraux.c. It has an optional upper limit -for input length. But unlike fgets(), it is also easily determined if the -buffer has been truncated early. In this way, a program can set an upper limit -on input sizes while still allowing for implementing context specific -truncation semantics (i.e., does the program consume but dump the extra -input, or does it consume it in later inputs?) - -10. Mixing char *'s and bstrings - -The bstring and char * representations are not identical. So there is a risk -when converting back and forth that data may lost. Essentially bstrings can -contain '\0' as a valid non-terminating character, while char * strings -cannot and in fact must use the character as a terminator. The risk of data -loss is very low, since: - - A) the simple method of only using bstrings in a char * semantically - compatible way is both easy to achieve and pervasively supported. - B) obtaining '\0' content in a string is either deliberate or indicative - of another, likely more serious problem in the code. - C) the library comes with various functions which deal with this issue - (namely: bfromcstr(), bstr2cstr (), and bSetCstrChar ()) - -Marginal security issues: -......................... - -11. 8-bit versus 9-bit portability - -Bstrlib uses CHAR_BIT and other limits.h constants to the maximum extent -possible to avoid portability problems. However, Bstrlib has not been tested -on any system that does not represent char as 8-bits. So whether or not it -works on 9-bit systems is an open question. It is recommended that Bstrlib be -carefully auditted by anyone using a system in which CHAR_BIT is not 8. - -12. EBCDIC/ASCII/UTF-8 data representation attacks. - -Bstrlib uses ctype.h functions to ensure that it remains portable to non- -ASCII systems. It also checks range to make sure it is well defined even for -data that ANSI does not define for the ctype functions. - -Obscure issues: -............... - -13. Data attributes - -There is no support for a Perl-like "taint" attribute, however, an example of -how to do this using C++'s type system is given as an example. - diff --git a/third_party/HLSLcc/src/decode.cpp b/third_party/HLSLcc/src/decode.cpp deleted file mode 100644 index b0622b3..0000000 --- a/third_party/HLSLcc/src/decode.cpp +++ /dev/null @@ -1,1635 +0,0 @@ -#include "internal_includes/tokens.h" -#include "internal_includes/decode.h" -#include "stdlib.h" -#include "stdio.h" -#include "internal_includes/reflect.h" -#include "internal_includes/debug.h" -#include "internal_includes/toGLSLOperand.h" -#include "internal_includes/Shader.h" -#include "internal_includes/Instruction.h" -#include "internal_includes/Declaration.h" - -#define FOURCC(a, b, c, d) ((uint32_t)(uint8_t)(a) | ((uint32_t)(uint8_t)(b) << 8) | ((uint32_t)(uint8_t)(c) << 16) | ((uint32_t)(uint8_t)(d) << 24 )) -enum { FOURCC_DXBC = FOURCC('D', 'X', 'B', 'C') }; //DirectX byte code -enum { FOURCC_SHDR = FOURCC('S', 'H', 'D', 'R') }; //Shader model 4 code -enum { FOURCC_SHEX = FOURCC('S', 'H', 'E', 'X') }; //Shader model 5 code -enum { FOURCC_RDEF = FOURCC('R', 'D', 'E', 'F') }; //Resource definition (e.g. constant buffers) -enum { FOURCC_ISGN = FOURCC('I', 'S', 'G', 'N') }; //Input signature -enum { FOURCC_IFCE = FOURCC('I', 'F', 'C', 'E') }; //Interface (for dynamic linking) -enum { FOURCC_OSGN = FOURCC('O', 'S', 'G', 'N') }; //Output signature -enum { FOURCC_PSGN = FOURCC('P', 'C', 'S', 'G') }; //Patch-constant signature - -enum { FOURCC_ISG1 = FOURCC('I', 'S', 'G', '1') }; //Input signature with Stream and MinPrecision -enum { FOURCC_OSG1 = FOURCC('O', 'S', 'G', '1') }; //Output signature with Stream and MinPrecision -enum { FOURCC_OSG5 = FOURCC('O', 'S', 'G', '5') }; //Output signature with Stream -enum { FOURCC_PSG1 = FOURCC('P', 'S', 'G', '1') }; //Patch constant signature with MinPrecision - -enum { FOURCC_STAT = FOURCC('S', 'T', 'A', 'T') }; // Chunks that we ignore -enum { FOURCC_SFI0 = FOURCC('S', 'F', 'I', '0') }; // Chunks that we ignore - - -typedef struct DXBCContainerHeaderTAG -{ - unsigned fourcc; - uint32_t unk[4]; - uint32_t one; - uint32_t totalSize; - uint32_t chunkCount; -} DXBCContainerHeader; - -typedef struct DXBCChunkHeaderTAG -{ - unsigned fourcc; - unsigned size; -} DXBCChunkHeader; - -#ifdef _DEBUG -static uint64_t operandID = 0; -static uint64_t instructionID = 0; -#endif - -void DecodeNameToken(const uint32_t* pui32NameToken, Operand* psOperand) -{ - psOperand->eSpecialName = DecodeOperandSpecialName(*pui32NameToken); - switch (psOperand->eSpecialName) - { - case NAME_UNDEFINED: - { - psOperand->specialName = "undefined"; - break; - } - case NAME_POSITION: - { - psOperand->specialName = "position"; - break; - } - case NAME_CLIP_DISTANCE: - { - psOperand->specialName = "clipDistance"; - break; - } - case NAME_CULL_DISTANCE: - { - psOperand->specialName = "cullDistance"; - break; - } - case NAME_RENDER_TARGET_ARRAY_INDEX: - { - psOperand->specialName = "renderTargetArrayIndex"; - break; - } - case NAME_VIEWPORT_ARRAY_INDEX: - { - psOperand->specialName = "viewportArrayIndex"; - break; - } - case NAME_VERTEX_ID: - { - psOperand->specialName = "vertexID"; - break; - } - case NAME_PRIMITIVE_ID: - { - psOperand->specialName = "primitiveID"; - break; - } - case NAME_INSTANCE_ID: - { - psOperand->specialName = "instanceID"; - break; - } - case NAME_IS_FRONT_FACE: - { - psOperand->specialName = "isFrontFace"; - break; - } - case NAME_SAMPLE_INDEX: - { - psOperand->specialName = "sampleIndex"; - break; - } - //For the quadrilateral domain, there are 6 factors (4 sides, 2 inner). - case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: - case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: - - //For the triangular domain, there are 4 factors (3 sides, 1 inner) - case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_INSIDE_TESSFACTOR: - - //For the isoline domain, there are 2 factors (detail and density). - case NAME_FINAL_LINE_DETAIL_TESSFACTOR: - case NAME_FINAL_LINE_DENSITY_TESSFACTOR: - { - psOperand->specialName = "tessFactor"; - break; - } - default: - { - ASSERT(0); - break; - } - } -} - -// Find the declaration of the texture described by psTextureOperand and -// mark it as a shadow type. (e.g. accessed via sampler2DShadow rather than sampler2D) -static void MarkTextureAsShadow(ShaderInfo* psShaderInfo, std::vector &declarations, const Operand* psTextureOperand) -{ - ASSERT(psTextureOperand->eType == OPERAND_TYPE_RESOURCE); - - for (std::vector::iterator psDecl = declarations.begin(); psDecl != declarations.end(); psDecl++) - { - if (psDecl->eOpcode == OPCODE_DCL_RESOURCE) - { - if (psDecl->asOperands[0].eType == OPERAND_TYPE_RESOURCE && - psDecl->asOperands[0].ui32RegisterNumber == psTextureOperand->ui32RegisterNumber) - { - psDecl->ui32IsShadowTex = 1; - break; - } - } - } -} - -static void MarkTextureSamplerPair(ShaderInfo* psShaderInfo, std::vector & declarations, const Operand* psTextureOperand, const Operand* psSamplerOperand, TextureSamplerPairs& samplers) -{ - ASSERT(psTextureOperand->eType == OPERAND_TYPE_RESOURCE); - ASSERT(psSamplerOperand->eType == OPERAND_TYPE_SAMPLER); - - for (std::vector::iterator psDecl = declarations.begin(); psDecl != declarations.end(); psDecl++) - { - if (psDecl->eOpcode == OPCODE_DCL_RESOURCE) - { - if (psDecl->asOperands[0].eType == OPERAND_TYPE_RESOURCE && - psDecl->asOperands[0].ui32RegisterNumber == psTextureOperand->ui32RegisterNumber) - { - // psDecl is the texture resource referenced by psTextureOperand - - // add psSamplerOperand->ui32RegisterNumber to list of samplers that use this texture - // set::insert returns a pair of which .second tells whether a new element was actually added - if (psDecl->samplersUsed.insert(psSamplerOperand->ui32RegisterNumber).second) - { - // Record the TEX_with_SMP string in the TextureSamplerPair array that we return to the client - std::string combinedname = TextureSamplerName(psShaderInfo, psTextureOperand->ui32RegisterNumber, psSamplerOperand->ui32RegisterNumber, psDecl->ui32IsShadowTex); - samplers.push_back(combinedname); - } - break; - } - } - } -} - -uint32_t DecodeOperand(const uint32_t *pui32Tokens, Operand* psOperand) -{ - int i; - uint32_t ui32NumTokens = 1; - OPERAND_NUM_COMPONENTS eNumComponents; - -#ifdef _DEBUG - psOperand->id = operandID++; -#endif - - //Some defaults - psOperand->iWriteMaskEnabled = 1; - psOperand->iGSInput = 0; - psOperand->iPSInOut = 0; - psOperand->aeDataType[0] = SVT_FLOAT; - psOperand->aeDataType[1] = SVT_FLOAT; - psOperand->aeDataType[2] = SVT_FLOAT; - psOperand->aeDataType[3] = SVT_FLOAT; - - psOperand->iExtended = DecodeIsOperandExtended(*pui32Tokens); - - - psOperand->eModifier = OPERAND_MODIFIER_NONE; - psOperand->m_SubOperands[0].reset(); - psOperand->m_SubOperands[1].reset(); - psOperand->m_SubOperands[2].reset(); - - psOperand->eMinPrecision = OPERAND_MIN_PRECISION_DEFAULT; - - /* Check if this instruction is extended. If it is, - * we need to print the information first */ - if (psOperand->iExtended) - { - /* OperandToken1 is the second token */ - ui32NumTokens++; - - if (DecodeExtendedOperandType(pui32Tokens[1]) == EXTENDED_OPERAND_MODIFIER) - { - psOperand->eModifier = DecodeExtendedOperandModifier(pui32Tokens[1]); - psOperand->eMinPrecision = (OPERAND_MIN_PRECISION)DecodeOperandMinPrecision(pui32Tokens[1]); - } - } - - psOperand->iIndexDims = DecodeOperandIndexDimension(*pui32Tokens); - psOperand->eType = DecodeOperandType(*pui32Tokens); - - psOperand->ui32RegisterNumber = 0; - - eNumComponents = DecodeOperandNumComponents(*pui32Tokens); - - if (psOperand->eType == OPERAND_TYPE_INPUT_GS_INSTANCE_ID) - { - eNumComponents = OPERAND_1_COMPONENT; - psOperand->aeDataType[0] = SVT_UINT; - } - - switch (eNumComponents) - { - case OPERAND_1_COMPONENT: - { - psOperand->iNumComponents = 1; - break; - } - case OPERAND_4_COMPONENT: - { - psOperand->iNumComponents = 4; - break; - } - default: - { - psOperand->iNumComponents = 0; - break; - } - } - - if (psOperand->iWriteMaskEnabled && - psOperand->iNumComponents == 4) - { - psOperand->eSelMode = DecodeOperand4CompSelMode(*pui32Tokens); - - if (psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) - { - psOperand->ui32CompMask = DecodeOperand4CompMask(*pui32Tokens); - } - else if (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - psOperand->ui32Swizzle = DecodeOperand4CompSwizzle(*pui32Tokens); - - if (psOperand->ui32Swizzle != NO_SWIZZLE) - { - psOperand->aui32Swizzle[0] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 0); - psOperand->aui32Swizzle[1] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 1); - psOperand->aui32Swizzle[2] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 2); - psOperand->aui32Swizzle[3] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 3); - } - else - { - psOperand->aui32Swizzle[0] = OPERAND_4_COMPONENT_X; - psOperand->aui32Swizzle[1] = OPERAND_4_COMPONENT_Y; - psOperand->aui32Swizzle[2] = OPERAND_4_COMPONENT_Z; - psOperand->aui32Swizzle[3] = OPERAND_4_COMPONENT_W; - } - } - else if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) - { - psOperand->aui32Swizzle[0] = DecodeOperand4CompSel1(*pui32Tokens); - } - } - - if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32) - { - for (i = 0; i < psOperand->iNumComponents; ++i) - { - psOperand->afImmediates[i] = *((float*)(&pui32Tokens[ui32NumTokens])); - ui32NumTokens++; - } - } - else if (psOperand->eType == OPERAND_TYPE_IMMEDIATE64) - { - for (i = 0; i < psOperand->iNumComponents; ++i) - { - psOperand->adImmediates[i] = *((double*)(&pui32Tokens[ui32NumTokens])); - ui32NumTokens += 2; - } - } - - // Used only for Metal - if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL || psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL || psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH) - { - psOperand->ui32RegisterNumber = 0; - psOperand->ui32CompMask = 1; - } - - for (i = 0; i < psOperand->iIndexDims; ++i) - { - OPERAND_INDEX_REPRESENTATION eRep = DecodeOperandIndexRepresentation(i , *pui32Tokens); - - psOperand->eIndexRep[i] = eRep; - - psOperand->aui32ArraySizes[i] = 0; - psOperand->ui32RegisterNumber = 0; - - switch (eRep) - { - case OPERAND_INDEX_IMMEDIATE32: - { - psOperand->ui32RegisterNumber = *(pui32Tokens + ui32NumTokens); - psOperand->aui32ArraySizes[i] = psOperand->ui32RegisterNumber; - break; - } - case OPERAND_INDEX_RELATIVE: - { - psOperand->m_SubOperands[i].reset(new Operand()); - DecodeOperand(pui32Tokens + ui32NumTokens, psOperand->m_SubOperands[i].get()); - - ui32NumTokens++; - break; - } - case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: - { - psOperand->ui32RegisterNumber = *(pui32Tokens + ui32NumTokens); - psOperand->aui32ArraySizes[i] = psOperand->ui32RegisterNumber; - - ui32NumTokens++; - - psOperand->m_SubOperands[i].reset(new Operand()); - DecodeOperand(pui32Tokens + ui32NumTokens, psOperand->m_SubOperands[i].get()); - - ui32NumTokens++; - break; - } - default: - { - ASSERT(0); - break; - } - } - - // Indices should be ints - switch (eRep) - { - case OPERAND_INDEX_IMMEDIATE32: - case OPERAND_INDEX_RELATIVE: - case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: - { - int j = 0; - for (; j < psOperand->iNumComponents; j++) - { - psOperand->aeDataType[j] = SVT_INT; - } - break; - } - default: - { - break; - } - } - ui32NumTokens++; - } - - psOperand->specialName = ""; - - return ui32NumTokens; -} - -const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, Declaration* psDecl, ShaderPhase *psPhase) -{ - uint32_t ui32TokenLength = DecodeInstructionLength(*pui32Token); - const uint32_t bExtended = DecodeIsOpcodeExtended(*pui32Token); - const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32Token); - uint32_t ui32OperandOffset = 1; - - if (eOpcode < NUM_OPCODES && eOpcode >= 0) - { - psShader->aiOpcodeUsed[eOpcode] = 1; - } - - psDecl->eOpcode = eOpcode; - - psDecl->ui32IsShadowTex = 0; - - if (bExtended) - { - ui32OperandOffset = 2; - } - - switch (eOpcode) - { - case OPCODE_DCL_RESOURCE: // DCL* opcodes have - { - psDecl->value.eResourceDimension = DecodeResourceDimension(*pui32Token); - psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - break; - } - case OPCODE_DCL_CONSTANT_BUFFER: // custom operand formats. - { - psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - break; - } - case OPCODE_DCL_SAMPLER: - { - psDecl->ui32NumOperands = 1; - psDecl->value.eSamplerMode = DecodeSamplerMode(*pui32Token); - - DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - break; - } - case OPCODE_DCL_INDEX_RANGE: - { - int regSpace = 0; - psDecl->ui32NumOperands = 1; - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - psDecl->value.ui32IndexRange = pui32Token[ui32OperandOffset]; - - regSpace = psDecl->asOperands[0].GetRegisterSpace(psShader->eShaderType, psPhase->ePhase); - if (psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT) - { - uint32_t i; - const uint32_t indexRange = psDecl->value.ui32IndexRange; - const uint32_t reg = psDecl->asOperands[0].ui32RegisterNumber; - - psShader->aIndexedInput[regSpace][reg] = indexRange; - psShader->aIndexedInputParents[regSpace][reg] = reg; - - //-1 means don't declare this input because it falls in - //the range of an already declared array. - for (i = reg + 1; i < reg + indexRange; ++i) - { - psShader->aIndexedInput[regSpace][i] = -1; - psShader->aIndexedInputParents[regSpace][i] = reg; - } - } - - if (psDecl->asOperands[0].eType == OPERAND_TYPE_OUTPUT) - { - psShader->aIndexedOutput[regSpace][psDecl->asOperands[0].ui32RegisterNumber] = true; - } - break; - } - case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: - { - psDecl->value.eOutputPrimitiveTopology = DecodeGSOutputPrimitiveTopology(*pui32Token); - break; - } - case OPCODE_DCL_GS_INPUT_PRIMITIVE: - { - psDecl->value.eInputPrimitive = DecodeGSInputPrimitive(*pui32Token); - break; - } - case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: - { - psDecl->value.ui32MaxOutputVertexCount = pui32Token[1]; - break; - } - case OPCODE_DCL_TESS_PARTITIONING: - { - psDecl->value.eTessPartitioning = DecodeTessPartitioning(*pui32Token); - break; - } - case OPCODE_DCL_TESS_DOMAIN: - { - psDecl->value.eTessDomain = DecodeTessDomain(*pui32Token); - break; - } - case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: - { - psDecl->value.eTessOutPrim = DecodeTessOutPrim(*pui32Token); - break; - } - case OPCODE_DCL_THREAD_GROUP: - { - psDecl->value.aui32WorkGroupSize[0] = pui32Token[1]; - psDecl->value.aui32WorkGroupSize[1] = pui32Token[2]; - psDecl->value.aui32WorkGroupSize[2] = pui32Token[3]; - break; - } - case OPCODE_DCL_INPUT: - { - psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - break; - } - case OPCODE_DCL_INPUT_SIV: - { - psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - if (psShader->eShaderType == PIXEL_SHADER) - { - psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token); - } - break; - } - case OPCODE_DCL_INPUT_PS: - { - psDecl->ui32NumOperands = 1; - psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token); - Operand* psOperand = &psDecl->asOperands[0]; - DecodeOperand(pui32Token + ui32OperandOffset, psOperand); - - ShaderInfo::InOutSignature *psSig = NULL; - psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, (const ShaderInfo::InOutSignature**)&psSig); - - /* UNITY_FRAMEBUFFER_FETCH_AVAILABLE - special case mapping for inout color. - - In the fragment shader, setting inout var : SV_Target would result to - compiler error, unless SV_Target is defined to COLOR semantic for compatibility - reasons. Unfortunately, we still need to have a clear distinction between - vertex shader COLOR output and SV_Target, so the following workaround abuses - the fact that semantic names are case insensitive and preprocessor macros - are not. The resulting HLSL bytecode has semantics in case preserving form, - helps code generator to do extra work required for framebuffer fetch - - See also HLSLSupport.cginc - */ - if (psSig->eSystemValueType == NAME_UNDEFINED && - psSig->semanticName.size() == 5 && !strncmp(psSig->semanticName.c_str(), "CoLoR", 5)) - { - // Rename into something more readable, matches output - psSig->semanticName.replace(0, 9, "SV_Target"); - psOperand->iPSInOut = 1; - } - - break; - } - case OPCODE_DCL_INPUT_SGV: - case OPCODE_DCL_INPUT_PS_SGV: - { - psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]); - break; - } - case OPCODE_DCL_INPUT_PS_SIV: - { - psDecl->ui32NumOperands = 1; - psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token); - DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]); - break; - } - case OPCODE_DCL_OUTPUT: - { - psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - break; - } - case OPCODE_DCL_OUTPUT_SGV: - { - break; - } - case OPCODE_DCL_OUTPUT_SIV: - { - psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]); - break; - } - case OPCODE_DCL_TEMPS: - { - psDecl->value.ui32NumTemps = *(pui32Token + ui32OperandOffset); - break; - } - case OPCODE_DCL_INDEXABLE_TEMP: - { - psDecl->sIdxTemp.ui32RegIndex = *(pui32Token + ui32OperandOffset); - psDecl->sIdxTemp.ui32RegCount = *(pui32Token + ui32OperandOffset + 1); - psDecl->sIdxTemp.ui32RegComponentSize = *(pui32Token + ui32OperandOffset + 2); - break; - } - case OPCODE_DCL_GLOBAL_FLAGS: - { - psDecl->value.ui32GlobalFlags = DecodeGlobalFlags(*pui32Token); - break; - } - case OPCODE_DCL_INTERFACE: - { - uint32_t func = 0, numClassesImplementingThisInterface, arrayLen, interfaceID; - interfaceID = pui32Token[ui32OperandOffset]; - ui32OperandOffset++; - psDecl->ui32TableLength = pui32Token[ui32OperandOffset]; - ui32OperandOffset++; - - numClassesImplementingThisInterface = DecodeInterfaceTableLength(*(pui32Token + ui32OperandOffset)); - arrayLen = DecodeInterfaceArrayLength(*(pui32Token + ui32OperandOffset)); - - ui32OperandOffset++; - - psDecl->value.iface.ui32InterfaceID = interfaceID; - psDecl->value.iface.ui32NumFuncTables = numClassesImplementingThisInterface; - psDecl->value.iface.ui32ArraySize = arrayLen; - - psShader->funcPointer[interfaceID].ui32NumBodiesPerTable = psDecl->ui32TableLength; - - for (; func < numClassesImplementingThisInterface; ++func) - { - uint32_t ui32FuncTable = *(pui32Token + ui32OperandOffset); - psShader->aui32FuncTableToFuncPointer[ui32FuncTable] = interfaceID; - - psShader->funcPointer[interfaceID].aui32FuncTables[func] = ui32FuncTable; - ui32OperandOffset++; - } - - break; - } - case OPCODE_DCL_FUNCTION_BODY: - { - psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - break; - } - case OPCODE_DCL_FUNCTION_TABLE: - { - uint32_t ui32Func; - const uint32_t ui32FuncTableID = pui32Token[ui32OperandOffset++]; - const uint32_t ui32NumFuncsInTable = pui32Token[ui32OperandOffset++]; - - for (ui32Func = 0; ui32Func < ui32NumFuncsInTable; ++ui32Func) - { - const uint32_t ui32FuncBodyID = pui32Token[ui32OperandOffset++]; - - psShader->aui32FuncBodyToFuncTable[ui32FuncBodyID] = ui32FuncTableID; - - psShader->funcTable[ui32FuncTableID].aui32FuncBodies[ui32Func] = ui32FuncBodyID; - } - -// OpcodeToken0 is followed by a DWORD that represents the function table -// identifier and another DWORD (TableLength) that gives the number of -// functions in the table. -// -// This is followed by TableLength DWORDs which are function body indices. -// - - break; - } - case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: - { - psDecl->value.ui32MaxOutputVertexCount = DecodeOutputControlPointCount(*pui32Token); - break; - } - case OPCODE_HS_DECLS: - { - break; - } - case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: - { - psDecl->value.ui32MaxOutputVertexCount = DecodeOutputControlPointCount(*pui32Token); - break; - } - case OPCODE_HS_JOIN_PHASE: - case OPCODE_HS_FORK_PHASE: - case OPCODE_HS_CONTROL_POINT_PHASE: - { - break; - } - case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: - case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: - { - psDecl->value.ui32HullPhaseInstanceCount = pui32Token[1]; - psPhase->ui32InstanceCount = psDecl->value.ui32HullPhaseInstanceCount; - break; - } - case OPCODE_CUSTOMDATA: - { - ui32TokenLength = pui32Token[1]; - { -// int iTupleSrc = 0, iTupleDest = 0; - //const uint32_t ui32ConstCount = pui32Token[1] - 2; - //const uint32_t ui32TupleCount = (ui32ConstCount / 4); - - const uint32_t ui32NumVec4 = (ui32TokenLength - 2) / 4; - - ICBVec4 const *pVec4Array = (ICBVec4 const *)(void*)(pui32Token + 2); - - /* must be a multiple of 4 */ - ASSERT(((ui32TokenLength - 2) % 4) == 0); - - psDecl->asImmediateConstBuffer.assign(pVec4Array, pVec4Array + ui32NumVec4); - - psDecl->ui32NumOperands = ui32NumVec4; - } - break; - } - case OPCODE_DCL_HS_MAX_TESSFACTOR: - { - psDecl->value.fMaxTessFactor = *((float*)&pui32Token[1]); - break; - } - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: - { - psDecl->ui32NumOperands = 2; - psDecl->value.eResourceDimension = DecodeResourceDimension(*pui32Token); - psDecl->sUAV.ui32GloballyCoherentAccess = DecodeAccessCoherencyFlags(*pui32Token); - psDecl->sUAV.bCounter = 0; - psDecl->ui32BufferStride = 4; - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - psDecl->sUAV.Type = DecodeResourceReturnType(0, pui32Token[ui32OperandOffset]); - break; - } - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: - { - psDecl->ui32NumOperands = 1; - psDecl->sUAV.ui32GloballyCoherentAccess = DecodeAccessCoherencyFlags(*pui32Token); - psDecl->sUAV.bCounter = 0; - psDecl->ui32BufferStride = 4; - DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - //This should be a RTYPE_UAV_RWBYTEADDRESS buffer. It is memory backed by - //a shader storage buffer whose is unknown at compile time. - break; - } - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: - { - const ResourceBinding* psBinding = NULL; - const ConstantBuffer* psBuffer = NULL; - - psDecl->ui32NumOperands = 1; - psDecl->sUAV.ui32GloballyCoherentAccess = DecodeAccessCoherencyFlags(*pui32Token); - psDecl->sUAV.bCounter = 0; - DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - - psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); - psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_UAV, psBinding->ui32BindPoint, &psBuffer); - psDecl->ui32BufferStride = psBuffer->ui32TotalSizeInBytes; - - switch (psBinding->eType) - { - case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER: - case RTYPE_UAV_APPEND_STRUCTURED: - case RTYPE_UAV_CONSUME_STRUCTURED: - psDecl->sUAV.bCounter = 1; - break; - default: - break; - } - break; - } - case OPCODE_DCL_RESOURCE_STRUCTURED: - { - const ResourceBinding* psBinding = NULL; - const ConstantBuffer* psBuffer = NULL; - psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - - psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); - psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_TEXTURE, psBinding->ui32BindPoint, &psBuffer); - psDecl->ui32BufferStride = psBuffer->ui32TotalSizeInBytes; - break; - } - case OPCODE_DCL_RESOURCE_RAW: - { - psDecl->ui32NumOperands = 1; - psDecl->ui32BufferStride = 4; - DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - break; - } - case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: - { - psDecl->ui32NumOperands = 1; - psDecl->sUAV.ui32GloballyCoherentAccess = 0; - - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - - psDecl->sTGSM.ui32Stride = pui32Token[ui32OperandOffset++]; - psDecl->sTGSM.ui32Count = pui32Token[ui32OperandOffset++]; - break; - } - case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: - { - psDecl->ui32NumOperands = 1; - psDecl->sUAV.ui32GloballyCoherentAccess = 0; - - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - - psDecl->sTGSM.ui32Stride = 4; - psDecl->sTGSM.ui32Count = pui32Token[ui32OperandOffset++]; - break; - } - case OPCODE_DCL_STREAM: - { - psDecl->ui32NumOperands = 1; - DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); - break; - } - case OPCODE_DCL_GS_INSTANCE_COUNT: - { - psDecl->ui32NumOperands = 0; - psDecl->value.ui32GSInstanceCount = pui32Token[1]; - break; - } - default: - { - //Reached end of declarations - return 0; - } - } - - return pui32Token + ui32TokenLength; -} - -const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psInst, Shader* psShader, ShaderPhase *psPhase) -{ - uint32_t ui32TokenLength = DecodeInstructionLength(*pui32Token); - const uint32_t bExtended = DecodeIsOpcodeExtended(*pui32Token); - const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32Token); - uint32_t ui32OperandOffset = 1; - -#ifdef _DEBUG - psInst->id = instructionID++; -#endif - - psInst->eOpcode = eOpcode; - - psInst->bSaturate = DecodeInstructionSaturate(*pui32Token); - psInst->ui32PreciseMask = DecodeInstructionPreciseMask(*pui32Token); - - psInst->bAddressOffset = 0; - - psInst->ui32FirstSrc = 1; - - psInst->iCausedSplit = 0; - - if (bExtended) - { - do - { - const uint32_t ui32ExtOpcodeToken = pui32Token[ui32OperandOffset]; - const EXTENDED_OPCODE_TYPE eExtType = DecodeExtendedOpcodeType(ui32ExtOpcodeToken); - - if (eExtType == EXTENDED_OPCODE_SAMPLE_CONTROLS) - { - struct {int i4 : 4;} sU; - struct {int i4 : 4;} sV; - struct {int i4 : 4;} sW; - - psInst->bAddressOffset = 1; - - sU.i4 = DecodeImmediateAddressOffset( - IMMEDIATE_ADDRESS_OFFSET_U, ui32ExtOpcodeToken); - sV.i4 = DecodeImmediateAddressOffset( - IMMEDIATE_ADDRESS_OFFSET_V, ui32ExtOpcodeToken); - sW.i4 = DecodeImmediateAddressOffset( - IMMEDIATE_ADDRESS_OFFSET_W, ui32ExtOpcodeToken); - - psInst->iUAddrOffset = sU.i4; - psInst->iVAddrOffset = sV.i4; - psInst->iWAddrOffset = sW.i4; - } - else if (eExtType == EXTENDED_OPCODE_RESOURCE_RETURN_TYPE) - { - psInst->xType = DecodeExtendedResourceReturnType(0, ui32ExtOpcodeToken); - psInst->yType = DecodeExtendedResourceReturnType(1, ui32ExtOpcodeToken); - psInst->zType = DecodeExtendedResourceReturnType(2, ui32ExtOpcodeToken); - psInst->wType = DecodeExtendedResourceReturnType(3, ui32ExtOpcodeToken); - } - else if (eExtType == EXTENDED_OPCODE_RESOURCE_DIM) - { - psInst->eResDim = DecodeExtendedResourceDimension(ui32ExtOpcodeToken); - } - - ui32OperandOffset++; - } - while (DecodeIsOpcodeExtended(pui32Token[ui32OperandOffset - 1])); - } - - if (eOpcode < NUM_OPCODES && eOpcode >= 0) - { - psShader->aiOpcodeUsed[eOpcode] = 1; - } - - switch (eOpcode) - { - //no operands - case OPCODE_CUT: - case OPCODE_EMIT: - case OPCODE_EMITTHENCUT: - case OPCODE_RET: - case OPCODE_LOOP: - case OPCODE_ENDLOOP: - case OPCODE_BREAK: - case OPCODE_ELSE: - case OPCODE_ENDIF: - case OPCODE_CONTINUE: - case OPCODE_DEFAULT: - case OPCODE_ENDSWITCH: - case OPCODE_NOP: - case OPCODE_HS_CONTROL_POINT_PHASE: - case OPCODE_HS_FORK_PHASE: - case OPCODE_HS_JOIN_PHASE: - { - psInst->ui32NumOperands = 0; - psInst->ui32FirstSrc = 0; - break; - } - case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: - case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: - { - psInst->ui32NumOperands = 0; - psInst->ui32FirstSrc = 0; - break; - } - case OPCODE_SYNC: - { - psInst->ui32NumOperands = 0; - psInst->ui32FirstSrc = 0; - psInst->ui32SyncFlags = DecodeSyncFlags(*pui32Token); - break; - } - - //1 operand - case OPCODE_EMIT_STREAM: - case OPCODE_CUT_STREAM: - case OPCODE_EMITTHENCUT_STREAM: - case OPCODE_CASE: - case OPCODE_SWITCH: - case OPCODE_LABEL: - { - psInst->ui32NumOperands = 1; - psInst->ui32FirstSrc = 0; - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - break; - } - - case OPCODE_INTERFACE_CALL: - { - psInst->ui32NumOperands = 1; - psInst->ui32FirstSrc = 0; - psInst->ui32FuncIndexWithinInterface = pui32Token[ui32OperandOffset]; - ui32OperandOffset++; - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - - break; - } - - /* Floating point instruction decodes */ - - //Instructions with two operands go here - case OPCODE_MOV: - { - psInst->ui32NumOperands = 2; - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); - break; - } - case OPCODE_LOG: - case OPCODE_RSQ: - case OPCODE_EXP: - case OPCODE_SQRT: - case OPCODE_ROUND_PI: - case OPCODE_ROUND_NI: - case OPCODE_ROUND_Z: - case OPCODE_ROUND_NE: - case OPCODE_FRC: - case OPCODE_FTOU: - case OPCODE_FTOI: - case OPCODE_UTOF: - case OPCODE_ITOF: - case OPCODE_INEG: - case OPCODE_IMM_ATOMIC_ALLOC: - case OPCODE_IMM_ATOMIC_CONSUME: - case OPCODE_DMOV: - case OPCODE_DTOF: - case OPCODE_FTOD: - case OPCODE_DRCP: - case OPCODE_COUNTBITS: - case OPCODE_FIRSTBIT_HI: - case OPCODE_FIRSTBIT_LO: - case OPCODE_FIRSTBIT_SHI: - case OPCODE_BFREV: - case OPCODE_F32TOF16: - case OPCODE_F16TOF32: - case OPCODE_RCP: - case OPCODE_DERIV_RTX: - case OPCODE_DERIV_RTY: - case OPCODE_DERIV_RTX_COARSE: - case OPCODE_DERIV_RTX_FINE: - case OPCODE_DERIV_RTY_COARSE: - case OPCODE_DERIV_RTY_FINE: - case OPCODE_NOT: - case OPCODE_BUFINFO: - { - psInst->ui32NumOperands = 2; - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); - break; - } - - //Instructions with three operands go here - case OPCODE_SINCOS: - { - psInst->ui32FirstSrc = 2; - //Intentional fall-through - } - case OPCODE_IMIN: - case OPCODE_UMIN: - case OPCODE_UMAX: - case OPCODE_MIN: - case OPCODE_IMAX: - case OPCODE_MAX: - case OPCODE_MUL: - case OPCODE_DIV: - case OPCODE_ADD: - case OPCODE_DP2: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_NE: - case OPCODE_OR: - case OPCODE_XOR: - case OPCODE_LT: - case OPCODE_IEQ: - case OPCODE_IADD: - case OPCODE_AND: - case OPCODE_GE: - case OPCODE_IGE: - case OPCODE_EQ: - case OPCODE_USHR: - case OPCODE_ISHL: - case OPCODE_ISHR: - case OPCODE_LD: - case OPCODE_ILT: - case OPCODE_INE: - case OPCODE_UGE: - case OPCODE_ULT: - case OPCODE_ATOMIC_AND: - case OPCODE_ATOMIC_IADD: - case OPCODE_ATOMIC_OR: - case OPCODE_ATOMIC_XOR: - case OPCODE_ATOMIC_IMAX: - case OPCODE_ATOMIC_IMIN: - case OPCODE_ATOMIC_UMAX: - case OPCODE_ATOMIC_UMIN: - case OPCODE_DADD: - case OPCODE_DMAX: - case OPCODE_DMIN: - case OPCODE_DMUL: - case OPCODE_DEQ: - case OPCODE_DGE: - case OPCODE_DLT: - case OPCODE_DNE: - case OPCODE_DDIV: - { - psInst->ui32NumOperands = 3; - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); - break; - } - //Instructions with four operands go here - case OPCODE_MAD: - case OPCODE_MOVC: - case OPCODE_IMAD: - case OPCODE_UDIV: - case OPCODE_LOD: - case OPCODE_SAMPLE: - case OPCODE_GATHER4: - case OPCODE_LD_MS: - case OPCODE_UBFE: - case OPCODE_IBFE: - case OPCODE_ATOMIC_CMP_STORE: - case OPCODE_IMM_ATOMIC_IADD: - case OPCODE_IMM_ATOMIC_AND: - case OPCODE_IMM_ATOMIC_OR: - case OPCODE_IMM_ATOMIC_XOR: - case OPCODE_IMM_ATOMIC_EXCH: - case OPCODE_IMM_ATOMIC_IMAX: - case OPCODE_IMM_ATOMIC_IMIN: - case OPCODE_IMM_ATOMIC_UMAX: - case OPCODE_IMM_ATOMIC_UMIN: - case OPCODE_DMOVC: - case OPCODE_DFMA: - case OPCODE_IMUL: - { - psInst->ui32NumOperands = 4; - - if (eOpcode == OPCODE_IMUL || eOpcode == OPCODE_UDIV) - { - psInst->ui32FirstSrc = 2; - } - - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[3]); - break; - } - case OPCODE_GATHER4_PO: - case OPCODE_SAMPLE_L: - case OPCODE_BFI: - case OPCODE_SWAPC: - case OPCODE_IMM_ATOMIC_CMP_EXCH: - { - psInst->ui32NumOperands = 5; - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[3]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[4]); - break; - } - case OPCODE_GATHER4_C: - case OPCODE_SAMPLE_C: - case OPCODE_SAMPLE_C_LZ: - case OPCODE_SAMPLE_B: - { - psInst->ui32NumOperands = 5; - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[3]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[4]); - - /* sample_b is not a shadow sampler, others need flagging */ - if (eOpcode != OPCODE_SAMPLE_B) - { - MarkTextureAsShadow(&psShader->sInfo, psPhase->psDecl, &psInst->asOperands[2]); - } - - break; - } - case OPCODE_GATHER4_PO_C: - case OPCODE_SAMPLE_D: - { - psInst->ui32NumOperands = 6; - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[3]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[4]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[5]); - - /* sample_d is not a shadow sampler, others need flagging */ - if (eOpcode != OPCODE_SAMPLE_D) - { - MarkTextureAsShadow(&psShader->sInfo, - psPhase->psDecl, - &psInst->asOperands[2]); - } - break; - } - case OPCODE_IF: - case OPCODE_BREAKC: - case OPCODE_CONTINUEC: - case OPCODE_RETC: - case OPCODE_DISCARD: - { - psInst->eBooleanTestType = DecodeInstrTestBool(*pui32Token); - psInst->ui32NumOperands = 1; - psInst->ui32FirstSrc = 0; // no destination registers - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - break; - } - case OPCODE_CALLC: - { - psInst->eBooleanTestType = DecodeInstrTestBool(*pui32Token); - psInst->ui32NumOperands = 2; - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); - break; - } - case OPCODE_CUSTOMDATA: - { - psInst->ui32NumOperands = 0; - ui32TokenLength = pui32Token[1]; - break; - } - case OPCODE_EVAL_CENTROID: - { - psInst->ui32NumOperands = 2; - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); - break; - } - case OPCODE_EVAL_SAMPLE_INDEX: - case OPCODE_EVAL_SNAPPED: - case OPCODE_STORE_UAV_TYPED: - case OPCODE_LD_UAV_TYPED: - case OPCODE_LD_RAW: - case OPCODE_STORE_RAW: - { - psInst->ui32NumOperands = 3; - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); - break; - } - case OPCODE_STORE_STRUCTURED: - case OPCODE_LD_STRUCTURED: - { - psInst->ui32NumOperands = 4; - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[3]); - break; - } - case OPCODE_RESINFO: - { - psInst->ui32NumOperands = 3; - - psInst->eResInfoReturnType = DecodeResInfoReturnType(pui32Token[0]); - - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); - break; - } - case OPCODE_SAMPLE_INFO: - { - psInst->ui32NumOperands = 2; - - psInst->eResInfoReturnType = DecodeResInfoReturnType(pui32Token[0]); - - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); - ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); - break; - } - case OPCODE_MSAD: - default: - { - ASSERT(0); - break; - } - } - - // For opcodes that sample textures, mark which samplers are used by each texture - { - uint32_t ui32TextureRegisterNumber = 0; - uint32_t ui32SamplerRegisterNumber = 0; - uint32_t bTextureSampleInstruction = 0; - switch (eOpcode) - { - case OPCODE_GATHER4: - // dest, coords, tex, sampler - ui32TextureRegisterNumber = 2; - ui32SamplerRegisterNumber = 3; - bTextureSampleInstruction = 1; - break; - case OPCODE_GATHER4_PO: - //dest, coords, offset, tex, sampler - ui32TextureRegisterNumber = 3; - ui32SamplerRegisterNumber = 4; - bTextureSampleInstruction = 1; - break; - case OPCODE_GATHER4_C: - //dest, coords, tex, sampler srcReferenceValue - ui32TextureRegisterNumber = 2; - ui32SamplerRegisterNumber = 3; - bTextureSampleInstruction = 1; - break; - case OPCODE_GATHER4_PO_C: - //dest, coords, offset, tex, sampler, srcReferenceValue - ui32TextureRegisterNumber = 3; - ui32SamplerRegisterNumber = 4; - bTextureSampleInstruction = 1; - break; - case OPCODE_SAMPLE: - case OPCODE_SAMPLE_L: - case OPCODE_SAMPLE_C: - case OPCODE_SAMPLE_C_LZ: - case OPCODE_SAMPLE_B: - case OPCODE_SAMPLE_D: - // dest, coords, tex, sampler [, reference] - ui32TextureRegisterNumber = 2; - ui32SamplerRegisterNumber = 3; - bTextureSampleInstruction = 1; - break; - default: - break; - } - - if (bTextureSampleInstruction) - { - MarkTextureSamplerPair(&psShader->sInfo, - psPhase->psDecl, - &psInst->asOperands[ui32TextureRegisterNumber], - &psInst->asOperands[ui32SamplerRegisterNumber], - psShader->textureSamplers); - } - } - - return pui32Token + ui32TokenLength; -} - -const uint32_t* DecodeShaderPhase(const uint32_t* pui32Tokens, - Shader* psShader, - const SHADER_PHASE_TYPE ePhaseType, - ShaderPhase *psPhase) -{ - const uint32_t* pui32CurrentToken = pui32Tokens; - const uint32_t ui32ShaderLength = psShader->ui32ShaderLength; - - psPhase->ePhase = ePhaseType; - //Using ui32ShaderLength as the declaration and instruction count - //will allocate more than enough memory. Avoids having to - //traverse the entire shader just to get the real counts. - - psPhase->psDecl.clear(); - psPhase->psDecl.reserve(ui32ShaderLength); - - while (1) //Keep going until we reach the first non-declaration token, or the end of the shader. - { - psPhase->psDecl.push_back(Declaration()); - const uint32_t* pui32Result = DecodeDeclaration(psShader, pui32CurrentToken, &psPhase->psDecl[psPhase->psDecl.size() - 1], psPhase); - - if (pui32Result) - { - pui32CurrentToken = pui32Result; - - if (pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength)) - { - break; - } - } - else - { - psPhase->psDecl.pop_back(); // Remove the last one, it wasn't needed after all - break; - } - } - - -//Instructions - psPhase->psInst.clear(); - psPhase->psInst.reserve(ui32ShaderLength); - - while (pui32CurrentToken < (psShader->pui32FirstToken + ui32ShaderLength)) - { - psPhase->psInst.push_back(Instruction()); - const uint32_t* nextInstr = DecodeInstruction(pui32CurrentToken, &psPhase->psInst[psPhase->psInst.size() - 1], psShader, psPhase); - -#ifdef _DEBUG - if (nextInstr == pui32CurrentToken) - { - ASSERT(0); - break; - } -#endif - - if (psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_HS_FORK_PHASE || psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_HS_JOIN_PHASE) - { - psPhase->psInst.pop_back(); - return pui32CurrentToken; - } - pui32CurrentToken = nextInstr; - } - - return pui32CurrentToken; -} - -const void AllocateHullPhaseArrays(const uint32_t* pui32Tokens, - Shader* psShader) -{ - const uint32_t* pui32CurrentToken = pui32Tokens; - const uint32_t ui32ShaderLength = psShader->ui32ShaderLength; - uint32_t ui32PhaseCount = 2; // Always the main phase and the HS global declarations - uint32_t i; - - while (1) //Keep going until we reach the first non-declaration token, or the end of the shader. - { - uint32_t ui32TokenLength = DecodeInstructionLength(*pui32CurrentToken); - const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32CurrentToken); - - if (eOpcode == OPCODE_CUSTOMDATA) - { - ui32TokenLength = pui32CurrentToken[1]; - } - - pui32CurrentToken = pui32CurrentToken + ui32TokenLength; - - switch (eOpcode) - { - case OPCODE_HS_CONTROL_POINT_PHASE: - case OPCODE_HS_JOIN_PHASE: - case OPCODE_HS_FORK_PHASE: - ui32PhaseCount++; - break; - default: - break; - } - - if (pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength)) - { - break; - } - } - - psShader->asPhases.clear(); - psShader->asPhases.resize(ui32PhaseCount); - for (i = 0; i < ui32PhaseCount; i++) - psShader->asPhases[i].ui32InstanceCount = 1; -} - -const uint32_t* DecodeHullShader(const uint32_t* pui32Tokens, Shader* psShader) -{ - const uint32_t* pui32CurrentToken = pui32Tokens; - const uint32_t ui32ShaderLength = psShader->ui32ShaderLength; - ShaderPhase *psPhase; - - AllocateHullPhaseArrays(pui32Tokens, psShader); - - // Index 1 is HS_GLOBAL_DECL - psShader->asPhases[1].psInst.clear(); - psShader->asPhases[1].psDecl.clear(); - psShader->asPhases[1].ePhase = HS_GLOBAL_DECL_PHASE; - psShader->asPhases[1].ui32InstanceCount = 1; - - // The next phase to parse in. - psPhase = &psShader->asPhases[2]; - - //Keep going until we have done all phases or the end of the shader. - while (1) - { - Declaration newDecl; - const uint32_t* pui32Result = DecodeDeclaration(psShader, pui32CurrentToken, &newDecl, psPhase); - - if (pui32Result) - { - pui32CurrentToken = pui32Result; - - if (newDecl.eOpcode == OPCODE_HS_CONTROL_POINT_PHASE) - { - pui32CurrentToken = DecodeShaderPhase(pui32CurrentToken, psShader, HS_CTRL_POINT_PHASE, psPhase); - psPhase++; - } - else if (newDecl.eOpcode == OPCODE_HS_FORK_PHASE) - { - pui32CurrentToken = DecodeShaderPhase(pui32CurrentToken, psShader, HS_FORK_PHASE, psPhase++); - } - else if (newDecl.eOpcode == OPCODE_HS_JOIN_PHASE) - { - pui32CurrentToken = DecodeShaderPhase(pui32CurrentToken, psShader, HS_JOIN_PHASE, psPhase++); - } - else - { - psShader->asPhases[1].psDecl.push_back(newDecl); - } - - if (pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength)) - { - break; - } - } - else - { - break; - } - } - - return pui32CurrentToken; -} - -void Decode(const uint32_t* pui32Tokens, Shader* psShader) -{ - const uint32_t* pui32CurrentToken = pui32Tokens; - const uint32_t ui32ShaderLength = pui32Tokens[1]; - - psShader->ui32MajorVersion = DecodeProgramMajorVersion(*pui32CurrentToken); - psShader->ui32MinorVersion = DecodeProgramMinorVersion(*pui32CurrentToken); - psShader->eShaderType = DecodeShaderType(*pui32CurrentToken); - - pui32CurrentToken++;//Move to shader length - psShader->ui32ShaderLength = ui32ShaderLength; - pui32CurrentToken++;//Move to after shader length (usually a declaration) - - psShader->pui32FirstToken = pui32Tokens; - - if (psShader->eShaderType == HULL_SHADER) - { - // DecodeHullShader will allocate psShader->asPhases array. - pui32CurrentToken = DecodeHullShader(pui32CurrentToken, psShader); - return; - } - else - { - psShader->asPhases.clear(); - psShader->asPhases.resize(1); - } - - // Phase 0 is always the main phase - psShader->asPhases[0].ui32InstanceCount = 1; - - DecodeShaderPhase(pui32CurrentToken, psShader, MAIN_PHASE, &psShader->asPhases[0]); -} - -Shader* DecodeDXBC(uint32_t* data, uint32_t decodeFlags) -{ - Shader* psShader; - DXBCContainerHeader* header = (DXBCContainerHeader*)data; - uint32_t i; - uint32_t chunkCount; - uint32_t* chunkOffsets; - ReflectionChunks refChunks; - uint32_t* shaderChunk = 0; - - if (header->fourcc != FOURCC_DXBC) - { - ASSERT(0 && "Invalid shader type (DX9 shaders no longer supported)!"); - } - - refChunks.pui32Inputs = NULL; - refChunks.pui32Interfaces = NULL; - refChunks.pui32Outputs = NULL; - refChunks.pui32Resources = NULL; - refChunks.pui32Inputs11 = NULL; - refChunks.pui32Outputs11 = NULL; - refChunks.pui32OutputsWithStreams = NULL; - refChunks.pui32PatchConstants = NULL; - refChunks.pui32PatchConstants11 = NULL; - - chunkOffsets = (uint32_t*)(header + 1); - - chunkCount = header->chunkCount; - - for (i = 0; i < chunkCount; ++i) - { - uint32_t offset = chunkOffsets[i]; - - DXBCChunkHeader* chunk = (DXBCChunkHeader*)((char*)data + offset); - - switch (chunk->fourcc) - { - case FOURCC_ISGN: - { - refChunks.pui32Inputs = (uint32_t*)(chunk + 1); - break; - } - case FOURCC_ISG1: - { - refChunks.pui32Inputs11 = (uint32_t*)(chunk + 1); - break; - } - case FOURCC_RDEF: - { - refChunks.pui32Resources = (uint32_t*)(chunk + 1); - break; - } - case FOURCC_IFCE: - { - refChunks.pui32Interfaces = (uint32_t*)(chunk + 1); - break; - } - case FOURCC_OSGN: - { - refChunks.pui32Outputs = (uint32_t*)(chunk + 1); - break; - } - case FOURCC_OSG1: - { - refChunks.pui32Outputs11 = (uint32_t*)(chunk + 1); - break; - } - case FOURCC_OSG5: - { - refChunks.pui32OutputsWithStreams = (uint32_t*)(chunk + 1); - break; - } - case FOURCC_SHDR: - case FOURCC_SHEX: - { - shaderChunk = (uint32_t*)(chunk + 1); - break; - } - case FOURCC_PSGN: - { - refChunks.pui32PatchConstants = (uint32_t*)(chunk + 1); - break; - } - case FOURCC_PSG1: - { - refChunks.pui32PatchConstants11 = (uint32_t*)(chunk + 1); - break; - } - case FOURCC_STAT: - case FOURCC_SFI0: - { - break; // Ignored - } - default: - { -// ASSERT(0); // Uncomment this to hunt for unknown chunks later on. - break; - } - } - } - - if (shaderChunk) - { - uint32_t ui32MajorVersion; - uint32_t ui32MinorVersion; - - psShader = new Shader(); - - ui32MajorVersion = DecodeProgramMajorVersion(*shaderChunk); - ui32MinorVersion = DecodeProgramMinorVersion(*shaderChunk); - - LoadShaderInfo(ui32MajorVersion, - ui32MinorVersion, - &refChunks, - &psShader->sInfo, decodeFlags); - - Decode(shaderChunk, psShader); - - return psShader; - } - - return 0; -} diff --git a/third_party/HLSLcc/src/internal_includes/ControlFlowGraph.h b/third_party/HLSLcc/src/internal_includes/ControlFlowGraph.h deleted file mode 100644 index b9263cf..0000000 --- a/third_party/HLSLcc/src/internal_includes/ControlFlowGraph.h +++ /dev/null @@ -1,151 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -#include - -struct Instruction; -class Operand; - -namespace HLSLcc -{ - using namespace std; - -namespace ControlFlow -{ - class BasicBlock; - - class ControlFlowGraph - { - friend class BasicBlock; - public: - ControlFlowGraph() - : m_BlockMap() - , m_BlockStorage() - {} - - typedef std::vector > BasicBlockStorage; - - const BasicBlock &Build(const Instruction* firstInstruction, const Instruction* endInstruction); - - // Only works for instructions that start the basic block - const BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction) const; - - // non-const version for BasicBlock - BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction); - - const BasicBlockStorage &AllBlocks() const { return m_BlockStorage; } - private: - - // Map for storing the created basic blocks. Map key is the pointer to the first instruction in the block - typedef std::map BasicBlockMap; - - BasicBlockMap m_BlockMap; - - // auto_ptr -type storage for multiple BasicBlocks. BlockMap above only has pointers into these - BasicBlockStorage m_BlockStorage; - }; - - - class BasicBlock - { - friend class ControlFlowGraph; - public: - // A set of register indices, one per each vec4 component per register - typedef std::set RegisterSet; - // The connections (either incoming or outgoing) from this block. The instruction is the same one as the key in ControlFlowGraph to that basic block - typedef std::set ConnectionSet; - - struct Definition - { - Definition(const Instruction* i = nullptr, const Operand* o = nullptr) - : m_Instruction(i) - , m_Operand(o) - {} - - Definition(const Definition& a) = default; - Definition(Definition&& a) = default; - ~Definition() = default; - - Definition& operator=(const Definition& a) = default; - Definition& operator=(Definition&& a) = default; - - bool operator==(const Definition& a) const - { - if (a.m_Instruction != m_Instruction) - return false; - return a.m_Operand == m_Operand; - } - - bool operator!=(const Definition& a) const - { - if (a.m_Instruction == m_Instruction) - return false; - return a.m_Operand != m_Operand; - } - - bool operator<(const Definition& a) const - { - if (m_Instruction != a.m_Instruction) - return m_Instruction < a.m_Instruction; - return m_Operand < a.m_Operand; - } - - const Instruction *m_Instruction; - const Operand *m_Operand; - }; - - typedef std::set ReachableDefinitionsPerVariable; // A set of possibly visible definitions for one component of one vec4 variable - typedef std::map ReachableVariables; // A VisibleDefinitionSet for each variable*component. - - const Instruction *First() const { return m_First; } - const Instruction *Last() const { return m_Last; } - - const RegisterSet &UEVar() const { return m_UEVar; } - const RegisterSet &VarKill() const { return m_VarKill; } - - const ConnectionSet &Preceding() const { return m_Preceding; } - const ConnectionSet &Succeeding() const { return m_Succeeding; } - - const ReachableVariables &DEDef() const { return m_DEDef; } - const ReachableVariables &Reachable() const { return m_Reachable; } - - // Helper function: Do union of 2 ReachableVariables, store result in a. - static void RVarUnion(ReachableVariables &a, const ReachableVariables &b); - - private: - - // Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build() - BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead, const Instruction* psEnd); - - // Walk through the instructions and build UEVar and VarKill sets, create succeeding nodes if they don't exist already. - void Build(); - - bool RebuildReachable(); // Rebuild m_Reachable from preceding blocks and this one. Returns true if current value changed. - - - BasicBlock * AddChildBasicBlock(const Instruction *psFirst); - - private: - ControlFlowGraph &m_Graph; // The graph object containing this block - - const Instruction *m_First; // The first instruction in the basic block - const Instruction *m_Last; // The last instruction in the basic block. Either OPCODE_RET or a branch/jump/loop instruction - const Instruction *m_End; // past-the-end pointer - - RegisterSet m_UEVar; // Upwards-exposed variables (temps that need definition from upstream and are used in this basic block) - RegisterSet m_VarKill; // Set of variables that are defined in this block. - - ConnectionSet m_Preceding; // Set of blocks that immediately precede this block in the CFG - ConnectionSet m_Succeeding; // Set of blocks that follow this block in the CFG - - ReachableVariables m_DEDef; // Downward-exposed definitions from this basic block. Always only one item per set. - - ReachableVariables m_Reachable; // The set of variable definitions that are visible at the end of this block. - }; -} -} diff --git a/third_party/HLSLcc/src/internal_includes/ControlFlowGraphUtils.h b/third_party/HLSLcc/src/internal_includes/ControlFlowGraphUtils.h deleted file mode 100644 index 69ad807..0000000 --- a/third_party/HLSLcc/src/internal_includes/ControlFlowGraphUtils.h +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -struct Instruction; - -namespace HLSLcc -{ -namespace ControlFlow -{ - class Utils - { - public: - // For a given flow-control instruction, find the corresponding jump location: - // If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1 - // For ELSE, find same level ENDIF + 1 - // For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1 - // For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1 - // For ENDLOOP, find previous same-level LOOP + 1 - // For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels - // For CONTINUE/C the previous LOOP + 1 - // Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block. - // Note that CASE labels fall through. - // Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc. - // If sawEndSwitch != null, will bet set to true if the label skipping saw past ENDSWITCH - // If needConnectToParent != null, will be set to true if sawEndSwitch == true and there are one or more case labels directly before it. - static const Instruction * GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch = 0, bool *needConnectToParent = 0); - - static const Instruction *GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch = 0); - }; -} -} diff --git a/third_party/HLSLcc/src/internal_includes/DataTypeAnalysis.h b/third_party/HLSLcc/src/internal_includes/DataTypeAnalysis.h deleted file mode 100644 index e01eb18..0000000 --- a/third_party/HLSLcc/src/internal_includes/DataTypeAnalysis.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include "include/ShaderInfo.h" -#include - -class HLSLCrossCompilerContext; -struct Instruction; - -namespace HLSLcc -{ -namespace DataTypeAnalysis -{ - void SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector &instructions, uint32_t ui32TempCount, std::vector &results); -} -} diff --git a/third_party/HLSLcc/src/internal_includes/Declaration.h b/third_party/HLSLcc/src/internal_includes/Declaration.h deleted file mode 100644 index 0586a22..0000000 --- a/third_party/HLSLcc/src/internal_includes/Declaration.h +++ /dev/null @@ -1,118 +0,0 @@ -#pragma once - -#include -#include -#include "internal_includes/tokens.h" -#include "internal_includes/Operand.h" - -typedef struct ICBVec4_TAG -{ - uint32_t a; - uint32_t b; - uint32_t c; - uint32_t d; -} ICBVec4; - -#define ACCESS_FLAG_READ 0x1 -#define ACCESS_FLAG_WRITE 0x2 -#define ACCESS_FLAG_ATOMIC 0x4 - -struct Declaration -{ - Declaration() : - eOpcode(OPCODE_INVALID), - ui32NumOperands(0), - ui32BufferStride(0), - ui32TableLength(0), - ui32IsShadowTex(0) - {} - - OPCODE_TYPE eOpcode; - - uint32_t ui32NumOperands; - - Operand asOperands[2]; - - std::vector asImmediateConstBuffer; - //The declaration can set one of these - //values depending on the opcode. - union - { - uint32_t ui32GlobalFlags; - uint32_t ui32NumTemps; - RESOURCE_DIMENSION eResourceDimension; - INTERPOLATION_MODE eInterpolation; - PRIMITIVE_TOPOLOGY eOutputPrimitiveTopology; - PRIMITIVE eInputPrimitive; - uint32_t ui32MaxOutputVertexCount; - TESSELLATOR_DOMAIN eTessDomain; - TESSELLATOR_PARTITIONING eTessPartitioning; - TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim; - uint32_t aui32WorkGroupSize[3]; - uint32_t ui32HullPhaseInstanceCount; - float fMaxTessFactor; - uint32_t ui32IndexRange; - uint32_t ui32GSInstanceCount; - SB_SAMPLER_MODE eSamplerMode; // For sampler declarations, the sampler mode. - - struct Interface_TAG - { - uint32_t ui32InterfaceID; - uint32_t ui32NumFuncTables; - uint32_t ui32ArraySize; - } iface; - } value; - - uint32_t ui32BufferStride; - - struct UAV_TAG - { - UAV_TAG() : - ui32GloballyCoherentAccess(0), - bCounter(0), - Type(RETURN_TYPE_UNORM), - ui32NumComponents(0), - ui32AccessFlags(0) - { - } - - uint32_t ui32GloballyCoherentAccess; - uint8_t bCounter; - RESOURCE_RETURN_TYPE Type; - uint32_t ui32NumComponents; - uint32_t ui32AccessFlags; - } sUAV; - - struct TGSM_TAG - { - uint32_t ui32Stride; - uint32_t ui32Count; - - TGSM_TAG() : - ui32Stride(0), - ui32Count(0) - { - } - } sTGSM; - - struct IndexableTemp_TAG - { - uint32_t ui32RegIndex; - uint32_t ui32RegCount; - uint32_t ui32RegComponentSize; - - IndexableTemp_TAG() : - ui32RegIndex(0), - ui32RegCount(0), - ui32RegComponentSize(0) - { - } - } sIdxTemp; - - uint32_t ui32TableLength; - - uint32_t ui32IsShadowTex; - - // Set indexed by sampler register number. - std::set samplersUsed; -}; diff --git a/third_party/HLSLcc/src/internal_includes/HLSLCrossCompilerContext.h b/third_party/HLSLcc/src/internal_includes/HLSLCrossCompilerContext.h deleted file mode 100644 index fa2af76..0000000 --- a/third_party/HLSLcc/src/internal_includes/HLSLCrossCompilerContext.h +++ /dev/null @@ -1,81 +0,0 @@ -#pragma once - -#include -#include -#include -#include "bstrlib.h" - -class Shader; -class GLSLCrossDependencyData; -class ShaderPhase; -class Translator; -class Operand; -class HLSLccReflection; - -class HLSLCrossCompilerContext -{ -public: - HLSLCrossCompilerContext(HLSLccReflection &refl) : - glsl(nullptr), - extensions(nullptr), - beforeMain(nullptr), - currentGLSLString(nullptr), - currentPhase(0), - indent(0), - flags(0), - psShader(nullptr), - psDependencies(nullptr), - inputPrefix(nullptr), - outputPrefix(nullptr), - psTranslator(nullptr), - m_Reflection(refl) - {} - - bstring glsl; - bstring extensions; - bstring beforeMain; - - bstring* currentGLSLString;//either glsl or earlyMain of current phase - - uint32_t currentPhase; - - int indent; - unsigned int flags; - - // Helper functions for checking flags - // Returns true if VULKAN_BINDINGS flag is set - bool IsVulkan() const; - - // Helper functions for checking flags - // Returns true if HLSLCC_FLAG_NVN_TARGET flag is set - bool IsSwitch() const; - - Shader* psShader; - GLSLCrossDependencyData* psDependencies; - const char *inputPrefix; // Prefix for shader inputs - const char *outputPrefix; // Prefix for shader outputs - - void DoDataTypeAnalysis(ShaderPhase *psPhase); - void ReserveFramebufferFetchInputs(); - - void ClearDependencyData(); - - void AddIndentation(); - - // Currently active translator - Translator *psTranslator; - - HLSLccReflection &m_Reflection; // Callbacks for bindings and diagnostic info - - // Retrieve the name for which the input or output is declared as. Takes into account possible redirections. - std::string GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const; - std::string GetDeclaredOutputName(const Operand* psOperand, int* stream, uint32_t *puiIgnoreSwizzle, int *piRebase, int iIgnoreRedirect) const; - - bool OutputNeedsDeclaring(const Operand* psOperand, const int count); - - bool RequireExtension(const std::string &extName); - bool EnableExtension(const std::string &extName); - -private: - std::set m_EnabledExtensions; -}; diff --git a/third_party/HLSLcc/src/internal_includes/HLSLccToolkit.h b/third_party/HLSLcc/src/internal_includes/HLSLccToolkit.h deleted file mode 100644 index 6d7604e..0000000 --- a/third_party/HLSLcc/src/internal_includes/HLSLccToolkit.h +++ /dev/null @@ -1,134 +0,0 @@ -#pragma once -#include "hlslcc.h" -#include "bstrlib.h" -#include -#include -#include - -#include "internal_includes/Instruction.h" -#include "internal_includes/Operand.h" - -class HLSLCrossCompilerContext; -struct ConstantBuffer; - -namespace HLSLcc -{ - uint32_t GetNumberBitsSet(uint32_t a); - - uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType); - - SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags); - - const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision = true); - - const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision); - - const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, const int components); - - std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows); - - void AddSwizzleUsingElementCount(bstring dest, uint32_t count); - - int WriteMaskToComponentCount(uint32_t writeMask); - - uint32_t BuildComponentMaskFromElementCount(int count); - - // Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc) - bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src); - - // Convert resource return type to SVT_ flags - uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType); - - SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec); - - RESOURCE_RETURN_TYPE SVTTypeToResourceReturnType(SHADER_VARIABLE_TYPE type); - - REFLECT_RESOURCE_PRECISION SVTTypeToPrecision(SHADER_VARIABLE_TYPE type); - - uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount); - - bool IsOperationCommutative(int /* OPCODE_TYPE */ eOpCode); - - bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB); - - int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim); - - SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b); - - // Returns true if the instruction adds 1 to the destination temp register - bool IsAddOneInstruction(const Instruction *psInst); - - bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest); - - bool IsUnityFlexibleInstancingBuffer(const ConstantBuffer* psCBuf); - - // Helper function to print floats with full precision - void PrintFloat(bstring b, float f); - - bstring GetEarlyMain(HLSLCrossCompilerContext *psContext); - bstring GetPostShaderCode(HLSLCrossCompilerContext *psContext); - - // Flags for ForeachOperand - // Process suboperands -#define FEO_FLAG_SUBOPERAND 1 - // Process src operands -#define FEO_FLAG_SRC_OPERAND 2 - // Process destination operands -#define FEO_FLAG_DEST_OPERAND 4 - // Convenience: Process all operands, both src and dest, and all suboperands -#define FEO_FLAG_ALL (FEO_FLAG_SUBOPERAND | FEO_FLAG_SRC_OPERAND | FEO_FLAG_DEST_OPERAND) - - // For_each for all operands within a range of instructions. Flags above. - template void ForEachOperand(ItrType _begin, ItrType _end, int flags, F callback) - { - ItrType inst = _begin; - while (inst != _end) - { - uint32_t i, k; - - if ((flags & FEO_FLAG_DEST_OPERAND) || (flags & FEO_FLAG_SUBOPERAND)) - { - for (i = 0; i < inst->ui32FirstSrc; i++) - { - if (flags & FEO_FLAG_SUBOPERAND) - { - for (k = 0; k < MAX_SUB_OPERANDS; k++) - { - if (inst->asOperands[i].m_SubOperands[k].get()) - { - callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND); - } - } - } - if (flags & FEO_FLAG_DEST_OPERAND) - { - callback(inst, &inst->asOperands[i], FEO_FLAG_DEST_OPERAND); - } - } - } - - if ((flags & FEO_FLAG_SRC_OPERAND) || (flags & FEO_FLAG_SUBOPERAND)) - { - for (i = inst->ui32FirstSrc; i < inst->ui32NumOperands; i++) - { - if (flags & FEO_FLAG_SUBOPERAND) - { - for (k = 0; k < MAX_SUB_OPERANDS; k++) - { - if (inst->asOperands[i].m_SubOperands[k].get()) - { - callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND); - } - } - } - if (flags & FEO_FLAG_SRC_OPERAND) - { - callback(inst, &inst->asOperands[i], FEO_FLAG_SRC_OPERAND); - } - } - } - - inst++; - } - } -} diff --git a/third_party/HLSLcc/src/internal_includes/Instruction.h b/third_party/HLSLcc/src/internal_includes/Instruction.h deleted file mode 100644 index a2826aa..0000000 --- a/third_party/HLSLcc/src/internal_includes/Instruction.h +++ /dev/null @@ -1,184 +0,0 @@ -#pragma once - -#include "internal_includes/Operand.h" -#include "internal_includes/tokens.h" -#include "include/ShaderInfo.h" -#include - -#define ATOMIC_ADDRESS_BASIC 0 -#define ATOMIC_ADDRESS_ARRAY_DYNAMIC 1 -#define ATOMIC_ADDRESS_STRUCT_DYNAMIC 2 - -#define TEXSMP_FLAG_NONE 0x0 -#define TEXSMP_FLAG_LOD 0x1 //LOD comes from operand -#define TEXSMP_FLAG_DEPTHCOMPARE 0x2 -#define TEXSMP_FLAG_FIRSTLOD 0x4 //LOD is 0 -#define TEXSMP_FLAG_BIAS 0x8 -#define TEXSMP_FLAG_GRAD 0x10 -//Gather specific flags -#define TEXSMP_FLAG_GATHER 0x20 -#define TEXSMP_FLAG_PARAMOFFSET 0x40 //Offset comes from operand - -struct Instruction -{ - Instruction() : - eOpcode(OPCODE_NOP), - eBooleanTestType(INSTRUCTION_TEST_ZERO), - ui32NumOperands(0), - ui32FirstSrc(0), - m_Uses(), - m_SkipTranslation(false), - m_InductorRegister(0), - bSaturate(0), - ui32SyncFlags(0), - ui32PreciseMask(0), - ui32FuncIndexWithinInterface(0), - eResInfoReturnType(RESINFO_INSTRUCTION_RETURN_FLOAT), - bAddressOffset(0), - iUAddrOffset(0), - iVAddrOffset(0), - iWAddrOffset(0), - xType(RETURN_TYPE_UNUSED), - yType(RETURN_TYPE_UNUSED), - zType(RETURN_TYPE_UNUSED), - wType(RETURN_TYPE_UNUSED), - eResDim(RESOURCE_DIMENSION_UNKNOWN), - iCausedSplit(0), - id(0) - { - m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0; - } - - // For creating unit tests only. Create an instruction with temps (unless reg is 0xffffffff in which case use OPERAND_TYPE_INPUT/OUTPUT) - Instruction(uint64_t _id, OPCODE_TYPE opcode, uint32_t reg1 = 0, uint32_t reg1Mask = 0, uint32_t reg2 = 0, uint32_t reg2Mask = 0, uint32_t reg3 = 0, uint32_t reg3Mask = 0, uint32_t reg4 = 0, uint32_t reg4Mask = 0) : - ui32SyncFlags(0), - bSaturate(0), - ui32PreciseMask(0), - ui32FuncIndexWithinInterface(0), - eResInfoReturnType(RESINFO_INSTRUCTION_RETURN_FLOAT), - bAddressOffset(0), - iUAddrOffset(0), - iVAddrOffset(0), - iWAddrOffset(0), - xType(RETURN_TYPE_UNUSED), - yType(RETURN_TYPE_UNUSED), - zType(RETURN_TYPE_UNUSED), - wType(RETURN_TYPE_UNUSED), - eResDim(RESOURCE_DIMENSION_UNKNOWN), - iCausedSplit(0) - { - id = _id; - eOpcode = opcode; - eBooleanTestType = INSTRUCTION_TEST_ZERO; - ui32FirstSrc = 0; - ui32NumOperands = 0; - m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0; - m_SkipTranslation = false; - m_InductorRegister = 0; - - if (reg1Mask == 0) - return; - - ui32NumOperands++; - asOperands[0].eType = reg1 == 0xffffffff ? OPERAND_TYPE_OUTPUT : OPERAND_TYPE_TEMP; - asOperands[0].ui32RegisterNumber = reg1 == 0xffffffff ? 0 : reg1; - asOperands[0].ui32CompMask = reg1Mask; - asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - - if (reg2Mask == 0) - return; - - ui32FirstSrc = 1; - ui32NumOperands++; - - asOperands[1].eType = reg2 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP; - asOperands[1].ui32RegisterNumber = reg2 == 0xffffffff ? 0 : reg2; - asOperands[1].ui32CompMask = reg2Mask; - asOperands[1].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - - if (reg3Mask == 0) - return; - ui32NumOperands++; - - asOperands[2].eType = reg3 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP; - asOperands[2].ui32RegisterNumber = reg3 == 0xffffffff ? 0 : reg3; - asOperands[2].ui32CompMask = reg3Mask; - asOperands[2].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - - if (reg4Mask == 0) - return; - ui32NumOperands++; - - asOperands[3].eType = reg4 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP; - asOperands[3].ui32RegisterNumber = reg4 == 0xffffffff ? 0 : reg4; - asOperands[3].ui32CompMask = reg4Mask; - asOperands[3].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - } - - // Returns true if this instruction is a conditional branch - bool IsConditionalBranchInstruction() const - { - switch (eOpcode) - { - case OPCODE_IF: - case OPCODE_BREAKC: - case OPCODE_CONTINUEC: - case OPCODE_RETC: - return true; - default: - return false; - } - } - - bool IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const; - - // Flags for ChangeOperandTempRegister -#define UD_CHANGE_SUBOPERANDS 1 -#define UD_CHANGE_MAIN_OPERAND 2 -#define UD_CHANGE_ALL 3 - - void ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase); - - - OPCODE_TYPE eOpcode; - INSTRUCTION_TEST_BOOLEAN eBooleanTestType; - uint32_t ui32SyncFlags; - uint32_t ui32NumOperands; - uint32_t ui32FirstSrc; - Operand asOperands[6]; - uint32_t bSaturate; - uint32_t ui32PreciseMask; - uint32_t ui32FuncIndexWithinInterface; - RESINFO_RETURN_TYPE eResInfoReturnType; - - int bAddressOffset; - int8_t iUAddrOffset; - int8_t iVAddrOffset; - int8_t iWAddrOffset; - RESOURCE_RETURN_TYPE xType, yType, zType, wType; - RESOURCE_DIMENSION eResDim; - int8_t iCausedSplit; // Nonzero if has caused a temp split. Later used by sampler datatype tweaking - - struct Use - { - Use() : m_Inst(0), m_Op(0) {} - Use(const Use& a) = default; - Use(Use&& a) = default; - Use(Instruction* inst, Operand* op) : m_Inst(inst), m_Op(op) {} - ~Use() = default; - - Use& operator=(const Use& a) = default; - Use& operator=(Use&& a) = default; - - Instruction* m_Inst; // The instruction that references the result of this instruction - Operand* m_Op; // The operand within the instruction above. Note: can also be suboperand. - }; - - std::vector m_Uses; // Array of use sites for the result(s) of this instruction, if any of the results is a temp reg. - - Instruction* m_LoopInductors[4]; // If OPCODE_LOOP and is suitable for transforming into for-loop, contains pointers to for initializer, end condition, breakc, and increment. - bool m_SkipTranslation; // If true, don't emit this instruction (currently used by the for loop translation) - uint32_t m_InductorRegister; // If non-zero, the inductor variable can be declared in the for statement, and this register number has been allocated for it - - uint64_t id; -}; diff --git a/third_party/HLSLcc/src/internal_includes/LoopTransform.h b/third_party/HLSLcc/src/internal_includes/LoopTransform.h deleted file mode 100644 index dacec4b..0000000 --- a/third_party/HLSLcc/src/internal_includes/LoopTransform.h +++ /dev/null @@ -1,8 +0,0 @@ -#pragma once - -class ShaderPhase; -class HLSLCrossCompilerContext; -namespace HLSLcc -{ - void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase); -} diff --git a/third_party/HLSLcc/src/internal_includes/Operand.h b/third_party/HLSLcc/src/internal_includes/Operand.h deleted file mode 100644 index 59bae49..0000000 --- a/third_party/HLSLcc/src/internal_includes/Operand.h +++ /dev/null @@ -1,150 +0,0 @@ -#pragma once - -#include "internal_includes/tokens.h" -#include -#include - -enum { MAX_SUB_OPERANDS = 3 }; -class Operand; -class HLSLCrossCompilerContext; -struct Instruction; - -#if _MSC_VER -// We want to disable the "array will be default-initialized" warning, as that's exactly what we want -#pragma warning(disable: 4351) -#endif - -class Operand -{ -public: - typedef std::shared_ptr SubOperandPtr; - - Operand() - : - iExtended(), - eType(), - eModifier(), - eMinPrecision(), - iIndexDims(), - iWriteMask(), - iGSInput(), - iPSInOut(), - iWriteMaskEnabled(), - iArrayElements(), - iNumComponents(), - eSelMode(), - ui32CompMask(), - ui32Swizzle(), - aui32Swizzle(), - aui32ArraySizes(), - ui32RegisterNumber(), - afImmediates(), - adImmediates(), - eSpecialName(), - specialName(), - eIndexRep(), - m_SubOperands(), - aeDataType(), - m_Rebase(0), - m_Size(0), - m_Defines(), - m_ForLoopInductorName(0) -#ifdef _DEBUG - , id(0) -#endif - {} - - // Retrieve the mask of all the components this operand accesses (either reads from or writes to). - // Note that destination writemask does affect the effective access mask. - uint32_t GetAccessMask() const; - - // Returns the index of the highest accessed component, based on component mask - int GetMaxComponent() const; - - bool IsSwizzleReplicated() const; - - // Get the number of elements returned by operand, taking additional component mask into account - //e.g. - //.z = 1 - //.x = 1 - //.yw = 2 - uint32_t GetNumSwizzleElements(uint32_t ui32CompMask = OPERAND_4_COMPONENT_MASK_ALL) const; - - // When this operand is used as an input declaration, how many components does it have? - int GetNumInputElements(const HLSLCrossCompilerContext *psContext) const; - - // Retrieve the operand data type. - SHADER_VARIABLE_TYPE GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates = SVT_INT) const; - - // Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch - int GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const; - // Same as above but with explicit shader type and phase - int GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const; - - // Find the operand that contains the dynamic index for this operand (array in constant buffer). - // When isAoS is true, we'll try to find the original index var to avoid additional calculations. - // needsIndexCalcRevert output will tell if we need to divide the value to get the correct index. - Operand* GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const; - - // Maps REFLECT_RESOURCE_PRECISION into OPERAND_MIN_PRECISION as much as possible - static OPERAND_MIN_PRECISION ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec); - - int iExtended; - OPERAND_TYPE eType; - OPERAND_MODIFIER eModifier; - OPERAND_MIN_PRECISION eMinPrecision; - int iIndexDims; - int iWriteMask; - int iGSInput; - int iPSInOut; - int iWriteMaskEnabled; - int iArrayElements; - int iNumComponents; - - OPERAND_4_COMPONENT_SELECTION_MODE eSelMode; - uint32_t ui32CompMask; - uint32_t ui32Swizzle; - uint32_t aui32Swizzle[4]; - - uint32_t aui32ArraySizes[3]; - uint32_t ui32RegisterNumber; - //If eType is OPERAND_TYPE_IMMEDIATE32 - float afImmediates[4]; - //If eType is OPERAND_TYPE_IMMEDIATE64 - double adImmediates[4]; - - SPECIAL_NAME eSpecialName; - std::string specialName; - - OPERAND_INDEX_REPRESENTATION eIndexRep[3]; - - SubOperandPtr m_SubOperands[MAX_SUB_OPERANDS]; - - //One type for each component. - SHADER_VARIABLE_TYPE aeDataType[4]; - - uint32_t m_Rebase; // Rebase value, for constant array accesses. - uint32_t m_Size; // Component count, only for constant array access. - - struct Define - { - Define() : m_Inst(0), m_Op(0) {} - Define(const Define& a) = default; - Define(Define&& a) = default; - Define(Instruction* inst, Operand* op) : m_Inst(inst), m_Op(op) {} - ~Define() = default; - - Define& operator=(const Define& other) = default; - Define& operator=(Define&& other) = default; - - Instruction* m_Inst; // Instruction that writes to the temp - Operand* m_Op; // The (destination) operand within that instruction. - }; - - std::vector m_Defines; // Array of instructions whose results this operand can use. (only if eType == OPERAND_TYPE_TEMP) - uint32_t m_ForLoopInductorName; // If non-zero, this (eType==OPERAND_TYPE_TEMP) is an inductor variable used in for loop, and it has a special number as given here (overrides ui32RegisterNumber) - -#ifdef _DEBUG - uint64_t id; -#endif -}; diff --git a/third_party/HLSLcc/src/internal_includes/Shader.h b/third_party/HLSLcc/src/internal_includes/Shader.h deleted file mode 100644 index 98a716b..0000000 --- a/third_party/HLSLcc/src/internal_includes/Shader.h +++ /dev/null @@ -1,255 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "growing_array.h" -#include "internal_includes/tokens.h" -#include "internal_includes/reflect.h" -#include "include/ShaderInfo.h" -#include "internal_includes/Instruction.h" -#include "internal_includes/Declaration.h" -#include "internal_includes/ControlFlowGraph.h" -#include "bstrlib.h" - -struct ConstantArrayChunk -{ - ConstantArrayChunk() : m_Size(0), m_AccessMask(0), m_Rebase(0), m_ComponentCount(0) {} - ConstantArrayChunk(uint32_t sz, uint32_t mask, Operand *firstUse) - : m_Size(sz), m_AccessMask(mask), m_Rebase(0), m_ComponentCount(0) - { - m_UseSites.push_back(firstUse); - } - - uint32_t m_Size; - uint32_t m_AccessMask; - uint32_t m_Rebase; - uint32_t m_ComponentCount; - - std::vector m_UseSites; -}; -typedef std::multimap ChunkMap; - -struct ConstantArrayInfo -{ - ConstantArrayInfo() : m_OrigDeclaration(0), m_Chunks() {} - - Declaration *m_OrigDeclaration; // Pointer to the original declaration of the const array - ChunkMap m_Chunks; // map of , same start offset might have multiple entries for different access masks -}; - -class ShaderPhase -{ -public: - ShaderPhase() - : - ePhase(MAIN_PHASE), - ui32InstanceCount(0), - postShaderCode(), - hasPostShaderCode(0), - earlyMain(), - ui32OrigTemps(0), - ui32TotalTemps(0), - psTempDeclaration(NULL), - pui32SplitInfo(), - peTempTypes(), - acInputNeedsRedirect(), - acOutputNeedsRedirect(), - acPatchConstantsNeedsRedirect(), - m_CFG(), - m_CFGInitialized(false), - m_NextFreeTempRegister(1), - m_NextTexCoordTemp(0) - {} - - void ResolveUAVProperties(const ShaderInfo& sInfo); - - void UnvectorizeImmMoves(); // Transform MOV tX.xyz, (0, 1, 2) into MOV tX.x, 0; MOV tX.y, 1; MOV tX.z, 2 to make datatype analysis easier - - void PruneConstArrays(); // Walk through everything that accesses a const array to see if we could make it smaller - - void ExpandSWAPCs(); // Expand all SWAPC opcodes into a bunch of MOVCs. Must be done first! - - ConstantArrayInfo m_ConstantArrayInfo; - - std::vector psDecl; - std::vector psInst; - - SHADER_PHASE_TYPE ePhase; - uint32_t ui32InstanceCount; // In case of hull shaders, how many instances this phase needs to have. Defaults to 1. - bstring postShaderCode;//End of main or before emit() - int hasPostShaderCode; - - bstring earlyMain;//Code to be inserted at the start of phase - - uint32_t ui32OrigTemps; // The number of temporaries this phase originally declared - uint32_t ui32TotalTemps; // The number of temporaries this phase has now - Declaration *psTempDeclaration; // Shortcut to the OPCODE_DCL_TEMPS opcode - - // The split table is a table containing the index of the original register this register was split out from, or 0xffffffff - // Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count - std::vector pui32SplitInfo; - std::vector peTempTypes; - - // These are needed in cases we have 2 vec2 texcoords combined into one vec4 and they are accessed together. - std::vector acInputNeedsRedirect; // If 0xff, requires re-routing all reads via a combined vec4. If 0xfe, the same but the vec4 has already been declared. - std::vector acOutputNeedsRedirect; // Same for outputs - std::vector acPatchConstantsNeedsRedirect; // Same for patch constants - - // Get the Control Flow Graph for this phase, build it if necessary. - HLSLcc::ControlFlow::ControlFlowGraph &GetCFG(); - - uint32_t m_NextFreeTempRegister; // A counter for creating new temporaries for for-loops. - uint32_t m_NextTexCoordTemp; // A counter for creating tex coord temps for driver issue workarounds - -private: - bool m_CFGInitialized; - HLSLcc::ControlFlow::ControlFlowGraph m_CFG; -}; - -class Shader -{ -public: - - Shader() - : - ui32MajorVersion(0), - ui32MinorVersion(0), - eShaderType(INVALID_SHADER), - eTargetLanguage(LANG_DEFAULT), - extensions(0), - fp64(0), - ui32ShaderLength(0), - aui32FuncTableToFuncPointer(), - aui32FuncBodyToFuncTable(), - funcTable(), - funcPointer(), - ui32NextClassFuncName(), - pui32FirstToken(NULL), - asPhases(), - sInfo(), - abScalarInput(), - abScalarOutput(), - aIndexedInput(), - aIndexedOutput(), - aIndexedInputParents(), - aeResourceDims(), - acInputDeclared(), - acOutputDeclared(), - aiOpcodeUsed(NUM_OPCODES, 0), - ui32CurrentVertexOutputStream(0), - textureSamplers(), - m_DummySamplerDeclared(false), - maxSemanticIndex(0) - { - } - - // Retrieve the number of components the temp register has. - uint32_t GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const; - - //Hull shaders have multiple phases. - //Each phase has its own temps. - //Convert from per-phase temps to global temps. - void ConsolidateHullTempVars(); - - // Detect temp registers per data type that are actually used. - void PruneTempRegisters(); - - // Check if inputs and outputs are accessed across semantic boundaries - // as in, 2x texcoord vec2's are packed together as vec4 but still accessed together. - void AnalyzeIOOverlap(); - - // Compute maxSemanticIndex based on the results of AnalyzeIOOverlap - void SetMaxSemanticIndex(); - - // Change all references to vertex position to always be highp, having them be mediump causes problems on Metal and Vivante GPUs. - void ForcePositionToHighp(); - - void FindUnusedGlobals(uint32_t flags); // Finds the DCL_CONSTANT_BUFFER with name "$Globals" and searches through all usages for each member of it and mark if they're actually ever used. - - void ExpandSWAPCs(); - - uint32_t ui32MajorVersion; - uint32_t ui32MinorVersion; - SHADER_TYPE eShaderType; - - GLLang eTargetLanguage; - const struct GlExtensions *extensions; - - int fp64; - - //DWORDs in program code, including version and length tokens. - uint32_t ui32ShaderLength; - - - //Instruction* functions;//non-main subroutines - HLSLcc::growing_vector aui32FuncTableToFuncPointer; // dynamic alloc? - HLSLcc::growing_vector aui32FuncBodyToFuncTable; - - struct FuncTableEntry - { - HLSLcc::growing_vector aui32FuncBodies; - }; - HLSLcc::growing_vector funcTable; - - struct FuncPointerEntry - { - HLSLcc::growing_vector aui32FuncTables; - uint32_t ui32NumBodiesPerTable; - }; - - HLSLcc::growing_vector funcPointer; - - HLSLcc::growing_vector ui32NextClassFuncName; - - const uint32_t* pui32FirstToken;//Reference for calculating current position in token stream. - - std::vector asPhases; - - ShaderInfo sInfo; - - // There are 2 input/output register spaces in DX bytecode: one for per-patch data and one for per-vertex. - // Which one is used depends on the context: - // per-vertex space is used in vertex/pixel/geom shaders always - // hull shader control point phase uses per-vertex by default, other phases are per-patch by default (can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT) - // domain shader is per-patch by default, can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT - - // Below, the [2] is accessed with 0 == per-vertex, 1 == per-patch - // Note that these ints are component masks - HLSLcc::growing_vector abScalarInput[2]; - HLSLcc::growing_vector abScalarOutput[2]; - - HLSLcc::growing_vector aIndexedInput[2]; - HLSLcc::growing_vector aIndexedOutput[2]; - - HLSLcc::growing_vector aIndexedInputParents[2]; - - HLSLcc::growing_vector aeResourceDims; - - HLSLcc::growing_vector acInputDeclared[2]; - HLSLcc::growing_vector acOutputDeclared[2]; - - std::vector aiOpcodeUsed; // Initialized to NUM_OPCODES elements above. - - uint32_t ui32CurrentVertexOutputStream; - - TextureSamplerPairs textureSamplers; - - std::vector psIntTempSizes; // Array for whether this temp register needs declaration as int temp - std::vector psInt16TempSizes; // min16ints - std::vector psInt12TempSizes; // min12ints - std::vector psUIntTempSizes; // Same for uints - std::vector psUInt16TempSizes; // ... and for uint16's - std::vector psFloatTempSizes; // ...and for floats - std::vector psFloat16TempSizes; // ...and for min16floats - std::vector psFloat10TempSizes; // ...and for min10floats - std::vector psDoubleTempSizes; // ...and for doubles - std::vector psBoolTempSizes; // ... and for bools - - bool m_DummySamplerDeclared; // If true, the shader doesn't declare any samplers but uses texelFetch and we have added a dummy sampler for Vulkan for that. - uint32_t maxSemanticIndex; // Highest semantic index found by SignatureAnalysis - -private: - void DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand); -}; diff --git a/third_party/HLSLcc/src/internal_includes/Translator.h b/third_party/HLSLcc/src/internal_includes/Translator.h deleted file mode 100644 index ae5224a..0000000 --- a/third_party/HLSLcc/src/internal_includes/Translator.h +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once -#include "HLSLCrossCompilerContext.h" -#include "Shader.h" - -struct Declaration; -// Base class for translator backend implenentations. -class Translator -{ -protected: - HLSLCrossCompilerContext *psContext; -public: - explicit Translator(HLSLCrossCompilerContext *ctx) : psContext(ctx) {} - virtual ~Translator() {} - - virtual bool Translate() = 0; - - virtual void TranslateDeclaration(const Declaration *psDecl) = 0; - - // Translate system value type to name, return true if succeeded and no further translation is necessary - virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL) = 0; - - // In GLSL, the input and output names cannot clash. - // Also, the output name of previous stage must match the input name of the next stage. - // So, do gymnastics depending on which shader we're running on and which other shaders exist in this program. - // - virtual void SetIOPrefixes() = 0; - - void SetExtensions(const struct GlExtensions *ext) - { - psContext->psShader->extensions = ext; - } -}; diff --git a/third_party/HLSLcc/src/internal_includes/UseDefineChains.h b/third_party/HLSLcc/src/internal_includes/UseDefineChains.h deleted file mode 100644 index 9c2b582..0000000 --- a/third_party/HLSLcc/src/internal_includes/UseDefineChains.h +++ /dev/null @@ -1,138 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -#include -#include - -struct DefineUseChainEntry; -struct UseDefineChainEntry; - -typedef std::set DefineSet; -typedef std::set UsageSet; - -struct Instruction; -class Operand; -class ShaderInfo; -namespace HLSLcc -{ -namespace ControlFlow -{ - class ControlFlowGraph; -} -} - - -// Def-Use chain per temp component -struct DefineUseChainEntry -{ - DefineUseChainEntry() - : psInst(0) - , psOp(0) - , usages() - , writeMask(0) - , index(0) - , isStandalone(0) - { - memset(psSiblings, 0, 4 * sizeof(DefineUseChainEntry *)); - } - - Instruction *psInst; // The declaration (write to this temp component) - Operand *psOp; // The operand within this instruction for the write target - UsageSet usages; // List of usages that are dependent on this write - uint32_t writeMask; // Access mask; which all components were written to in the same op - uint32_t index; // For which component was this definition created for? - uint32_t isStandalone; // A shortcut for analysis: if nonzero, all siblings of all usages for both this and all this siblings - struct DefineUseChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this define's corresponding entries for the other components. - -#if _DEBUG - bool operator==(const DefineUseChainEntry &a) const - { - if (psInst != a.psInst) - return false; - if (psOp != a.psOp) - return false; - if (writeMask != a.writeMask) - return false; - if (index != a.index) - return false; - if (isStandalone != a.isStandalone) - return false; - - // Just check that each one has the same amount of usages - if (usages.size() != a.usages.size()) - return false; - - return true; - } - -#endif -}; - -typedef std::list DefineUseChain; - -struct UseDefineChainEntry -{ - UseDefineChainEntry() - : psInst(0) - , psOp(0) - , defines() - , accessMask(0) - , index(0) - { - memset(psSiblings, 0, 4 * sizeof(UseDefineChainEntry *)); - } - - Instruction *psInst; // The use (read from this temp component) - Operand *psOp; // The operand within this instruction for the read - DefineSet defines; // List of writes that are visible to this read - uint32_t accessMask; // Which all components were read together with this one - uint32_t index; // For which component was this usage created for? - struct UseDefineChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this usage's corresponding entries for the other components. - -#if _DEBUG - bool operator==(const UseDefineChainEntry &a) const - { - if (psInst != a.psInst) - return false; - if (psOp != a.psOp) - return false; - if (accessMask != a.accessMask) - return false; - if (index != a.index) - return false; - - // Just check that each one has the same amount of usages - if (defines.size() != a.defines.size()) - return false; - - return true; - } - -#endif -}; - -typedef std::list UseDefineChain; - -typedef std::map UseDefineChains; -typedef std::map DefineUseChains; -typedef std::vector ActiveDefinitions; - -// Do flow control analysis on the instructions and build the define-use and use-define chains -void BuildUseDefineChains(std::vector &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, HLSLcc::ControlFlow::ControlFlowGraph &cfg); - -// Do temp splitting based on use-define chains -void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector &pui32SplitTable); - -// Based on the sampler precisions, downgrade the definitions if possible. -void UpdateSamplerPrecisions(const ShaderInfo &psContext, DefineUseChains &psDUChains, uint32_t ui32NumTemps); - -// Optimization pass for successive passes: Mark Operand->isStandalone for definitions that are "standalone": all usages (and all their sibligns) of this and all its siblings only see this definition. -void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps); - -// Write the uses and defines back to Instruction and Operand member lists. -void WriteBackUsesAndDefines(DefineUseChains &psDUChains); diff --git a/third_party/HLSLcc/src/internal_includes/debug.h b/third_party/HLSLcc/src/internal_includes/debug.h deleted file mode 100644 index bc201c0..0000000 --- a/third_party/HLSLcc/src/internal_includes/debug.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef DEBUG_H_ -#define DEBUG_H_ - -#ifdef _DEBUG -#include "assert.h" -#define ASSERT(expr) CustomAssert(expr) -static void CustomAssert(int expression) -{ - if (!expression) - { - assert(0); - } -} - -#else -#define UNUSED(EXPR_) \ - do { if (false) (void)(EXPR_); } while(0) -#define ASSERT(expr) UNUSED(expr) -#endif - -#endif diff --git a/third_party/HLSLcc/src/internal_includes/decode.h b/third_party/HLSLcc/src/internal_includes/decode.h deleted file mode 100644 index 331cca4..0000000 --- a/third_party/HLSLcc/src/internal_includes/decode.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef DECODE_H -#define DECODE_H - -#include "internal_includes/Shader.h" - -Shader* DecodeDXBC(uint32_t* data, uint32_t decodeFlags); - -void UpdateOperandReferences(Shader* psShader, SHADER_PHASE_TYPE eShaderPhaseType, Instruction* psInst); - -#endif diff --git a/third_party/HLSLcc/src/internal_includes/languages.h b/third_party/HLSLcc/src/internal_includes/languages.h deleted file mode 100644 index eefbeca..0000000 --- a/third_party/HLSLcc/src/internal_includes/languages.h +++ /dev/null @@ -1,328 +0,0 @@ -#ifndef LANGUAGES_H -#define LANGUAGES_H - -#include "hlslcc.h" -#include "HLSLCrossCompilerContext.h" -#include "Shader.h" - -static int InOutSupported(const GLLang eLang) -{ - if (eLang == LANG_ES_100 || eLang == LANG_120) - { - return 0; - } - return 1; -} - -static int WriteToFragData(const GLLang eLang) -{ - if (eLang == LANG_ES_100 || eLang == LANG_120) - { - return 1; - } - return 0; -} - -static int ShaderBitEncodingSupported(const GLLang eLang) -{ - if (eLang != LANG_ES_300 && - eLang != LANG_ES_310 && - eLang < LANG_330) - { - return 0; - } - return 1; -} - -static int HaveOverloadedTextureFuncs(const GLLang eLang) -{ - if (eLang == LANG_ES_100 || eLang == LANG_120) - { - return 0; - } - return 1; -} - -static bool IsMobileTarget(const HLSLCrossCompilerContext *psContext) -{ - if ((psContext->flags & HLSLCC_FLAG_MOBILE_TARGET) != 0) - return true; - - return false; -} - -//Only enable for ES. Vulkan and Switch. -//Not present in 120, ignored in other desktop languages. Specifically enabled on Vulkan. -static int HavePrecisionQualifiers(const HLSLCrossCompilerContext *psContext) -{ - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0 || (psContext->flags & HLSLCC_FLAG_NVN_TARGET) != 0) - return 1; - - const GLLang eLang = psContext->psShader->eTargetLanguage; - if (eLang >= LANG_ES_100 && eLang <= LANG_ES_310) - { - return 1; - } - return 0; -} - -static int EmitLowp(const HLSLCrossCompilerContext *psContext) -{ - const GLLang eLang = psContext->psShader->eTargetLanguage; - return eLang == LANG_ES_100 ? 1 : 0; -} - -static int HaveCubemapArray(const GLLang eLang) -{ - if (eLang >= LANG_400 && eLang <= LANG_GL_LAST) - return 1; - return 0; -} - -static bool IsESLanguage(const GLLang eLang) -{ - return (eLang >= LANG_ES_FIRST && eLang <= LANG_ES_LAST); -} - -static bool IsDesktopGLLanguage(const GLLang eLang) -{ - return (eLang >= LANG_GL_FIRST && eLang <= LANG_GL_LAST); -} - -//Only on vertex inputs and pixel outputs. -static int HaveLimitedInOutLocationQualifier(const GLLang eLang, const struct GlExtensions *extensions) -{ - if (eLang >= LANG_330 || eLang == LANG_ES_300 || eLang == LANG_ES_310 || (extensions && ((struct GlExtensions*)extensions)->ARB_explicit_attrib_location)) - { - return 1; - } - return 0; -} - -static int HaveInOutLocationQualifier(const GLLang eLang) -{ - if (eLang >= LANG_410 || eLang == LANG_ES_310) - { - return 1; - } - return 0; -} - -//layout(binding = X) uniform {uniformA; uniformB;} -//layout(location = X) uniform uniform_name; -static int HaveUniformBindingsAndLocations(const GLLang eLang, const struct GlExtensions *extensions, unsigned int flags) -{ - if (flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS) - return 0; - - if (eLang >= LANG_430 || eLang == LANG_ES_310 || - (extensions && ((struct GlExtensions*)extensions)->ARB_explicit_uniform_location && ((struct GlExtensions*)extensions)->ARB_shading_language_420pack)) - { - return 1; - } - return 0; -} - -static int DualSourceBlendSupported(const GLLang eLang) -{ - if (eLang >= LANG_330) - { - return 1; - } - return 0; -} - -static int SubroutinesSupported(const GLLang eLang) -{ - if (eLang >= LANG_400) - { - return 1; - } - return 0; -} - -//Before 430, flat/smooth/centroid/noperspective must match -//between fragment and its previous stage. -//HLSL bytecode only tells us the interpolation in pixel shader. -static int PixelInterpDependency(const GLLang eLang) -{ - if (eLang < LANG_430) - { - return 1; - } - return 0; -} - -static int HaveUnsignedTypes(const GLLang eLang) -{ - switch (eLang) - { - case LANG_ES_100: - case LANG_120: - return 0; - default: - break; - } - return 1; -} - -static int HaveBitEncodingOps(const GLLang eLang) -{ - switch (eLang) - { - case LANG_ES_100: - case LANG_120: - return 0; - default: - break; - } - return 1; -} - -static int HaveNativeBitwiseOps(const GLLang eLang) -{ - switch (eLang) - { - case LANG_ES_100: - case LANG_120: - return 0; - default: - break; - } - return 1; -} - -static int HaveDynamicIndexing(HLSLCrossCompilerContext *psContext, const Operand* psOperand = NULL) -{ - // WebGL only allows dynamic indexing with constant expressions, loop indices or a combination. - // The only exception is for uniform access in vertex shaders, which can be indexed using any expression. - - switch (psContext->psShader->eTargetLanguage) - { - case LANG_ES_100: - case LANG_120: - if (psOperand != NULL) - { - if (psOperand->m_ForLoopInductorName) - return 1; - - if (psContext->psShader->eShaderType == VERTEX_SHADER && psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER) - return 1; - } - - return 0; - default: - break; - } - return 1; -} - -static int HaveGather(const GLLang eLang) -{ - if (eLang >= LANG_400 || eLang == LANG_ES_310) - { - return 1; - } - return 0; -} - -static int HaveGatherNonConstOffset(const GLLang eLang) -{ - if (eLang >= LANG_420 || eLang == LANG_ES_310) - { - return 1; - } - return 0; -} - -static int HaveQueryLod(const GLLang eLang) -{ - if (eLang >= LANG_400) - { - return 1; - } - return 0; -} - -static int HaveQueryLevels(const GLLang eLang) -{ - if (eLang >= LANG_430) - { - return 1; - } - return 0; -} - -static int HaveFragmentCoordConventions(const GLLang eLang) -{ - if (eLang >= LANG_150) - { - return 1; - } - return 0; -} - -static int HaveGeometryShaderARB(const GLLang eLang) -{ - if (eLang >= LANG_150) - { - return 1; - } - return 0; -} - -static int HaveAtomicCounter(const GLLang eLang) -{ - if (eLang >= LANG_420 || eLang == LANG_ES_310) - { - return 1; - } - return 0; -} - -static int HaveAtomicMem(const GLLang eLang) -{ - if (eLang >= LANG_430 || eLang == LANG_ES_310) - { - return 1; - } - return 0; -} - -static int HaveImageAtomics(const GLLang eLang) -{ - if (eLang >= LANG_420) - { - return 1; - } - return 0; -} - -static int HaveCompute(const GLLang eLang) -{ - if (eLang >= LANG_430 || eLang == LANG_ES_310) - { - return 1; - } - return 0; -} - -static int HaveImageLoadStore(const GLLang eLang) -{ - if (eLang >= LANG_420 || eLang == LANG_ES_310) - { - return 1; - } - return 0; -} - -static int HavePreciseQualifier(const GLLang eLang) -{ - if (eLang >= LANG_400) // TODO: Add for ES when we're adding 3.2 lang - { - return 1; - } - return 0; -} - -#endif diff --git a/third_party/HLSLcc/src/internal_includes/reflect.h b/third_party/HLSLcc/src/internal_includes/reflect.h deleted file mode 100644 index ddc468c..0000000 --- a/third_party/HLSLcc/src/internal_includes/reflect.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef REFLECT_H -#define REFLECT_H - -#include "hlslcc.h" - -struct ShaderPhase_TAG; - -typedef struct -{ - uint32_t* pui32Inputs; - uint32_t* pui32Outputs; - uint32_t* pui32Resources; - uint32_t* pui32Interfaces; - uint32_t* pui32Inputs11; - uint32_t* pui32Outputs11; - uint32_t* pui32OutputsWithStreams; - uint32_t* pui32PatchConstants; - uint32_t* pui32PatchConstants11; -} ReflectionChunks; - -void LoadShaderInfo(const uint32_t ui32MajorVersion, - const uint32_t ui32MinorVersion, - const ReflectionChunks* psChunks, - ShaderInfo* psInfo, uint32_t decodeFlags); - -#endif diff --git a/third_party/HLSLcc/src/internal_includes/toGLSL.h b/third_party/HLSLcc/src/internal_includes/toGLSL.h deleted file mode 100644 index b4ae9a8..0000000 --- a/third_party/HLSLcc/src/internal_includes/toGLSL.h +++ /dev/null @@ -1,244 +0,0 @@ -#pragma once - -#include "hlslcc.h" -#include "internal_includes/Translator.h" - -class HLSLCrossCompilerContext; - -class ToGLSL : public Translator -{ -protected: - GLLang language; - bool m_NeedUnityInstancingArraySizeDecl; - bool m_NeedUnityPreTransformDecl; - -public: - explicit ToGLSL(HLSLCrossCompilerContext* ctx) : - Translator(ctx), - language(LANG_DEFAULT), - m_NeedUnityInstancingArraySizeDecl(false), - m_NeedUnityPreTransformDecl(false), - m_NumDeclaredWhileTrueLoops(0) - {} - // Sets the target language according to given input. if LANG_DEFAULT, does autodetect and returns the selected language - GLLang SetLanguage(GLLang suggestedLanguage); - - virtual bool Translate(); - virtual void TranslateDeclaration(const Declaration* psDecl); - virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL); - virtual void SetIOPrefixes(); - -private: - void TranslateOperand(bstring glsl, const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL, bool forceNoConversion = false); - void TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL, bool forceNoConversion = false); - void TranslateInstruction(Instruction* psInst, bool isEmbedded = false); - - void TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion = false); - void TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion = false); - - void TranslateOperandIndex(const Operand* psOperand, int index); - void TranslateOperandIndexMAD(const Operand* psOperand, int index, uint32_t multiply, uint32_t add); - - void AddOpAssignToDestWithMask(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int *pNeedsParenthesis, uint32_t ui32CompMask); - void AddAssignToDest(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int* pNeedsParenthesis); - void AddAssignPrologue(int numParenthesis, bool isEmbedded = false); - - - void AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName); - void AddBuiltinInput(const Declaration* psDecl, const char* builtinName); - void HandleOutputRedirect(const Declaration *psDecl, const char *Precision); - void HandleInputRedirect(const Declaration *psDecl, const char *Precision); - - void AddUserOutput(const Declaration* psDecl); - void DeclareStructConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, const Operand* psOperand, bstring glsl); - void DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix, bool reportInReflection); - void PreDeclareStructType(const std::string &name, const struct ShaderVarType* psType); - void DeclareUBOConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, bstring glsl); - - void ReportStruct(const std::string &name, const struct ShaderVarType* psType); - - typedef enum - { - CMP_EQ, - CMP_LT, - CMP_GE, - CMP_NE, - } ComparisonType; - - void AddComparison(Instruction* psInst, ComparisonType eType, - uint32_t typeFlag); - - void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise, bool isEmbedded = false); - void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise); - void CallBinaryOp(const char* name, Instruction* psInst, - int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded = false); - void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, - int dest, int src0, int src1, int src2, uint32_t dataType); - void CallHelper3(const char* name, Instruction* psInst, - int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask); - void CallHelper2(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask); - void CallHelper2Int(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask); - void CallHelper2UInt(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask); - void CallHelper1(const char* name, Instruction* psInst, - int dest, int src0, int paramsShouldFollowWriteMask); - void CallHelper1Int( - const char* name, - Instruction* psInst, - const int dest, - const int src0, - int paramsShouldFollowWriteMask); - void TranslateTexelFetch( - Instruction* psInst, - const ResourceBinding* psBinding, - bstring glsl); - void TranslateTexCoord( - const RESOURCE_DIMENSION eResDim, - Operand* psTexCoordOperand); - void GetResInfoData(Instruction* psInst, int index, int destElem); - void TranslateTextureSample(Instruction* psInst, - uint32_t ui32Flags); - void TranslateDynamicComponentSelection(const ShaderVarType* psVarType, - const Operand* psByteAddr, uint32_t offset, uint32_t mask); - void TranslateShaderStorageStore(Instruction* psInst); - void TranslateShaderStorageLoad(Instruction* psInst); - void TranslateAtomicMemOp(Instruction* psInst); - void TranslateConditional( - Instruction* psInst, - bstring glsl); - - void HandleSwitchTransformation(Instruction* psInst, bstring glsl); - - // Add an extra function to the m_FunctionDefinitions list, unless it's already there. - bool DeclareExtraFunction(const std::string &name, bstring body); - void UseExtraFunctionDependency(const std::string &name); - - void DeclareDynamicIndexWrapper(const struct ShaderVarType* psType); - void DeclareDynamicIndexWrapper(const char* psName, SHADER_VARIABLE_CLASS eClass, SHADER_VARIABLE_TYPE eType, uint32_t ui32Rows, uint32_t ui32Columns, uint32_t ui32Elements); - - bool RenderTargetDeclared(uint32_t input); - - std::string GetVulkanDummySamplerName(); - - // A map of extra helper functions we'll need. - FunctionDefinitions m_FunctionDefinitions; - std::vector m_FunctionDefinitionsOrder; - - std::vector m_AdditionalDefinitions; - - std::vector m_DefinedStructs; - - std::set m_DeclaredRenderTarget; - int m_NumDeclaredWhileTrueLoops; - - struct SwitchConversion - { - /* - IF (CONDITION1) BREAK; STATEMENT1; IF (CONDITION2) BREAK; STATEMENT2;... transforms to - if (CONDITION1) {} ELSE { STATEMENT1; IF (CONDITION2) {} ELSE {STATEMENT2; ...} } - thus, we need to count the "BREAK" statements we encountered in each IF on the same level inside a SWITCH. - */ - struct ConditionalInfo - { - int breakCount; // Count BREAK on the same level to emit enough closing braces afterwards - bool breakEncountered; // Just encountered a BREAK statment, potentially need to emit "ELSE" - bool endifEncountered; // We need to check for "ENDIF ELSE" sequence, and not emit "else" if we see it - - ConditionalInfo() : - ConditionalInfo(0, false) - {} - - explicit ConditionalInfo(int initialBreakCount) : - ConditionalInfo(initialBreakCount, false) - {} - - ConditionalInfo(int initialBreakCount, bool withEndif) : - ConditionalInfo(initialBreakCount, withEndif, false) - {} - - ConditionalInfo(int initialBreakCount, bool withEndif, bool withBreak) : - breakCount(initialBreakCount), - endifEncountered(withEndif), - breakEncountered(withBreak) - {} - }; - - bstring switchOperand; - // We defer emitting if (condition) for each CASE statement to concatenate possible CASE A: CASE B:... into one if (). - std::vector currentCaseOperands; - std::vector conditionalsInfo; - int isInLoop; // We don't count "BREAK" (end emit them) if we're in a loop. - bool isFirstCase; - - SwitchConversion() : - switchOperand(bfromcstr("")), - isInLoop(0), - isFirstCase(true) - {} - - SwitchConversion(const SwitchConversion& other) : - switchOperand(bstrcpy(other.switchOperand)), - conditionalsInfo(other.conditionalsInfo), - isInLoop(other.isInLoop), - isFirstCase(other.isFirstCase) - { - currentCaseOperands.reserve(other.currentCaseOperands.size()); - for (size_t i = 0; i < other.currentCaseOperands.size(); ++i) - currentCaseOperands.push_back(bstrcpy(other.currentCaseOperands[i])); - } - - SwitchConversion(SwitchConversion&& other) : - switchOperand(other.switchOperand), - currentCaseOperands(std::move(other.currentCaseOperands)), - conditionalsInfo(std::move(other.conditionalsInfo)), - isInLoop(other.isInLoop), - isFirstCase(other.isFirstCase) - { - other.switchOperand = nullptr; - } - - ~SwitchConversion() - { - bdestroy(switchOperand); - for (size_t i = 0; i < currentCaseOperands.size(); ++i) - bdestroy(currentCaseOperands[i]); - } - - SwitchConversion& operator=(const SwitchConversion& other) - { - if (this == &other) - return *this; - - switchOperand = bstrcpy(other.switchOperand); - conditionalsInfo = other.conditionalsInfo; - isInLoop = other.isInLoop; - isFirstCase = other.isFirstCase; - currentCaseOperands.reserve(other.currentCaseOperands.size()); - for (size_t i = 0; i < other.currentCaseOperands.size(); ++i) - currentCaseOperands.push_back(bstrcpy(other.currentCaseOperands[i])); - - return *this; - } - - SwitchConversion& operator=(SwitchConversion&& other) - { - if (this == &other) - return *this; - - switchOperand = other.switchOperand; - conditionalsInfo = std::move(other.conditionalsInfo); - isInLoop = other.isInLoop; - isFirstCase = other.isFirstCase; - currentCaseOperands = std::move(other.currentCaseOperands); - - other.switchOperand = nullptr; - - return *this; - } - }; - std::vector m_SwitchStack; -}; diff --git a/third_party/HLSLcc/src/internal_includes/toGLSLOperand.h b/third_party/HLSLcc/src/internal_includes/toGLSLOperand.h deleted file mode 100644 index deda652..0000000 --- a/third_party/HLSLcc/src/internal_includes/toGLSLOperand.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef TO_GLSL_OPERAND_H -#define TO_GLSL_OPERAND_H - -#include -#include "bstrlib.h" -#include "ShaderInfo.h" - -class HLSLCrossCompilerContext; - -//void TranslateOperand(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32TOFlag); -// Translate operand but add additional component mask -//void TranslateOperandWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask); - -void TranslateOperandSwizzle(HLSLCrossCompilerContext* psContext, const Operand* psOperand, int iRebase); -void TranslateOperandSwizzleWithMask(bstring glsl, HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase); -void TranslateOperandSwizzleWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase); - -void ResourceName(bstring targetStr, HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare); -std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare); - -std::string TextureSamplerName(ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare); -void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare); - -std::string UniformBufferInstanceName(HLSLCrossCompilerContext* psContext, const std::string& name); - -#endif diff --git a/third_party/HLSLcc/src/internal_includes/toMetal.h b/third_party/HLSLcc/src/internal_includes/toMetal.h deleted file mode 100644 index 08d8eb1..0000000 --- a/third_party/HLSLcc/src/internal_includes/toMetal.h +++ /dev/null @@ -1,182 +0,0 @@ -#pragma once -#include "internal_includes/Translator.h" -#include -#include - -struct SamplerDesc -{ - std::string name; - uint32_t reg, slot; -}; -struct TextureSamplerDesc -{ - std::string name; - int textureBind, samplerBind; - HLSLCC_TEX_DIMENSION dim; - bool isMultisampled; - bool isDepthSampler; - bool uav; -}; - -class ToMetal : public Translator -{ -public: - explicit ToMetal(HLSLCrossCompilerContext *ctx) - : Translator(ctx) - , m_ShadowSamplerDeclared(false) - , m_NeedFBOutputRemapDecl(false) - , m_NeedFBInputRemapDecl(false) - {} - - virtual bool Translate(); - virtual void TranslateDeclaration(const Declaration *psDecl); - virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL); - std::string TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL); - - virtual void SetIOPrefixes(); - -private: - void TranslateInstruction(Instruction* psInst); - - void DeclareBuiltinInput(const Declaration *psDecl); - void DeclareBuiltinOutput(const Declaration *psDecl); - void DeclareClipPlanes(const Declaration* decl, unsigned declCount); - void GenerateTexturesReflection(HLSLccReflection* refl); - - // Retrieve the name of the output struct for this shader - std::string GetOutputStructName() const; - std::string GetInputStructName() const; - std::string GetCBName(const std::string& cbName) const; - - void DeclareHullShaderPassthrough(); - void HandleInputRedirect(const Declaration *psDecl, const std::string &typeName); - void HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName); - - void DeclareConstantBuffer(const ConstantBuffer *psCBuf, uint32_t ui32BindingPoint); - void DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB = false, uint32_t cumulativeOffset = 0, bool stripUnused = false); - void DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB = false, uint32_t cumulativeOffset = 0); - void DeclareStructVariable(const std::string &parentName, const ShaderVar &var, bool withinCB = false, uint32_t cumulativeOffset = 0, bool isUsed = true); - void DeclareStructVariable(const std::string &parentName, const ShaderVarType &var, bool withinCB = false, uint32_t cumulativeOffset = 0, bool isUsed = true); - void DeclareBufferVariable(const Declaration *psDecl, bool isRaw, bool isUAV); - - void DeclareResource(const Declaration *psDecl); - void TranslateResourceTexture(const Declaration* psDecl, uint32_t samplerCanDoShadowCmp, HLSLCC_TEX_DIMENSION texDim); - - void DeclareOutput(const Declaration *decl); - - void PrintStructDeclarations(StructDefinitions &defs, const char *name = ""); - - std::string ResourceName(ResourceGroup group, const uint32_t ui32RegisterNumber); - - // ToMetalOperand.cpp - std::string TranslateOperandSwizzle(const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase, bool includeDot = true); - std::string TranslateOperandIndex(const Operand* psOperand, int index); - std::string TranslateVariableName(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase); - - // ToMetalInstruction.cpp - - void AddOpAssignToDestWithMask(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis, uint32_t ui32CompMask); - void AddAssignToDest(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis); - void AddAssignPrologue(int numParenthesis); - - typedef enum - { - CMP_EQ, - CMP_LT, - CMP_GE, - CMP_NE, - } ComparisonType; - - void AddComparison(Instruction* psInst, ComparisonType eType, - uint32_t typeFlag); - - bool CanForceToHalfOperand(const Operand *psOperand); - - void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise); - void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise); - void CallBinaryOp(const char* name, Instruction* psInst, - int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType); - void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, - int dest, int src0, int src1, int src2, uint32_t dataType); - void CallHelper3(const char* name, Instruction* psInst, - int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask, uint32_t ui32Flags); - void CallHelper3(const char* name, Instruction* psInst, - int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask); - void CallHelper2(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask); - void CallHelper2Int(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask); - void CallHelper2UInt(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask); - void CallHelper1(const char* name, Instruction* psInst, - int dest, int src0, int paramsShouldFollowWriteMask); - void CallHelper1Int( - const char* name, - Instruction* psInst, - const int dest, - const int src0, - int paramsShouldFollowWriteMask); - void TranslateTexelFetch( - Instruction* psInst, - const ResourceBinding* psBinding, - bstring glsl); - void TranslateTexelFetchOffset( - Instruction* psInst, - const ResourceBinding* psBinding, - bstring glsl); - void TranslateTexCoord( - const RESOURCE_DIMENSION eResDim, - Operand* psTexCoordOperand); - void GetResInfoData(Instruction* psInst, int index, int destElem); - void TranslateTextureSample(Instruction* psInst, - uint32_t ui32Flags); - void TranslateDynamicComponentSelection(const ShaderVarType* psVarType, - const Operand* psByteAddr, uint32_t offset, uint32_t mask); - void TranslateShaderStorageStore(Instruction* psInst); - void TranslateShaderStorageLoad(Instruction* psInst); - void TranslateAtomicMemOp(Instruction* psInst); - void TranslateConditional( - Instruction* psInst, - bstring glsl); - - // The map is keyed by struct name. The special name "" (empty string) is reserved for entry point function parameters - StructDefinitions m_StructDefinitions; - - // A map of extra helper functions we'll need. - FunctionDefinitions m_FunctionDefinitions; - - BindingSlotAllocator m_TextureSlots, m_SamplerSlots; - BindingSlotAllocator m_BufferSlots; - - struct BufferReflection - { - uint32_t bind; - bool isUAV; - bool hasCounter; - }; - std::map m_BufferReflections; - - std::vector m_Samplers; - std::vector m_Textures; - - std::string m_ExtraGlobalDefinitions; - - // Flags for whether we need to add the declaration for the FB IO remaps - bool m_NeedFBInputRemapDecl; - bool m_NeedFBOutputRemapDecl; - - bool m_ShadowSamplerDeclared; - - void EnsureShadowSamplerDeclared(); - - // Add an extra function to the m_FunctionDefinitions list, unless it's already there. - void DeclareExtraFunction(const std::string &name, const std::string &body); - - // Move all lowp -> mediump - void ClampPartialPrecisions(); - - // Reseve UAV slots in advance to match the original HLSL bindings -> correct bindings in SetRandomWriteTarget() - void ReserveUAVBindingSlots(ShaderPhase *phase); -}; diff --git a/third_party/HLSLcc/src/internal_includes/toMetalDeclaration.h b/third_party/HLSLcc/src/internal_includes/toMetalDeclaration.h deleted file mode 100644 index 2052009..0000000 --- a/third_party/HLSLcc/src/internal_includes/toMetalDeclaration.h +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once - -#include "internal_includes/Declaration.h" diff --git a/third_party/HLSLcc/src/internal_includes/tokens.h b/third_party/HLSLcc/src/internal_includes/tokens.h deleted file mode 100644 index 671ccbd..0000000 --- a/third_party/HLSLcc/src/internal_includes/tokens.h +++ /dev/null @@ -1,789 +0,0 @@ -#ifndef TOKENS_H -#define TOKENS_H - -#include "hlslcc.h" - -enum SHADER_PHASE_TYPE -{ - SHADER_PHASE_INVALID = -1, - MAIN_PHASE = 0, - HS_GLOBAL_DECL_PHASE = 1, - HS_CTRL_POINT_PHASE = 2, - HS_FORK_PHASE = 3, - HS_JOIN_PHASE = 4 -}; - -static SHADER_TYPE DecodeShaderType(uint32_t ui32Token) -{ - return (SHADER_TYPE)((ui32Token & 0xffff0000) >> 16); -} - -static uint32_t DecodeProgramMajorVersion(uint32_t ui32Token) -{ - return (ui32Token & 0x000000f0) >> 4; -} - -static uint32_t DecodeProgramMinorVersion(uint32_t ui32Token) -{ - return (ui32Token & 0x0000000f); -} - -static uint32_t DecodeInstructionLength(uint32_t ui32Token) -{ - return (ui32Token & 0x7f000000) >> 24; -} - -static uint32_t DecodeIsOpcodeExtended(uint32_t ui32Token) -{ - return (ui32Token & 0x80000000) >> 31; -} - -typedef enum EXTENDED_OPCODE_TYPE -{ - EXTENDED_OPCODE_EMPTY = 0, - EXTENDED_OPCODE_SAMPLE_CONTROLS = 1, - EXTENDED_OPCODE_RESOURCE_DIM = 2, - EXTENDED_OPCODE_RESOURCE_RETURN_TYPE = 3, -} EXTENDED_OPCODE_TYPE; - -static EXTENDED_OPCODE_TYPE DecodeExtendedOpcodeType(uint32_t ui32Token) -{ - return (EXTENDED_OPCODE_TYPE)(ui32Token & 0x0000003f); -} - -static RESOURCE_RETURN_TYPE DecodeResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token) -{ - return (RESOURCE_RETURN_TYPE)((ui32Token >> (ui32Coord * 4)) & 0xF); -} - -static RESOURCE_RETURN_TYPE DecodeExtendedResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token) -{ - return (RESOURCE_RETURN_TYPE)((ui32Token >> (ui32Coord * 4 + 6)) & 0xF); -} - -enum OPCODE_TYPE -{ - //For DX9 - OPCODE_POW = -6, - OPCODE_DP2ADD = -5, - OPCODE_LRP = -4, - OPCODE_ENDREP = -3, - OPCODE_REP = -2, - OPCODE_SPECIAL_DCL_IMMCONST = -1, - - OPCODE_ADD, - OPCODE_AND, - OPCODE_BREAK, - OPCODE_BREAKC, - OPCODE_CALL, - OPCODE_CALLC, - OPCODE_CASE, - OPCODE_CONTINUE, - OPCODE_CONTINUEC, - OPCODE_CUT, - OPCODE_DEFAULT, - OPCODE_DERIV_RTX, - OPCODE_DERIV_RTY, - OPCODE_DISCARD, - OPCODE_DIV, - OPCODE_DP2, - OPCODE_DP3, - OPCODE_DP4, - OPCODE_ELSE, - OPCODE_EMIT, - OPCODE_EMITTHENCUT, - OPCODE_ENDIF, - OPCODE_ENDLOOP, - OPCODE_ENDSWITCH, - OPCODE_EQ, - OPCODE_EXP, - OPCODE_FRC, - OPCODE_FTOI, - OPCODE_FTOU, - OPCODE_GE, - OPCODE_IADD, - OPCODE_IF, - OPCODE_IEQ, - OPCODE_IGE, - OPCODE_ILT, - OPCODE_IMAD, - OPCODE_IMAX, - OPCODE_IMIN, - OPCODE_IMUL, - OPCODE_INE, - OPCODE_INEG, - OPCODE_ISHL, - OPCODE_ISHR, - OPCODE_ITOF, - OPCODE_LABEL, - OPCODE_LD, - OPCODE_LD_MS, - OPCODE_LOG, - OPCODE_LOOP, - OPCODE_LT, - OPCODE_MAD, - OPCODE_MIN, - OPCODE_MAX, - OPCODE_CUSTOMDATA, - OPCODE_MOV, - OPCODE_MOVC, - OPCODE_MUL, - OPCODE_NE, - OPCODE_NOP, - OPCODE_NOT, - OPCODE_OR, - OPCODE_RESINFO, - OPCODE_RET, - OPCODE_RETC, - OPCODE_ROUND_NE, - OPCODE_ROUND_NI, - OPCODE_ROUND_PI, - OPCODE_ROUND_Z, - OPCODE_RSQ, - OPCODE_SAMPLE, - OPCODE_SAMPLE_C, - OPCODE_SAMPLE_C_LZ, - OPCODE_SAMPLE_L, - OPCODE_SAMPLE_D, - OPCODE_SAMPLE_B, - OPCODE_SQRT, - OPCODE_SWITCH, - OPCODE_SINCOS, - OPCODE_UDIV, - OPCODE_ULT, - OPCODE_UGE, - OPCODE_UMUL, - OPCODE_UMAD, - OPCODE_UMAX, - OPCODE_UMIN, - OPCODE_USHR, - OPCODE_UTOF, - OPCODE_XOR, - OPCODE_DCL_RESOURCE, // DCL* opcodes have - OPCODE_DCL_CONSTANT_BUFFER, // custom operand formats. - OPCODE_DCL_SAMPLER, - OPCODE_DCL_INDEX_RANGE, - OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, - OPCODE_DCL_GS_INPUT_PRIMITIVE, - OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, - OPCODE_DCL_INPUT, - OPCODE_DCL_INPUT_SGV, - OPCODE_DCL_INPUT_SIV, - OPCODE_DCL_INPUT_PS, - OPCODE_DCL_INPUT_PS_SGV, - OPCODE_DCL_INPUT_PS_SIV, - OPCODE_DCL_OUTPUT, - OPCODE_DCL_OUTPUT_SGV, - OPCODE_DCL_OUTPUT_SIV, - OPCODE_DCL_TEMPS, - OPCODE_DCL_INDEXABLE_TEMP, - OPCODE_DCL_GLOBAL_FLAGS, - -// ----------------------------------------------- - - OPCODE_RESERVED_10, - -// ---------- DX 10.1 op codes--------------------- - - OPCODE_LOD, - OPCODE_GATHER4, - OPCODE_SAMPLE_POS, - OPCODE_SAMPLE_INFO, - -// ----------------------------------------------- - - // This should be 10.1's version of NUM_OPCODES - OPCODE_RESERVED_10_1, - -// ---------- DX 11 op codes--------------------- - OPCODE_HS_DECLS, // token marks beginning of HS sub-shader - OPCODE_HS_CONTROL_POINT_PHASE, // token marks beginning of HS sub-shader - OPCODE_HS_FORK_PHASE, // token marks beginning of HS sub-shader - OPCODE_HS_JOIN_PHASE, // token marks beginning of HS sub-shader - - OPCODE_EMIT_STREAM, - OPCODE_CUT_STREAM, - OPCODE_EMITTHENCUT_STREAM, - OPCODE_INTERFACE_CALL, - - OPCODE_BUFINFO, - OPCODE_DERIV_RTX_COARSE, - OPCODE_DERIV_RTX_FINE, - OPCODE_DERIV_RTY_COARSE, - OPCODE_DERIV_RTY_FINE, - OPCODE_GATHER4_C, - OPCODE_GATHER4_PO, - OPCODE_GATHER4_PO_C, - OPCODE_RCP, - OPCODE_F32TOF16, - OPCODE_F16TOF32, - OPCODE_UADDC, - OPCODE_USUBB, - OPCODE_COUNTBITS, - OPCODE_FIRSTBIT_HI, - OPCODE_FIRSTBIT_LO, - OPCODE_FIRSTBIT_SHI, - OPCODE_UBFE, - OPCODE_IBFE, - OPCODE_BFI, - OPCODE_BFREV, - OPCODE_SWAPC, - - OPCODE_DCL_STREAM, - OPCODE_DCL_FUNCTION_BODY, - OPCODE_DCL_FUNCTION_TABLE, - OPCODE_DCL_INTERFACE, - - OPCODE_DCL_INPUT_CONTROL_POINT_COUNT, - OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT, - OPCODE_DCL_TESS_DOMAIN, - OPCODE_DCL_TESS_PARTITIONING, - OPCODE_DCL_TESS_OUTPUT_PRIMITIVE, - OPCODE_DCL_HS_MAX_TESSFACTOR, - OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT, - OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, - - OPCODE_DCL_THREAD_GROUP, - OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED, - OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW, - OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED, - OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW, - OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED, - OPCODE_DCL_RESOURCE_RAW, - OPCODE_DCL_RESOURCE_STRUCTURED, - OPCODE_LD_UAV_TYPED, - OPCODE_STORE_UAV_TYPED, - OPCODE_LD_RAW, - OPCODE_STORE_RAW, - OPCODE_LD_STRUCTURED, - OPCODE_STORE_STRUCTURED, - OPCODE_ATOMIC_AND, - OPCODE_ATOMIC_OR, - OPCODE_ATOMIC_XOR, - OPCODE_ATOMIC_CMP_STORE, - OPCODE_ATOMIC_IADD, - OPCODE_ATOMIC_IMAX, - OPCODE_ATOMIC_IMIN, - OPCODE_ATOMIC_UMAX, - OPCODE_ATOMIC_UMIN, - OPCODE_IMM_ATOMIC_ALLOC, - OPCODE_IMM_ATOMIC_CONSUME, - OPCODE_IMM_ATOMIC_IADD, - OPCODE_IMM_ATOMIC_AND, - OPCODE_IMM_ATOMIC_OR, - OPCODE_IMM_ATOMIC_XOR, - OPCODE_IMM_ATOMIC_EXCH, - OPCODE_IMM_ATOMIC_CMP_EXCH, - OPCODE_IMM_ATOMIC_IMAX, - OPCODE_IMM_ATOMIC_IMIN, - OPCODE_IMM_ATOMIC_UMAX, - OPCODE_IMM_ATOMIC_UMIN, - OPCODE_SYNC, - - OPCODE_DADD, - OPCODE_DMAX, - OPCODE_DMIN, - OPCODE_DMUL, - OPCODE_DEQ, - OPCODE_DGE, - OPCODE_DLT, - OPCODE_DNE, - OPCODE_DMOV, - OPCODE_DMOVC, - OPCODE_DTOF, - OPCODE_FTOD, - - OPCODE_EVAL_SNAPPED, - OPCODE_EVAL_SAMPLE_INDEX, - OPCODE_EVAL_CENTROID, - - OPCODE_DCL_GS_INSTANCE_COUNT, - - OPCODE_ABORT, - OPCODE_DEBUG_BREAK, - -// ----------------------------------------------- - - // This marks the end of D3D11.0 opcodes - OPCODE_RESERVED_11, - - OPCODE_DDIV, - OPCODE_DFMA, - OPCODE_DRCP, - - OPCODE_MSAD, - - OPCODE_DTOI, - OPCODE_DTOU, - OPCODE_ITOD, - OPCODE_UTOD, - -// ----------------------------------------------- - - // This marks the end of D3D11.1 opcodes - OPCODE_RESERVED_11_1, - - NUM_OPCODES, - OPCODE_INVALID = NUM_OPCODES, -}; - -static OPCODE_TYPE DecodeOpcodeType(uint32_t ui32Token) -{ - return (OPCODE_TYPE)(ui32Token & 0x00007ff); -} - -typedef enum -{ - INDEX_0D, - INDEX_1D, - INDEX_2D, - INDEX_3D, -} OPERAND_INDEX_DIMENSION; - -static OPERAND_INDEX_DIMENSION DecodeOperandIndexDimension(uint32_t ui32Token) -{ - return (OPERAND_INDEX_DIMENSION)((ui32Token & 0x00300000) >> 20); -} - -typedef enum OPERAND_TYPE -{ - OPERAND_TYPE_SPECIAL_LOOPCOUNTER = -10, - OPERAND_TYPE_SPECIAL_IMMCONSTINT = -9, - OPERAND_TYPE_SPECIAL_TEXCOORD = -8, - OPERAND_TYPE_SPECIAL_POSITION = -7, - OPERAND_TYPE_SPECIAL_FOG = -6, - OPERAND_TYPE_SPECIAL_POINTSIZE = -5, - OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR = -4, - OPERAND_TYPE_SPECIAL_OUTBASECOLOUR = -3, - OPERAND_TYPE_SPECIAL_ADDRESS = -2, - OPERAND_TYPE_SPECIAL_IMMCONST = -1, - OPERAND_TYPE_TEMP = 0, // Temporary Register File - OPERAND_TYPE_INPUT = 1, // General Input Register File - OPERAND_TYPE_OUTPUT = 2, // General Output Register File - OPERAND_TYPE_INDEXABLE_TEMP = 3, // Temporary Register File (indexable) - OPERAND_TYPE_IMMEDIATE32 = 4, // 32bit/component immediate value(s) - // If for example, operand token bits - // [01:00]==OPERAND_4_COMPONENT, - // this means that the operand type: - // OPERAND_TYPE_IMMEDIATE32 - // results in 4 additional 32bit - // DWORDS present for the operand. - OPERAND_TYPE_IMMEDIATE64 = 5, // 64bit/comp.imm.val(s)HI:LO - OPERAND_TYPE_SAMPLER = 6, // Reference to sampler state - OPERAND_TYPE_RESOURCE = 7, // Reference to memory resource (e.g. texture) - OPERAND_TYPE_CONSTANT_BUFFER = 8, // Reference to constant buffer - OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER = 9, // Reference to immediate constant buffer - OPERAND_TYPE_LABEL = 10, // Label - OPERAND_TYPE_INPUT_PRIMITIVEID = 11, // Input primitive ID - OPERAND_TYPE_OUTPUT_DEPTH = 12, // Output Depth - OPERAND_TYPE_NULL = 13, // Null register, used to discard results of operations - // Below Are operands new in DX 10.1 - OPERAND_TYPE_RASTERIZER = 14, // DX10.1 Rasterizer register, used to denote the depth/stencil and render target resources - OPERAND_TYPE_OUTPUT_COVERAGE_MASK = 15, // DX10.1 PS output MSAA coverage mask (scalar) - // Below Are operands new in DX 11 - OPERAND_TYPE_STREAM = 16, // Reference to GS stream output resource - OPERAND_TYPE_FUNCTION_BODY = 17, // Reference to a function definition - OPERAND_TYPE_FUNCTION_TABLE = 18, // Reference to a set of functions used by a class - OPERAND_TYPE_INTERFACE = 19, // Reference to an interface - OPERAND_TYPE_FUNCTION_INPUT = 20, // Reference to an input parameter to a function - OPERAND_TYPE_FUNCTION_OUTPUT = 21, // Reference to an output parameter to a function - OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID = 22, // HS Control Point phase input saying which output control point ID this is - OPERAND_TYPE_INPUT_FORK_INSTANCE_ID = 23, // HS Fork Phase input instance ID - OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID = 24, // HS Join Phase input instance ID - OPERAND_TYPE_INPUT_CONTROL_POINT = 25, // HS Fork+Join, DS phase input control points (array of them) - OPERAND_TYPE_OUTPUT_CONTROL_POINT = 26, // HS Fork+Join phase output control points (array of them) - OPERAND_TYPE_INPUT_PATCH_CONSTANT = 27, // DS+HSJoin Input Patch Constants (array of them) - OPERAND_TYPE_INPUT_DOMAIN_POINT = 28, // DS Input Domain point - OPERAND_TYPE_THIS_POINTER = 29, // Reference to an interface this pointer - OPERAND_TYPE_UNORDERED_ACCESS_VIEW = 30, // Reference to UAV u# - OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY = 31, // Reference to Thread Group Shared Memory g# - OPERAND_TYPE_INPUT_THREAD_ID = 32, // Compute Shader Thread ID - OPERAND_TYPE_INPUT_THREAD_GROUP_ID = 33, // Compute Shader Thread Group ID - OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP = 34, // Compute Shader Thread ID In Thread Group - OPERAND_TYPE_INPUT_COVERAGE_MASK = 35, // Pixel shader coverage mask input - OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED = 36, // Compute Shader Thread ID In Group Flattened to a 1D value. - OPERAND_TYPE_INPUT_GS_INSTANCE_ID = 37, // Input GS instance ID - OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL = 38, // Output Depth, forced to be greater than or equal than current depth - OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL = 39, // Output Depth, forced to be less than or equal to current depth - OPERAND_TYPE_CYCLE_COUNTER = 40, // Cycle counter -} OPERAND_TYPE; - -static OPERAND_TYPE DecodeOperandType(uint32_t ui32Token) -{ - return (OPERAND_TYPE)((ui32Token & 0x000ff000) >> 12); -} - -static SPECIAL_NAME DecodeOperandSpecialName(uint32_t ui32Token) -{ - return (SPECIAL_NAME)(ui32Token & 0x0000ffff); -} - -typedef enum OPERAND_INDEX_REPRESENTATION -{ - OPERAND_INDEX_IMMEDIATE32 = 0, // Extra DWORD - OPERAND_INDEX_IMMEDIATE64 = 1, // 2 Extra DWORDs - // (HI32:LO32) - OPERAND_INDEX_RELATIVE = 2, // Extra operand - OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE = 3, // Extra DWORD followed by - // extra operand - OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE = 4, // 2 Extra DWORDS - // (HI32:LO32) followed - // by extra operand -} OPERAND_INDEX_REPRESENTATION; - -static OPERAND_INDEX_REPRESENTATION DecodeOperandIndexRepresentation(uint32_t ui32Dimension, uint32_t ui32Token) -{ - return (OPERAND_INDEX_REPRESENTATION)((ui32Token & (0x3 << (22 + 3 * ((ui32Dimension) & 3)))) >> (22 + 3 * ((ui32Dimension) & 3))); -} - -typedef enum OPERAND_NUM_COMPONENTS -{ - OPERAND_0_COMPONENT = 0, - OPERAND_1_COMPONENT = 1, - OPERAND_4_COMPONENT = 2, - OPERAND_N_COMPONENT = 3 // unused for now -} OPERAND_NUM_COMPONENTS; - -static OPERAND_NUM_COMPONENTS DecodeOperandNumComponents(uint32_t ui32Token) -{ - return (OPERAND_NUM_COMPONENTS)(ui32Token & 0x00000003); -} - -typedef enum OPERAND_4_COMPONENT_SELECTION_MODE -{ - OPERAND_4_COMPONENT_MASK_MODE = 0, // mask 4 components - OPERAND_4_COMPONENT_SWIZZLE_MODE = 1, // swizzle 4 components - OPERAND_4_COMPONENT_SELECT_1_MODE = 2, // select 1 of 4 components -} OPERAND_4_COMPONENT_SELECTION_MODE; - -static OPERAND_4_COMPONENT_SELECTION_MODE DecodeOperand4CompSelMode(uint32_t ui32Token) -{ - return (OPERAND_4_COMPONENT_SELECTION_MODE)((ui32Token & 0x0000000c) >> 2); -} - -#define OPERAND_4_COMPONENT_MASK_X 0x00000001 -#define OPERAND_4_COMPONENT_MASK_Y 0x00000002 -#define OPERAND_4_COMPONENT_MASK_Z 0x00000004 -#define OPERAND_4_COMPONENT_MASK_W 0x00000008 -#define OPERAND_4_COMPONENT_MASK_R OPERAND_4_COMPONENT_MASK_X -#define OPERAND_4_COMPONENT_MASK_G OPERAND_4_COMPONENT_MASK_Y -#define OPERAND_4_COMPONENT_MASK_B OPERAND_4_COMPONENT_MASK_Z -#define OPERAND_4_COMPONENT_MASK_A OPERAND_4_COMPONENT_MASK_W -#define OPERAND_4_COMPONENT_MASK_ALL 0x0000000f - -static uint32_t DecodeOperand4CompMask(uint32_t ui32Token) -{ - return (uint32_t)((ui32Token & 0x000000f0) >> 4); -} - -static uint32_t DecodeOperand4CompSwizzle(uint32_t ui32Token) -{ - return (uint32_t)((ui32Token & 0x00000ff0) >> 4); -} - -static uint32_t DecodeOperand4CompSel1(uint32_t ui32Token) -{ - return (uint32_t)((ui32Token & 0x00000030) >> 4); -} - -#define OPERAND_4_COMPONENT_X 0 -#define OPERAND_4_COMPONENT_Y 1 -#define OPERAND_4_COMPONENT_Z 2 -#define OPERAND_4_COMPONENT_W 3 - -static const uint32_t NO_SWIZZLE = (((OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_Y << 2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_W << 6)) /*<<4*/); - -static const uint32_t XXXX_SWIZZLE = (((OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_X << 2) | (OPERAND_4_COMPONENT_X << 4) | (OPERAND_4_COMPONENT_X << 6))); -static const uint32_t YYYY_SWIZZLE = (((OPERAND_4_COMPONENT_Y) | (OPERAND_4_COMPONENT_Y << 2) | (OPERAND_4_COMPONENT_Y << 4) | (OPERAND_4_COMPONENT_Y << 6))); -static const uint32_t ZZZZ_SWIZZLE = (((OPERAND_4_COMPONENT_Z) | (OPERAND_4_COMPONENT_Z << 2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_Z << 6))); -static const uint32_t WWWW_SWIZZLE = (((OPERAND_4_COMPONENT_W) | (OPERAND_4_COMPONENT_W << 2) | (OPERAND_4_COMPONENT_W << 4) | (OPERAND_4_COMPONENT_W << 6))); - -static uint32_t DecodeOperand4CompSwizzleSource(uint32_t ui32Token, uint32_t comp) -{ - return (uint32_t)(((ui32Token) >> (4 + 2 * ((comp) & 3))) & 3); -} - -typedef enum RESOURCE_DIMENSION -{ - RESOURCE_DIMENSION_UNKNOWN = 0, - RESOURCE_DIMENSION_BUFFER = 1, - RESOURCE_DIMENSION_TEXTURE1D = 2, - RESOURCE_DIMENSION_TEXTURE2D = 3, - RESOURCE_DIMENSION_TEXTURE2DMS = 4, - RESOURCE_DIMENSION_TEXTURE3D = 5, - RESOURCE_DIMENSION_TEXTURECUBE = 6, - RESOURCE_DIMENSION_TEXTURE1DARRAY = 7, - RESOURCE_DIMENSION_TEXTURE2DARRAY = 8, - RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9, - RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10, - RESOURCE_DIMENSION_RAW_BUFFER = 11, - RESOURCE_DIMENSION_STRUCTURED_BUFFER = 12, -} RESOURCE_DIMENSION; - -static RESOURCE_DIMENSION DecodeResourceDimension(uint32_t ui32Token) -{ - return (RESOURCE_DIMENSION)((ui32Token & 0x0000f800) >> 11); -} - -static RESOURCE_DIMENSION DecodeExtendedResourceDimension(uint32_t ui32Token) -{ - return (RESOURCE_DIMENSION)((ui32Token & 0x000007C0) >> 6); -} - -typedef enum INSTRUCTION_TEST_BOOLEAN -{ - INSTRUCTION_TEST_ZERO = 0, - INSTRUCTION_TEST_NONZERO = 1 -} INSTRUCTION_TEST_BOOLEAN; - -static INSTRUCTION_TEST_BOOLEAN DecodeInstrTestBool(uint32_t ui32Token) -{ - return (INSTRUCTION_TEST_BOOLEAN)((ui32Token & 0x00040000) >> 18); -} - -static uint32_t DecodeIsOperandExtended(uint32_t ui32Token) -{ - return (ui32Token & 0x80000000) >> 31; -} - -typedef enum EXTENDED_OPERAND_TYPE -{ - EXTENDED_OPERAND_EMPTY = 0, - EXTENDED_OPERAND_MODIFIER = 1, -} EXTENDED_OPERAND_TYPE; - -static EXTENDED_OPERAND_TYPE DecodeExtendedOperandType(uint32_t ui32Token) -{ - return (EXTENDED_OPERAND_TYPE)(ui32Token & 0x0000003f); -} - -typedef enum OPERAND_MODIFIER -{ - OPERAND_MODIFIER_NONE = 0, - OPERAND_MODIFIER_NEG = 1, - OPERAND_MODIFIER_ABS = 2, - OPERAND_MODIFIER_ABSNEG = 3, -} OPERAND_MODIFIER; - -static OPERAND_MODIFIER DecodeExtendedOperandModifier(uint32_t ui32Token) -{ - return (OPERAND_MODIFIER)((ui32Token & 0x00003fc0) >> 6); -} - -static const uint32_t GLOBAL_FLAG_REFACTORING_ALLOWED = (1 << 11); -static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS = (1 << 12); -static const uint32_t GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL = (1 << 13); -static const uint32_t GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS = (1 << 14); -static const uint32_t GLOBAL_FLAG_SKIP_OPTIMIZATION = (1 << 15); -static const uint32_t GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION = (1 << 16); -static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS = (1 << 17); -static const uint32_t GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS = (1 << 18); - -static uint32_t DecodeGlobalFlags(uint32_t ui32Token) -{ - return (uint32_t)(ui32Token & 0x00fff800); -} - -static INTERPOLATION_MODE DecodeInterpolationMode(uint32_t ui32Token) -{ - return (INTERPOLATION_MODE)((ui32Token & 0x00007800) >> 11); -} - -typedef enum PRIMITIVE_TOPOLOGY -{ - PRIMITIVE_TOPOLOGY_UNDEFINED = 0, - PRIMITIVE_TOPOLOGY_POINTLIST = 1, - PRIMITIVE_TOPOLOGY_LINELIST = 2, - PRIMITIVE_TOPOLOGY_LINESTRIP = 3, - PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4, - PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5, - // 6 is reserved for legacy triangle fans - // Adjacency values should be equal to (0x8 & non-adjacency): - PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10, - PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11, - PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12, - PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13, -} PRIMITIVE_TOPOLOGY; - -static PRIMITIVE_TOPOLOGY DecodeGSOutputPrimitiveTopology(uint32_t ui32Token) -{ - return (PRIMITIVE_TOPOLOGY)((ui32Token & 0x0001f800) >> 11); -} - -typedef enum PRIMITIVE -{ - PRIMITIVE_UNDEFINED = 0, - PRIMITIVE_POINT = 1, - PRIMITIVE_LINE = 2, - PRIMITIVE_TRIANGLE = 3, - // Adjacency values should be equal to (0x4 & non-adjacency): - PRIMITIVE_LINE_ADJ = 6, - PRIMITIVE_TRIANGLE_ADJ = 7, - PRIMITIVE_1_CONTROL_POINT_PATCH = 8, - PRIMITIVE_2_CONTROL_POINT_PATCH = 9, - PRIMITIVE_3_CONTROL_POINT_PATCH = 10, - PRIMITIVE_4_CONTROL_POINT_PATCH = 11, - PRIMITIVE_5_CONTROL_POINT_PATCH = 12, - PRIMITIVE_6_CONTROL_POINT_PATCH = 13, - PRIMITIVE_7_CONTROL_POINT_PATCH = 14, - PRIMITIVE_8_CONTROL_POINT_PATCH = 15, - PRIMITIVE_9_CONTROL_POINT_PATCH = 16, - PRIMITIVE_10_CONTROL_POINT_PATCH = 17, - PRIMITIVE_11_CONTROL_POINT_PATCH = 18, - PRIMITIVE_12_CONTROL_POINT_PATCH = 19, - PRIMITIVE_13_CONTROL_POINT_PATCH = 20, - PRIMITIVE_14_CONTROL_POINT_PATCH = 21, - PRIMITIVE_15_CONTROL_POINT_PATCH = 22, - PRIMITIVE_16_CONTROL_POINT_PATCH = 23, - PRIMITIVE_17_CONTROL_POINT_PATCH = 24, - PRIMITIVE_18_CONTROL_POINT_PATCH = 25, - PRIMITIVE_19_CONTROL_POINT_PATCH = 26, - PRIMITIVE_20_CONTROL_POINT_PATCH = 27, - PRIMITIVE_21_CONTROL_POINT_PATCH = 28, - PRIMITIVE_22_CONTROL_POINT_PATCH = 29, - PRIMITIVE_23_CONTROL_POINT_PATCH = 30, - PRIMITIVE_24_CONTROL_POINT_PATCH = 31, - PRIMITIVE_25_CONTROL_POINT_PATCH = 32, - PRIMITIVE_26_CONTROL_POINT_PATCH = 33, - PRIMITIVE_27_CONTROL_POINT_PATCH = 34, - PRIMITIVE_28_CONTROL_POINT_PATCH = 35, - PRIMITIVE_29_CONTROL_POINT_PATCH = 36, - PRIMITIVE_30_CONTROL_POINT_PATCH = 37, - PRIMITIVE_31_CONTROL_POINT_PATCH = 38, - PRIMITIVE_32_CONTROL_POINT_PATCH = 39, -} PRIMITIVE; - -static PRIMITIVE DecodeGSInputPrimitive(uint32_t ui32Token) -{ - return (PRIMITIVE)((ui32Token & 0x0001f800) >> 11); -} - -static TESSELLATOR_PARTITIONING DecodeTessPartitioning(uint32_t ui32Token) -{ - return (TESSELLATOR_PARTITIONING)((ui32Token & 0x00003800) >> 11); -} - -static TESSELLATOR_DOMAIN DecodeTessDomain(uint32_t ui32Token) -{ - return (TESSELLATOR_DOMAIN)((ui32Token & 0x00001800) >> 11); -} - -static TESSELLATOR_OUTPUT_PRIMITIVE DecodeTessOutPrim(uint32_t ui32Token) -{ - return (TESSELLATOR_OUTPUT_PRIMITIVE)((ui32Token & 0x00003800) >> 11); -} - -static const uint32_t SYNC_THREADS_IN_GROUP = 0x00000800; -static const uint32_t SYNC_THREAD_GROUP_SHARED_MEMORY = 0x00001000; -static const uint32_t SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP = 0x00002000; -static const uint32_t SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL = 0x00004000; - -static uint32_t DecodeSyncFlags(uint32_t ui32Token) -{ - return ui32Token & 0x00007800; -} - -// The number of types that implement this interface -static uint32_t DecodeInterfaceTableLength(uint32_t ui32Token) -{ - return (uint32_t)((ui32Token & 0x0000ffff) >> 0); -} - -// The number of interfaces that are defined in this array. -static uint32_t DecodeInterfaceArrayLength(uint32_t ui32Token) -{ - return (uint32_t)((ui32Token & 0xffff0000) >> 16); -} - -typedef enum CUSTOMDATA_CLASS -{ - CUSTOMDATA_COMMENT = 0, - CUSTOMDATA_DEBUGINFO, - CUSTOMDATA_OPAQUE, - CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER, - CUSTOMDATA_SHADER_MESSAGE, -} CUSTOMDATA_CLASS; - -static CUSTOMDATA_CLASS DecodeCustomDataClass(uint32_t ui32Token) -{ - return (CUSTOMDATA_CLASS)((ui32Token & 0xfffff800) >> 11); -} - -static uint32_t DecodeInstructionSaturate(uint32_t ui32Token) -{ - return (ui32Token & 0x00002000) ? 1 : 0; -} - -static uint32_t DecodeInstructionPreciseMask(uint32_t ui32Token) // "precise" keyword -{ - return (uint32_t)((ui32Token & 0x00780000) >> 19); -} - -typedef enum OPERAND_MIN_PRECISION -{ - OPERAND_MIN_PRECISION_DEFAULT = 0, // Default precision - // for the shader model - OPERAND_MIN_PRECISION_FLOAT_16 = 1, // Min 16 bit/component float - OPERAND_MIN_PRECISION_FLOAT_2_8 = 2, // Min 10(2.8)bit/comp. float - OPERAND_MIN_PRECISION_SINT_16 = 4, // Min 16 bit/comp. signed integer - OPERAND_MIN_PRECISION_UINT_16 = 5, // Min 16 bit/comp. unsigned integer -} OPERAND_MIN_PRECISION; - -static uint32_t DecodeOperandMinPrecision(uint32_t ui32Token) -{ - return (ui32Token & 0x0001C000) >> 14; -} - -static uint32_t DecodeOutputControlPointCount(uint32_t ui32Token) -{ - return ((ui32Token & 0x0001f800) >> 11); -} - -typedef enum IMMEDIATE_ADDRESS_OFFSET_COORD -{ - IMMEDIATE_ADDRESS_OFFSET_U = 0, - IMMEDIATE_ADDRESS_OFFSET_V = 1, - IMMEDIATE_ADDRESS_OFFSET_W = 2, -} IMMEDIATE_ADDRESS_OFFSET_COORD; - - -#define IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord) (9+4*((Coord)&3)) -#define IMMEDIATE_ADDRESS_OFFSET_MASK(Coord) (0x0000000f<> (IMMEDIATE_ADDRESS_OFFSET_SHIFT(eCoord)))); -} - -// UAV access scope flags -static const uint32_t GLOBALLY_COHERENT_ACCESS = 0x00010000; -static uint32_t DecodeAccessCoherencyFlags(uint32_t ui32Token) -{ - return ui32Token & 0x00010000; -} - -typedef enum RESINFO_RETURN_TYPE -{ - RESINFO_INSTRUCTION_RETURN_FLOAT = 0, - RESINFO_INSTRUCTION_RETURN_RCPFLOAT = 1, - RESINFO_INSTRUCTION_RETURN_UINT = 2 -} RESINFO_RETURN_TYPE; - -static RESINFO_RETURN_TYPE DecodeResInfoReturnType(uint32_t ui32Token) -{ - return (RESINFO_RETURN_TYPE)((ui32Token & 0x00001800) >> 11); -} - -typedef enum SB_SAMPLER_MODE -{ - D3D10_SB_SAMPLER_MODE_DEFAULT = 0, - D3D10_SB_SAMPLER_MODE_COMPARISON = 1, - D3D10_SB_SAMPLER_MODE_MONO = 2, -} SB_SAMPLER_MODE; - -static SB_SAMPLER_MODE DecodeSamplerMode(uint32_t ui32Token) -{ - return (SB_SAMPLER_MODE)((ui32Token & 0x00001800) >> 11); -} - -#endif diff --git a/third_party/HLSLcc/src/reflect.cpp b/third_party/HLSLcc/src/reflect.cpp deleted file mode 100644 index 303dbfd..0000000 --- a/third_party/HLSLcc/src/reflect.cpp +++ /dev/null @@ -1,620 +0,0 @@ -#include "internal_includes/reflect.h" -#include "internal_includes/debug.h" -#include "internal_includes/decode.h" -#include "bstrlib.h" -#include -#include -#include - -static void FormatVariableName(std::string & Name) -{ - /* MSDN http://msdn.microsoft.com/en-us/library/windows/desktop/bb944006(v=vs.85).aspx - The uniform function parameters appear in the - constant table prepended with a dollar sign ($), - unlike the global variables. The dollar sign is - required to avoid name collisions between local - uniform inputs and global variables of the same name.*/ - - /* Leave $ThisPointer, $Element and $Globals as-is. - Otherwise remove $ character ($ is not a valid character for GLSL variable names). */ - if (Name[0] == '$') - { - if (strcmp(Name.c_str(), "$Element") != 0 && - strcmp(Name.c_str(), "$Globals") != 0 && - strcmp(Name.c_str(), "$ThisPointer") != 0) - { - Name[0] = '_'; - } - } -} - -static std::string ReadStringFromTokenStream(const uint32_t* tokens) -{ - char* charTokens = (char*)tokens; - return std::string(charTokens); -} - -static int MaskToRebaseOffset(const uint32_t mask) -{ - int res = 0; - uint32_t m = mask; - while ((m & 1) == 0) - { - res++; - m = m >> 1; - } - return res; -} - -static void ReadInputSignatures(const uint32_t* pui32Tokens, - ShaderInfo* psShaderInfo, - const int extended) -{ - uint32_t i; - - const uint32_t* pui32FirstSignatureToken = pui32Tokens; - const uint32_t ui32ElementCount = *pui32Tokens++; - /* const uint32_t ui32Key = * */ pui32Tokens++; - - psShaderInfo->psInputSignatures.clear(); - psShaderInfo->psInputSignatures.resize(ui32ElementCount); - - for (i = 0; i < ui32ElementCount; ++i) - { - uint32_t ui32ComponentMasks; - ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psInputSignatures[i]; - uint32_t ui32SemanticNameOffset; - - psCurrentSignature->ui32Stream = 0; - psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT; - - if (extended) - psCurrentSignature->ui32Stream = *pui32Tokens++; - - ui32SemanticNameOffset = *pui32Tokens++; - psCurrentSignature->ui32SemanticIndex = *pui32Tokens++; - psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++; - psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE)*pui32Tokens++; - psCurrentSignature->ui32Register = *pui32Tokens++; - - ui32ComponentMasks = *pui32Tokens++; - psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F; - //Shows which components are read - psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8; - psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask); - - if (extended) - psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++; - - psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset)); - } -} - -static void ReadOutputSignatures(const uint32_t* pui32Tokens, - ShaderInfo* psShaderInfo, - const int minPrec, - const int streams) -{ - uint32_t i; - - const uint32_t* pui32FirstSignatureToken = pui32Tokens; - const uint32_t ui32ElementCount = *pui32Tokens++; - /*const uint32_t ui32Key = * */ pui32Tokens++; - - psShaderInfo->psOutputSignatures.clear(); - psShaderInfo->psOutputSignatures.resize(ui32ElementCount); - - for (i = 0; i < ui32ElementCount; ++i) - { - uint32_t ui32ComponentMasks; - ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psOutputSignatures[i]; - uint32_t ui32SemanticNameOffset; - - psCurrentSignature->ui32Stream = 0; - psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT; - - if (streams) - psCurrentSignature->ui32Stream = *pui32Tokens++; - - ui32SemanticNameOffset = *pui32Tokens++; - psCurrentSignature->ui32SemanticIndex = *pui32Tokens++; - psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++; - psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE)*pui32Tokens++; - psCurrentSignature->ui32Register = *pui32Tokens++; - - // Massage some special inputs/outputs to match the types of GLSL counterparts - if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX) - { - psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32; - } - - ui32ComponentMasks = *pui32Tokens++; - psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F; - //Shows which components are NEVER written. - psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8; - psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask); - - if (minPrec) - psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++; - - psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset)); - } -} - -static void ReadPatchConstantSignatures(const uint32_t* pui32Tokens, - ShaderInfo* psShaderInfo, - const int minPrec, - const int streams) -{ - uint32_t i; - - const uint32_t* pui32FirstSignatureToken = pui32Tokens; - const uint32_t ui32ElementCount = *pui32Tokens++; - /*const uint32_t ui32Key = * */ pui32Tokens++; - - psShaderInfo->psPatchConstantSignatures.clear(); - psShaderInfo->psPatchConstantSignatures.resize(ui32ElementCount); - - for (i = 0; i < ui32ElementCount; ++i) - { - uint32_t ui32ComponentMasks; - ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psPatchConstantSignatures[i]; - uint32_t ui32SemanticNameOffset; - - psCurrentSignature->ui32Stream = 0; - psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT; - - if (streams) - psCurrentSignature->ui32Stream = *pui32Tokens++; - - ui32SemanticNameOffset = *pui32Tokens++; - psCurrentSignature->ui32SemanticIndex = *pui32Tokens++; - psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++; - psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE)*pui32Tokens++; - psCurrentSignature->ui32Register = *pui32Tokens++; - - // Massage some special inputs/outputs to match the types of GLSL counterparts - if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX) - { - psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32; - } - - ui32ComponentMasks = *pui32Tokens++; - psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F; - //Shows which components are NEVER written. - psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8; - psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask); - - if (minPrec) - psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++; - - psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset)); - } -} - -static const uint32_t* ReadResourceBinding(ShaderInfo* psShaderInfo, const uint32_t* pui32FirstResourceToken, const uint32_t* pui32Tokens, ResourceBinding* psBinding, uint32_t decodeFlags) -{ - uint32_t ui32NameOffset = *pui32Tokens++; - - psBinding->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstResourceToken + ui32NameOffset)); - FormatVariableName(psBinding->name); - - psBinding->eType = (ResourceType) * pui32Tokens++; - psBinding->ui32ReturnType = (RESOURCE_RETURN_TYPE)*pui32Tokens++; - psBinding->eDimension = (REFLECT_RESOURCE_DIMENSION)*pui32Tokens++; - psBinding->ui32NumSamples = *pui32Tokens++; // fxc generates 2^32 - 1 for non MS images - psBinding->ui32BindPoint = *pui32Tokens++; - psBinding->ui32BindCount = *pui32Tokens++; - psBinding->ui32Flags = *pui32Tokens++; - if (((psShaderInfo->ui32MajorVersion >= 5) && (psShaderInfo->ui32MinorVersion >= 1)) || - (psShaderInfo->ui32MajorVersion > 5)) - { - psBinding->ui32Space = *pui32Tokens++; - psBinding->ui32RangeID = *pui32Tokens++; - } - - psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_UNKNOWN; - - if (decodeFlags & HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME) - { - if (psBinding->name.rfind("_highp") == psBinding->name.length() - 6) - { - psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_HIGHP; - psBinding->name.resize(psBinding->name.length() - 6); - } - else if (psBinding->name.rfind("_mediump") == psBinding->name.length() - 8) - { - psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_MEDIUMP; - psBinding->name.resize(psBinding->name.length() - 8); - } - else if (psBinding->name.rfind("_lowp") == psBinding->name.length() - 5) - { - psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_LOWP; - psBinding->name.resize(psBinding->name.length() - 5); - } - } - - return pui32Tokens; -} - -//Read D3D11_SHADER_TYPE_DESC -static void ReadShaderVariableType(const uint32_t ui32MajorVersion, - const uint32_t* pui32FirstConstBufToken, - const uint32_t* pui32tokens, ShaderVarType* varType) -{ - const uint16_t* pui16Tokens = (const uint16_t*)pui32tokens; - uint16_t ui32MemberCount; - uint32_t ui32MemberOffset; - const uint32_t* pui32MemberTokens; - uint32_t i; - - varType->Class = (SHADER_VARIABLE_CLASS)pui16Tokens[0]; - varType->Type = (SHADER_VARIABLE_TYPE)pui16Tokens[1]; - varType->Rows = pui16Tokens[2]; - varType->Columns = pui16Tokens[3]; - varType->Elements = pui16Tokens[4]; - - varType->MemberCount = ui32MemberCount = pui16Tokens[5]; - varType->Members.clear(); - - if (varType->ParentCount) - { - // Add empty brackets for array parents. Indices are filled in later in the printing codes. - if (varType->Parent->Elements > 1) - varType->fullName = varType->Parent->fullName + "[]." + varType->name; - else - varType->fullName = varType->Parent->fullName + "." + varType->name; - } - - if (ui32MemberCount) - { - varType->Members.resize(ui32MemberCount); - - ui32MemberOffset = pui32tokens[3]; - - pui32MemberTokens = (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32MemberOffset); - - for (i = 0; i < ui32MemberCount; ++i) - { - uint32_t ui32NameOffset = *pui32MemberTokens++; - uint32_t ui32MemberTypeOffset = *pui32MemberTokens++; - - varType->Members[i].Parent = varType; - varType->Members[i].ParentCount = varType->ParentCount + 1; - - varType->Members[i].Offset = *pui32MemberTokens++; - - varType->Members[i].name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset)); - - ReadShaderVariableType(ui32MajorVersion, pui32FirstConstBufToken, - (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32MemberTypeOffset), &varType->Members[i]); - } - } -} - -static const uint32_t* ReadConstantBuffer(ShaderInfo* psShaderInfo, - const uint32_t* pui32FirstConstBufToken, const uint32_t* pui32Tokens, ConstantBuffer* psBuffer) -{ - uint32_t i; - uint32_t ui32NameOffset = *pui32Tokens++; - uint32_t ui32VarCount = *pui32Tokens++; - uint32_t ui32VarOffset = *pui32Tokens++; - const uint32_t* pui32VarToken = (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32VarOffset); - - psBuffer->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset)); - FormatVariableName(psBuffer->name); - - psBuffer->asVars.clear(); - psBuffer->asVars.resize(ui32VarCount); - - for (i = 0; i < ui32VarCount; ++i) - { - //D3D11_SHADER_VARIABLE_DESC - ShaderVar * const psVar = &psBuffer->asVars[i]; - - uint32_t ui32TypeOffset; - uint32_t ui32DefaultValueOffset; - - ui32NameOffset = *pui32VarToken++; - - psVar->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset)); - FormatVariableName(psVar->name); - - psVar->ui32StartOffset = *pui32VarToken++; - psVar->ui32Size = *pui32VarToken++; - - //skip ui32Flags - pui32VarToken++; - - ui32TypeOffset = *pui32VarToken++; - - psVar->sType.name = psVar->name; - psVar->sType.fullName = psVar->name; - psVar->sType.Parent = 0; - psVar->sType.ParentCount = 0; - psVar->sType.Offset = 0; - psVar->sType.m_IsUsed = false; - - ReadShaderVariableType(psShaderInfo->ui32MajorVersion, pui32FirstConstBufToken, - (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32TypeOffset), &psVar->sType); - - ui32DefaultValueOffset = *pui32VarToken++; - - - if (psShaderInfo->ui32MajorVersion >= 5) - { - /*uint32_t StartTexture = * */ pui32VarToken++; - /*uint32_t TextureSize = * */ pui32VarToken++; - /*uint32_t StartSampler = * */ pui32VarToken++; - /*uint32_t SamplerSize = * */ pui32VarToken++; - } - - psVar->haveDefaultValue = 0; - - if (ui32DefaultValueOffset) - { - uint32_t i = 0; - const uint32_t ui32NumDefaultValues = psVar->ui32Size / 4; - const uint32_t* pui32DefaultValToken = (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32DefaultValueOffset); - - //Always a sequence of 4-bytes at the moment. - //bool const becomes 0 or 0xFFFFFFFF int, int & float are 4-bytes. - ASSERT(psVar->ui32Size % 4 == 0); - - psVar->haveDefaultValue = 1; - - psVar->pui32DefaultValues.clear(); - psVar->pui32DefaultValues.resize(psVar->ui32Size / 4); - - for (i = 0; i < ui32NumDefaultValues; ++i) - { - psVar->pui32DefaultValues[i] = pui32DefaultValToken[i]; - } - } - } - - - { - psBuffer->ui32TotalSizeInBytes = *pui32Tokens++; - - //skip ui32Flags - pui32Tokens++; - //skip ui32BufferType - pui32Tokens++; - } - - return pui32Tokens; -} - -static void ReadResources(const uint32_t* pui32Tokens,//in - ShaderInfo* psShaderInfo, //out - uint32_t decodeFlags) -{ - ResourceBinding* psResBindings; - ConstantBuffer* psConstantBuffers; - const uint32_t* pui32ConstantBuffers; - const uint32_t* pui32ResourceBindings; - const uint32_t* pui32FirstToken = pui32Tokens; - uint32_t i; - - const uint32_t ui32NumConstantBuffers = *pui32Tokens++; - const uint32_t ui32ConstantBufferOffset = *pui32Tokens++; - - uint32_t ui32NumResourceBindings = *pui32Tokens++; - uint32_t ui32ResourceBindingOffset = *pui32Tokens++; - /*uint32_t ui32ShaderModel = * */ pui32Tokens++; - /*uint32_t ui32CompileFlags = * */ pui32Tokens++;//D3DCompile flags? http://msdn.microsoft.com/en-us/library/gg615083(v=vs.85).aspx - - //Resources - pui32ResourceBindings = (const uint32_t*)((const char*)pui32FirstToken + ui32ResourceBindingOffset); - - psShaderInfo->psResourceBindings.clear(); - psShaderInfo->psResourceBindings.resize(ui32NumResourceBindings); - psResBindings = ui32NumResourceBindings == 0 ? NULL : &psShaderInfo->psResourceBindings[0]; - - for (i = 0; i < ui32NumResourceBindings; ++i) - { - pui32ResourceBindings = ReadResourceBinding(psShaderInfo, pui32FirstToken, pui32ResourceBindings, psResBindings + i, decodeFlags); - ASSERT(psResBindings[i].ui32BindPoint < MAX_RESOURCE_BINDINGS); - } - - //Constant buffers - pui32ConstantBuffers = (const uint32_t*)((const char*)pui32FirstToken + ui32ConstantBufferOffset); - - psShaderInfo->psConstantBuffers.clear(); - psShaderInfo->psConstantBuffers.resize(ui32NumConstantBuffers); - psConstantBuffers = ui32NumConstantBuffers == 0 ? NULL : &psShaderInfo->psConstantBuffers[0]; - - for (i = 0; i < ui32NumConstantBuffers; ++i) - { - pui32ConstantBuffers = ReadConstantBuffer(psShaderInfo, pui32FirstToken, pui32ConstantBuffers, psConstantBuffers + i); - } - - //Map resource bindings to constant buffers - if (psShaderInfo->psConstantBuffers.size()) - { - /* HLSL allows the following: - cbuffer A - {...} - cbuffer A - {...} - And both will be present in the assembly if used - - So we need to track which ones we matched already and throw an error if two buffers have the same name - */ - std::vector alreadyBound(ui32NumConstantBuffers, 0); - for (i = 0; i < ui32NumResourceBindings; ++i) - { - ResourceGroup eRGroup; - uint32_t cbufIndex = 0; - - eRGroup = ShaderInfo::ResourceTypeToResourceGroup(psResBindings[i].eType); - - //Find the constant buffer whose name matches the resource at the given resource binding point - for (cbufIndex = 0; cbufIndex < psShaderInfo->psConstantBuffers.size(); cbufIndex++) - { - if (psConstantBuffers[cbufIndex].name == psResBindings[i].name && alreadyBound[cbufIndex] == 0) - { - psShaderInfo->aui32ResourceMap[eRGroup][psResBindings[i].ui32BindPoint] = cbufIndex; - alreadyBound[cbufIndex] = 1; - break; - } - } - } - } -} - -static const uint16_t* ReadClassType(const uint32_t* pui32FirstInterfaceToken, const uint16_t* pui16Tokens, ClassType* psClassType) -{ - const uint32_t* pui32Tokens = (const uint32_t*)pui16Tokens; - uint32_t ui32NameOffset = *pui32Tokens; - pui16Tokens += 2; - - psClassType->ui16ID = *pui16Tokens++; - psClassType->ui16ConstBufStride = *pui16Tokens++; - psClassType->ui16Texture = *pui16Tokens++; - psClassType->ui16Sampler = *pui16Tokens++; - - psClassType->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset)); - - return pui16Tokens; -} - -static const uint16_t* ReadClassInstance(const uint32_t* pui32FirstInterfaceToken, const uint16_t* pui16Tokens, ClassInstance* psClassInstance) -{ - uint32_t ui32NameOffset = *pui16Tokens++ << 16; - ui32NameOffset |= *pui16Tokens++; - - psClassInstance->ui16ID = *pui16Tokens++; - psClassInstance->ui16ConstBuf = *pui16Tokens++; - psClassInstance->ui16ConstBufOffset = *pui16Tokens++; - psClassInstance->ui16Texture = *pui16Tokens++; - psClassInstance->ui16Sampler = *pui16Tokens++; - - psClassInstance->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset)); - - return pui16Tokens; -} - -static void ReadInterfaces(const uint32_t* pui32Tokens, - ShaderInfo* psShaderInfo) -{ - uint32_t i; - uint32_t ui32StartSlot; - const uint32_t* pui32FirstInterfaceToken = pui32Tokens; - const uint32_t ui32ClassInstanceCount = *pui32Tokens++; - const uint32_t ui32ClassTypeCount = *pui32Tokens++; - const uint32_t ui32InterfaceSlotRecordCount = *pui32Tokens++; - /*const uint32_t ui32InterfaceSlotCount = * */ pui32Tokens++; - const uint32_t ui32ClassInstanceOffset = *pui32Tokens++; - const uint32_t ui32ClassTypeOffset = *pui32Tokens++; - const uint32_t ui32InterfaceSlotOffset = *pui32Tokens++; - - const uint16_t* pui16ClassTypes = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32ClassTypeOffset); - const uint16_t* pui16ClassInstances = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32ClassInstanceOffset); - const uint32_t* pui32InterfaceSlots = (const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32InterfaceSlotOffset); - - const uint32_t* pui32InterfaceSlotTokens = pui32InterfaceSlots; - - ClassType* psClassTypes; - ClassInstance* psClassInstances; - - psShaderInfo->psClassTypes.clear(); - psShaderInfo->psClassTypes.resize(ui32ClassTypeCount); - psClassTypes = &psShaderInfo->psClassTypes[0]; - - for (i = 0; i < ui32ClassTypeCount; ++i) - { - pui16ClassTypes = ReadClassType(pui32FirstInterfaceToken, pui16ClassTypes, psClassTypes + i); - psClassTypes[i].ui16ID = (uint16_t)i; - } - - psShaderInfo->psClassInstances.clear(); - psShaderInfo->psClassInstances.resize(ui32ClassInstanceCount); - psClassInstances = &psShaderInfo->psClassInstances[0]; - - for (i = 0; i < ui32ClassInstanceCount; ++i) - { - pui16ClassInstances = ReadClassInstance(pui32FirstInterfaceToken, pui16ClassInstances, psClassInstances + i); - } - - //Slots map function table to $ThisPointer cbuffer variable index - ui32StartSlot = 0; - for (i = 0; i < ui32InterfaceSlotRecordCount; ++i) - { - uint32_t k; - - const uint32_t ui32SlotSpan = *pui32InterfaceSlotTokens++; - const uint32_t ui32Count = *pui32InterfaceSlotTokens++; - const uint32_t ui32TypeIDOffset = *pui32InterfaceSlotTokens++; - const uint32_t ui32TableIDOffset = *pui32InterfaceSlotTokens++; - - const uint16_t* pui16TypeID = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32TypeIDOffset); - const uint32_t* pui32TableID = (const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32TableIDOffset); - - for (k = 0; k < ui32Count; ++k) - { - psShaderInfo->aui32TableIDToTypeID[*pui32TableID++] = *pui16TypeID++; - } - - ui32StartSlot += ui32SlotSpan; - } -} - -void LoadShaderInfo(const uint32_t ui32MajorVersion, - const uint32_t ui32MinorVersion, - const ReflectionChunks* psChunks, - ShaderInfo* psInfo, - uint32_t decodeFlags) -{ - const uint32_t* pui32Inputs = psChunks->pui32Inputs; - const uint32_t* pui32Inputs11 = psChunks->pui32Inputs11; - const uint32_t* pui32Resources = psChunks->pui32Resources; - const uint32_t* pui32Interfaces = psChunks->pui32Interfaces; - const uint32_t* pui32Outputs = psChunks->pui32Outputs; - const uint32_t* pui32Outputs11 = psChunks->pui32Outputs11; - const uint32_t* pui32OutputsWithStreams = psChunks->pui32OutputsWithStreams; - const uint32_t* pui32PatchConstants = psChunks->pui32PatchConstants; - const uint32_t* pui32PatchConstants11 = psChunks->pui32PatchConstants11; - - psInfo->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED; - psInfo->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED; - psInfo->ui32TessInputControlPointCount = 0; - psInfo->ui32TessOutputControlPointCount = 0; - psInfo->eTessDomain = TESSELLATOR_DOMAIN_UNDEFINED; - psInfo->bEarlyFragmentTests = false; - - psInfo->ui32MajorVersion = ui32MajorVersion; - psInfo->ui32MinorVersion = ui32MinorVersion; - - - if (pui32Inputs) - ReadInputSignatures(pui32Inputs, psInfo, 0); - if (pui32Inputs11) - ReadInputSignatures(pui32Inputs11, psInfo, 1); - if (pui32Resources) - ReadResources(pui32Resources, psInfo, decodeFlags); - if (pui32Interfaces) - ReadInterfaces(pui32Interfaces, psInfo); - if (pui32Outputs) - ReadOutputSignatures(pui32Outputs, psInfo, 0, 0); - if (pui32Outputs11) - ReadOutputSignatures(pui32Outputs11, psInfo, 1, 1); - if (pui32OutputsWithStreams) - ReadOutputSignatures(pui32OutputsWithStreams, psInfo, 0, 1); - if (pui32PatchConstants) - ReadPatchConstantSignatures(pui32PatchConstants, psInfo, 0, 0); - if (pui32PatchConstants11) - ReadPatchConstantSignatures(pui32PatchConstants11, psInfo, 1, 1); - - { - uint32_t i; - for (i = 0; i < psInfo->psConstantBuffers.size(); ++i) - { - if (psInfo->psConstantBuffers[i].name == "$ThisPointer") - { - psInfo->psThisPointerConstBuffer = &psInfo->psConstantBuffers[i]; - } - } - } -} diff --git a/third_party/HLSLcc/src/toGLSL.cpp b/third_party/HLSLcc/src/toGLSL.cpp deleted file mode 100644 index c6c4e14..0000000 --- a/third_party/HLSLcc/src/toGLSL.cpp +++ /dev/null @@ -1,1190 +0,0 @@ -#include - -#include "internal_includes/tokens.h" -#include "internal_includes/decode.h" -#include "stdlib.h" -#include "stdio.h" -#include "bstrlib.h" -#include "internal_includes/toGLSL.h" -#include "internal_includes/toGLSLOperand.h" -#include "internal_includes/Declaration.h" -#include "internal_includes/languages.h" -#include "internal_includes/debug.h" -#include "internal_includes/HLSLccToolkit.h" -#include "internal_includes/UseDefineChains.h" -#include "internal_includes/DataTypeAnalysis.h" -#include "internal_includes/Shader.h" -#include "internal_includes/HLSLCrossCompilerContext.h" -#include "internal_includes/Instruction.h" -#include "internal_includes/LoopTransform.h" -#include "UnityInstancingFlexibleArraySize.h" -#include -#include - -// In GLSL, the input and output names cannot clash. -// Also, the output name of previous stage must match the input name of the next stage. -// So, do gymnastics depending on which shader we're running on and which other shaders exist in this program. -// -void ToGLSL::SetIOPrefixes() -{ - switch (psContext->psShader->eShaderType) - { - case VERTEX_SHADER: - psContext->inputPrefix = "in_"; - psContext->outputPrefix = "vs_"; - break; - - case HULL_SHADER: - // Input always coming from vertex shader - psContext->inputPrefix = "vs_"; - psContext->outputPrefix = "hs_"; - break; - - case DOMAIN_SHADER: - // There's no domain shader without hull shader - psContext->inputPrefix = "hs_"; - psContext->outputPrefix = "ds_"; - break; - - case GEOMETRY_SHADER: - // The input depends on whether there's a tessellation shader before us - if (psContext->psDependencies && (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER)) - psContext->inputPrefix = "ds_"; - else - psContext->inputPrefix = "vs_"; - - psContext->outputPrefix = "gs_"; - break; - - case PIXEL_SHADER: - // The inputs can come from geom shader, domain shader or directly from vertex shader - if (psContext->psDependencies) - { - if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER) - { - psContext->inputPrefix = "gs_"; - } - else if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER) - { - psContext->inputPrefix = "ds_"; - } - else - { - psContext->inputPrefix = "vs_"; - } - } - else - { - psContext->inputPrefix = "vs_"; - } - psContext->outputPrefix = ""; - break; - - - case COMPUTE_SHADER: - default: - // No prefixes - psContext->inputPrefix = ""; - psContext->outputPrefix = ""; - break; - } -} - -static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) -{ - bstring glsl = *psContext->currentGLSLString; - bstring extensions = psContext->extensions; - bool isES = (psContext->psShader->eTargetLanguage >= LANG_ES_100 && psContext->psShader->eTargetLanguage <= LANG_ES_310); - bool GL_ARB_shader_storage_buffer_object = false; - bool GL_ARB_shader_image_load_store = false; - - if (psContext->psShader->ui32MajorVersion > 3 && psContext->psShader->eTargetLanguage != LANG_ES_100 && psContext->psShader->eTargetLanguage != LANG_ES_300 && psContext->psShader->eTargetLanguage != LANG_ES_310 && !(psContext->psShader->eTargetLanguage >= LANG_330)) - { - psContext->EnableExtension("GL_ARB_shader_bit_encoding"); - } - - if (!HaveCompute(psContext->psShader->eTargetLanguage)) - { - if (psContext->psShader->eShaderType == COMPUTE_SHADER) - { - psContext->EnableExtension("GL_ARB_compute_shader"); - } - - if (psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED] || - psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW] || - psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_STRUCTURED] || - psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_RAW]) - { - GL_ARB_shader_storage_buffer_object = true; - } - } - - if (!HaveAtomicMem(psContext->psShader->eTargetLanguage) || - !HaveAtomicCounter(psContext->psShader->eTargetLanguage)) - { - if (psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_ALLOC] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CONSUME] || - psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED]) - { - psContext->EnableExtension("GL_ARB_shader_atomic_counters"); - } - } - - if (psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_CMP_STORE] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_AND] || - psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_AND] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IADD] || - psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IADD] || - psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_OR] || - psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_XOR] || - psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IMIN] || - psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_UMIN] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMAX] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMIN] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMAX] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMIN] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_OR] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_XOR] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_EXCH] || - psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CMP_EXCH]) - { - if (!HaveAtomicMem(psContext->psShader->eTargetLanguage)) - GL_ARB_shader_storage_buffer_object = true; - - if (!HaveImageAtomics(psContext->psShader->eTargetLanguage)) - { - if (isES) - psContext->EnableExtension("GL_OES_shader_image_atomic"); - else - GL_ARB_shader_image_load_store = true; - } - } - - if (!HaveGather(psContext->psShader->eTargetLanguage)) - { - if (psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4] || - psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] || - psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO] || - psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_C]) - { - psContext->EnableExtension("GL_ARB_texture_gather"); - } - } - - if (IsESLanguage(psContext->psShader->eTargetLanguage)) - { - if (psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX_COARSE] || - psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX_FINE] || - psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX] || - psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY_COARSE] || - psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY_FINE] || - psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY]) - { - if (psContext->psShader->eTargetLanguage < LANG_ES_300) - { - psContext->EnableExtension("GL_OES_standard_derivatives"); - } - } - - if (psContext->psShader->eShaderType == PIXEL_SHADER && - (psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_L] || - psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_C_LZ] || - psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_D])) - { - psContext->EnableExtension("GL_EXT_shader_texture_lod"); - - static const int tex_sampler_type_count = 4; - static const char* tex_sampler_dim_name[tex_sampler_type_count] = { - "1D", "2D", "3D", "Cube", - }; - - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - { - bcatcstr(extensions, "#if !defined(GL_EXT_shader_texture_lod)\n"); - - for (int dim = 0; dim < tex_sampler_type_count; dim++) - { - bformata(extensions, "#define texture%sLodEXT texture%s\n", tex_sampler_dim_name[dim], tex_sampler_dim_name[dim]); - - if (dim == 1) // 2D - bformata(extensions, "#define texture%sProjLodEXT texture%sProj\n", tex_sampler_dim_name[dim], tex_sampler_dim_name[dim]); - } - bcatcstr(extensions, "#endif\n"); - } - } - } - - if (!HaveGatherNonConstOffset(psContext->psShader->eTargetLanguage)) - { - if (psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] || - psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO]) - { - psContext->EnableExtension("GL_ARB_gpu_shader5"); - } - } - - if (!HaveQueryLod(psContext->psShader->eTargetLanguage)) - { - if (psContext->psShader->aiOpcodeUsed[OPCODE_LOD]) - { - psContext->EnableExtension("GL_ARB_texture_query_lod"); - } - } - - if (!HaveQueryLevels(psContext->psShader->eTargetLanguage)) - { - if (psContext->psShader->aiOpcodeUsed[OPCODE_RESINFO]) - { - psContext->EnableExtension("GL_ARB_texture_query_levels"); - psContext->EnableExtension("GL_ARB_shader_image_size"); - } - } - - if (psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_INFO]) - { - psContext->EnableExtension("GL_ARB_shader_texture_image_samples"); - } - - if (!HaveImageLoadStore(psContext->psShader->eTargetLanguage)) - { - if (psContext->psShader->aiOpcodeUsed[OPCODE_STORE_UAV_TYPED] || - psContext->psShader->aiOpcodeUsed[OPCODE_STORE_RAW] || - psContext->psShader->aiOpcodeUsed[OPCODE_STORE_STRUCTURED]) - { - GL_ARB_shader_image_load_store = true; - psContext->EnableExtension("GL_ARB_shader_bit_encoding"); - } - else if (psContext->psShader->aiOpcodeUsed[OPCODE_LD_UAV_TYPED] || - psContext->psShader->aiOpcodeUsed[OPCODE_LD_RAW] || - psContext->psShader->aiOpcodeUsed[OPCODE_LD_STRUCTURED]) - { - GL_ARB_shader_image_load_store = true; - } - } - - if (!HaveGeometryShaderARB(psContext->psShader->eTargetLanguage)) - { - if (psContext->psShader->eShaderType == GEOMETRY_SHADER) - { - psContext->EnableExtension("GL_ARB_geometry_shader"); - } - } - - if (psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310) - { - if (psContext->psShader->eShaderType == GEOMETRY_SHADER) - { - psContext->EnableExtension("GL_OES_geometry_shader"); - psContext->EnableExtension("GL_EXT_geometry_shader"); - } - } - - if (psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310) - { - if (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) - { - psContext->EnableExtension("GL_OES_tessellation_shader"); - psContext->EnableExtension("GL_EXT_tessellation_shader"); - } - } - - if (GL_ARB_shader_storage_buffer_object) - psContext->EnableExtension("GL_ARB_shader_storage_buffer_object"); - - if (GL_ARB_shader_image_load_store) - psContext->EnableExtension("GL_ARB_shader_image_load_store"); - - if (psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_120 && !HaveFragmentCoordConventions(psContext->psShader->eTargetLanguage)) - { - psContext->RequireExtension("GL_ARB_fragment_coord_conventions"); - } - - if (psContext->psShader->extensions->EXT_shader_framebuffer_fetch && psContext->psShader->eShaderType == PIXEL_SHADER && psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) - { - psContext->EnableExtension("GL_EXT_shader_framebuffer_fetch"); - } - - //Handle fragment shader default precision - if (psContext->psShader->eShaderType == PIXEL_SHADER && - (psContext->psShader->eTargetLanguage == LANG_ES_100 || psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310 || (psContext->flags & HLSLCC_FLAG_NVN_TARGET))) - { - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - { - // gles 2.0 shaders can have mediump as default if the GPU doesn't have highp support - bcatcstr(glsl, - "#ifdef GL_FRAGMENT_PRECISION_HIGH\n" - " precision highp float;\n" - "#else\n" - " precision mediump float;\n" - "#endif\n"); - } - else - { - bcatcstr(glsl, "precision highp float;\n"); - } - - // Define default int precision to highp to avoid issues on platforms that actually implement mediump - bcatcstr(glsl, "precision highp int;\n"); - } - - if (psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_150) - { - if (psContext->flags & HLSLCC_FLAG_ORIGIN_UPPER_LEFT) - bcatcstr(glsl, "layout(origin_upper_left) in vec4 gl_FragCoord;\n"); - - if (psContext->flags & HLSLCC_FLAG_PIXEL_CENTER_INTEGER) - bcatcstr(glsl, "layout(pixel_center_integer) in vec4 gl_FragCoord;\n"); - } - - - /* - OpenGL 4.1 API spec: - To use any built-in input or output in the gl_PerVertex block in separable - program objects, shader code must redeclare that block prior to use. - */ - /* DISABLED FOR NOW */ -/* if(psContext->psShader->eShaderType == VERTEX_SHADER && psContext->psShader->eTargetLanguage >= LANG_410) - { - bcatcstr(glsl, "out gl_PerVertex {\n"); - bcatcstr(glsl, "vec4 gl_Position;\n"); - bcatcstr(glsl, "float gl_PointSize;\n"); - bcatcstr(glsl, "float gl_ClipDistance[];"); - bcatcstr(glsl, "};\n"); - }*/ -} - -GLLang ChooseLanguage(Shader* psShader) -{ - // Depends on the HLSL shader model extracted from bytecode. - switch (psShader->ui32MajorVersion) - { - case 5: - { - return LANG_430; - } - case 4: - { - return LANG_330; - } - default: - { - return LANG_120; - } - } -} - -const char* GetVersionString(GLLang language) -{ - switch (language) - { - case LANG_ES_100: - { - return "#version 100\n"; - break; - } - case LANG_ES_300: - { - return "#version 300 es\n"; - break; - } - case LANG_ES_310: - { - return "#version 310 es\n"; - break; - } - case LANG_120: - { - return "#version 120\n"; - break; - } - case LANG_130: - { - return "#version 130\n"; - break; - } - case LANG_140: - { - return "#version 140\n"; - break; - } - case LANG_150: - { - return "#version 150\n"; - break; - } - case LANG_330: - { - return "#version 330\n"; - break; - } - case LANG_400: - { - return "#version 400\n"; - break; - } - case LANG_410: - { - return "#version 410\n"; - break; - } - case LANG_420: - { - return "#version 420\n"; - break; - } - case LANG_430: - { - return "#version 430\n"; - break; - } - case LANG_440: - { - return "#version 440\n"; - break; - } - default: - { - return ""; - break; - } - } -} - -static const char * GetPhaseFuncName(SHADER_PHASE_TYPE eType) -{ - switch (eType) - { - default: - case MAIN_PHASE: return ""; - case HS_GLOBAL_DECL_PHASE: return "hs_global_decls"; - case HS_FORK_PHASE: return "fork_phase"; - case HS_CTRL_POINT_PHASE: return "control_point_phase"; - case HS_JOIN_PHASE: return "join_phase"; - } -} - -static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext) -{ - uint32_t i; - bstring glsl = psContext->glsl; - - for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) - { - ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; - const char *Type; - uint32_t ui32NumComponents = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); - switch (psSig->eComponentType) - { - default: - case INOUT_COMPONENT_FLOAT32: - Type = ui32NumComponents > 1 ? "vec" : "float"; - break; - case INOUT_COMPONENT_SINT32: - Type = ui32NumComponents > 1 ? "ivec" : "int"; - break; - case INOUT_COMPONENT_UINT32: - Type = ui32NumComponents > 1 ? "uvec" : "uint"; - break; - } - if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) - continue; - - std::string inputName; - - { - std::ostringstream oss; - oss << psContext->inputPrefix << psSig->semanticName << psSig->ui32SemanticIndex; - inputName = oss.str(); - } - - if (psContext->psDependencies->IsHullShaderInputAlreadyDeclared(inputName)) - continue; - - psContext->psDependencies->RecordHullShaderInput(inputName); - - std::string outputName; - { - std::ostringstream oss; - oss << psContext->outputPrefix << psSig->semanticName << psSig->ui32SemanticIndex; - outputName = oss.str(); - } - - const char * prec = ""; - if (HavePrecisionQualifiers(psContext)) - { - if (psSig->eMinPrec != MIN_PRECISION_DEFAULT) - prec = "mediump "; - else - prec = "highp "; - } - - bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); - int inLoc = psContext->psDependencies->GetVaryingLocation(inputName, HULL_SHADER, true, keepLocation, psContext->psShader->maxSemanticIndex); - int outLoc = psContext->psDependencies->GetVaryingLocation(outputName, HULL_SHADER, false, keepLocation, psContext->psShader->maxSemanticIndex); - - psContext->AddIndentation(); - if (ui32NumComponents > 1) - bformata(glsl, "layout(location = %d) in %s%s%d %s%s%d[];\n", inLoc, prec, Type, ui32NumComponents, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); - else - bformata(glsl, "layout(location = %d) in %s%s %s%s%d[];\n", inLoc, prec, Type, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); - - psContext->AddIndentation(); - if (ui32NumComponents > 1) - bformata(glsl, "layout(location = %d) out %s%s%d %s%s%d[];\n", outLoc, prec, Type, ui32NumComponents, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); - else - bformata(glsl, "layout(location = %d) out %s%s %s%s%d[];\n", outLoc, prec, Type, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); - } - - psContext->AddIndentation(); - bcatcstr(glsl, "void passthrough_ctrl_points()\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "{\n"); - psContext->indent++; - - for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) - { - const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; - - psContext->AddIndentation(); - - if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) - bformata(glsl, "gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"); - else - bformata(glsl, "%s%s%d[gl_InvocationID] = %s%s%d[gl_InvocationID];\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); - } - - psContext->indent--; - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); -} - -GLLang ToGLSL::SetLanguage(GLLang suggestedLanguage) -{ - language = suggestedLanguage; - if (language == LANG_DEFAULT) - { - language = ChooseLanguage(psContext->psShader); - } - return language; -} - -// Go through all declarations and remove reserve UAV occupied binding points -void ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase, HLSLCrossCompilerContext *psContext, GLSLCrossDependencyData *glslDependencyData) -{ - for (uint32_t p = 0; p < psPhase->psDecl.size(); ++p) - { - if (psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW || - psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED) - { - uint32_t uav = psPhase->psDecl[p].asOperands[0].ui32RegisterNumber; // uav binding point - - bstring BufNamebstr = bfromcstr(""); - ResourceName(BufNamebstr, psContext, RGROUP_UAV, psPhase->psDecl[p].asOperands[0].ui32RegisterNumber, 0); - - char *btmp = bstr2cstr(BufNamebstr, '\0'); - std::string BufName = btmp; - bcstrfree(btmp); - bdestroy(BufNamebstr); - - glslDependencyData->ReserveNamedBindPoint(BufName, uav, GLSLCrossDependencyData::BufferType_ReadWrite); - } - } -} - -bool ToGLSL::Translate() -{ - bstring glsl; - uint32_t i; - Shader* psShader = psContext->psShader; - uint32_t ui32Phase; - - psContext->psTranslator = this; - - if (language == LANG_DEFAULT) - SetLanguage(LANG_DEFAULT); - - SetIOPrefixes(); - psShader->ExpandSWAPCs(); - psShader->ForcePositionToHighp(); - psShader->AnalyzeIOOverlap(); - if ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0) - psShader->SetMaxSemanticIndex(); - psShader->FindUnusedGlobals(psContext->flags); - - psContext->indent = 0; - - glsl = bfromcstralloc(1024 * 10, "\n"); - bstring extensions = bfromcstralloc(1024 * 10, GetVersionString(language)); - psContext->extensions = extensions; - - psContext->glsl = glsl; - for (i = 0; i < psShader->asPhases.size(); ++i) - { - psShader->asPhases[i].postShaderCode = bfromcstralloc(1024 * 5, ""); - psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, ""); - } - psContext->currentGLSLString = &glsl; - psShader->eTargetLanguage = language; - psContext->currentPhase = MAIN_PHASE; - - if (psShader->extensions) - { - if (psContext->flags & HLSLCC_FLAG_NVN_TARGET) - { - psContext->EnableExtension("GL_ARB_separate_shader_objects"); - psContext->EnableExtension("GL_NV_desktop_lowp_mediump"); // This flag allow FP16 operations (mediump in GLSL) - } - if (psShader->extensions->ARB_explicit_attrib_location) - psContext->RequireExtension("GL_ARB_explicit_attrib_location"); - if (psShader->extensions->ARB_explicit_uniform_location) - psContext->RequireExtension("GL_ARB_explicit_uniform_location"); - if (psShader->extensions->ARB_shading_language_420pack) - psContext->RequireExtension("GL_ARB_shading_language_420pack"); - } - - psContext->ClearDependencyData(); - - AddVersionDependentCode(psContext); - - if (psShader->eShaderType == VERTEX_SHADER && - HaveLimitedInOutLocationQualifier(language, psShader->extensions) && - psContext->flags & HLSLCC_FLAG_NVN_TARGET) - { - bcatcstr(glsl, "out gl_PerVertex { vec4 gl_Position; };\n"); - } - - if (!psContext->psDependencies->m_ExtBlendModes.empty() && psShader->eShaderType == PIXEL_SHADER) - { - psContext->EnableExtension("GL_KHR_blend_equation_advanced"); - bcatcstr(glsl, "#if GL_KHR_blend_equation_advanced\n"); - for (i = 0; i < psContext->psDependencies->m_ExtBlendModes.size(); i++) - { - bformata(glsl, "layout(%s) out;\n", psContext->psDependencies->m_ExtBlendModes[i].c_str()); - } - bcatcstr(glsl, "#endif\n"); - } - - if (psContext->psShader->eTargetLanguage != LANG_ES_100) - { - bool hasConstantBuffers = psContext->psShader->sInfo.psConstantBuffers.size() > 0; - if (hasConstantBuffers) - { - // This value will be replaced at runtime with 0 if we need to disable UBO. - bcatcstr(glsl, "#define HLSLCC_ENABLE_UNIFORM_BUFFERS 1\n"); - bcatcstr(glsl, "#if HLSLCC_ENABLE_UNIFORM_BUFFERS\n#define UNITY_UNIFORM\n#else\n#define UNITY_UNIFORM uniform\n#endif\n"); - } - bool hasTextures = false; - for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) - { - if (psShader->asPhases[0].psDecl[i].eOpcode == OPCODE_DCL_RESOURCE) - { - hasTextures = true; - break; - } - } - if (hasTextures || hasConstantBuffers) - { - // This value will be replaced at runtime with 0 if we need to disable explicit uniform locations. - bcatcstr(glsl, "#define UNITY_SUPPORTS_UNIFORM_LOCATION 1\n"); - bcatcstr(glsl, "#if UNITY_SUPPORTS_UNIFORM_LOCATION\n#define UNITY_LOCATION(x) layout(location = x)\n#define UNITY_BINDING(x) layout(binding = x, std140)\n#else\n#define UNITY_LOCATION(x)\n#define UNITY_BINDING(x) layout(std140)\n#endif\n"); - } - } - - for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - ShaderPhase &phase = psShader->asPhases[ui32Phase]; - phase.UnvectorizeImmMoves(); - psContext->DoDataTypeAnalysis(&phase); - phase.ResolveUAVProperties(psShader->sInfo); - ResolveStructuredBufferBindingSlots(&phase, psContext, psContext->psDependencies); - if (!psContext->IsVulkan() && !psContext->IsSwitch()) - { - phase.PruneConstArrays(); - psContext->ReserveFramebufferFetchInputs(); - } - } - - psShader->PruneTempRegisters(); - - for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - // Loop transform can only be done after the temps have been pruned - ShaderPhase &phase = psShader->asPhases[ui32Phase]; - HLSLcc::DoLoopTransform(psContext, phase); - } - - //Special case. Can have multiple phases. - if (psShader->eShaderType == HULL_SHADER) - { - const SHADER_PHASE_TYPE ePhaseFuncCallOrder[3] = { HS_CTRL_POINT_PHASE, HS_FORK_PHASE, HS_JOIN_PHASE }; - uint32_t ui32PhaseCallIndex; - int perPatchSectionAdded = 0; - int hasControlPointPhase = 0; - - psShader->ConsolidateHullTempVars(); - - // Find out if we have a passthrough hull shader - for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) - hasControlPointPhase = 1; - } - - // Phase 1 is always the global decls phase, no instructions - for (i = 0; i < psShader->asPhases[1].psDecl.size(); ++i) - { - TranslateDeclaration(&psShader->asPhases[1].psDecl[i]); - } - - if (hasControlPointPhase == 0) - { - DoHullShaderPassthrough(psContext); - } - - for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; - psContext->currentPhase = ui32Phase; - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase)); - } - - for (i = 0; i < psPhase->psDecl.size(); ++i) - { - TranslateDeclaration(&psPhase->psDecl[i]); - } - - bformata(glsl, "void %s%d(int phaseInstanceID)\n{\n", GetPhaseFuncName(psPhase->ePhase), ui32Phase); - psContext->indent++; - - if (psPhase->psInst.size() > 0) - { - //The minus one here is remove the return statement at end of phases. - //We don't want to translate that, we'll just end the function body. - ASSERT(psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_RET); - for (i = 0; i < psPhase->psInst.size() - 1; ++i) - { - TranslateInstruction(&psPhase->psInst[i]); - } - } - - - psContext->indent--; - bcatcstr(glsl, "}\n"); - } - - bcatcstr(glsl, "void main()\n{\n"); - - psContext->indent++; - - // There are cases when there are no control point phases and we have to do passthrough - if (hasControlPointPhase == 0) - { - // Passthrough control point phase, run the rest only once per patch - psContext->AddIndentation(); - bcatcstr(glsl, "passthrough_ctrl_points();\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "barrier();\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "if (gl_InvocationID == 0)\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "{\n"); - psContext->indent++; - perPatchSectionAdded = 1; - } - - for (ui32PhaseCallIndex = 0; ui32PhaseCallIndex < 3; ui32PhaseCallIndex++) - { - for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - uint32_t i; - ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; - if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex]) - continue; - - if (psPhase->earlyMain->slen > 1) - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Start Early Main ---\n"); - } - - bconcat(glsl, psPhase->earlyMain); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End Early Main ---\n"); - } - } - - for (i = 0; i < psPhase->ui32InstanceCount; i++) - { - psContext->AddIndentation(); - bformata(glsl, "%s%d(%d);\n", GetPhaseFuncName(psShader->asPhases[ui32Phase].ePhase), ui32Phase, i); - } - - if (psPhase->hasPostShaderCode) - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Post shader code ---\n"); - } - - bconcat(glsl, psPhase->postShaderCode); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End post shader code ---\n"); - } - } - - - if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) - { - // We're done printing control point phase, run the rest only once per patch - psContext->AddIndentation(); - bcatcstr(glsl, "barrier();\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "if (gl_InvocationID == 0)\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "{\n"); - psContext->indent++; - perPatchSectionAdded = 1; - } - } - } - - if (perPatchSectionAdded != 0) - { - psContext->indent--; - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - } - - psContext->indent--; - - bcatcstr(glsl, "}\n"); - - // Print out extra functions we generated, in reverse order for potential dependencies - std::for_each(m_FunctionDefinitions.rbegin(), m_FunctionDefinitions.rend(), [&extensions](const FunctionDefinitions::value_type &p) - { - bcatcstr(extensions, p.second.c_str()); - bcatcstr(extensions, "\n"); - }); - - // Concat extensions and glsl for the final shader code. - if (m_NeedUnityInstancingArraySizeDecl) - { - if (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) - { - bformata(extensions, "layout(constant_id = %d) const int %s = 2;\n", kArraySizeConstantID, UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO); - } - else - { - bcatcstr(extensions, "#ifndef " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "\n\t#define " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO " 2\n#endif\n"); - } - } - if (m_NeedUnityPreTransformDecl) - { - if (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) - { - bformata(extensions, "layout(constant_id = %d) const int %s = 0;\n", kPreTransformConstantID, UNITY_PRETRANSFORM_CONSTANT_NAME); - } - } - - bconcat(extensions, glsl); - bdestroy(glsl); - psContext->glsl = extensions; - glsl = NULL; - - if (psContext->psDependencies) - { - //Save partitioning and primitive type for use by domain shader. - psContext->psDependencies->eTessOutPrim = psShader->sInfo.eTessOutPrim; - - psContext->psDependencies->eTessPartitioning = psShader->sInfo.eTessPartitioning; - } - - return true; - } - - if (psShader->eShaderType == DOMAIN_SHADER && psContext->psDependencies) - { - //Load partitioning and primitive type from hull shader. - switch (psContext->psDependencies->eTessOutPrim) - { - case TESSELLATOR_OUTPUT_TRIANGLE_CCW: - { - bcatcstr(glsl, "layout(ccw) in;\n"); - break; - } - case TESSELLATOR_OUTPUT_TRIANGLE_CW: - { - bcatcstr(glsl, "layout(cw) in;\n"); - break; - } - case TESSELLATOR_OUTPUT_POINT: - { - bcatcstr(glsl, "layout(point_mode) in;\n"); - break; - } - default: - { - break; - } - } - - switch (psContext->psDependencies->eTessPartitioning) - { - case TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: - { - bcatcstr(glsl, "layout(fractional_odd_spacing) in;\n"); - break; - } - case TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: - { - bcatcstr(glsl, "layout(fractional_even_spacing) in;\n"); - break; - } - default: - { - break; - } - } - } - - bstring generatedFunctionsKeyword = bfromcstr("\n// Generated functions\n\n"); - bstring beforeMain = NULL; - bstring beforeMainKeyword = NULL; - - if (!HaveDynamicIndexing(psContext)) - { - beforeMain = bfromcstr(""); - beforeMainKeyword = bfromcstr("\n// Before Main\n\n"); - psContext->beforeMain = beforeMain; - } - - for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) - { - TranslateDeclaration(&psShader->asPhases[0].psDecl[i]); - } - - // Search and replace string, for injecting generated functions that need to be after default precision declarations - bconcat(glsl, generatedFunctionsKeyword); - - // Search and replace string, for injecting stuff from translation that need to be after normal declarations and before main - if (!HaveDynamicIndexing(psContext)) - { - bconcat(glsl, beforeMainKeyword); - } - - bcatcstr(glsl, "void main()\n{\n"); - - psContext->indent++; - - if (psContext->psShader->asPhases[0].earlyMain->slen > 1) - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Start Early Main ---\n"); - } - - bconcat(glsl, psContext->psShader->asPhases[0].earlyMain); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End Early Main ---\n"); - } - } - - for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i) - { - TranslateInstruction(&psShader->asPhases[0].psInst[i]); - } - - psContext->indent--; - - bcatcstr(glsl, "}\n"); - - // Print out extra definitions and functions we generated in generation order to satisfy dependencies - { - bstring generatedFunctionsAndDefinitions = bfromcstr(""); - - for (size_t i = 0; i < m_AdditionalDefinitions.size(); ++i) - { - bcatcstr(generatedFunctionsAndDefinitions, m_AdditionalDefinitions[i].c_str()); - bcatcstr(generatedFunctionsAndDefinitions, "\n"); - } - - for (std::vector::const_iterator funcNameIter = m_FunctionDefinitionsOrder.begin(); funcNameIter != m_FunctionDefinitionsOrder.end(); ++funcNameIter) - { - const FunctionDefinitions::const_iterator definition = m_FunctionDefinitions.find(*funcNameIter); - ASSERT(definition != m_FunctionDefinitions.end()); - bcatcstr(generatedFunctionsAndDefinitions, definition->second.c_str()); - bcatcstr(generatedFunctionsAndDefinitions, "\n"); - } - bfindreplace(glsl, generatedFunctionsKeyword, generatedFunctionsAndDefinitions, 0); - bdestroy(generatedFunctionsAndDefinitions); - bdestroy(generatedFunctionsKeyword); - } - - // Concat extensions and glsl for the final shader code. - if (m_NeedUnityInstancingArraySizeDecl) - { - if (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) - { - bformata(extensions, "layout(constant_id = %d) const int %s = 2;\n", kArraySizeConstantID, UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO); - } - else - { - bcatcstr(extensions, "#ifndef " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "\n\t#define " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO " 2\n#endif\n"); - } - } - if (m_NeedUnityPreTransformDecl) - { - if (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) - { - bformata(extensions, "layout(constant_id = %d) const int %s = 0;\n", kPreTransformConstantID, UNITY_PRETRANSFORM_CONSTANT_NAME); - } - } - bconcat(extensions, glsl); - bdestroy(glsl); - - if (!HaveDynamicIndexing(psContext)) - { - bstring empty = bfromcstr(""); - - if (beforeMain->slen > 1) - bfindreplace(extensions, beforeMainKeyword, beforeMain, 0); - else - bfindreplace(extensions, beforeMainKeyword, empty, 0); - - psContext->beforeMain = NULL; - - bdestroy(empty); - bdestroy(beforeMain); - bdestroy(beforeMainKeyword); - } - - psContext->glsl = extensions; - glsl = NULL; - - return true; -} - -bool ToGLSL::DeclareExtraFunction(const std::string &name, bstring body) -{ - if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end()) - return true; - m_FunctionDefinitions.insert(std::make_pair(name, (const char *)body->data)); - m_FunctionDefinitionsOrder.push_back(name); - return false; -} - -static void PrintComponentWrapper1(bstring code, const char *func, const char *type2, const char *type3, const char *type4) -{ - bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); return a; }\n", type2, func, type2, func, func); - bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); a.z = %s(a.z); return a; }\n", type3, func, type3, func, func, func); - bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); a.z = %s(a.z); a.w = %s(a.w); return a; }\n", type4, func, type4, func, func, func, func); -} - -static void PrintComponentWrapper2(bstring code, const char *func, const char *type2, const char *type3, const char *type4) -{ - bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); return a; }\n", type2, func, type2, type2, func, func); - bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); a.z = %s(a.z, b.z); return a; }\n", type3, func, type3, type3, func, func, func); - bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); a.z = %s(a.z, b.z); a.w = %s(a.w, b.w); return a; }\n", type4, func, type4, type4, func, func, func, func); -} - -static void PrintTrunc(bstring code, const char *type) -{ - bformata(code, "%s trunc(%s x) { return sign(x)*floor(abs(x)); }\n", type, type); -} - -void ToGLSL::UseExtraFunctionDependency(const std::string &name) -{ - if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end()) - return; - - bstring code = bfromcstr(""); - bool match = true; - - if (name == "trunc") - { - PrintTrunc(code, "float"); - PrintTrunc(code, "vec2"); - PrintTrunc(code, "vec3"); - PrintTrunc(code, "vec4"); - } - else if (name == "roundEven") - { - bformata(code, "float roundEven(float x) { float y = floor(x + 0.5); return (y - x == 0.5) ? floor(0.5*y) * 2.0 : y; }\n"); - PrintComponentWrapper1(code, "roundEven", "vec2", "vec3", "vec4"); - } - else if (name == "op_modi") - { - bformata(code, "const int BITWISE_BIT_COUNT = 32;\nint op_modi(int x, int y) { return x - y * (x / y); }\n"); - PrintComponentWrapper2(code, "op_modi", "ivec2", "ivec3", "ivec4"); - } - else if (name == "op_and") - { - UseExtraFunctionDependency("op_modi"); - - bformata(code, "int op_and(int a, int b) { int result = 0; int n = 1; for (int i = 0; i < BITWISE_BIT_COUNT; i++) { if ((op_modi(a, 2) != 0) && (op_modi(b, 2) != 0)) { result += n; } a = a / 2; b = b / 2; n = n * 2; if (!(a > 0 && b > 0)) { break; } } return result; }\n"); - PrintComponentWrapper2(code, "op_and", "ivec2", "ivec3", "ivec4"); - } - else if (name == "op_or") - { - UseExtraFunctionDependency("op_modi"); - - bformata(code, "int op_or(int a, int b) { int result = 0; int n = 1; for (int i = 0; i < BITWISE_BIT_COUNT; i++) { if ((op_modi(a, 2) != 0) || (op_modi(b, 2) != 0)) { result += n; } a = a / 2; b = b / 2; n = n * 2; if (!(a > 0 || b > 0)) { break; } } return result; }\n"); - PrintComponentWrapper2(code, "op_or", "ivec2", "ivec3", "ivec4"); - } - else if (name == "op_xor") - { - UseExtraFunctionDependency("op_and"); - - bformata(code, "int op_xor(int a, int b) { return (a + b - 2 * op_and(a, b)); }\n"); - PrintComponentWrapper2(code, "op_xor", "ivec2", "ivec3", "ivec4"); - } - else if (name == "op_shr") - { - bformata(code, "int op_shr(int a, int b) { return int(floor(float(a) / pow(2.0, float(b)))); }\n"); - PrintComponentWrapper2(code, "op_shr", "ivec2", "ivec3", "ivec4"); - } - else if (name == "op_shl") - { - bformata(code, "int op_shl(int a, int b) { return int(floor(float(a) * pow(2.0, float(b)))); }\n"); - PrintComponentWrapper2(code, "op_shl", "ivec2", "ivec3", "ivec4"); - } - else if (name == "op_not") - { - bformata(code, "int op_not(int value) { return -value - 1; }\n"); - PrintComponentWrapper1(code, "op_not", "ivec2", "ivec3", "ivec4"); - } - else if (name == "int_bitfieldInsert") - { - // Can't use the name 'bitfieldInsert' because Adreno fails with "can't redefine/overload built-in functions!" - bcatcstr(code, - "int int_bitfieldInsert(int base, int insert, int offset, int bits) {\n" - " uint mask = ~(uint(0xffffffff) << uint(bits)) << uint(offset);\n" - " return int((uint(base) & ~mask) | ((uint(insert) << uint(offset)) & mask));\n" - "}\n"); - } - else - { - match = false; - } - - if (match) - DeclareExtraFunction(name, code); - - bdestroy(code); -} diff --git a/third_party/HLSLcc/src/toGLSLDeclaration.cpp b/third_party/HLSLcc/src/toGLSLDeclaration.cpp deleted file mode 100644 index efafefa..0000000 --- a/third_party/HLSLcc/src/toGLSLDeclaration.cpp +++ /dev/null @@ -1,3933 +0,0 @@ -#include "hlslcc.h" -#include "internal_includes/Declaration.h" -#include "internal_includes/toGLSLOperand.h" -#include "internal_includes/toGLSL.h" -#include "internal_includes/languages.h" -#include "internal_includes/HLSLccToolkit.h" -#include "internal_includes/Shader.h" -#include "internal_includes/HLSLCrossCompilerContext.h" -#include "bstrlib.h" -#include "internal_includes/debug.h" -#include -#include -#include -#include -#include -#include "internal_includes/toGLSL.h" -#include "UnityInstancingFlexibleArraySize.h" - -using namespace HLSLcc; - -#ifndef fpcheck -#ifdef _MSC_VER -#define fpcheck(x) (_isnan(x) || !_finite(x)) -#else -#define fpcheck(x) (std::isnan(x) || std::isinf(x)) -#endif -#endif // #ifndef fpcheck - -static bool UseReflection(HLSLCrossCompilerContext* psContext) -{ - return !psContext->IsSwitch() && psContext->psShader->eShaderType != COMPUTE_SHADER; -} - -static SHADER_VARIABLE_TYPE TypeToReport(SHADER_VARIABLE_TYPE type) -{ - switch (type) - { - case SVT_BOOL: - case SVT_INT: - case SVT_UINT: - case SVT_UINT8: - case SVT_FORCED_INT: - case SVT_INT_AMBIGUOUS: - case SVT_INT16: - case SVT_INT12: - case SVT_UINT16: - return SVT_UINT; - - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - return SVT_FLOAT; - - default: - return type; - } -} - -static void GenerateUnsupportedFormatWarning(HLSLccReflection& refl, const char* name) -{ - std::ostringstream oss; - oss << "The resource '" << name << "' uses an unsupported type/format"; - refl.OnDiagnostics(oss.str(), -1, false); -} - -static void GenerateUnsupportedReadWriteFormatWarning(HLSLccReflection& refl, const char* name) -{ - std::ostringstream oss; - oss << "The resource '" << name << "' uses an unsupported type/format for read/write access"; - refl.OnDiagnostics(oss.str(), -1, false); -} - -void ToGLSL::DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix, bool reportInReflection) -{ - bstring glsl = *psContext->currentGLSLString; - - if (reportInReflection && !psContext->IsVulkan() && psType->Class != SVC_STRUCT && UseReflection(psContext)) - { - const bool isMatrix = psType->Class == SVC_MATRIX_COLUMNS || psType->Class == SVC_MATRIX_ROWS; - const SHADER_VARIABLE_TYPE type = TypeToReport(psType->Type); - psContext->m_Reflection.OnConstant(varName, 0, type, psType->Rows, psType->Columns, isMatrix, psType->Elements, true); - } - - if (psType->Class == SVC_STRUCT) - { - bformata(glsl, "\t%s%s_Type %s", addUniformPrefix ? "UNITY_UNIFORM " : "", varName, varName); - if (psType->Elements > 1) - { - if (HLSLcc::IsUnityFlexibleInstancingBuffer(psCBuf)) - { - bformata(glsl, "[" UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "]"); - m_NeedUnityInstancingArraySizeDecl = true; - } - else - bformata(glsl, "[%d]", psType->Elements); - } - } - else if (psType->Class == SVC_MATRIX_COLUMNS || psType->Class == SVC_MATRIX_ROWS) - { - if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) - { - // Translate matrices into vec4 arrays - bformata(glsl, "\t%s%s " HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, 4), psType->Rows, psType->Columns, varName); - uint32_t elemCount = (psType->Class == SVC_MATRIX_COLUMNS ? psType->Columns : psType->Rows); - if (psType->Elements > 1) - { - elemCount *= psType->Elements; - } - bformata(glsl, "[%d]", elemCount); - } - else - { - bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetMatrixTypeName(psContext, psType->Type, psType->Columns, psType->Rows).c_str(), varName); - if (psType->Elements > 1) - { - bformata(glsl, "[%d]", psType->Elements); - } - } - } - else if (psType->Class == SVC_VECTOR && psType->Columns > 1) - { - bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, psType->Columns), varName); - - if (psType->Elements > 1) - { - bformata(glsl, "[%d]", psType->Elements); - } - } - else if ((psType->Class == SVC_SCALAR) || - (psType->Class == SVC_VECTOR && psType->Columns == 1)) - { - if (psType->Type == SVT_BOOL) - { - //Use int instead of bool. - //Allows implicit conversions to integer and - //bool consumes 4-bytes in HLSL and GLSL anyway. - ((ShaderVarType *)psType)->Type = SVT_INT; - } - - bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, 1), varName); - - if (psType->Elements > 1) - { - bformata(glsl, "[%d]", psType->Elements); - } - } - if (unsizedArray) - bformata(glsl, "[]"); - bformata(glsl, ";\n"); -} - -//In GLSL embedded structure definitions are not supported. -void ToGLSL::PreDeclareStructType(const std::string &name, const struct ShaderVarType* psType) -{ - bstring glsl = *psContext->currentGLSLString; - uint32_t i; - - for (i = 0; i < psType->MemberCount; ++i) - { - if (psType->Members[i].Class == SVC_STRUCT) - { - PreDeclareStructType(psType->Members[i].name, &psType->Members[i]); - } - } - - if (psType->Class == SVC_STRUCT) - { - //Not supported at the moment - ASSERT(name != "$Element"); - - for (size_t i = 0; i < m_DefinedStructs.size(); ++i) - { - if (m_DefinedStructs[i] == name) - return; - } - - m_DefinedStructs.push_back(name); - - bformata(glsl, "struct %s_Type {\n", name.c_str()); - - for (i = 0; i < psType->MemberCount; ++i) - { - ASSERT(psType->Members.size() != 0); - - DeclareConstBufferShaderVariable(psType->Members[i].name.c_str(), &psType->Members[i], NULL, 0, false, false); - } - - bformata(glsl, "};\n"); - } -} - -static const char* GetInterpolationString(INTERPOLATION_MODE eMode, GLLang lang) -{ - switch (eMode) - { - case INTERPOLATION_CONSTANT: - { - return "flat "; - } - case INTERPOLATION_LINEAR: - { - return ""; - } - case INTERPOLATION_LINEAR_CENTROID: - { - return "centroid "; - } - case INTERPOLATION_LINEAR_NOPERSPECTIVE: - { - return lang <= LANG_ES_310 ? "" : "noperspective "; - break; - } - case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: - { - return lang <= LANG_ES_310 ? "centroid " : "noperspective centroid "; - } - case INTERPOLATION_LINEAR_SAMPLE: - { - return "sample "; - } - case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: - { - return lang <= LANG_ES_310 ? "" : "noperspective sample "; - } - default: - { - return ""; - } - } -} - -static void DeclareInput( - HLSLCrossCompilerContext* psContext, - const Declaration* psDecl, - const char* Interpolation, const char* StorageQualifier, const char* Precision, int iNumComponents, OPERAND_INDEX_DIMENSION eIndexDim, const char* InputName, const uint32_t ui32CompMask) -{ - Shader* psShader = psContext->psShader; - bstring glsl = *psContext->currentGLSLString; - int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; - const ShaderInfo::InOutSignature *psSig = NULL; - - // This falls within the specified index ranges. The default is 0 if no input range is specified - - if (regSpace == 0) - psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); - else - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); - - ASSERT(psSig != NULL); - - // No need to declare input pos 0 on HS control point phases, it's always position - // Also no point in declaring the builtins - if (psShader->eShaderType == HULL_SHADER && psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) - { - if (regSpace == 0) - { - if ((psSig->semanticName == "POS" || psSig->semanticName == "SV_Position") && psSig->ui32SemanticIndex == 0) - return; - } - } - - if ((ui32CompMask & ~psShader->acInputDeclared[regSpace][ui32Reg]) != 0) - { - const char* vecType = "vec"; - const char* scalarType = "float"; - - switch (psSig->eComponentType) - { - case INOUT_COMPONENT_UINT32: - { - vecType = "uvec"; - scalarType = "uint"; - break; - } - case INOUT_COMPONENT_SINT32: - { - vecType = "ivec"; - scalarType = "int"; - break; - } - case INOUT_COMPONENT_FLOAT32: - { - break; - } - default: - { - ASSERT(0); - break; - } - } - - if (psContext->psDependencies) - { - if (psShader->eShaderType == PIXEL_SHADER) - { - psContext->psDependencies->SetInterpolationMode(ui32Reg, psDecl->value.eInterpolation); - } - } - - std::string locationQualifier = ""; - - bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); - - if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || - ((psContext->flags & HLSLCC_FLAG_NVN_TARGET) && HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions))) - { - bool addLocation = false; - - // Add locations to vertex shader inputs unless disabled in flags - if (psShader->eShaderType == VERTEX_SHADER && !(psContext->flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS)) - addLocation = true; - - // Add intra-shader locations if supported - if (psShader->eShaderType != VERTEX_SHADER) - addLocation = true; - - if (addLocation) - { - std::ostringstream oss; - oss << "layout(location = " << psContext->psDependencies->GetVaryingLocation(std::string(InputName), psShader->eShaderType, true, keepLocation, psShader->maxSemanticIndex) << ") "; - locationQualifier = oss.str(); - } - } - - psShader->acInputDeclared[regSpace][ui32Reg] = (char)psSig->ui32Mask; - - // Do the reflection report on vertex shader inputs - if (psShader->eShaderType == VERTEX_SHADER) - { - psContext->m_Reflection.OnInputBinding(std::string(InputName), psContext->psDependencies->GetVaryingLocation(std::string(InputName), VERTEX_SHADER, true, keepLocation, psShader->maxSemanticIndex)); - } - - switch (eIndexDim) - { - case INDEX_2D: - { - if (iNumComponents == 1) - { - const uint32_t regNum = psDecl->asOperands[0].ui32RegisterNumber; - const uint32_t arraySize = psDecl->asOperands[0].aui32ArraySizes[0]; - - psContext->psShader->abScalarInput[regSpace][regNum] |= (int)ui32CompMask; - - if (psShader->eShaderType == HULL_SHADER || psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT_CONTROL_POINT) - bformata(glsl, "%s%s%s %s %s %s [];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, scalarType, InputName); - else - bformata(glsl, "%s%s%s %s %s %s [%d];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, scalarType, InputName, arraySize); - } - else - { - if (psShader->eShaderType == HULL_SHADER || psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT_CONTROL_POINT) - bformata(glsl, "%s%s%s %s %s%d %s [];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); - else - bformata(glsl, "%s%s%s %s %s%d %s [%d];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName, - psDecl->asOperands[0].aui32ArraySizes[0]); - } - break; - } - default: - { - if (iNumComponents == 1) - { - psContext->psShader->abScalarInput[regSpace][ui32Reg] |= (int)ui32CompMask; - - bformata(glsl, "%s%s%s %s %s %s;\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, scalarType, InputName); - } - else - { - if (psShader->aIndexedInput[regSpace][ui32Reg] > 0) - { - bformata(glsl, "%s%s%s %s %s%d %s", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); - if (psShader->eShaderType == HULL_SHADER) - bcatcstr(glsl, "[];\n"); - else - bcatcstr(glsl, ";\n"); - } - else - { - if (psShader->eShaderType == HULL_SHADER) - bformata(glsl, "%s%s%s %s %s%d %s[];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); - else - bformata(glsl, "%s%s%s %s %s%d %s;\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); - } - } - break; - } - } - } -} - -bool ToGLSL::RenderTargetDeclared(uint32_t input) -{ - if (m_DeclaredRenderTarget.find(input) != m_DeclaredRenderTarget.end()) - return true; - - m_DeclaredRenderTarget.insert(input); - return false; -} - -void ToGLSL::AddBuiltinInput(const Declaration* psDecl, const char* builtinName) -{ - Shader* psShader = psContext->psShader; - const Operand* psOperand = &psDecl->asOperands[0]; - const int regSpace = psOperand->GetRegisterSpace(psContext); - ASSERT(regSpace == 0); - - // we need to at least mark if they are scalars or not (as we might need to use vector ctor) - if (psOperand->GetNumInputElements(psContext) == 1) - psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] |= (int)psOperand->ui32CompMask; -} - -void ToGLSL::AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName) -{ - bstring glsl = *psContext->currentGLSLString; - Shader* psShader = psContext->psShader; - const SPECIAL_NAME eSpecialName = psDecl->asOperands[0].eSpecialName; - - if (eSpecialName != NAME_CLIP_DISTANCE && eSpecialName != NAME_CULL_DISTANCE) - return; - - psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; - - if (psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], arrayElements ? arrayElements : 1)) - { - const ShaderInfo::InOutSignature* psSignature = NULL; - - psShader->sInfo.GetOutputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].ui32CompMask, - 0, - &psSignature); - psContext->currentGLSLString = &psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; - glsl = *psContext->currentGLSLString; - psContext->indent++; - if (arrayElements) - { - } - else if ((eSpecialName == NAME_CLIP_DISTANCE || eSpecialName == NAME_CULL_DISTANCE) && psContext->psShader->eShaderType != HULL_SHADER) - { - // Case 828454 : For some reason DX compiler seems to inject clip/cull distance declaration to the hull shader sometimes - // even though it's not used at all, and overlaps some completely unrelated patch constant declarations. We'll just ignore this now. - // Revisit this if this actually pops up elsewhere. - - // cull/clip distance are pretty similar (the only real difference is extension name (and functionality, but we dont care here)) - int max = psDecl->asOperands[0].GetMaxComponent(); - - if (IsESLanguage(psShader->eTargetLanguage)) - psContext->RequireExtension("GL_EXT_clip_cull_distance"); - else if (eSpecialName == NAME_CULL_DISTANCE) - psContext->RequireExtension("GL_ARB_cull_distance"); - const char* glName = eSpecialName == NAME_CLIP_DISTANCE ? "Clip" : "Cull"; - - int applySwizzle = psDecl->asOperands[0].GetNumSwizzleElements() > 1 ? 1 : 0; - const char* swizzle[] = {".x", ".y", ".z", ".w"}; - - ASSERT(psSignature != NULL); - const int index = psSignature->ui32SemanticIndex; - - //Clip/Cull distance can be spread across 1 or 2 outputs (each no more than a vec4). - //Some examples: - //float4 clip[2] : SV_ClipDistance; //8 clip distances - //float3 clip[2] : SV_ClipDistance; //6 clip distances - //float4 clip : SV_ClipDistance; //4 clip distances - //float clip : SV_ClipDistance; //1 clip distance. - - //In GLSL the clip/cull distance built-in is an array of up to 8 floats. - //So vector to array conversion needs to be done here. - int multiplier = 1; - if (index == 1) - { - const ShaderInfo::InOutSignature* psFirstClipSignature; - if (psShader->sInfo.GetOutputSignatureFromSystemValue(eSpecialName, 1, &psFirstClipSignature)) - { - if (psFirstClipSignature->ui32Mask & (1 << 3)) multiplier = 4; - else if (psFirstClipSignature->ui32Mask & (1 << 2)) multiplier = 3; - else if (psFirstClipSignature->ui32Mask & (1 << 1)) multiplier = 2; - } - } - - // Add a specially crafted comment so runtime knows to enable clip planes. - // We may end up doing 2 of these, so at runtime OR the results - uint32_t clipmask = psDecl->asOperands[0].GetAccessMask(); - if (index != 0) - clipmask <<= multiplier; - bformata(psContext->glsl, "// HLSLcc_%sDistances_%x\n", glName, clipmask); - - psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psSignature->ui32Register] = 0xff; - bformata(psContext->glsl, "vec4 phase%d_gl%sDistance%d;\n", psContext->currentPhase, glName, index); - - for (int i = 0; i < max; ++i) - { - psContext->AddIndentation(); - bformata(glsl, "%s[%d] = (", builtinName, i + multiplier * index); - TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); - if (applySwizzle) bformata(glsl, ")%s;\n", swizzle[i]); - else bformata(glsl, ");\n"); - } - } - psContext->indent--; - psContext->currentGLSLString = &psContext->glsl; - } -} - -void ToGLSL::HandleOutputRedirect(const Declaration *psDecl, const char *Precision) -{ - const Operand *psOperand = &psDecl->asOperands[0]; - Shader *psShader = psContext->psShader; - bstring glsl = *psContext->currentGLSLString; - int needsRedirect = 0; - const ShaderInfo::InOutSignature *psSig = NULL; - - int regSpace = psOperand->GetRegisterSpace(psContext); - if (regSpace == 0 && psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - { - needsRedirect = 1; - } - else if (regSpace == 1 && psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - { - needsRedirect = 1; - } - - if (needsRedirect == 1) - { - // TODO What if this is indexed? - int comp = 0; - uint32_t origMask = psOperand->ui32CompMask; - - ASSERT(psContext->psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber] == 0); - - psContext->AddIndentation(); - bformata(glsl, "%s vec4 phase%d_Output%d_%d;\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - - while (comp < 4) - { - int numComps = 0; - int hasCast = 0; - uint32_t mask, i; - psSig = NULL; - if (regSpace == 0) - psContext->psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, psContext->psShader->ui32CurrentVertexOutputStream, &psSig, true); - else - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); - - // The register isn't necessarily packed full. Continue with the next component. - if (psSig == NULL) - { - comp++; - continue; - } - - numComps = GetNumberBitsSet(psSig->ui32Mask); - mask = psSig->ui32Mask; - - ((Operand *)psOperand)->ui32CompMask = 1 << comp; - bstring str = GetPostShaderCode(psContext); - TranslateOperand(str, psOperand, TO_FLAG_NAME_ONLY); - bcatcstr(str, " = "); - - if (psSig->eComponentType == INOUT_COMPONENT_SINT32) - { - bformata(str, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "floatBitsToInt(" : "int("); - hasCast = 1; - } - else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) - { - bformata(str, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "floatBitsToUint(" : "int("); - hasCast = 1; - } - bformata(str, "phase%d_Output%d_%d.", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - // Print out mask - for (i = 0; i < 4; i++) - { - if ((mask & (1 << i)) == 0) - continue; - - bformata(str, "%c", "xyzw"[i]); - } - - if (hasCast) - bcatcstr(str, ")"); - comp += numComps; - bcatcstr(str, ";\n"); - } - - ((Operand *)psOperand)->ui32CompMask = origMask; - if (regSpace == 0) - psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - else - psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - } -} - -void ToGLSL::AddUserOutput(const Declaration* psDecl) -{ - bstring glsl = *psContext->currentGLSLString; - Shader* psShader = psContext->psShader; - - if (psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], 1)) - { - const Operand* psOperand = &psDecl->asOperands[0]; - const char* Precision = ""; - int iNumComponents; - bstring type = NULL; - int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; - - const ShaderInfo::InOutSignature* psSignature = NULL; - - if (regSpace == 0) - psShader->sInfo.GetOutputSignatureFromRegister( - ui32Reg, - psDecl->asOperands[0].ui32CompMask, - psShader->ui32CurrentVertexOutputStream, - &psSignature); - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, psDecl->asOperands[0].ui32CompMask, &psSignature); - - if (psSignature->semanticName == "POS" && psOperand->ui32RegisterNumber == 0 && psContext->psShader->eShaderType == VERTEX_SHADER) - return; - - iNumComponents = GetNumberBitsSet(psSignature->ui32Mask); - if (iNumComponents == 1) - psContext->psShader->abScalarOutput[regSpace][ui32Reg] |= (int)psDecl->asOperands[0].ui32CompMask; - - switch (psSignature->eComponentType) - { - case INOUT_COMPONENT_UINT32: - { - if (iNumComponents > 1) - type = bformat("uvec%d", iNumComponents); - else - type = bformat("uint"); - break; - } - case INOUT_COMPONENT_SINT32: - { - if (iNumComponents > 1) - type = bformat("ivec%d", iNumComponents); - else - type = bformat("int"); - break; - } - case INOUT_COMPONENT_FLOAT32: - { - if (iNumComponents > 1) - type = bformat("vec%d", iNumComponents); - else - type = bformat("float"); - break; - } - default: - ASSERT(0); - break; - } - - if (HavePrecisionQualifiers(psContext)) - { - switch (psOperand->eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - { - Precision = "highp "; - break; - } - case OPERAND_MIN_PRECISION_FLOAT_16: - { - Precision = "mediump "; - break; - } - case OPERAND_MIN_PRECISION_FLOAT_2_8: - { - Precision = EmitLowp(psContext) ? "lowp " : "mediump "; - break; - } - case OPERAND_MIN_PRECISION_SINT_16: - { - Precision = "mediump "; - //type = "ivec"; - break; - } - case OPERAND_MIN_PRECISION_UINT_16: - { - Precision = "mediump "; - //type = "uvec"; - break; - } - } - } - - switch (psShader->eShaderType) - { - case PIXEL_SHADER: - { - switch (psDecl->asOperands[0].eType) - { - case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: - { - break; - } - case OPERAND_TYPE_OUTPUT_DEPTH: - { - if (psShader->eTargetLanguage == LANG_ES_100 && !psContext->EnableExtension("GL_EXT_frag_depth")) - { - bcatcstr(psContext->extensions, "#define gl_FragDepth gl_FragDepthEXT\n"); - } - break; - } - case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: - { - psContext->EnableExtension("GL_ARB_conservative_depth"); - bcatcstr(glsl, "#ifdef GL_ARB_conservative_depth\n"); - bcatcstr(glsl, "layout (depth_greater) out float gl_FragDepth;\n"); - bcatcstr(glsl, "#endif\n"); - break; - } - case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: - { - psContext->EnableExtension("GL_ARB_conservative_depth"); - bcatcstr(glsl, "#ifdef GL_ARB_conservative_depth\n"); - bcatcstr(glsl, "layout (depth_less) out float gl_FragDepth;\n"); - bcatcstr(glsl, "#endif\n"); - break; - } - default: - { - uint32_t renderTarget = psDecl->asOperands[0].ui32RegisterNumber; - - char OutputName[512]; - bstring oname; - oname = bformat("%s%s%d", psContext->outputPrefix, psSignature->semanticName.c_str(), renderTarget); - strncpy(OutputName, (char *)oname->data, 512); - bdestroy(oname); - - if (psShader->eTargetLanguage == LANG_ES_100 && renderTarget > 0) - psContext->EnableExtension("GL_EXT_draw_buffers"); - - bool haveFramebufferFetch = (psShader->extensions->EXT_shader_framebuffer_fetch && - psShader->eShaderType == PIXEL_SHADER && - psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH); - - if (WriteToFragData(psContext->psShader->eTargetLanguage)) - { - bformata(glsl, "#define %s gl_FragData[%d]\n", OutputName, renderTarget); - } - else - { - if (!RenderTargetDeclared(renderTarget)) - { - bstring layoutQualifier = bformat(""); - - if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || - HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) - { - uint32_t index = 0; - - if ((psContext->flags & HLSLCC_FLAG_DUAL_SOURCE_BLENDING) && DualSourceBlendSupported(psContext->psShader->eTargetLanguage)) - { - if (renderTarget > 0) - { - renderTarget = 0; - index = 1; - } - bdestroy(layoutQualifier); - layoutQualifier = bformat("layout(location = %d, index = %d) ", renderTarget, index); - } - else - { - bdestroy(layoutQualifier); - layoutQualifier = bformat("layout(location = %d) ", renderTarget); - } - } - - auto lq = bstr2cstr(layoutQualifier, '\0'); - - if (haveFramebufferFetch) - { - bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); - bformata(glsl, "%sinout %s%s %s;\n", lq, Precision, type->data, OutputName); - bcatcstr(glsl, "#else\n"); - bformata(glsl, "%sout %s%s %s;\n", lq, Precision, type->data, OutputName); - bcatcstr(glsl, "#endif\n"); - } - else - bformata(glsl, "%sout %s%s %s;\n", lq, Precision, type->data, OutputName); - - bcstrfree(lq); - bdestroy(layoutQualifier); - } - } - break; - } - } - break; - } - case VERTEX_SHADER: - case GEOMETRY_SHADER: - case DOMAIN_SHADER: - case HULL_SHADER: - { - const char* Interpolation = ""; - char OutputName[512]; - bstring oname; - oname = bformat("%s%s%s%d", psContext->outputPrefix, regSpace == 0 ? "" : "patch", psSignature->semanticName.c_str(), psSignature->ui32SemanticIndex); - strncpy(OutputName, (char *)oname->data, 512); - bdestroy(oname); - - if (psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == GEOMETRY_SHADER) - { - if (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || - psSignature->eComponentType == INOUT_COMPONENT_SINT32) // GLSL spec requires that integer vertex outputs always have "flat" interpolation - { - Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); - } - else if (psContext->psDependencies) // For floats we get the interpolation that was resolved from the fragment shader input - { - Interpolation = GetInterpolationString(psContext->psDependencies->GetInterpolationMode(psDecl->asOperands[0].ui32RegisterNumber), psContext->psShader->eTargetLanguage); - } - } - - if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage)) - { - bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); - bformata(glsl, "layout(location = %d) ", psContext->psDependencies->GetVaryingLocation(std::string(OutputName), psShader->eShaderType, false, keepLocation, psShader->maxSemanticIndex)); - } - - if (InOutSupported(psContext->psShader->eTargetLanguage)) - { - if (psContext->psShader->eShaderType == HULL_SHADER) - { - // In Hull shaders outputs are either per-vertex (and need []) or per-patch (need 'out patch') - if (regSpace == 0) - bformata(glsl, "%sout %s%s %s[];\n", Interpolation, Precision, type->data, OutputName); - else - bformata(glsl, "patch %sout %s%s %s;\n", Interpolation, Precision, type->data, OutputName); - } - else - bformata(glsl, "%sout %s%s %s;\n", Interpolation, Precision, type->data, OutputName); - } - else - { - bformata(glsl, "%svarying %s%s %s;\n", Interpolation, Precision, type->data, OutputName); - } - - break; - } - default: - ASSERT(0); - break; - } - HandleOutputRedirect(psDecl, Precision); - bdestroy(type); - } -} - -void ToGLSL::ReportStruct(const std::string &name, const struct ShaderVarType* psType) -{ - if (psContext->IsVulkan() || psContext->IsSwitch() || psType->Class != SVC_STRUCT) - return; - - for (uint32_t i = 0; i < psType->MemberCount; ++i) - { - if (psType->Members[i].Class == SVC_STRUCT) - ReportStruct(psType->Members[i].name, &psType->Members[i]); - } - - for (uint32_t i = 0; i < psType->MemberCount; ++i) - { - const bool isMatrix = psType->Members[i].Class == SVC_MATRIX_COLUMNS || psType->Members[i].Class == SVC_MATRIX_ROWS; - const SHADER_VARIABLE_TYPE type = TypeToReport(psType->Members[i].Type); - psContext->m_Reflection.OnConstant(psType->Members[i].fullName.c_str(), 0, type, psType->Members[i].Rows, psType->Members[i].Columns, isMatrix, psType->Members[i].Elements, true); - } - - psContext->m_Reflection.OnConstant(psType->fullName.c_str(), 0, SVT_VOID, psType->Rows, psType->Columns, false, psType->Elements, true); -} - -void ToGLSL::DeclareUBOConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, bstring glsl) -{ - uint32_t i; - - bool skipUnused = false; - - if ((psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS) && psCBuf->name == "$Globals") - skipUnused = true; - - - std::string cbName = psCBuf->name; - if (cbName == "$Globals") - { - // Need to tweak Globals struct name to prevent clashes between shader stages - char prefix = 'A'; - switch (psContext->psShader->eShaderType) - { - default: - ASSERT(0); - break; - case COMPUTE_SHADER: - prefix = 'C'; - break; - case VERTEX_SHADER: - prefix = 'V'; - break; - case PIXEL_SHADER: - prefix = 'P'; - break; - case GEOMETRY_SHADER: - prefix = 'G'; - break; - case HULL_SHADER: - prefix = 'H'; - break; - case DOMAIN_SHADER: - prefix = 'D'; - break; - } - - cbName[0] = prefix; - } - - for (i = 0; i < psCBuf->asVars.size(); ++i) - { - if (skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) - continue; - - PreDeclareStructType(psCBuf->asVars[i].name, &psCBuf->asVars[i].sType); - } - - if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) - bformata(glsl, "#if HLSLCC_ENABLE_UNIFORM_BUFFERS\n"); - - uint32_t slot = 0xffffffff; - bool isKnown = true; - - /* [layout (location = X)] uniform vec4 HLSLConstantBufferName[numConsts]; */ - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - { - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(cbName, false, 1); - bformata(glsl, "layout(set = %d, binding = %d, std140) ", binding.set, binding.binding); - } - else - { - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags) || (psContext->flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS)) - { - GLSLCrossDependencyData::GLSLBufferBindPointInfo bindPointInfo = psContext->psDependencies->GetGLSLResourceBinding(cbName, GLSLCrossDependencyData::BufferType_UBO); - isKnown = bindPointInfo.known; - slot = bindPointInfo.slot; - bformata(glsl, "UNITY_BINDING(%d) ", slot); - } - else - bcatcstr(glsl, "layout(std140) "); - - if (slot != 0xffffffff && !isKnown && UseReflection(psContext)) - { - psContext->m_Reflection.OnConstantBuffer(cbName, psCBuf->ui32TotalSizeInBytes, psCBuf->GetMemberCount(skipUnused)); - for (i = 0; i < psCBuf->asVars.size(); ++i) - { - if (skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) - continue; - - ReportStruct(psCBuf->asVars[i].name, &psCBuf->asVars[i].sType); - } - } - } - - const bool reportInReflection = slot != 0xffffffff && !isKnown && UseReflection(psContext); - - bformata(glsl, "uniform %s {\n", cbName.c_str()); - - if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) - bformata(glsl, "#endif\n"); - - for (i = 0; i < psCBuf->asVars.size(); ++i) - { - if (skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) - continue; - - DeclareConstBufferShaderVariable(psCBuf->asVars[i].name.c_str(), - &psCBuf->asVars[i].sType, psCBuf, 0, psContext->flags & HLSLCC_FLAG_WRAP_UBO ? true : false, reportInReflection); - } - - if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) - bformata(glsl, "#if HLSLCC_ENABLE_UNIFORM_BUFFERS\n"); - - if (psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) - { - std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name); - bformata(glsl, "} %s;\n", instanceName.c_str()); - } - else - bcatcstr(glsl, "};\n"); - - if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) - bformata(glsl, "#endif\n"); - - if (reportInReflection) - psContext->m_Reflection.OnConstantBufferBinding(cbName, slot); -} - -bool DeclareRWStructuredBufferTemplateTypeAsInteger(HLSLCrossCompilerContext* psContext, const Operand* psOperand) -{ - // with cases like: RWStructuredBuffer myBuffer; /*...*/ AtomicMin(myBuffer[0].x , myInt); - // if we translate RWStructuredBuffer template type to uint, incorrect version of the function might be called ( AtomicMin(uint..) instead of AtomicMin(int..) ) - // we try to avoid this case by using integer type in those cases - if (psContext && psOperand) - { - const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; - if (!isVulkan) - { - if (psContext->psShader && HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - { - uint32_t ui32BindingPoint = psOperand->ui32RegisterNumber; - const ResourceBinding* psBinding = NULL; - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, ui32BindingPoint, &psBinding); - if (psBinding) - { - const ConstantBuffer* psBuffer = NULL; - psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_UAV, psBinding->ui32BindPoint, &psBuffer); - if (psBuffer && psBuffer->asVars.size() == 1 && psBuffer->asVars[0].sType.Type == SVT_INT /*&& psContext->IsSwitch()*/) - return true; - } - } - } - } - return false; -} - -static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t ui32BindingPoint, - const Operand* psOperand, const uint32_t ui32GloballyCoherentAccess, - const uint32_t isRaw, const uint32_t isUAV, const uint32_t hasEmbeddedCounter, const uint32_t stride, bstring glsl) -{ - const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; - bstring BufNamebstr = bfromcstr(""); - // Use original HLSL bindings for UAVs only. For non-UAV buffers we have resolved new binding points from the same register space. - - ResourceName(BufNamebstr, psContext, isUAV ? RGROUP_UAV : RGROUP_TEXTURE, psOperand->ui32RegisterNumber, 0); - - char *btmp = bstr2cstr(BufNamebstr, '\0'); - std::string BufName = btmp; - bcstrfree(btmp); - bdestroy(BufNamebstr); - - // Declare the struct type for structured buffers - if (!isRaw) - { - const char* typeStr = "uint"; - if (isUAV && DeclareRWStructuredBufferTemplateTypeAsInteger(psContext, psOperand)) - typeStr = "int"; - bformata(glsl, " struct %s_type {\n\t%s[%d] value;\n};\n\n", BufName.c_str(), typeStr, stride / 4); - } - - uint32_t slot = 0xffffffff; - bool isKnown = true; - if (isVulkan) - { - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(BufName); - bformata(glsl, "layout(set = %d, binding = %d, std430) ", binding.set, binding.binding); - } - else - { - GLSLCrossDependencyData::GLSLBufferBindPointInfo bindPointInfo = psContext->psDependencies->GetGLSLResourceBinding(BufName, isUAV ? GLSLCrossDependencyData::BufferType_ReadWrite : GLSLCrossDependencyData::BufferType_SSBO); - slot = bindPointInfo.slot; - isKnown = bindPointInfo.known; - bformata(glsl, "layout(std430, binding = %d) ", slot); - } - - if (ui32GloballyCoherentAccess & GLOBALLY_COHERENT_ACCESS) - bcatcstr(glsl, "coherent "); - - if (!isUAV) - bcatcstr(glsl, "readonly "); - - // For Nintendo Switch, adds a "decoration" to get around not being able to detect readonly modifier on the SSBO via the platform shader reflection API. - bformata(glsl, "buffer %s%s {\n\t", psContext->IsSwitch() && !isUAV ? "hlslcc_readonly" : "", BufName.c_str()); - - if (hasEmbeddedCounter) - bformata(glsl, "coherent uint %s_counter;\n\t", BufName.c_str()); - - if (isRaw) - { - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, "uint"); - else - bcatcstr(glsl, "int"); - } - else - bformata(glsl, "%s_type", BufName.c_str()); - - bformata(glsl, " %s_buf[];\n};\n", BufName.c_str()); - - if (!isKnown && slot != 0xffffffff && UseReflection(psContext)) - psContext->m_Reflection.OnBufferBinding(BufName, slot, isUAV); -} - -void ToGLSL::DeclareStructConstants(const uint32_t ui32BindingPoint, - const ConstantBuffer* psCBuf, const Operand* psOperand, - bstring glsl) -{ - uint32_t i; - int useGlobalsStruct = 1; - bool skipUnused = false; - - if ((psContext->flags & HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT) && psCBuf->name[0] == '$') - useGlobalsStruct = 0; - - if ((psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS) && psCBuf->name == "$Globals") - skipUnused = true; - - if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) == 0) - useGlobalsStruct = 0; - - - for (i = 0; i < psCBuf->asVars.size(); ++i) - { - if (skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) - continue; - - PreDeclareStructType(psCBuf->asVars[i].name, &psCBuf->asVars[i].sType); - } - - /* [layout (location = X)] uniform vec4 HLSLConstantBufferName[numConsts]; */ - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - { - ASSERT(0); // Catch this to see what's going on - std::string bname = "wut"; - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(bname); - bformata(glsl, "layout(set = %d, binding = %d) ", binding.set, binding.binding); - } - else - { - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) - bformata(glsl, "layout(location = %d) ", ui32BindingPoint); - } - if (useGlobalsStruct) - { - bcatcstr(glsl, "uniform struct "); - TranslateOperand(psOperand, TO_FLAG_DECLARATION_NAME); - - bcatcstr(glsl, "_Type {\n"); - } - else - { - if (psCBuf->name == "$Globals") - { - // GLSL needs to report $Globals in reflection so that SRP batcher can properly determine if the shader is compatible with it or not. - if (UseReflection(psContext) && !psContext->IsVulkan()) - { - size_t memberCount = 0; - for (i = 0; i < psCBuf->asVars.size(); ++i) - { - if (!psCBuf->asVars[i].sType.m_IsUsed) - continue; - - memberCount += psCBuf->asVars[i].sType.GetMemberCount(); - } - - psContext->m_Reflection.OnConstantBuffer(psCBuf->name, 0, memberCount); - } - } - } - - for (i = 0; i < psCBuf->asVars.size(); ++i) - { - if (skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) - continue; - - if (!useGlobalsStruct) - bcatcstr(glsl, "uniform "); - - DeclareConstBufferShaderVariable(psCBuf->asVars[i].name.c_str(), &psCBuf->asVars[i].sType, psCBuf, 0, false, true); - } - - if (useGlobalsStruct) - { - bcatcstr(glsl, "} "); - - TranslateOperand(psOperand, TO_FLAG_DECLARATION_NAME); - - bcatcstr(glsl, ";\n"); - } -} - -static const char* GetVulkanTextureType(HLSLCrossCompilerContext* psContext, - const RESOURCE_DIMENSION eDimension, - const uint32_t ui32RegisterNumber) -{ - const ResourceBinding* psBinding = 0; - RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; - int found; - found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); - if (found) - { - eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; - } - switch (eDimension) - { - case RESOURCE_DIMENSION_BUFFER: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itextureBuffer"; - case RETURN_TYPE_UINT: - return "utextureBuffer"; - default: - return "textureBuffer"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE1D: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itexture1D"; - case RETURN_TYPE_UINT: - return "utexture1D"; - default: - return "texture1D"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2D: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itexture2D"; - case RETURN_TYPE_UINT: - return "utexture2D"; - default: - return "texture2D"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DMS: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itexture2DMS"; - case RETURN_TYPE_UINT: - return "utexture2DMS"; - default: - return "texture2DMS"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE3D: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itexture3D"; - case RETURN_TYPE_UINT: - return "utexture3D"; - default: - return "texture3D"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURECUBE: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itextureCube"; - case RETURN_TYPE_UINT: - return "utextureCube"; - default: - return "textureCube"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itexture1DArray"; - case RETURN_TYPE_UINT: - return "utexture1DArray"; - default: - return "texture1DArray"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itexture2DArray"; - case RETURN_TYPE_UINT: - return "utexture2DArray"; - default: - return "texture2DArray"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itexture2DMSArray"; - case RETURN_TYPE_UINT: - return "utexture2DMSArray"; - default: - return "texture2DMSArray"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "itextureCubeArray"; - case RETURN_TYPE_UINT: - return "utextureCubeArray"; - default: - return "textureCubeArray"; - } - break; - } - default: - ASSERT(0); - break; - } - - return "texture2D"; -} - -static HLSLCC_TEX_DIMENSION GetTextureDimension(HLSLCrossCompilerContext* psContext, - const RESOURCE_DIMENSION eDimension, - const uint32_t ui32RegisterNumber) -{ - const ResourceBinding* psBinding = 0; - RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; - int found; - found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); - if (found) - { - eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; - } - - switch (eDimension) - { - case RESOURCE_DIMENSION_BUFFER: - case RESOURCE_DIMENSION_TEXTURE1D: - return eType == RETURN_TYPE_SINT || eType == RETURN_TYPE_UINT ? TD_INT : TD_FLOAT; - - case RESOURCE_DIMENSION_TEXTURE2D: - case RESOURCE_DIMENSION_TEXTURE2DMS: - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - return TD_2D; - - case RESOURCE_DIMENSION_TEXTURE3D: - return TD_3D; - - case RESOURCE_DIMENSION_TEXTURECUBE: - return TD_CUBE; - - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - return TD_2DARRAY; - - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - return TD_CUBEARRAY; - default: - ASSERT(0); - break; - } - - return TD_2D; -} - -// Not static because this is used in toGLSLInstruction.cpp when sampling Vulkan textures -const char* GetSamplerType(HLSLCrossCompilerContext* psContext, - const RESOURCE_DIMENSION eDimension, - const uint32_t ui32RegisterNumber) -{ - const ResourceBinding* psBinding = 0; - RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; - int found; - found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); - if (found) - { - eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; - } - switch (eDimension) - { - case RESOURCE_DIMENSION_BUFFER: - { - if (IsESLanguage(psContext->psShader->eTargetLanguage)) - psContext->RequireExtension("GL_EXT_texture_buffer"); - switch (eType) - { - case RETURN_TYPE_SINT: - return "isamplerBuffer"; - case RETURN_TYPE_UINT: - return "usamplerBuffer"; - default: - return "samplerBuffer"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE1D: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "isampler1D"; - case RETURN_TYPE_UINT: - return "usampler1D"; - default: - return "sampler1D"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2D: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "isampler2D"; - case RETURN_TYPE_UINT: - return "usampler2D"; - default: - return "sampler2D"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DMS: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "isampler2DMS"; - case RETURN_TYPE_UINT: - return "usampler2DMS"; - default: - return "sampler2DMS"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE3D: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "isampler3D"; - case RETURN_TYPE_UINT: - return "usampler3D"; - default: - return "sampler3D"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURECUBE: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "isamplerCube"; - case RETURN_TYPE_UINT: - return "usamplerCube"; - default: - return "samplerCube"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "isampler1DArray"; - case RETURN_TYPE_UINT: - return "usampler1DArray"; - default: - return "sampler1DArray"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "isampler2DArray"; - case RETURN_TYPE_UINT: - return "usampler2DArray"; - default: - return "sampler2DArray"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - if (IsESLanguage(psContext->psShader->eTargetLanguage)) - psContext->RequireExtension("GL_OES_texture_storage_multisample_2d_array"); - switch (eType) - { - case RETURN_TYPE_SINT: - return "isampler2DMSArray"; - case RETURN_TYPE_UINT: - return "usampler2DMSArray"; - default: - return "sampler2DMSArray"; - } - break; - } - - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - switch (eType) - { - case RETURN_TYPE_SINT: - return "isamplerCubeArray"; - case RETURN_TYPE_UINT: - return "usamplerCubeArray"; - default: - return "samplerCubeArray"; - } - break; - } - default: - ASSERT(0); - break; - } - - return "sampler2D"; -} - -static const char *GetSamplerPrecision(const HLSLCrossCompilerContext *psContext, REFLECT_RESOURCE_PRECISION ePrec) -{ - if (!HavePrecisionQualifiers(psContext)) - return " "; - - switch (ePrec) - { - default: - case REFLECT_RESOURCE_PRECISION_UNKNOWN: - case REFLECT_RESOURCE_PRECISION_LOWP: - return EmitLowp(psContext) ? "lowp " : "mediump "; - case REFLECT_RESOURCE_PRECISION_HIGHP: - return "highp "; - case REFLECT_RESOURCE_PRECISION_MEDIUMP: - return "mediump "; - } -} - -static void TranslateVulkanResource(HLSLCrossCompilerContext* psContext, const Declaration* psDecl) -{ - bstring glsl = *psContext->currentGLSLString; - Shader* psShader = psContext->psShader; - - const ResourceBinding *psBinding = NULL; - psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); - ASSERT(psBinding != NULL); - - const char *samplerPrecision = GetSamplerPrecision(psContext, psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); - std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 0); - - const char* samplerTypeName = GetVulkanTextureType(psContext, - psDecl->value.eResourceDimension, - psDecl->asOperands[0].ui32RegisterNumber); - - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(tname); - bformata(glsl, "layout(set = %d, binding = %d) ", binding.set, binding.binding); - bcatcstr(glsl, "uniform "); - bcatcstr(glsl, samplerPrecision); - bcatcstr(glsl, samplerTypeName); - bcatcstr(glsl, " "); - bcatcstr(glsl, tname.c_str()); - bcatcstr(glsl, ";\n"); -} - -static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const Declaration* psDecl, uint32_t samplerCanDoShadowCmp) -{ - bstring glsl = *psContext->currentGLSLString; - Shader* psShader = psContext->psShader; - const char *samplerPrecision = NULL; - std::set::iterator i; - - const char* samplerTypeName = GetSamplerType(psContext, - psDecl->value.eResourceDimension, - psDecl->asOperands[0].ui32RegisterNumber); - - if (psDecl->value.eResourceDimension == RESOURCE_DIMENSION_TEXTURECUBEARRAY - && !HaveCubemapArray(psContext->psShader->eTargetLanguage)) - { - // Need to enable extension (either OES or ARB), but we only need to add it once - if (IsESLanguage(psContext->psShader->eTargetLanguage)) - { - psContext->EnableExtension("GL_OES_texture_cube_map_array"); - psContext->EnableExtension("GL_EXT_texture_cube_map_array"); - } - else - psContext->RequireExtension("GL_ARB_texture_cube_map_array"); - } - - if (psContext->psShader->eTargetLanguage == LANG_ES_100 && samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) - { - psContext->EnableExtension("GL_EXT_shadow_samplers"); - } - - const ResourceBinding *psBinding = NULL; - psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); - ASSERT(psBinding != NULL); - - samplerPrecision = GetSamplerPrecision(psContext, psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); - - if (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) - { - if (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) - { - for (i = psDecl->samplersUsed.begin(); i != psDecl->samplersUsed.end(); i++) - { - std::string tname = TextureSamplerName(&psShader->sInfo, psDecl->asOperands[0].ui32RegisterNumber, *i, 1); - bcatcstr(glsl, "uniform "); - bcatcstr(glsl, samplerPrecision); - bcatcstr(glsl, samplerTypeName); - bcatcstr(glsl, "Shadow "); - bcatcstr(glsl, tname.c_str()); - bcatcstr(glsl, ";\n"); - } - } - for (i = psDecl->samplersUsed.begin(); i != psDecl->samplersUsed.end(); i++) - { - std::string tname = TextureSamplerName(&psShader->sInfo, psDecl->asOperands[0].ui32RegisterNumber, *i, 0); - bcatcstr(glsl, "uniform "); - bcatcstr(glsl, samplerPrecision); - bcatcstr(glsl, samplerTypeName); - bcatcstr(glsl, " "); - bcatcstr(glsl, tname.c_str()); - bcatcstr(glsl, ";\n"); - } - } - - std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 0); - - bcatcstr(glsl, "uniform "); - bcatcstr(glsl, samplerPrecision); - bcatcstr(glsl, samplerTypeName); - bcatcstr(glsl, " "); - bcatcstr(glsl, tname.c_str()); - bcatcstr(glsl, ";\n"); - - if (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) - { - //Create shadow and non-shadow sampler. - //HLSL does not have separate types for depth compare, just different functions. - std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 1); - - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags) || - ((psContext->flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS) && ((psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS))) - { - GLSLCrossDependencyData::GLSLBufferBindPointInfo slotInfo = psContext->psDependencies->GetGLSLResourceBinding(tname, GLSLCrossDependencyData::BufferType_Texture); - bformata(glsl, "UNITY_LOCATION(%d) ", slotInfo.slot); - } - bcatcstr(glsl, "uniform "); - bcatcstr(glsl, samplerPrecision); - bcatcstr(glsl, samplerTypeName); - bcatcstr(glsl, "Shadow "); - bcatcstr(glsl, tname.c_str()); - bcatcstr(glsl, ";\n"); - } -} - -void ToGLSL::HandleInputRedirect(const Declaration *psDecl, const char *Precision) -{ - Operand *psOperand = (Operand *)&psDecl->asOperands[0]; - Shader *psShader = psContext->psShader; - bstring glsl = *psContext->currentGLSLString; - int needsRedirect = 0; - const ShaderInfo::InOutSignature *psSig = NULL; - - int regSpace = psOperand->GetRegisterSpace(psContext); - if (regSpace == 0) - { - if (psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - needsRedirect = 1; - } - else if (psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - { - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); - needsRedirect = 1; - } - - if (needsRedirect == 1) - { - // TODO What if this is indexed? - int needsLooping = 0; - int i = 0; - uint32_t origArraySize = 0; - uint32_t origMask = psOperand->ui32CompMask; - - ASSERT(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] == 0); - - psContext->AddIndentation(); - // Does the input have multiple array components (such as geometry shader input, or domain shader control point input) - if ((psShader->eShaderType == DOMAIN_SHADER && regSpace == 0) || (psShader->eShaderType == GEOMETRY_SHADER)) - { - // The count is actually stored in psOperand->aui32ArraySizes[0] - origArraySize = psOperand->aui32ArraySizes[0]; - bformata(glsl, "%s vec4 phase%d_Input%d_%d[%d];\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, origArraySize); - needsLooping = 1; - i = origArraySize - 1; - } - else - bformata(glsl, "%s vec4 phase%d_Input%d_%d;\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - - psContext->indent++; - - // Do a conditional loop. In normal cases needsLooping == 0 so this is only run once. - do - { - int comp = 0; - bstring str = GetEarlyMain(psContext); - if (needsLooping) - bformata(str, "phase%d_Input%d_%d[%d] = vec4(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, i); - else - bformata(str, "phase%d_Input%d_%d = vec4(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - - while (comp < 4) - { - int numComps = 0; - int hasCast = 0; - int hasSig = 0; - if (regSpace == 0) - hasSig = psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); - else - hasSig = psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); - - if (hasSig) - { - numComps = GetNumberBitsSet(psSig->ui32Mask); - if (psSig->eComponentType == INOUT_COMPONENT_SINT32) - { - bformata(str, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "intBitsToFloat(" : "float("); - hasCast = 1; - } - else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) - { - bformata(str, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "uintBitsToFloat(" : "float("); - hasCast = 1; - } - - // Override the array size of the operand so TranslateOperand call below prints the correct index - if (needsLooping) - psOperand->aui32ArraySizes[0] = i; - - // And the component mask - psOperand->ui32CompMask = 1 << comp; - - TranslateOperand(str, psOperand, TO_FLAG_NAME_ONLY); - - // Restore the original array size value and mask - psOperand->ui32CompMask = origMask; - if (needsLooping) - psOperand->aui32ArraySizes[0] = origArraySize; - - if (hasCast) - bcatcstr(str, ")"); - comp += numComps; - } - else // no signature found -> fill with zero - { - bcatcstr(str, "0"); - comp++; - } - - if (comp < 4) - bcatcstr(str, ", "); - } - bcatcstr(str, ");\n"); - } - while ((--i) >= 0); - - psContext->indent--; - - if (regSpace == 0) - psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - else - psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - } -} - -void ToGLSL::TranslateDeclaration(const Declaration* psDecl) -{ - bstring glsl = *psContext->currentGLSLString; - Shader* psShader = psContext->psShader; - - switch (psDecl->eOpcode) - { - case OPCODE_DCL_INPUT_SGV: - case OPCODE_DCL_INPUT_PS_SGV: - { - const SPECIAL_NAME eSpecialName = psDecl->asOperands[0].eSpecialName; - switch (eSpecialName) - { - case NAME_POSITION: - { - AddBuiltinInput(psDecl, "gl_Position"); - break; - } - case NAME_RENDER_TARGET_ARRAY_INDEX: - { - AddBuiltinInput(psDecl, "gl_Layer"); - if (psShader->eShaderType == VERTEX_SHADER) - { - psContext->RequireExtension("GL_AMD_vertex_shader_layer"); - } - - break; - } - case NAME_CLIP_DISTANCE: - { - AddBuiltinInput(psDecl, "gl_ClipDistance"); - break; - } - case NAME_CULL_DISTANCE: - { - AddBuiltinInput(psDecl, "gl_CullDistance"); - break; - } - case NAME_VIEWPORT_ARRAY_INDEX: - { - AddBuiltinInput(psDecl, "gl_ViewportIndex"); - break; - } - case NAME_INSTANCE_ID: - { - AddBuiltinInput(psDecl, "gl_InstanceID"); - break; - } - case NAME_IS_FRONT_FACE: - { - /* - Cast to int used because - if(gl_FrontFacing != 0) failed to compiled on Intel HD 4000. - Suggests no implicit conversion for bool<->int. - */ - - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - AddBuiltinInput(psDecl, "(gl_FrontFacing ? 0xffffffffu : uint(0))"); // Old ES3.0 Adrenos treat 0u as const int - else - AddBuiltinInput(psDecl, "(gl_FrontFacing ? 1 : 0)"); - break; - } - case NAME_SAMPLE_INDEX: - { - // Using gl_SampleID requires either GL_OES_sample_variables or #version 320 es - if (IsESLanguage(psContext->psShader->eTargetLanguage)) - psContext->RequireExtension("GL_OES_sample_variables"); - AddBuiltinInput(psDecl, "gl_SampleID"); - break; - } - case NAME_VERTEX_ID: - { - AddBuiltinInput(psDecl, "gl_VertexID"); - break; - } - case NAME_PRIMITIVE_ID: - { - if (psShader->eShaderType == GEOMETRY_SHADER) - AddBuiltinInput(psDecl, "gl_PrimitiveIDIn"); // LOL opengl. - else - AddBuiltinInput(psDecl, "gl_PrimitiveID"); - break; - } - default: - { - bformata(glsl, "in vec4 %s;\n", psDecl->asOperands[0].specialName.c_str()); - } - } - break; - } - - case OPCODE_DCL_OUTPUT_SIV: - { - switch (psDecl->asOperands[0].eSpecialName) - { - case NAME_POSITION: - { - AddBuiltinOutput(psDecl, 0, "gl_Position"); - break; - } - case NAME_RENDER_TARGET_ARRAY_INDEX: - { - AddBuiltinOutput(psDecl, 0, "gl_Layer"); - if (psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) - { - if (psContext->IsVulkan()) - { - psContext->RequireExtension("GL_ARB_shader_viewport_layer_array"); - } - else if (psContext->IsSwitch()) - { - psContext->RequireExtension("GL_NV_viewport_array2"); - } - else if (psShader->eShaderType == VERTEX_SHADER) // case 1261150 - { - psContext->RequireExtension("GL_AMD_vertex_shader_layer"); - } - } - - break; - } - case NAME_CLIP_DISTANCE: - { - AddBuiltinOutput(psDecl, 0, "gl_ClipDistance"); - break; - } - case NAME_CULL_DISTANCE: - { - AddBuiltinOutput(psDecl, 0, "gl_CullDistance"); - break; - } - case NAME_VIEWPORT_ARRAY_INDEX: - { - AddBuiltinOutput(psDecl, 0, "gl_ViewportIndex"); - break; - } - case NAME_VERTEX_ID: - { - ASSERT(0); //VertexID is not an output - break; - } - case NAME_PRIMITIVE_ID: - { - AddBuiltinOutput(psDecl, 0, "gl_PrimitiveID"); - break; - } - case NAME_INSTANCE_ID: - { - ASSERT(0); //InstanceID is not an output - break; - } - case NAME_IS_FRONT_FACE: - { - ASSERT(0); //FrontFacing is not an output - break; - } - case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: - { - if (psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) - { - AddBuiltinOutput(psDecl, 4, "gl_TessLevelOuter"); - } - else - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[0]"); - } - break; - } - case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[1]"); - break; - } - case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[2]"); - break; - } - case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[3]"); - break; - } - case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: - { - if (psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) - { - AddBuiltinOutput(psDecl, 3, "gl_TessLevelOuter"); - } - else - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[0]"); - } - break; - } - case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[1]"); - break; - } - case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[2]"); - break; - } - case NAME_FINAL_LINE_DENSITY_TESSFACTOR: - { - if (psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) - { - AddBuiltinOutput(psDecl, 2, "gl_TessLevelOuter"); - } - else - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[0]"); - } - break; - } - case NAME_FINAL_LINE_DETAIL_TESSFACTOR: - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[1]"); - break; - } - case NAME_FINAL_TRI_INSIDE_TESSFACTOR: - case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: - { - if (psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) - { - AddBuiltinOutput(psDecl, 2, "gl_TessLevelInner"); - } - else - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelInner[0]"); - } - break; - } - case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: - { - AddBuiltinOutput(psDecl, 0, "gl_TessLevelInner[1]"); - break; - } - default: - { - // Sometimes DX compiler seems to declare patch constant outputs like this. Anyway, nothing for us to do. -// bformata(glsl, "out vec4 %s;\n", psDecl->asOperands[0].specialName.c_str()); - -/* bcatcstr(glsl, "#define "); - TranslateOperand(psContext, &psDecl->asOperands[0], TO_FLAG_NONE); - bformata(glsl, " %s\n", psDecl->asOperands[0].pszSpecialName); - break;*/ - } - } - break; - } - case OPCODE_DCL_INPUT: - { - const Operand* psOperand = &psDecl->asOperands[0]; - - int iNumComponents = psOperand->GetNumInputElements(psContext); - const char* StorageQualifier = "attribute"; - std::string inputName; - const char* Precision = ""; - - if ((psOperand->eType == OPERAND_TYPE_INPUT_DOMAIN_POINT) || - (psOperand->eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) || - (psOperand->eType == OPERAND_TYPE_INPUT_COVERAGE_MASK) || - (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID) || - (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_GROUP_ID) || - (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP) || - (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED) || - (psOperand->eType == OPERAND_TYPE_INPUT_FORK_INSTANCE_ID)) - { - break; - } - - // No need to declare patch constants read again by the hull shader. - if ((psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT) && psContext->psShader->eShaderType == HULL_SHADER) - { - break; - } - - // Also skip position input to hull and domain shader - if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && - (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER)) - { - const ShaderInfo::InOutSignature *psIn = NULL; - psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); - ASSERT(psIn != NULL); - - if ((psIn->semanticName == "SV_POSITION" || psIn->semanticName == "SV_Position" - || psIn->semanticName == "POS" || psIn->semanticName == "POSITION") && psIn->ui32SemanticIndex == 0) - break; - } - - //Already declared as part of an array. - if (psShader->aIndexedInput[psOperand->GetRegisterSpace(psContext)][psDecl->asOperands[0].ui32RegisterNumber] == -1) - { - break; - } - - inputName = psContext->GetDeclaredInputName(psOperand, NULL, 1, NULL); - - // In the case of the Hull Shader, due to the different phases, we might have already delcared this input - // so check to see if that is the case, and if not record it - if (psContext->psShader->eShaderType == HULL_SHADER) - { - if (psContext->psDependencies->IsHullShaderInputAlreadyDeclared(inputName)) - { - return; - } - - psContext->psDependencies->RecordHullShaderInput(inputName); - } - - if (InOutSupported(psContext->psShader->eTargetLanguage)) - { - if (psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT && psContext->psShader->eShaderType == DOMAIN_SHADER) - StorageQualifier = "patch in"; - else - StorageQualifier = "in"; - } - - if (HavePrecisionQualifiers(psContext)) - { - switch (psOperand->eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - { - Precision = "highp"; - break; - } - case OPERAND_MIN_PRECISION_FLOAT_16: - { - Precision = "mediump"; - break; - } - case OPERAND_MIN_PRECISION_FLOAT_2_8: - { - Precision = EmitLowp(psContext) ? "lowp " : "mediump "; - break; - } - case OPERAND_MIN_PRECISION_SINT_16: - { - Precision = "mediump"; - break; - } - case OPERAND_MIN_PRECISION_UINT_16: - { - Precision = "mediump"; - break; - } - } - } - - const char * Interpolation = ""; - - if (psShader->eShaderType == GEOMETRY_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) - { - const ShaderInfo::InOutSignature* psSignature = NULL; - - psShader->sInfo.GetInputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].ui32CompMask, - &psSignature, true); - - if ((psSignature != NULL) && (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || - psSignature->eComponentType == INOUT_COMPONENT_SINT32)) // GLSL spec requires that integer inputs always have "flat" interpolation - { - Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); - } - else if (psContext->psDependencies) // For floats we get the interpolation that was resolved from the fragment shader input - { - Interpolation = GetInterpolationString(psContext->psDependencies->GetInterpolationMode(psDecl->asOperands[0].ui32RegisterNumber), psContext->psShader->eTargetLanguage); - } - } - - DeclareInput(psContext, psDecl, - Interpolation, StorageQualifier, Precision, iNumComponents, (OPERAND_INDEX_DIMENSION)psOperand->iIndexDims, inputName.c_str(), psOperand->ui32CompMask); - - HandleInputRedirect(psDecl, Precision); - break; - } - case OPCODE_DCL_INPUT_PS_SIV: - { - switch (psDecl->asOperands[0].eSpecialName) - { - case NAME_POSITION: - { - AddBuiltinInput(psDecl, "gl_FragCoord"); - bcatcstr(GetEarlyMain(psContext), "vec4 hlslcc_FragCoord = vec4(gl_FragCoord.xyz, 1.0/gl_FragCoord.w);\n"); - break; - } - case NAME_RENDER_TARGET_ARRAY_INDEX: - { - AddBuiltinInput(psDecl, "gl_Layer"); - break; - } - default: - ASSERT(0); - break; - } - break; - } - case OPCODE_DCL_INPUT_SIV: - { - if (psShader->eShaderType == PIXEL_SHADER && psContext->psDependencies) - { - psContext->psDependencies->SetInterpolationMode(psDecl->asOperands[0].ui32RegisterNumber, psDecl->value.eInterpolation); - } - break; - } - case OPCODE_DCL_INPUT_PS: - { - const Operand* psOperand = &psDecl->asOperands[0]; - int iNumComponents = psOperand->GetNumInputElements(psContext); - const char* StorageQualifier = "varying"; - const char* Precision = ""; - std::string inputName; - const char* Interpolation = ""; - int hasNoPerspective = psContext->psShader->eTargetLanguage <= LANG_ES_310 ? 0 : 1; - inputName = psContext->GetDeclaredInputName(psOperand, NULL, 1, NULL); - - if (InOutSupported(psContext->psShader->eTargetLanguage)) - { - StorageQualifier = "in"; - } - const ShaderInfo::InOutSignature* psSignature = NULL; - - psShader->sInfo.GetInputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].ui32CompMask, - &psSignature); - - if (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || - psSignature->eComponentType == INOUT_COMPONENT_SINT32) // GLSL spec requires that integer inputs always have "flat" interpolation - { - Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); - } - else - { - switch (psDecl->value.eInterpolation) - { - case INTERPOLATION_CONSTANT: - { - Interpolation = "flat "; - break; - } - case INTERPOLATION_LINEAR: - { - break; - } - case INTERPOLATION_LINEAR_CENTROID: - { - Interpolation = "centroid "; - break; - } - case INTERPOLATION_LINEAR_NOPERSPECTIVE: - { - Interpolation = hasNoPerspective ? "noperspective " : ""; - break; - } - case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: - { - Interpolation = hasNoPerspective ? "noperspective centroid " : "centroid"; - break; - } - case INTERPOLATION_LINEAR_SAMPLE: - { - Interpolation = hasNoPerspective ? "sample " : ""; - break; - } - case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: - { - Interpolation = hasNoPerspective ? "noperspective sample " : ""; - break; - } - default: - ASSERT(0); - break; - } - } - - if (HavePrecisionQualifiers(psContext)) - { - switch (psOperand->eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - { - Precision = "highp"; - break; - } - case OPERAND_MIN_PRECISION_FLOAT_16: - { - Precision = "mediump"; - break; - } - case OPERAND_MIN_PRECISION_FLOAT_2_8: - { - Precision = EmitLowp(psContext) ? "lowp " : "mediump "; - break; - } - case OPERAND_MIN_PRECISION_SINT_16: - { - Precision = "mediump"; - break; - } - case OPERAND_MIN_PRECISION_UINT_16: - { - Precision = "mediump"; - break; - } - } - } - - bool haveFramebufferFetch = (psShader->extensions->EXT_shader_framebuffer_fetch && - psShader->eShaderType == PIXEL_SHADER && - psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH); - - // If this is a SV_Target input and framebuffer fetch is enabled, do special input declaration unless output is declared later - if (haveFramebufferFetch && psOperand->iPSInOut && inputName.size() == 13 && !strncmp(inputName.c_str(), "vs_SV_Target", 12)) - { - bstring type = NULL; - - switch (psSignature->eComponentType) - { - case INOUT_COMPONENT_UINT32: - { - if (iNumComponents > 1) - type = bformat("uvec%d", iNumComponents); - else - type = bformat("uint"); - break; - } - case INOUT_COMPONENT_SINT32: - { - if (iNumComponents > 1) - type = bformat("ivec%d", iNumComponents); - else - type = bformat("int"); - break; - } - case INOUT_COMPONENT_FLOAT32: - { - if (iNumComponents > 1) - type = bformat("vec%d", iNumComponents); - else - type = bformat("float"); - break; - } - default: - ASSERT(0); - break; - } - - uint32_t renderTarget = psSignature->ui32SemanticIndex; - - char OutputName[512]; - bstring oname; - oname = bformat("%s%s%d", psContext->outputPrefix, psSignature->semanticName.c_str(), renderTarget); - strncpy(OutputName, (char *)oname->data, 512); - bdestroy(oname); - - if (WriteToFragData(psContext->psShader->eTargetLanguage)) - { - bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); - bformata(glsl, "#define vs_%s gl_LastFragData[%d]\n", OutputName, renderTarget); - bcatcstr(glsl, "#else\n"); - bformata(glsl, "#define vs_%s gl_FragData[%d]\n", OutputName, renderTarget); - bcatcstr(glsl, "#endif\n"); - } - else - { - if (!RenderTargetDeclared(renderTarget)) - { - bstring layoutQualifier = bformat(""); - - if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || - HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) - { - uint32_t index = 0; - - if ((psContext->flags & HLSLCC_FLAG_DUAL_SOURCE_BLENDING) && DualSourceBlendSupported(psContext->psShader->eTargetLanguage)) - { - if (renderTarget > 0) - { - renderTarget = 0; - index = 1; - } - bdestroy(layoutQualifier); - layoutQualifier = bformat("layout(location = %d, index = %d) ", renderTarget, index); - } - else - { - bdestroy(layoutQualifier); - layoutQualifier = bformat("layout(location = %d) ", renderTarget); - } - } - - auto lq = bstr2cstr(layoutQualifier, '\0'); - - bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); - bformata(glsl, "%sinout %s %s %s;\n", lq, Precision, type->data, OutputName); - bcatcstr(glsl, "#else\n"); - bformata(glsl, "%sout %s %s %s;\n", lq, Precision, type->data, OutputName); - bcatcstr(glsl, "#endif\n"); - - bcstrfree(lq); - bdestroy(layoutQualifier); - } - } - break; - } - - DeclareInput(psContext, psDecl, - Interpolation, StorageQualifier, Precision, iNumComponents, INDEX_1D, inputName.c_str(), psOperand->ui32CompMask); - - HandleInputRedirect(psDecl, Precision); - - break; - } - case OPCODE_DCL_TEMPS: - { - uint32_t i = 0; - const uint32_t ui32NumTemps = psDecl->value.ui32NumTemps; - bool usePrecision = (HavePrecisionQualifiers(psContext) != 0); - // Default values for temp variables allow avoiding Switch shader compiler incorrect warnings - // related to potential use of uninitialized variables (false-positives from compiler). - bool useDefaultInit = psContext->IsSwitch(); - - for (i = 0; i < ui32NumTemps; i++) - { - if (useDefaultInit) - { - if (psShader->psFloatTempSizes[i] != 0) - { - const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i], usePrecision); - const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i], false); - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "%d = %s(0);\n", constructor, i, constructorNoPrecision); - } - if (psShader->psFloat16TempSizes[i] != 0) - { - const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i], usePrecision); - const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i], false); - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "16_%d = %s(0);\n", constructor, i, constructorNoPrecision); - } - if (psShader->psFloat10TempSizes[i] != 0) - { - const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i], usePrecision); - const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i], false); - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "10_%d = %s(0);\n", constructor, i, constructorNoPrecision); - } - if (psShader->psIntTempSizes[i] != 0) - { - const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i], usePrecision); - const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i], false); - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i%d = %s(0);\n", constructor, i, constructorNoPrecision); - } - if (psShader->psInt16TempSizes[i] != 0) - { - const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i], usePrecision); - const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i], false); - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i16_%d = %s(0);\n", constructor, i, constructorNoPrecision); - } - if (psShader->psInt12TempSizes[i] != 0) - { - const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i], usePrecision); - const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i], false); - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i12_%d = %s(0);\n", constructor, i, constructorNoPrecision); - } - if (psShader->psUIntTempSizes[i] != 0) - { - const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i], usePrecision); - const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i], false); - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u%d = %s(0);\n", constructor, i, constructorNoPrecision); - } - if (psShader->psUInt16TempSizes[i] != 0) - { - const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i], usePrecision); - const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i], false); - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u16_%d = %s(0);\n", constructor, i, constructorNoPrecision); - } - if (psShader->fp64 && (psShader->psDoubleTempSizes[i] != 0)) - { - const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i], usePrecision); - const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i], false); - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "d%d = %s(0);\n", constructor, i, constructorNoPrecision); - } - if (psShader->psBoolTempSizes[i] != 0) - { - const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i], usePrecision); - const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i], false); - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "b%d = %s(0);\n", constructor, i, constructorNoPrecision); - } - } - else - { - if (psShader->psFloatTempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i], usePrecision), i); - if (psShader->psFloat16TempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i], usePrecision), i); - if (psShader->psFloat10TempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "10_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i], usePrecision), i); - if (psShader->psIntTempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i], usePrecision), i); - if (psShader->psInt16TempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i], usePrecision), i); - if (psShader->psInt12TempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i12_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i], usePrecision), i); - if (psShader->psUIntTempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i], usePrecision), i); - if (psShader->psUInt16TempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i], usePrecision), i); - if (psShader->fp64 && (psShader->psDoubleTempSizes[i] != 0)) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "d%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i], usePrecision), i); - if (psShader->psBoolTempSizes[i] != 0) - bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "b%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i], usePrecision), i); - } - } - break; - } - case OPCODE_SPECIAL_DCL_IMMCONST: - { - ASSERT(0 && "DX9 shaders no longer supported!"); - break; - } - case OPCODE_DCL_CONSTANT_BUFFER: - { - const Operand* psOperand = &psDecl->asOperands[0]; - const uint32_t ui32BindingPoint = psOperand->aui32ArraySizes[0]; - - const ConstantBuffer* psCBuf = NULL; - psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, ui32BindingPoint, &psCBuf); - - // We don't have a original resource name, maybe generate one??? - if (!psCBuf) - { - char name[24]; - sprintf(name, "ConstantBuffer%d", ui32BindingPoint); - - GLSLCrossDependencyData::GLSLBufferBindPointInfo bindPointInfo = psContext->IsVulkan() ? - GLSLCrossDependencyData::GLSLBufferBindPointInfo{ ui32BindingPoint, true } : psContext->psDependencies->GetGLSLResourceBinding(name, GLSLCrossDependencyData::BufferType_Constant); - - bool isKnown = bindPointInfo.known; - uint32_t actualBindingPoint = bindPointInfo.slot; - - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags) || (psContext->flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS)) - { - if (!psContext->IsVulkan() && !isKnown && UseReflection(psContext)) - psContext->m_Reflection.OnConstantBufferBinding(name, actualBindingPoint); - bformata(glsl, "UNITY_LOCATION(%d) ", actualBindingPoint); - } - - bformata(glsl, "layout(std140) uniform %s {\n\tvec4 data[%d];\n} cb%d;\n", name, psOperand->aui32ArraySizes[1], ui32BindingPoint); - break; - } - - if (psCBuf->name.substr(0, 20) == "hlslcc_SubpassInput_" && psCBuf->name.length() >= 23 && !psCBuf->asVars.empty()) - { - // Special case for vulkan subpass input. - - // The multisample versions have multiple members in the cbuffer, but we must only declare once. - // We still need to loop through all the variables and adjust names - - // Pick up the type and index - char ty = psCBuf->name[20]; - int idx = psCBuf->name[22] - '0'; - bool isMS = false; - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding((std::string &)psCBuf->name, false, 2); - - bool declared = false; - for (std::vector::const_iterator itr = psCBuf->asVars.begin(); itr != psCBuf->asVars.end(); itr++) - { - ShaderVar &sv = (ShaderVar &)*itr; - if (sv.name.substr(0, 15) == "hlslcc_fbinput_") - { - if (!declared) - { - switch (ty) - { - case 'f': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform highp subpassInput %s;\n", idx, binding.set, binding.binding, sv.name.c_str()); - break; - case 'h': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform mediump subpassInput %s;\n", idx, binding.set, binding.binding, sv.name.c_str()); - break; - case 'i': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform isubpassInput %s;\n", idx, binding.set, binding.binding, sv.name.c_str()); - break; - case 'u': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform usubpassInput %s;\n", idx, binding.set, binding.binding, sv.name.c_str()); - break; - case 'F': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform highp subpassInputMS %s;\n", idx, binding.set, binding.binding, sv.name.substr(0, 16).c_str()); - isMS = true; - break; - case 'H': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform mediump subpassInputMS %s;\n", idx, binding.set, binding.binding, sv.name.substr(0, 16).c_str()); - isMS = true; - break; - case 'I': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform isubpassInputMS %s;\n", idx, binding.set, binding.binding, sv.name.substr(0, 16).c_str()); - isMS = true; - break; - case 'U': - bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform usubpassInputMS %s;\n", idx, binding.set, binding.binding, sv.name.substr(0, 16).c_str()); - isMS = true; - break; - default: - break; - } - declared = true; - } - else - { - if (ty == 'F' || ty == 'I' || ty == 'U') - isMS = true; - } - // Munge the name so it'll get the correct function call in GLSL directly - sv.name.insert(0, "subpassLoad("); - if (isMS) - sv.name.append(","); - else - sv.name.append(")"); - // Also update the type name - sv.sType.name = sv.name; - sv.sType.fullName = sv.name; - } - } - - // Break out so this doesn't get declared. - break; - } - - if (psCBuf->name == "OVR_multiview") - { - // Special case for piggy-backing multiview info out - // This is not really a cbuffer, but if we see this being accessed, we know we need viewID - - // Extract numViews - uint32_t numViews = 0; - for (std::vector::const_iterator itr = psCBuf->asVars.begin(); itr != psCBuf->asVars.end(); itr++) - { - if (strncmp(itr->name.c_str(), "numViews_", 9) == 0) - { - // I really don't think we'll ever have more than 9 multiviews - numViews = itr->name[9] - '0'; - break; - } - } - if (numViews > 0 && numViews < 10) - { - // multiview2 is required because we have built-in shaders that do eye-dependent work other than just position - psContext->RequireExtension("GL_OVR_multiview2"); - - if (psShader->eShaderType == VERTEX_SHADER) - bformata(glsl, "layout(num_views = %d) in;\n", numViews); - - break; // Break out so we don't actually declare this cbuffer - } - } - - if (IsPreTransformConstantBufferName(psCBuf->name.c_str())) - { - m_NeedUnityPreTransformDecl = true; - break; // Break out so we don't actually declare this cbuffer - } - - if (psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) - { - if (psContext->flags & HLSLCC_FLAG_GLOBAL_CONSTS_NEVER_IN_UBO && psCBuf->name[0] == '$') - { - DeclareStructConstants(ui32BindingPoint, psCBuf, psOperand, glsl); - } - else - { - DeclareUBOConstants(ui32BindingPoint, psCBuf, glsl); - } - } - else - { - DeclareStructConstants(ui32BindingPoint, psCBuf, psOperand, glsl); - } - break; - } - case OPCODE_DCL_RESOURCE: - { - psShader->aeResourceDims[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.eResourceDimension; - - // Vulkan doesn't use combined textures+samplers, so do own handling in a separate func - if (psContext->IsVulkan()) - { - TranslateVulkanResource(psContext, psDecl); - break; - } - - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags) || - ((psContext->flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS) && ((psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS))) - { - std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 0); - GLSLCrossDependencyData::GLSLBufferBindPointInfo slotInfo = psContext->psDependencies->GetGLSLResourceBinding(tname, GLSLCrossDependencyData::BufferType_Texture); - - bformata(glsl, "UNITY_LOCATION(%d) ", slotInfo.slot); - if (!slotInfo.known && UseReflection(psContext)) - { - const RESOURCE_DIMENSION dim = psDecl->value.eResourceDimension; - if (dim == RESOURCE_DIMENSION_BUFFER) - psContext->m_Reflection.OnBufferBinding(tname, slotInfo.slot, false); - else - { - bool isMSAATex = (dim == RESOURCE_DIMENSION_TEXTURE2DMS) || (dim == RESOURCE_DIMENSION_TEXTURE2DMSARRAY); - psContext->m_Reflection.OnTextureBinding(tname, slotInfo.slot, slotInfo.slot, isMSAATex, GetTextureDimension(psContext, dim, psDecl->asOperands[0].ui32RegisterNumber), false); - } - } - } - - switch (psDecl->value.eResourceDimension) - { - case RESOURCE_DIMENSION_BUFFER: - { - bcatcstr(glsl, "uniform "); - if (IsESLanguage(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, "highp "); - bformata(glsl, "%s ", GetSamplerType(psContext, - RESOURCE_DIMENSION_BUFFER, - psDecl->asOperands[0].ui32RegisterNumber)); - TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); - bcatcstr(glsl, ";\n"); - break; - } - - case RESOURCE_DIMENSION_TEXTURE1D: - case RESOURCE_DIMENSION_TEXTURE2D: - case RESOURCE_DIMENSION_TEXTURECUBE: - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - TranslateResourceTexture(psContext, psDecl, 1); - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DMS: - case RESOURCE_DIMENSION_TEXTURE3D: - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - TranslateResourceTexture(psContext, psDecl, 0); - break; - } - - default: - ASSERT(0); - break; - } - break; - } - case OPCODE_DCL_OUTPUT: - { - bool needsDeclare = true; - if (psShader->eShaderType == HULL_SHADER && psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE && psDecl->asOperands[0].ui32RegisterNumber == 0) - { - // Need extra check from signature: - const ShaderInfo::InOutSignature *sig = NULL; - psShader->sInfo.GetOutputSignatureFromRegister(0, psDecl->asOperands->GetAccessMask(), 0, &sig, true); - if (!sig || sig->semanticName == "POSITION" || sig->semanticName == "POS" || sig->semanticName == "SV_Position") - { - needsDeclare = false; - AddBuiltinOutput(psDecl, 0, "gl_out[gl_InvocationID].gl_Position"); - } - } - - if (needsDeclare) - { - AddUserOutput(psDecl); - } - break; - } - case OPCODE_DCL_GLOBAL_FLAGS: - { - uint32_t ui32Flags = psDecl->value.ui32GlobalFlags; - - if (ui32Flags & GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL && psContext->psShader->eShaderType == PIXEL_SHADER) - { - bcatcstr(glsl, "layout(early_fragment_tests) in;\n"); - psShader->sInfo.bEarlyFragmentTests = true; - } - if ((ui32Flags & GLOBAL_FLAG_REFACTORING_ALLOWED) && HavePreciseQualifier(psContext->psShader->eTargetLanguage)) - { - static const char * const types[] = - { - "vec4", "ivec4", "bvec4", "uvec4" - }; - - for (int i = 0; i < sizeof(types) / sizeof(types[0]); ++i) - { - char const * t = types[i]; - bformata(glsl, "precise %s u_xlat_precise_%s;\n", t, t); - } - } - if (ui32Flags & GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS) - { - psContext->EnableExtension("GL_ARB_gpu_shader_fp64"); - psShader->fp64 = 1; - } - break; - } - - case OPCODE_DCL_THREAD_GROUP: - { - bformata(glsl, "layout(local_size_x = %d, local_size_y = %d, local_size_z = %d) in;\n", - psDecl->value.aui32WorkGroupSize[0], - psDecl->value.aui32WorkGroupSize[1], - psDecl->value.aui32WorkGroupSize[2]); - break; - } - case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: - { - if (psContext->psShader->eShaderType == HULL_SHADER) - { - psContext->psShader->sInfo.eTessOutPrim = psDecl->value.eTessOutPrim; - // Invert triangle winding order to match glsl better, except on vulkan - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) == 0) - { - if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CW) - psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CCW; - else if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CCW) - psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CW; - } - } - break; - } - case OPCODE_DCL_TESS_DOMAIN: - { - if (psContext->psShader->eShaderType == DOMAIN_SHADER) - { - switch (psDecl->value.eTessDomain) - { - case TESSELLATOR_DOMAIN_ISOLINE: - { - bcatcstr(glsl, "layout(isolines) in;\n"); - break; - } - case TESSELLATOR_DOMAIN_TRI: - { - bcatcstr(glsl, "layout(triangles) in;\n"); - break; - } - case TESSELLATOR_DOMAIN_QUAD: - { - bcatcstr(glsl, "layout(quads) in;\n"); - break; - } - default: - { - break; - } - } - } - break; - } - case OPCODE_DCL_TESS_PARTITIONING: - { - if (psContext->psShader->eShaderType == HULL_SHADER) - { - psContext->psShader->sInfo.eTessPartitioning = psDecl->value.eTessPartitioning; - } - break; - } - case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: - { - switch (psDecl->value.eOutputPrimitiveTopology) - { - case PRIMITIVE_TOPOLOGY_POINTLIST: - { - bcatcstr(glsl, "layout(points) out;\n"); - break; - } - case PRIMITIVE_TOPOLOGY_LINELIST_ADJ: - case PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ: - case PRIMITIVE_TOPOLOGY_LINELIST: - case PRIMITIVE_TOPOLOGY_LINESTRIP: - { - bcatcstr(glsl, "layout(line_strip) out;\n"); - break; - } - - case PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ: - case PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ: - case PRIMITIVE_TOPOLOGY_TRIANGLESTRIP: - case PRIMITIVE_TOPOLOGY_TRIANGLELIST: - { - bcatcstr(glsl, "layout(triangle_strip) out;\n"); - break; - } - default: - { - break; - } - } - break; - } - case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: - { - bformata(glsl, "layout(max_vertices = %d) out;\n", psDecl->value.ui32MaxOutputVertexCount); - break; - } - case OPCODE_DCL_GS_INPUT_PRIMITIVE: - { - switch (psDecl->value.eInputPrimitive) - { - case PRIMITIVE_POINT: - { - bcatcstr(glsl, "layout(points) in;\n"); - break; - } - case PRIMITIVE_LINE: - { - bcatcstr(glsl, "layout(lines) in;\n"); - break; - } - case PRIMITIVE_LINE_ADJ: - { - bcatcstr(glsl, "layout(lines_adjacency) in;\n"); - break; - } - case PRIMITIVE_TRIANGLE: - { - bcatcstr(glsl, "layout(triangles) in;\n"); - break; - } - case PRIMITIVE_TRIANGLE_ADJ: - { - bcatcstr(glsl, "layout(triangles_adjacency) in;\n"); - break; - } - default: - { - break; - } - } - break; - } - case OPCODE_DCL_INTERFACE: - { - const uint32_t interfaceID = psDecl->value.iface.ui32InterfaceID; - const uint32_t numUniforms = psDecl->value.iface.ui32ArraySize; - const uint32_t ui32NumBodiesPerTable = psContext->psShader->funcPointer[interfaceID].ui32NumBodiesPerTable; - ShaderVar* psVar; - uint32_t varFound; - - const char* uniformName; - - varFound = psContext->psShader->sInfo.GetInterfaceVarFromOffset(interfaceID, &psVar); - ASSERT(varFound); - uniformName = &psVar->name[0]; - - bformata(glsl, "subroutine uniform SubroutineType %s[%d*%d];\n", uniformName, numUniforms, ui32NumBodiesPerTable); - break; - } - case OPCODE_DCL_FUNCTION_BODY: - { - //bformata(glsl, "void Func%d();//%d\n", psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].eType); - break; - } - case OPCODE_DCL_FUNCTION_TABLE: - { - break; - } - case OPCODE_CUSTOMDATA: - { - // On Vulkan we just spew the data in uints as-is - if (psContext->IsVulkan()) - { - bstring glsl = *psContext->currentGLSLString; - bformata(glsl, "const uvec4 ImmCB_%d[] = uvec4[%d] (\n", psContext->currentPhase, psDecl->asImmediateConstBuffer.size()); - bool isFirst = true; - std::for_each(psDecl->asImmediateConstBuffer.begin(), psDecl->asImmediateConstBuffer.end(), [&](const ICBVec4 &data) - { - if (!isFirst) - { - bcatcstr(glsl, ",\n"); - } - isFirst = false; - bformata(glsl, "\tuvec4(0x%X, 0x%X, 0x%X, 0x%X)", data.a, data.b, data.c, data.d); - }); - bcatcstr(glsl, ");\n"); - } - else if (psContext->IsSwitch()) - { - bstring glsl = *psContext->currentGLSLString; - bformata(glsl, "const vec4 ImmCB_%d[] = vec4[%d] (\n", psContext->currentPhase, psDecl->asImmediateConstBuffer.size()); - bool isFirst = true; - std::for_each(psDecl->asImmediateConstBuffer.begin(), psDecl->asImmediateConstBuffer.end(), [&](const ICBVec4 &data) - { - if (!isFirst) - { - bcatcstr(glsl, ",\n"); - } - isFirst = false; - bformata(glsl, "vec4(uintBitsToFloat(uint(0x%Xu)), uintBitsToFloat(uint(0x%Xu)), uintBitsToFloat(uint(0x%Xu)), uintBitsToFloat(uint(0x%Xu)))", data.a, data.b, data.c, data.d); - }); - bcatcstr(glsl, ");\n"); - } - else - { - // TODO: This is only ever accessed as a float currently. Do trickery if we ever see ints accessed from an array. - // Walk through all the chunks we've seen in this phase. - ShaderPhase &sp = psShader->asPhases[psContext->currentPhase]; - std::for_each(sp.m_ConstantArrayInfo.m_Chunks.begin(), sp.m_ConstantArrayInfo.m_Chunks.end(), [this](const std::pair &chunk) - { - bstring glsl = *psContext->currentGLSLString; - uint32_t componentCount = chunk.second.m_ComponentCount; - // Just do the declaration here and contents to earlyMain. - if (componentCount == 1) - bformata(glsl, "float ImmCB_%d_%d_%d[%d];\n", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); - else - bformata(glsl, "vec%d ImmCB_%d_%d_%d[%d];\n", componentCount, psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); - - if (!HaveDynamicIndexing(psContext)) - { - bstring name = bfromcstr(""); - bformata(name, "ImmCB_%d_%d_%d", psContext->currentPhase, chunk.first, chunk.second.m_Rebase); - SHADER_VARIABLE_CLASS eClass = componentCount > 1 ? SVC_VECTOR : SVC_SCALAR; - - DeclareDynamicIndexWrapper((const char *)name->data, eClass, SVT_FLOAT, 1, componentCount, chunk.second.m_Size); - bdestroy(name); - } - - bstring tgt = psContext->psShader->asPhases[psContext->currentPhase].earlyMain; - Declaration *psDecl = psContext->psShader->asPhases[psContext->currentPhase].m_ConstantArrayInfo.m_OrigDeclaration; - if (componentCount == 1) - { - for (uint32_t i = 0; i < chunk.second.m_Size; i++) - { - float val[4] = { - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d - }; - bformata(tgt, "\tImmCB_%d_%d_%d[%d] = ", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i); - if (fpcheck(val[chunk.second.m_Rebase]) && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) - bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[chunk.second.m_Rebase]); - else - HLSLcc::PrintFloat(tgt, val[chunk.second.m_Rebase]); - bcatcstr(tgt, ";\n"); - } - } - else - { - for (uint32_t i = 0; i < chunk.second.m_Size; i++) - { - float val[4] = { - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, - *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d - }; - bformata(tgt, "\tImmCB_%d_%d_%d[%d] = vec%d(", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i, componentCount); - for (uint32_t k = 0; k < componentCount; k++) - { - if (k != 0) - bcatcstr(tgt, ", "); - if (fpcheck(val[k]) && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) - bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[k + chunk.second.m_Rebase]); - else - HLSLcc::PrintFloat(tgt, val[k + chunk.second.m_Rebase]); - } - bcatcstr(tgt, ");\n"); - } - } - }); - } - - - break; - } - case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: - case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: - break; // Nothing to do - - case OPCODE_DCL_INDEXABLE_TEMP: - { - const uint32_t ui32RegIndex = psDecl->sIdxTemp.ui32RegIndex; - const uint32_t ui32RegCount = psDecl->sIdxTemp.ui32RegCount; - const uint32_t ui32RegComponentSize = psDecl->sIdxTemp.ui32RegComponentSize; - bformata(glsl, "vec%d TempArray%d[%d];\n", ui32RegComponentSize, ui32RegIndex, ui32RegCount); - break; - } - case OPCODE_DCL_INDEX_RANGE: - { - switch (psDecl->asOperands[0].eType) - { - case OPERAND_TYPE_OUTPUT: - case OPERAND_TYPE_INPUT: - { - const ShaderInfo::InOutSignature* psSignature = NULL; - const char* type = "vec"; - const char* Precision = ""; - uint32_t startReg = 0; - uint32_t i; - bstring *oldString; - int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - int isInput = psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT ? 1 : 0; - - if (regSpace == 0) - { - if (isInput) - psShader->sInfo.GetInputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].ui32CompMask, - &psSignature); - else - psShader->sInfo.GetOutputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].ui32CompMask, - psShader->ui32CurrentVertexOutputStream, - &psSignature); - } - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].ui32CompMask, &psSignature); - - ASSERT(psSignature != NULL); - - switch (psSignature->eComponentType) - { - case INOUT_COMPONENT_UINT32: - { - type = "uvec"; - break; - } - case INOUT_COMPONENT_SINT32: - { - type = "ivec"; - break; - } - case INOUT_COMPONENT_FLOAT32: - { - break; - } - default: - ASSERT(0); - break; - } - - if (HavePrecisionQualifiers(psContext)) - { - switch (psSignature->eMinPrec) // TODO What if the inputs in the indexed range are of different precisions? - { - default: - { - Precision = "highp "; - break; - } - case MIN_PRECISION_ANY_16: - case MIN_PRECISION_FLOAT_16: - case MIN_PRECISION_SINT_16: - case MIN_PRECISION_UINT_16: - { - Precision = "mediump "; - break; - } - case MIN_PRECISION_FLOAT_2_8: - { - Precision = EmitLowp(psContext) ? "lowp " : "mediump "; - break; - } - } - } - - startReg = psDecl->asOperands[0].ui32RegisterNumber; - bformata(glsl, "%s%s4 phase%d_%sput%d_%d[%d];\n", Precision, type, psContext->currentPhase, isInput ? "In" : "Out", regSpace, startReg, psDecl->value.ui32IndexRange); - oldString = psContext->currentGLSLString; - glsl = isInput ? psContext->psShader->asPhases[psContext->currentPhase].earlyMain : psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; - psContext->currentGLSLString = &glsl; - if (isInput == 0) - psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; - for (i = 0; i < psDecl->value.ui32IndexRange; i++) - { - int dummy = 0; - std::string realName; - uint32_t destMask = psDecl->asOperands[0].ui32CompMask; - uint32_t rebase = 0; - const ShaderInfo::InOutSignature *psSig = NULL; - uint32_t regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - - if (regSpace == 0) - if (isInput) - psContext->psShader->sInfo.GetInputSignatureFromRegister(startReg + i, destMask, &psSig); - else - psContext->psShader->sInfo.GetOutputSignatureFromRegister(startReg + i, destMask, 0, &psSig); - else - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(startReg + i, destMask, &psSig); - - ASSERT(psSig != NULL); - - if ((psSig->ui32Mask & destMask) == 0) - continue; // Skip dummy writes (vec2 texcoords get filled to vec4 with zeroes etc) - - while ((psSig->ui32Mask & (1 << rebase)) == 0) - rebase++; - - ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg + i; - - if (isInput) - { - realName = psContext->GetDeclaredInputName(&psDecl->asOperands[0], &dummy, 1, NULL); - - psContext->AddIndentation(); - - bformata(glsl, "phase%d_Input%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); - - if (destMask != OPERAND_4_COMPONENT_MASK_ALL) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k]); - } - } - } - bcatcstr(glsl, " = "); - bcatcstr(glsl, realName.c_str()); - if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k - rebase]); - } - } - } - } - else - { - realName = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], &dummy, NULL, NULL, 1); - - psContext->AddIndentation(); - bcatcstr(glsl, realName.c_str()); - if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k - rebase]); - } - } - } - - bformata(glsl, " = phase%d_Output%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); - - if (destMask != OPERAND_4_COMPONENT_MASK_ALL) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k]); - } - } - } - } - - bcatcstr(glsl, ";\n"); - } - - ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg; - psContext->currentGLSLString = oldString; - glsl = *psContext->currentGLSLString; - - for (i = 0; i < psDecl->value.ui32IndexRange; i++) - { - if (regSpace == 0) - { - if (isInput) - psShader->sInfo.GetInputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber + i, - psDecl->asOperands[0].ui32CompMask, - &psSignature); - else - psShader->sInfo.GetOutputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber + i, - psDecl->asOperands[0].ui32CompMask, - psShader->ui32CurrentVertexOutputStream, - &psSignature); - } - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber + i, psDecl->asOperands[0].ui32CompMask, &psSignature); - - ASSERT(psSignature != NULL); - - ((ShaderInfo::InOutSignature *)psSignature)->isIndexed.insert(psContext->currentPhase); - ((ShaderInfo::InOutSignature *)psSignature)->indexStart[psContext->currentPhase] = startReg; - ((ShaderInfo::InOutSignature *)psSignature)->index[psContext->currentPhase] = i; - } - - - break; - } - default: - // TODO Input index ranges. - ASSERT(0); - } - break; - } - case OPCODE_HS_DECLS: - { - break; - } - case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: - { - break; - } - case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: - { - if (psContext->psShader->eShaderType == HULL_SHADER) - { - bformata(glsl, "layout(vertices=%d) out;\n", psDecl->value.ui32MaxOutputVertexCount); - } - break; - } - case OPCODE_HS_FORK_PHASE: - { - break; - } - case OPCODE_HS_JOIN_PHASE: - { - break; - } - case OPCODE_DCL_SAMPLER: - { - if (psContext->IsVulkan()) - { - ResourceBinding *pRes = NULL; - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, psDecl->asOperands[0].ui32RegisterNumber, (const ResourceBinding **)&pRes); - ASSERT(pRes != NULL); - std::string name = ResourceName(psContext, RGROUP_SAMPLER, psDecl->asOperands[0].ui32RegisterNumber, 0); - const char *samplerPrecision = GetSamplerPrecision(psContext, pRes->ePrecision); - - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(name); - const char *samplerType = psDecl->value.eSamplerMode == D3D10_SB_SAMPLER_MODE_COMPARISON ? "samplerShadow" : "sampler"; - bformata(glsl, "layout(set = %d, binding = %d) uniform %s %s %s;\n", binding.set, binding.binding, samplerPrecision, samplerType, name.c_str()); - // Store the sampler mode to ShaderInfo, it's needed when we use the sampler - pRes->m_SamplerMode = psDecl->value.eSamplerMode; - } - break; - } - case OPCODE_DCL_HS_MAX_TESSFACTOR: - { - //For GLSL the max tessellation factor is fixed to the value of gl_MaxTessGenLevel. - break; - } - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: - { - // non-float images need either 'i' or 'u' prefix. - char imageTypePrefix[2] = { 0, 0 }; - uint32_t bindpoint = psDecl->asOperands[0].ui32RegisterNumber; - const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; - - if (psDecl->sUAV.ui32GloballyCoherentAccess & GLOBALLY_COHERENT_ACCESS) - { - bcatcstr(glsl, "coherent "); - } - - // Use 4 component format as a fallback if no instruction defines it - const uint32_t numComponents = psDecl->sUAV.ui32NumComponents > 0 ? psDecl->sUAV.ui32NumComponents : 4; - REFLECT_RESOURCE_PRECISION precision = REFLECT_RESOURCE_PRECISION_UNKNOWN; - - if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) && - !(psContext->flags & HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS) && !isVulkan) - { //Special case on desktop glsl: writeonly image does not need format qualifier - bformata(glsl, "writeonly layout(binding=%d) ", bindpoint); - } - else - { - if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ)) - bcatcstr(glsl, "writeonly "); - else if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE)) - bcatcstr(glsl, "readonly "); - - if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) && IsESLanguage(psShader->eTargetLanguage)) - { - // Need to require the extension - psContext->RequireExtension("GL_EXT_texture_buffer"); - } - - if (psContext->IsSwitch() && !(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_ATOMIC)) - { - // Switch supports the GL_EXT_shader_image_load_formatted extension but it does require being enabled. - // Allows imageLoad() to do formatted reads and match the ld_uav_typed_indexable instruction. - // GL_EXT_shader_image_load_formatted doesn't provide support for imageAtomic*() functions. These still require format layout qualifier - psContext->RequireExtension("GL_EXT_shader_image_load_formatted"); - bformata(glsl, "layout(binding=%d) ", bindpoint); - switch (psDecl->sUAV.Type) - { - case RETURN_TYPE_FLOAT: - case RETURN_TYPE_UINT: - case RETURN_TYPE_SINT: - bcatcstr(glsl, "highp "); //TODO: half case? - break; - case RETURN_TYPE_UNORM: - case RETURN_TYPE_SNORM: - bcatcstr(glsl, "lowp "); - break; - default: - ASSERT(0); - } - } - else - { - if (isVulkan) - { - std::string name = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(name); - bformata(glsl, "layout(set = %d, binding = %d, ", binding.set, binding.binding); - } - else - bformata(glsl, "layout(binding=%d, ", bindpoint); - - const ResourceBinding* psBinding = 0; - if (psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, &psBinding)) - precision = psBinding->ePrecision; - - if (psDecl->sUAV.Type == RETURN_TYPE_FLOAT && numComponents == 3 && precision == REFLECT_RESOURCE_PRECISION_LOWP) - { - if (IsESLanguage(psContext->psShader->eTargetLanguage)) - GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); - bcatcstr(glsl, "r11f_g11f_b10f) mediump "); - } - else if (psDecl->sUAV.Type == RETURN_TYPE_UNORM && numComponents == 4 && precision == REFLECT_RESOURCE_PRECISION_LOWP) - { - if (IsESLanguage(psContext->psShader->eTargetLanguage)) - GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); - bcatcstr(glsl, "rgb10_a2) mediump "); - } - else if (psDecl->sUAV.Type == RETURN_TYPE_UINT && numComponents == 4 && precision == REFLECT_RESOURCE_PRECISION_LOWP) - { - if (IsESLanguage(psContext->psShader->eTargetLanguage)) - GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); - bcatcstr(glsl, "rgb10_a2ui) mediump "); - } - else - { - if (numComponents >= 1) - bcatcstr(glsl, "r"); - if (numComponents >= 2) - bcatcstr(glsl, "g"); - if (numComponents >= 3) - bcatcstr(glsl, "ba"); - - switch (psDecl->sUAV.Type) - { - case RETURN_TYPE_FLOAT: - { - switch (precision) - { - case REFLECT_RESOURCE_PRECISION_LOWP: - case REFLECT_RESOURCE_PRECISION_MEDIUMP: - if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4) - GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); - bcatcstr(glsl, "16f) mediump "); break; - default: - if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4 && numComponents != 1) - GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); - bcatcstr(glsl, "32f) highp "); break; - } - } break; - case RETURN_TYPE_UNORM: - case RETURN_TYPE_SNORM: - { - if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4) - GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); - bformata(glsl, "8%s) lowp ", psDecl->sUAV.Type == RETURN_TYPE_SNORM ? "_snorm" : ""); - } break; - case RETURN_TYPE_UINT: - case RETURN_TYPE_SINT: - { - const char* fmt = psDecl->sUAV.Type == RETURN_TYPE_UINT ? "ui" : "i"; - switch (precision) - { - case REFLECT_RESOURCE_PRECISION_LOWP: - if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4) - GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); - bformata(glsl, "8%s) lowp ", fmt); break; - case REFLECT_RESOURCE_PRECISION_MEDIUMP: - if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4) - GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); - bformata(glsl, "16%s) mediump ", fmt); break; - default: - if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4 && numComponents != 1) - GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); - bformata(glsl, "32%s) highp ", fmt); break; - } - } break; - default: - ASSERT(0); - } - } - } - } - - if (psDecl->sUAV.Type == RETURN_TYPE_UINT) - imageTypePrefix[0] = 'u'; - else if (psDecl->sUAV.Type == RETURN_TYPE_SINT) - imageTypePrefix[0] = 'i'; - - // GLSL requires images to be always explicitly defined as uniforms - switch (psDecl->value.eResourceDimension) - { - case RESOURCE_DIMENSION_BUFFER: - { - if (IsESLanguage(psShader->eTargetLanguage) || psContext->IsVulkan()) - { - psContext->RequireExtension("GL_EXT_texture_buffer"); - if (numComponents != 1 || precision == REFLECT_RESOURCE_PRECISION_LOWP || precision == REFLECT_RESOURCE_PRECISION_MEDIUMP) - GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); - } - - bformata(glsl, "uniform %simageBuffer ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURE1D: - { - bformata(glsl, "uniform %simage1D ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURE2D: - { - bformata(glsl, "uniform %simage2D ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURE2DMS: - { - bformata(glsl, "uniform %simage2DMS ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURE3D: - { - bformata(glsl, "uniform %simage3D ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURECUBE: - { - bformata(glsl, "uniform %simageCube ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - bformata(glsl, "uniform %simage1DArray ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - bformata(glsl, "uniform %simage2DArray ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - bformata(glsl, "uniform %simage3DArray ", imageTypePrefix); - break; - } - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - bformata(glsl, "uniform %simageCubeArray ", imageTypePrefix); - break; - } - default: - ASSERT(0); - break; - } - TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); - bcatcstr(glsl, ";\n"); - - unsigned int accessFlags = 0; - if (psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) - accessFlags |= HLSLccReflection::ReadAccess; - if (psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) - accessFlags |= HLSLccReflection::WriteAccess; - - if (IsESLanguage(psContext->psShader->eTargetLanguage) && accessFlags == (HLSLccReflection::ReadAccess | HLSLccReflection::WriteAccess)) - { - if (numComponents != 1 || precision == REFLECT_RESOURCE_PRECISION_LOWP || precision == REFLECT_RESOURCE_PRECISION_MEDIUMP) - GenerateUnsupportedReadWriteFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); - } - - psContext->m_Reflection.OnStorageImage(bindpoint, accessFlags); - - break; - } - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: - { - const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; - const bool avoidAtomicCounter = (psContext->flags & HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS) != 0; - if (psDecl->sUAV.bCounter) - { - if (isVulkan) - { - std::string uavname = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); - GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true); - GLSLCrossDependencyData::VulkanResourceBinding counterBinding = { uavBinding.set, uavBinding.binding + 1 }; - bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.set, counterBinding.binding, uavname.c_str(), uavname.c_str()); - - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); - } - else if (avoidAtomicCounter) // no support for atomic counter. We must use atomic functions in SSBO instead. - { - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 1, psDecl->ui32BufferStride, glsl); - } - else - { - std::string name = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); - name += "_counter"; - bcatcstr(glsl, "layout (binding = 0) uniform "); - - if (HavePrecisionQualifiers(psContext)) - bcatcstr(glsl, "highp "); - bformata(glsl, "atomic_uint %s;\n", name.c_str()); - - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); - } - } - else - { - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); - } - - break; - } - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: - { - const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; - if (psDecl->sUAV.bCounter) - { - if (isVulkan) - { - std::string uavname = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); - GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true); - GLSLCrossDependencyData::VulkanResourceBinding counterBinding = { uavBinding.set, uavBinding.binding + 1 }; - bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.set, counterBinding.binding, uavname.c_str(), uavname.c_str()); - } - else - { - std::string name = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); - name += "_counter"; - bcatcstr(glsl, "layout (binding = 0) uniform "); - - if (HavePrecisionQualifiers(psContext)) - bcatcstr(glsl, "highp "); - bformata(glsl, "atomic_uint %s;\n", name.c_str()); - } - } - - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 1, 1, 0, psDecl->ui32BufferStride, glsl); - - break; - } - case OPCODE_DCL_RESOURCE_STRUCTURED: - { - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 0, 0, 0, psDecl->ui32BufferStride, glsl); - break; - } - case OPCODE_DCL_RESOURCE_RAW: - { - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], - psDecl->sUAV.ui32GloballyCoherentAccess, 1, 0, 0, psDecl->ui32BufferStride, glsl); - break; - } - case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: - { - ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; - - bcatcstr(glsl, "shared struct {\n"); - bformata(glsl, "\tuint value[%d];\n", psDecl->sTGSM.ui32Stride / 4); - bcatcstr(glsl, "} "); - TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); - bformata(glsl, "[%d];\n", - psDecl->sTGSM.ui32Count); - psVarType->name = "value"; - - psVarType->Columns = psDecl->sTGSM.ui32Stride / 4; - psVarType->Elements = psDecl->sTGSM.ui32Count; - break; - } - case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: - { - ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; - - bcatcstr(glsl, "shared uint "); - TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); - bformata(glsl, "[%d];\n", psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride); - - psVarType->name = "$Element"; - - psVarType->Columns = 1; - psVarType->Elements = psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride; - break; - } - case OPCODE_DCL_STREAM: - { - ASSERT(psDecl->asOperands[0].eType == OPERAND_TYPE_STREAM); - - - if (psShader->eTargetLanguage >= LANG_400 && (psShader->ui32CurrentVertexOutputStream != psDecl->asOperands[0].ui32RegisterNumber)) - { - // Only emit stream declaration for desktop GL >= 4.0, and only if we're declaring something else than the default 0 - bformata(glsl, "layout(stream = %d) out;\n", psShader->ui32CurrentVertexOutputStream); - } - psShader->ui32CurrentVertexOutputStream = psDecl->asOperands[0].ui32RegisterNumber; - - break; - } - case OPCODE_DCL_GS_INSTANCE_COUNT: - { - bformata(glsl, "layout(invocations = %d) in;\n", psDecl->value.ui32GSInstanceCount); - break; - } - default: - { - ASSERT(0); - break; - } - } -} - -bool ToGLSL::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix, int *iIgnoreRedirect) -{ - ASSERT(sig != NULL); - if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_TessFactor") - { - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - ASSERT(sig->ui32SemanticIndex <= 3); - std::ostringstream oss; - oss << "gl_TessLevelOuter[" << sig->ui32SemanticIndex << "]"; - result = oss.str(); - return true; - } - - if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_InsideTessFactor") - { - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - ASSERT(sig->ui32SemanticIndex <= 1); - std::ostringstream oss; - oss << "gl_TessLevelInner[" << sig->ui32SemanticIndex << "]"; - result = oss.str(); - return true; - } - - switch (sig->eSystemValueType) - { - case NAME_POSITION: - if (psContext->psShader->eShaderType == PIXEL_SHADER) - result = "hlslcc_FragCoord"; - else - result = "gl_Position"; - return true; - case NAME_RENDER_TARGET_ARRAY_INDEX: - result = "gl_Layer"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_CLIP_DISTANCE: - case NAME_CULL_DISTANCE: - { - const char* glName = sig->eSystemValueType == NAME_CLIP_DISTANCE ? "Clip" : "Cull"; - // This is always routed through temp - std::ostringstream oss; - oss << "phase" << psContext->currentPhase << "_gl" << glName << "Distance" << sig->ui32SemanticIndex; - result = oss.str(); - return true; - } - case NAME_VIEWPORT_ARRAY_INDEX: - result = "gl_ViewportIndex"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_VERTEX_ID: - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - result = "gl_VertexIndex"; - else - result = "gl_VertexID"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_INSTANCE_ID: - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - result = "gl_InstanceIndex"; - else - result = "gl_InstanceID"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_IS_FRONT_FACE: - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - result = "(gl_FrontFacing ? 0xffffffffu : uint(0))"; // Old ES3.0 Adrenos treat 0u as const int - else - result = "(gl_FrontFacing ? 1 : 0)"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_PRIMITIVE_ID: - if (isInput && psContext->psShader->eShaderType == GEOMETRY_SHADER) - result = "gl_PrimitiveIDIn"; // LOL opengl - else - result = "gl_PrimitiveID"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_SAMPLE_INDEX: - result = "gl_SampleID"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_LINE_DENSITY_TESSFACTOR: - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - if (isIndexed) - { - result = "gl_TessLevelOuter"; - return true; - } - else - { - result = "gl_TessLevelOuter[0]"; - return true; - } - case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_LINE_DETAIL_TESSFACTOR: - result = "gl_TessLevelOuter[1]"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: - result = "gl_TessLevelOuter[2]"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: - result = "gl_TessLevelOuter[3]"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - - case NAME_FINAL_TRI_INSIDE_TESSFACTOR: - case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - if (isIndexed) - { - result = "gl_TessLevelInner"; - return true; - } - else - { - result = "gl_TessLevelInner[0]"; - return true; - } - case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: - result = "gl_TessLevelInner[3]"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - default: - break; - } - - if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) - { - if ((sig->semanticName == "POS" || sig->semanticName == "POSITION" || sig->semanticName == "SV_POSITION" || sig->semanticName == "SV_Position") - && sig->ui32SemanticIndex == 0) - { - result = "gl_out[gl_InvocationID].gl_Position"; - return true; - } - std::ostringstream oss; - if (isInput) - oss << psContext->inputPrefix << sig->semanticName << sig->ui32SemanticIndex; - else - oss << psContext->outputPrefix << sig->semanticName << sig->ui32SemanticIndex << "[gl_InvocationID]"; - result = oss.str(); - return true; - } - - if ((psOperand->eType == OPERAND_TYPE_OUTPUT || psOperand->eType == OPERAND_TYPE_INPUT) - && HLSLcc::WriteMaskToComponentCount(sig->ui32Mask) == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - - // TODO: Add other builtins here. - if (sig->eSystemValueType == NAME_POSITION || (sig->semanticName == "POS" && sig->ui32SemanticIndex == 0 && psContext->psShader->eShaderType == VERTEX_SHADER)) - { - result = "gl_Position"; - return true; - } - - if (sig->semanticName == "PSIZE") - { - result = "gl_PointSize"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - } - - return false; -} diff --git a/third_party/HLSLcc/src/toGLSLInstruction.cpp b/third_party/HLSLcc/src/toGLSLInstruction.cpp deleted file mode 100644 index c0732ff..0000000 --- a/third_party/HLSLcc/src/toGLSLInstruction.cpp +++ /dev/null @@ -1,4801 +0,0 @@ -#include "internal_includes/toGLSLOperand.h" -#include "internal_includes/HLSLccToolkit.h" -#include "internal_includes/languages.h" -#include "internal_includes/HLSLCrossCompilerContext.h" -#include "bstrlib.h" -#include "stdio.h" -#include -#include -#include "internal_includes/debug.h" -#include "internal_includes/Shader.h" -#include "internal_includes/Instruction.h" -#include "internal_includes/toGLSL.h" -#include - -using namespace HLSLcc; - -// In toGLSLDeclaration.cpp -const char* GetSamplerType(HLSLCrossCompilerContext* psContext, - const RESOURCE_DIMENSION eDimension, - const uint32_t ui32RegisterNumber); -bool DeclareRWStructuredBufferTemplateTypeAsInteger(HLSLCrossCompilerContext* psContext, const Operand* psOperand); - -// This function prints out the destination name, possible destination writemask, assignment operator -// and any possible conversions needed based on the eSrcType+ui32SrcElementCount (type and size of data expected to be coming in) -// As an output, pNeedsParenthesis will be filled with the amount of closing parenthesis needed -// and pSrcCount will be filled with the number of components expected -// ui32CompMask can be used to only write to 1 or more components (used by MOVC) -void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int *pNeedsParenthesis, uint32_t ui32CompMask) -{ - uint32_t ui32DestElementCount = psDest->GetNumSwizzleElements(ui32CompMask); - bstring glsl = *psContext->currentGLSLString; - SHADER_VARIABLE_TYPE eDestDataType = psDest->GetDataType(psContext); - ASSERT(pNeedsParenthesis != NULL); - - *pNeedsParenthesis = 0; - - TranslateOperand(psDest, TO_FLAG_DESTINATION, ui32CompMask); - - bcatcstr(glsl, " = "); - - if (precise && HavePreciseQualifier(psContext->psShader->eTargetLanguage)) - { - char const *t, *s; - switch (eDestDataType) - { - case SVT_BOOL: t = "bvec4"; break; - case SVT_INT: t = "ivec4"; break; - case SVT_FLOAT: t = "vec4"; break; - case SVT_UINT: t = "uvec4"; break; - default: ASSERT(0); t = NULL; break; - } - switch (ui32DestElementCount) - { - case 1: s = ".x"; break; - case 2: s = ".xy"; break; - case 3: s = ".xyz"; break; - case 4: s = ".xyzw"; break; - default: ASSERT(0); s = NULL; break; - } - if (t && s) - { - bformata(glsl, "(u_xlat_precise_%s%s = (", t, s); - (*pNeedsParenthesis) += 2; - } - } - - // Simple path: types match. - if (DoAssignmentDataTypesMatch(eDestDataType, eSrcType)) - { - // Cover cases where the HLSL language expects the rest of the components to be default-filled - // eg. MOV r0, c0.x => Temp[0] = vec4(c0.x); - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); - (*pNeedsParenthesis)++; - } - - return; - } - - switch (eDestDataType) - { - case SVT_INT: - case SVT_INT12: - case SVT_INT16: - // Bitcasts from lower precisions are ambiguous - ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); - if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) - { - bcatcstr(glsl, "floatBitsToInt("); - // Cover cases where the HLSL language expects the rest of the components to be default-filled - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eSrcType, ui32DestElementCount, false)); - (*pNeedsParenthesis)++; - } - } - else - bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); - - (*pNeedsParenthesis)++; - break; - case SVT_UINT: - case SVT_UINT16: - ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); - if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) - { - bcatcstr(glsl, "floatBitsToUint("); - // Cover cases where the HLSL language expects the rest of the components to be default-filled - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eSrcType, ui32DestElementCount, false)); - (*pNeedsParenthesis)++; - } - } - else - bformata(glsl, " %s(", GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); - - (*pNeedsParenthesis)++; - break; - - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - ASSERT(eSrcType != SVT_INT12 || (eSrcType != SVT_INT16 && eSrcType != SVT_UINT16)); - if (psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) - { - if (eSrcType == SVT_INT) - bcatcstr(glsl, "intBitsToFloat("); - else - bcatcstr(glsl, "uintBitsToFloat("); - // Cover cases where the HLSL language expects the rest of the components to be default-filled - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eSrcType, ui32DestElementCount, false)); - (*pNeedsParenthesis)++; - } - } - else - bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); - - (*pNeedsParenthesis)++; - break; - case SVT_BOOL: - bformata(glsl, " %s(", GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); - (*pNeedsParenthesis)++; - break; - default: - ASSERT(0); - break; - } -} - -void ToGLSL::AddAssignToDest(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int* pNeedsParenthesis) -{ - AddOpAssignToDestWithMask(psDest, eSrcType, ui32SrcElementCount, precise, pNeedsParenthesis, OPERAND_4_COMPONENT_MASK_ALL); -} - -void ToGLSL::AddAssignPrologue(int numParenthesis, bool isEmbedded /* = false*/) -{ - bstring glsl = *psContext->currentGLSLString; - while (numParenthesis != 0) - { - bcatcstr(glsl, ")"); - numParenthesis--; - } - if (!isEmbedded) - bcatcstr(glsl, ";\n"); -} - -void ToGLSL::AddComparison(Instruction* psInst, ComparisonType eType, - uint32_t typeFlag) -{ - // Multiple cases to consider here: - // For shader model <=3: all comparisons are floats - // otherwise: - // OPCODE_LT, _GT, _NE etc: inputs are floats, outputs UINT 0xffffffff or 0. typeflag: TO_FLAG_NONE - // OPCODE_ILT, _IGT etc: comparisons are signed ints, outputs UINT 0xffffffff or 0 typeflag TO_FLAG_INTEGER - // _ULT, UGT etc: inputs unsigned ints, outputs UINTs typeflag TO_FLAG_UNSIGNED_INTEGER - // - // Additional complexity: if dest swizzle element count is 1, we can use normal comparison operators, otherwise glsl intrinsics. - - - bstring glsl = *psContext->currentGLSLString; - const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); - const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); - const uint32_t s1ElemCount = psInst->asOperands[2].GetNumSwizzleElements(); - int isBoolDest = psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL; - - int floatResult = 0; - - ASSERT(s0ElemCount == s1ElemCount || s1ElemCount == 1 || s0ElemCount == 1); - if (s0ElemCount != s1ElemCount) - { - // Set the proper auto-expand flag is either argument is scalar - typeFlag |= (TO_AUTO_EXPAND_TO_VEC2 << (std::max(s0ElemCount, s1ElemCount) - 2)); - } - - if (psContext->psShader->ui32MajorVersion < 4) - { - floatResult = 1; - } - - if (destElemCount > 1) - { - const char* glslOpcode[] = { - "equal", - "lessThan", - "greaterThanEqual", - "notEqual", - }; - - int needsParenthesis = 0; - psContext->AddIndentation(); - if (isBoolDest) - { - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); - bcatcstr(glsl, " = "); - } - else - { - AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, psInst->ui32PreciseMask, &needsParenthesis); - - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, false)); - bcatcstr(glsl, "("); - } - bformata(glsl, "%s(", glslOpcode[eType]); - TranslateOperand(&psInst->asOperands[1], typeFlag); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[2], typeFlag); - bcatcstr(glsl, ")"); - TranslateOperandSwizzle(psContext, &psInst->asOperands[0], 0); - if (!isBoolDest) - { - bcatcstr(glsl, ")"); - if (!floatResult) - { - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, " * 0xFFFFFFFFu"); - else - bcatcstr(glsl, " * -1"); // GLSL ES 2 spec: high precision ints are guaranteed to have a range of at least (-2^16, 2^16) - } - } - - AddAssignPrologue(needsParenthesis); - } - else - { - const char* glslOpcode[] = { - "==", - "<", - ">=", - "!=", - }; - - //Scalar compare - - const bool workaroundAdrenoBugs = psContext->psShader->eTargetLanguage == LANG_ES_300; - - if (workaroundAdrenoBugs) - { - // Workarounds for bug cases 777617, 735299, 776827 - bcatcstr(glsl, "#ifdef UNITY_ADRENO_ES3\n"); - - int needsParenthesis = 0; - psContext->AddIndentation(); - if (isBoolDest) - { - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); - bcatcstr(glsl, " = !!("); - needsParenthesis += 1; - TranslateOperand(&psInst->asOperands[1], typeFlag); - bformata(glsl, "%s", glslOpcode[eType]); - TranslateOperand(&psInst->asOperands[2], typeFlag); - AddAssignPrologue(needsParenthesis); - } - else - { - bcatcstr(glsl, "{ bool cond = "); - TranslateOperand(&psInst->asOperands[1], typeFlag); - bformata(glsl, "%s", glslOpcode[eType]); - TranslateOperand(&psInst->asOperands[2], typeFlag); - bcatcstr(glsl, "; "); - AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, psInst->ui32PreciseMask, &needsParenthesis); - bcatcstr(glsl, "!!cond ? "); - if (floatResult) - bcatcstr(glsl, "1.0 : 0.0"); - else - { - // Old ES3.0 Adrenos treat 0u as const int. - // GLSL ES 2 spec: high precision ints are guaranteed to have a range of at least (-2^16, 2^16) - bcatcstr(glsl, HaveUnsignedTypes(psContext->psShader->eTargetLanguage) ? "0xFFFFFFFFu : uint(0)" : "-1 : 0"); - } - AddAssignPrologue(needsParenthesis, true); - bcatcstr(glsl, "; }\n"); - } - - bcatcstr(glsl, "#else\n"); - } - - int needsParenthesis = 0; - psContext->AddIndentation(); - if (isBoolDest) - { - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); - bcatcstr(glsl, " = "); - } - else - { - AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, psInst->ui32PreciseMask, &needsParenthesis); - bcatcstr(glsl, "("); - } - TranslateOperand(&psInst->asOperands[1], typeFlag); - bformata(glsl, "%s", glslOpcode[eType]); - TranslateOperand(&psInst->asOperands[2], typeFlag); - if (!isBoolDest) - { - if (floatResult) - bcatcstr(glsl, ") ? 1.0 : 0.0"); - else - { - // Old ES3.0 Adrenos treat 0u as const int. - // GLSL ES 2 spec: high precision ints are guaranteed to have a range of at least (-2^16, 2^16) - bcatcstr(glsl, HaveUnsignedTypes(psContext->psShader->eTargetLanguage) ? ") ? 0xFFFFFFFFu : uint(0)" : ") ? -1 : 0"); - } - } - AddAssignPrologue(needsParenthesis); - - if (workaroundAdrenoBugs) - bcatcstr(glsl, "#endif\n"); - } -} - -void ToGLSL::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise, bool isEmbedded /* = false*/) -{ - int numParenthesis = 0; - int srcSwizzleCount = pSrc->GetNumSwizzleElements(); - uint32_t writeMask = pDest->GetAccessMask(); - - const SHADER_VARIABLE_TYPE eSrcType = pSrc->GetDataType(psContext, pDest->GetDataType(psContext)); - uint32_t flags = SVTTypeToFlag(eSrcType); - - AddAssignToDest(pDest, eSrcType, srcSwizzleCount, precise, &numParenthesis); - TranslateOperand(pSrc, flags, writeMask); - - AddAssignPrologue(numParenthesis, isEmbedded); -} - -void ToGLSL::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise) -{ - bstring glsl = *psContext->currentGLSLString; - uint32_t destElemCount = pDest->GetNumSwizzleElements(); - uint32_t s0ElemCount = src0->GetNumSwizzleElements(); - uint32_t s1ElemCount = src1->GetNumSwizzleElements(); - uint32_t s2ElemCount = src2->GetNumSwizzleElements(); - uint32_t destWriteMask = pDest->GetAccessMask(); - uint32_t destElem; - - const SHADER_VARIABLE_TYPE eDestType = pDest->GetDataType(psContext); - /* - for each component in dest[.mask] - if the corresponding component in src0 (POS-swizzle) - has any bit set - { - copy this component (POS-swizzle) from src1 into dest - } - else - { - copy this component (POS-swizzle) from src2 into dest - } - endfor - */ - - /* Single-component conditional variable (src0) */ - if (s0ElemCount == 1 || src0->IsSwizzleReplicated()) - { - int numParenthesis = 0; - SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); - psContext->AddIndentation(); - AddAssignToDest(pDest, eDestType, destElemCount, precise, &numParenthesis); - bcatcstr(glsl, "("); - if (s0Type == SVT_UINT || s0Type == SVT_UINT16) - TranslateOperand(src0, TO_AUTO_BITCAST_TO_UINT, OPERAND_4_COMPONENT_MASK_X); - else if (s0Type == SVT_BOOL) - TranslateOperand(src0, TO_FLAG_BOOL, OPERAND_4_COMPONENT_MASK_X); - else - TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, OPERAND_4_COMPONENT_MASK_X); - - if (psContext->psShader->ui32MajorVersion < 4) - { - //cmp opcode uses >= 0 - bcatcstr(glsl, " >= 0) ? "); - } - else - { - if (s0Type == SVT_UINT || s0Type == SVT_UINT16) - bcatcstr(glsl, HaveUnsignedTypes(psContext->psShader->eTargetLanguage) ? " != uint(0)) ? " : " != 0) ? "); // Old ES3.0 Adrenos treat 0u as const int. - else if (s0Type == SVT_BOOL) - bcatcstr(glsl, ") ? "); - else - bcatcstr(glsl, " != 0) ? "); - } - - if (s1ElemCount == 1 && destElemCount > 1) - TranslateOperand(src1, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); - else - TranslateOperand(src1, SVTTypeToFlag(eDestType), destWriteMask); - - bcatcstr(glsl, " : "); - if (s2ElemCount == 1 && destElemCount > 1) - TranslateOperand(src2, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); - else - TranslateOperand(src2, SVTTypeToFlag(eDestType), destWriteMask); - - AddAssignPrologue(numParenthesis); - } - else - { - // NOTE: mix() cannot be used to implement MOVC, because it propagates - // NaN from both endpoints. - int srcElem = -1; - SHADER_VARIABLE_TYPE dstType = pDest->GetDataType(psContext); - SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); - - // Use an extra temp if dest is also one of the sources. Without this some swizzle combinations - // might alter the source before all components are handled. - const std::string tempName = "hlslcc_movcTemp"; - bool dstIsSrc1 = (pDest->eType == src1->eType) - && (dstType == src1->GetDataType(psContext)) - && (pDest->ui32RegisterNumber == src1->ui32RegisterNumber); - bool dstIsSrc2 = (pDest->eType == src2->eType) - && (dstType == src2->GetDataType(psContext)) - && (pDest->ui32RegisterNumber == src2->ui32RegisterNumber); - - if (dstIsSrc1 || dstIsSrc2) - { - psContext->AddIndentation(); - bcatcstr(glsl, "{\n"); - ++psContext->indent; - psContext->AddIndentation(); - int numComponents = (pDest->eType == OPERAND_TYPE_TEMP) ? - psContext->psShader->GetTempComponentCount(eDestType, pDest->ui32RegisterNumber) : - pDest->iNumComponents; - - const char* constructorStr = HLSLcc::GetConstructorForType(psContext, eDestType, numComponents, false); - bformata(glsl, "%s %s = ", constructorStr, tempName.c_str()); - TranslateOperand(pDest, TO_FLAG_NAME_ONLY); - bformata(glsl, ";\n"); - - // Override OPERAND_TYPE_TEMP name temporarily - const_cast(pDest)->specialName.assign(tempName); - } - - for (destElem = 0; destElem < 4; ++destElem) - { - int numParenthesis = 0; - srcElem++; - if (pDest->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && pDest->ui32CompMask != 0 && !(pDest->ui32CompMask & (1 << destElem))) - continue; - - psContext->AddIndentation(); - AddOpAssignToDestWithMask(pDest, eDestType, 1, precise, &numParenthesis, 1 << destElem); - bcatcstr(glsl, "("); - if (s0Type == SVT_BOOL) - { - TranslateOperand(src0, TO_FLAG_BOOL, 1 << srcElem); - bcatcstr(glsl, ") ? "); - } - else - { - TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, 1 << srcElem); - - if (psContext->psShader->ui32MajorVersion < 4) - { - //cmp opcode uses >= 0 - bcatcstr(glsl, " >= 0) ? "); - } - else - { - bcatcstr(glsl, " != 0) ? "); - } - } - - TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem); - bcatcstr(glsl, " : "); - TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem); - AddAssignPrologue(numParenthesis); - } - - if (dstIsSrc1 || dstIsSrc2) - { - const_cast(pDest)->specialName.clear(); - - psContext->AddIndentation(); - TranslateOperand(glsl, pDest, TO_FLAG_NAME_ONLY); - bformata(glsl, " = %s;\n", tempName.c_str()); - - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - } - } -} - -void ToGLSL::CallBinaryOp(const char* name, Instruction* psInst, - int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded /* = false*/) -{ - uint32_t ui32Flags = SVTTypeToFlag(eDataType); - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int needsParenthesis = 0; - - if (!HaveNativeBitwiseOps(psContext->psShader->eTargetLanguage)) - { - const char *binaryOpWrap = NULL; - - if (!strcmp("%", name)) - binaryOpWrap = "op_modi"; - else if (!strcmp("&", name)) - binaryOpWrap = "op_and"; - else if (!strcmp("|", name)) - binaryOpWrap = "op_or"; - else if (!strcmp("^", name)) - binaryOpWrap = "op_xor"; - else if (!strcmp(">>", name)) - binaryOpWrap = "op_shr"; - else if (!strcmp("<<", name)) - binaryOpWrap = "op_shl"; - // op_not handled separately at OPCODE_NOT - - if (binaryOpWrap) - { - UseExtraFunctionDependency(binaryOpWrap); - CallHelper2Int(binaryOpWrap, psInst, 0, 1, 2, 1); - return; - } - } - - if (src1SwizCount != src0SwizCount) - { - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - if (!isEmbedded) - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], eDataType, dstSwizCount, psInst->ui32PreciseMask, &needsParenthesis); - - // Adreno 3xx fails on binary ops that operate on vectors - bool opComponentWiseOnAdreno = (!strcmp("&", name) || !strcmp("|", name) || !strcmp("^", name) || !strcmp(">>", name) || !strcmp("<<", name)); - if (psContext->psShader->eTargetLanguage == LANG_ES_300 && opComponentWiseOnAdreno) - { - uint32_t i; - int firstPrinted = 0; - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); - bcatcstr(glsl, "("); - for (i = 0; i < 4; i++) - { - if (!(destMask & (1 << i))) - continue; - - if (firstPrinted != 0) - bcatcstr(glsl, ", "); - else - firstPrinted = 1; - - // Remove the auto expand flags - ui32Flags &= ~(TO_AUTO_EXPAND_TO_VEC2 | TO_AUTO_EXPAND_TO_VEC3 | TO_AUTO_EXPAND_TO_VEC4); - - TranslateOperand(&psInst->asOperands[src0], ui32Flags, 1 << i); - bformata(glsl, " %s ", name); - TranslateOperand(&psInst->asOperands[src1], ui32Flags, 1 << i); - } - bcatcstr(glsl, ")"); - } - else - { - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bformata(glsl, " %s ", name); - TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - } - - AddAssignPrologue(needsParenthesis, isEmbedded); -} - -void ToGLSL::CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, - int dest, int src0, int src1, int src2, uint32_t dataType) -{ - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); - uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - - uint32_t ui32Flags = dataType; - int numParenthesis = 0; - - if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) - { - uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], TypeFlagsToSVTType(dataType), dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); - - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bformata(glsl, " %s ", op1); - TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - bformata(glsl, " %s ", op2); - TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} - -void ToGLSL::CallHelper3(const char* name, Instruction* psInst, - int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask) -{ - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int numParenthesis = 0; - - if ((src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} - -void ToGLSL::CallHelper2(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask) -{ - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - - int isDotProduct = (strncmp(name, "dot", 3) == 0) ? 1 : 0; - int numParenthesis = 0; - - if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, isDotProduct ? 1 : dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - - AddAssignPrologue(numParenthesis); -} - -void ToGLSL::CallHelper2Int(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask) -{ - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int numParenthesis = 0; - - if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} - -void ToGLSL::CallHelper2UInt(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask) -{ - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_UINT; - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int numParenthesis = 0; - - if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], SVT_UINT, dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} - -void ToGLSL::CallHelper1(const char* name, Instruction* psInst, - int dest, int src0, int paramsShouldFollowWriteMask) -{ - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; - bstring glsl = *psContext->currentGLSLString; - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - int numParenthesis = 0; - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} - -//Result is an int. -void ToGLSL::CallHelper1Int( - const char* name, - Instruction* psInst, - const int dest, - const int src0, - int paramsShouldFollowWriteMask) -{ - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; - bstring glsl = *psContext->currentGLSLString; - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - int numParenthesis = 0; - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} - -// Texel fetches etc need a dummy sampler (because glslang wants one, for Reasons(tm)). -// Any non-shadow sampler will do, so try to get one from sampler registers. If the current shader doesn't have any, declare a dummy one. -std::string ToGLSL::GetVulkanDummySamplerName() -{ - std::string dummySmpName = "hlslcc_dummyPointClamp"; - if (!psContext->IsVulkan()) - return ""; - - const ResourceBinding *pSmpInfo = NULL; - int smpIdx = 0; - - while (psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, smpIdx, &pSmpInfo) != 0) - { - if (pSmpInfo->m_SamplerMode != D3D10_SB_SAMPLER_MODE_COMPARISON) - return ResourceName(psContext, RGROUP_SAMPLER, smpIdx, 0); - - smpIdx++; - } - - if (!psContext->psShader->m_DummySamplerDeclared) - { - GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(dummySmpName); - bstring code = bfromcstr(""); - bformata(code, "layout(set = %d, binding = %d) uniform mediump sampler %s;", binding.set, binding.binding, dummySmpName.c_str()); - DeclareExtraFunction(dummySmpName, code); - bdestroy(code); - psContext->psShader->m_DummySamplerDeclared = true; - } - return dummySmpName; -} - -void ToGLSL::TranslateTexelFetch( - Instruction* psInst, - const ResourceBinding* psBinding, - bstring glsl) -{ - int numParenthesis = 0; - - std::string vulkanSamplerName = GetVulkanDummySamplerName(); - - std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, 0); - const bool hasOffset = (psInst->bAddressOffset != 0); - - // On Vulkan wrap the tex name with the sampler constructor - if (psContext->IsVulkan()) - { - const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; - std::string smpType = GetSamplerType(psContext, eResDim, psInst->asOperands[2].ui32RegisterNumber); - std::ostringstream oss; - oss << smpType; - oss << "(" << texName << ", " << vulkanSamplerName << ")"; - texName = oss.str(); - } - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], TypeFlagsToSVTType(ResourceReturnTypeToFlag(psBinding->ui32ReturnType)), 4, psInst->ui32PreciseMask, &numParenthesis); - - if (hasOffset) - bcatcstr(glsl, "texelFetchOffset("); - else - bcatcstr(glsl, "texelFetch("); - - switch (psBinding->eDimension) - { - case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: - case REFLECT_RESOURCE_DIMENSION_BUFFER: - { - bcatcstr(glsl, texName.c_str()); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); - // Buffers don't have LOD or offset - if (psBinding->eDimension != REFLECT_RESOURCE_DIMENSION_BUFFER) - { - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_A); - if (hasOffset) - bformata(glsl, ", %d", psInst->iUAddrOffset); - } - bcatcstr(glsl, ")"); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: - { - bcatcstr(glsl, texName.c_str()); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_A); - if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE3D) - bformata(glsl, ", ivec3(%d, %d, %d)", psInst->iUAddrOffset, psInst->iVAddrOffset, psInst->iWAddrOffset); - if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY) - bformata(glsl, ", ivec3(%d, %d)", psInst->iUAddrOffset, psInst->iVAddrOffset); - bcatcstr(glsl, ")"); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - bcatcstr(glsl, texName.c_str()); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_A); - if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY) - bformata(glsl, ", %d", psInst->iUAddrOffset); - if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE2D) - bformata(glsl, ", ivec3(%d, %d)", psInst->iUAddrOffset, psInst->iVAddrOffset); - bcatcstr(glsl, ")"); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: - { - ASSERT(psInst->eOpcode == OPCODE_LD_MS); - bcatcstr(glsl, texName.c_str()); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[3], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); - bcatcstr(glsl, ")"); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - ASSERT(psInst->eOpcode == OPCODE_LD_MS); - bcatcstr(glsl, texName.c_str()); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[3], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); - bcatcstr(glsl, ")"); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: - case REFLECT_RESOURCE_DIMENSION_BUFFEREX: - default: - { - // Not possible in either HLSL or GLSL - ASSERT(0); - break; - } - } - - TranslateOperandSwizzleWithMask(psContext, &psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); - AddAssignPrologue(numParenthesis); -} - -//Makes sure the texture coordinate swizzle is appropriate for the texture type. -//i.e. vecX for X-dimension texture. -//Currently supports floating point coord only, so not used for texelFetch. -void ToGLSL::TranslateTexCoord( - const RESOURCE_DIMENSION eResDim, - Operand* psTexCoordOperand) -{ - uint32_t flags = TO_AUTO_BITCAST_TO_FLOAT; - uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; - - switch (eResDim) - { - case RESOURCE_DIMENSION_TEXTURE1D: - { - //Vec1 texcoord. Mask out the other components. - opMask = OPERAND_4_COMPONENT_MASK_X; - break; - } - case RESOURCE_DIMENSION_TEXTURE2D: - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - //Vec2 texcoord. Mask out the other components. - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; - flags |= TO_AUTO_EXPAND_TO_VEC2; - break; - } - case RESOURCE_DIMENSION_TEXTURECUBE: - case RESOURCE_DIMENSION_TEXTURE3D: - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - //Vec3 texcoord. Mask out the other components. - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; - flags |= TO_AUTO_EXPAND_TO_VEC3; - break; - } - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - flags |= TO_AUTO_EXPAND_TO_VEC4; - break; - } - default: - { - ASSERT(0); - break; - } - } - - //FIXME detect when integer coords are needed. - TranslateOperand(psTexCoordOperand, flags, opMask); -} - -void ToGLSL::GetResInfoData(Instruction* psInst, int index, int destElem) -{ - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; - bool isUAV = (psInst->asOperands[2].eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW); - bool isMS = psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMS || psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMSARRAY; - - std::string texName = ResourceName(psContext, isUAV ? RGROUP_UAV : RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, 0); - - // On Vulkan wrap the tex name with the sampler constructor - if (psContext->IsVulkan() && !isUAV) - { - std::string vulkanSamplerName = GetVulkanDummySamplerName(); - - const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; - std::string smpType = GetSamplerType(psContext, eResDim, psInst->asOperands[2].ui32RegisterNumber); - std::ostringstream oss; - oss << smpType; - oss << "(" << texName << ", " << vulkanSamplerName << ")"; - texName = oss.str(); - } - - psContext->AddIndentation(); - AddOpAssignToDestWithMask(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, psInst->ui32PreciseMask, &numParenthesis, 1 << destElem); - - //[width, height, depth or array size, total-mip-count] - if (index < 3) - { - int dim = GetNumTextureDimensions(psInst->eResDim); - bcatcstr(glsl, "("); - if (dim < (index + 1)) - { - bcatcstr(glsl, eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? (HaveUnsignedTypes(psContext->psShader->eTargetLanguage) ? "uint(0)" : "0") : "0.0"); // Old ES3.0 Adrenos treat 0u as const int. - } - else - { - if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT) - { - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bformata(glsl, "uvec%d(", dim); - else - bformata(glsl, "ivec%d(", dim); - } - else if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_RCPFLOAT) - bformata(glsl, "vec%d(1.0) / vec%d(", dim, dim); - else - bformata(glsl, "vec%d(", dim); - - if (isUAV) - bcatcstr(glsl, "imageSize("); - else - bcatcstr(glsl, "textureSize("); - - bcatcstr(glsl, texName.c_str()); - - if (!isUAV && !isMS) - { - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); - } - bcatcstr(glsl, "))"); - - switch (index) - { - case 0: - bcatcstr(glsl, ".x"); - break; - case 1: - bcatcstr(glsl, ".y"); - break; - case 2: - bcatcstr(glsl, ".z"); - break; - } - } - - bcatcstr(glsl, ")"); - } - else - { - ASSERT(!isUAV); - if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT) - { - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, "uint("); - else - bcatcstr(glsl, "int("); - } - else - bcatcstr(glsl, "float("); - bcatcstr(glsl, "textureQueryLevels("); - bcatcstr(glsl, texName.c_str()); - bcatcstr(glsl, "))"); - } - AddAssignPrologue(numParenthesis); -} - -void ToGLSL::TranslateTextureSample(Instruction* psInst, - uint32_t ui32Flags) -{ - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - int hasParamOffset = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? 1 : 0; - - Operand* psDest = &psInst->asOperands[0]; - Operand* psDestAddr = &psInst->asOperands[1]; - Operand* psSrcOff = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? &psInst->asOperands[2] : 0; - Operand* psSrcTex = &psInst->asOperands[2 + hasParamOffset]; - Operand* psSrcSamp = &psInst->asOperands[3 + hasParamOffset]; - Operand* psSrcRef = (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) ? &psInst->asOperands[4 + hasParamOffset] : 0; - Operand* psSrcLOD = (ui32Flags & TEXSMP_FLAG_LOD) ? &psInst->asOperands[4] : 0; - Operand* psSrcDx = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[4] : 0; - Operand* psSrcDy = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[5] : 0; - Operand* psSrcBias = (ui32Flags & TEXSMP_FLAG_BIAS) ? &psInst->asOperands[4] : 0; - - const char* funcName = "texture"; - const char* offset = ""; - const char* depthCmpCoordType = ""; - const char* gradSwizzle = ""; - const char* ext = ""; - - uint32_t ui32NumOffsets = 0; - - const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psSrcTex->ui32RegisterNumber]; - const int iHaveOverloadedTexFuncs = HaveOverloadedTextureFuncs(psContext->psShader->eTargetLanguage); - const int useCombinedTextureSamplers = (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) ? 1 : 0; - - if (psInst->bAddressOffset) - { - offset = "Offset"; - } - if (psContext->IsSwitch() && psInst->eOpcode == OPCODE_GATHER4_PO) - { - // it seems that other GLSLCore compilers accept textureGather(sampler2D sampler, vec2 texCoord, ivec2 texelOffset, int component) with the "texelOffset" parameter, - // however this is not in the GLSL spec, and Switch's GLSLc compiler requires to use the textureGatherOffset version of the function - offset = "Offset"; - } - - switch (eResDim) - { - case RESOURCE_DIMENSION_TEXTURE1D: - { - depthCmpCoordType = "vec2"; - gradSwizzle = ".x"; - ui32NumOffsets = 1; - if (!iHaveOverloadedTexFuncs) - { - funcName = "texture1D"; - if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) - { - funcName = "shadow1D"; - } - } - break; - } - case RESOURCE_DIMENSION_TEXTURE2D: - { - depthCmpCoordType = "vec3"; - gradSwizzle = ".xy"; - ui32NumOffsets = 2; - if (!iHaveOverloadedTexFuncs) - { - funcName = "texture2D"; - if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) - { - funcName = "shadow2D"; - } - } - break; - } - case RESOURCE_DIMENSION_TEXTURECUBE: - { - depthCmpCoordType = "vec4"; - gradSwizzle = ".xyz"; - ui32NumOffsets = 3; - if (!iHaveOverloadedTexFuncs) - { - funcName = "textureCube"; - } - break; - } - case RESOURCE_DIMENSION_TEXTURE3D: - { - depthCmpCoordType = "vec4"; - gradSwizzle = ".xyz"; - ui32NumOffsets = 3; - if (!iHaveOverloadedTexFuncs) - { - funcName = "texture3D"; - } - break; - } - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - depthCmpCoordType = "vec3"; - gradSwizzle = ".x"; - ui32NumOffsets = 1; - break; - } - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - depthCmpCoordType = "vec4"; - gradSwizzle = ".xy"; - ui32NumOffsets = 2; - break; - } - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - gradSwizzle = ".xyz"; - ui32NumOffsets = 3; - break; - } - default: - { - ASSERT(0); - break; - } - } - - if (ui32Flags & TEXSMP_FLAG_GATHER) - funcName = "textureGather"; - - uint32_t uniqueNameCounter = 0; - - // In GLSL, for every texture sampling func overload, except for cubemap arrays, the - // depth compare reference value is given as the last component of the texture coord vector. - // Cubemap array sampling as well as all the gather funcs have a separate parameter for it. - // HLSL always provides the reference as a separate param. - // - // Here we create a temp texcoord var with the reference value embedded - if ((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) && - (eResDim != RESOURCE_DIMENSION_TEXTURECUBEARRAY && !(ui32Flags & TEXSMP_FLAG_GATHER))) - { - uniqueNameCounter = psContext->psShader->asPhases[psContext->currentPhase].m_NextTexCoordTemp++; - psContext->AddIndentation(); - // Create a temp variable for the coordinate as Adrenos hate nonstandard swizzles in the texcoords - bformata(glsl, "%s txVec%d = ", depthCmpCoordType, uniqueNameCounter); - bformata(glsl, "%s(", depthCmpCoordType); - TranslateTexCoord(eResDim, psDestAddr); - bcatcstr(glsl, ","); - // Last component is the reference - TranslateOperand(psSrcRef, TO_AUTO_BITCAST_TO_FLOAT); - bcatcstr(glsl, ");\n"); - } - - SHADER_VARIABLE_TYPE dataType = psContext->psShader->sInfo.GetTextureDataType(psSrcTex->ui32RegisterNumber); - psContext->AddIndentation(); - AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), psInst->ui32PreciseMask, &numParenthesis); - - // GLSL doesn't have textureLod() for 2d shadow samplers, we'll have to use grad instead. In that case assume LOD 0. - bool needsLodWorkaround = (eResDim == RESOURCE_DIMENSION_TEXTURE2DARRAY) && (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE); - const bool needsLodWorkaroundES2 = (psContext->psShader->eTargetLanguage == LANG_ES_100 && psContext->psShader->eShaderType == PIXEL_SHADER && (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)); - - // Workaround for switch for OPCODE_SAMPLE_C_LZ, in particular sampler2dArrayShadow.SampleCmpLevelZero(). - // textureGrad() with shadow samplers is not implemented in HW on switch so the behavior is emulated using shuffles and 4 texture fetches. - // The code generated is very heavy. - // Workaround: use standard texture fetch, shadows are currently not mipmapped, so that should work for now. - if (needsLodWorkaround && psContext->IsSwitch() && ui32Flags == (TEXSMP_FLAG_DEPTHCOMPARE | TEXSMP_FLAG_FIRSTLOD)) - { - needsLodWorkaround = false; - ui32Flags &= ~TEXSMP_FLAG_FIRSTLOD; - } - - if (needsLodWorkaround) - { - bformata(glsl, "%sGrad%s(", funcName, offset); - } - else - { - if (psContext->psShader->eTargetLanguage == LANG_ES_100 && - psContext->psShader->eShaderType == PIXEL_SHADER && - ui32Flags & (TEXSMP_FLAG_LOD | TEXSMP_FLAG_FIRSTLOD | TEXSMP_FLAG_GRAD)) - ext = "EXT"; - - if (ui32Flags & (TEXSMP_FLAG_LOD | TEXSMP_FLAG_FIRSTLOD) && !needsLodWorkaroundES2) - bformata(glsl, "%sLod%s%s(", funcName, ext, offset); - else if (ui32Flags & TEXSMP_FLAG_GRAD) - bformata(glsl, "%sGrad%s%s(", funcName, ext, offset); - else - bformata(glsl, "%s%s%s(", funcName, ext, offset); - } - - if (psContext->IsVulkan()) - { - // Build the sampler name here - std::string samplerType = GetSamplerType(psContext, eResDim, psSrcTex->ui32RegisterNumber); - const ResourceBinding *pSmpRes = NULL; - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, psSrcSamp->ui32RegisterNumber, &pSmpRes); - - if (pSmpRes->m_SamplerMode == D3D10_SB_SAMPLER_MODE_COMPARISON) - samplerType.append("Shadow"); - std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psSrcTex->ui32RegisterNumber, 0); - std::string smpName = ResourceName(psContext, RGROUP_SAMPLER, psSrcSamp->ui32RegisterNumber, 0); - bformata(glsl, "%s(%s, %s)", samplerType.c_str(), texName.c_str(), smpName.c_str()); - } - else - { - // Sampler name - if (!useCombinedTextureSamplers) - ResourceName(glsl, psContext, RGROUP_TEXTURE, psSrcTex->ui32RegisterNumber, ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE); - else - bcatcstr(glsl, TextureSamplerName(&psContext->psShader->sInfo, psSrcTex->ui32RegisterNumber, psSrcSamp->ui32RegisterNumber, ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE).c_str()); - } - bcatcstr(glsl, ", "); - - // Texture coordinates, either from previously constructed temp - // or straight from the psDestAddr operand - if ((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) && - (eResDim != RESOURCE_DIMENSION_TEXTURECUBEARRAY && !(ui32Flags & TEXSMP_FLAG_GATHER))) - bformata(glsl, "txVec%d", uniqueNameCounter); - else - TranslateTexCoord(eResDim, psDestAddr); - - // If depth compare reference was not embedded to texcoord - // then insert it here as a separate param - if ((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) && - (eResDim == RESOURCE_DIMENSION_TEXTURECUBEARRAY || (ui32Flags & TEXSMP_FLAG_GATHER))) - { - bcatcstr(glsl, ", "); - TranslateOperand(psSrcRef, TO_AUTO_BITCAST_TO_FLOAT); - } - - // Add LOD/grad parameters based on the flags - if (needsLodWorkaround) - { - bcatcstr(glsl, ", vec2(0.0, 0.0), vec2(0.0, 0.0)"); - } - else if (ui32Flags & TEXSMP_FLAG_LOD) - { - if (!needsLodWorkaroundES2) - { - bcatcstr(glsl, ", "); - TranslateOperand(psSrcLOD, TO_AUTO_BITCAST_TO_FLOAT); - if (psContext->psShader->ui32MajorVersion < 4) - { - bcatcstr(glsl, ".w"); - } - } - } - else if (ui32Flags & TEXSMP_FLAG_FIRSTLOD) - { - if (!needsLodWorkaroundES2) - bcatcstr(glsl, ", 0.0"); - } - else if (ui32Flags & TEXSMP_FLAG_GRAD) - { - bcatcstr(glsl, ", vec4("); - TranslateOperand(psSrcDx, TO_AUTO_BITCAST_TO_FLOAT); - bcatcstr(glsl, ")"); - bcatcstr(glsl, gradSwizzle); - bcatcstr(glsl, ", vec4("); - TranslateOperand(psSrcDy, TO_AUTO_BITCAST_TO_FLOAT); - bcatcstr(glsl, ")"); - bcatcstr(glsl, gradSwizzle); - } - - // Add offset param - if (psInst->bAddressOffset) - { - if (ui32NumOffsets == 1) - { - bformata(glsl, ", %d", - psInst->iUAddrOffset); - } - else if (ui32NumOffsets == 2) - { - bformata(glsl, ", ivec2(%d, %d)", - psInst->iUAddrOffset, - psInst->iVAddrOffset); - } - else if (ui32NumOffsets == 3) - { - bformata(glsl, ", ivec3(%d, %d, %d)", - psInst->iUAddrOffset, - psInst->iVAddrOffset, - psInst->iWAddrOffset); - } - } - // HLSL gather has a variant with separate offset operand - else if (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) - { - uint32_t mask = OPERAND_4_COMPONENT_MASK_X; - if (ui32NumOffsets > 1) - mask |= OPERAND_4_COMPONENT_MASK_Y; - if (ui32NumOffsets > 2) - mask |= OPERAND_4_COMPONENT_MASK_Z; - - bcatcstr(glsl, ","); - TranslateOperand(psSrcOff, TO_FLAG_INTEGER, mask); - } - - // Add bias if present - if (ui32Flags & TEXSMP_FLAG_BIAS) - { - bcatcstr(glsl, ", "); - TranslateOperand(psSrcBias, TO_AUTO_BITCAST_TO_FLOAT); - } - - // Add texture gather component selection if needed - if ((ui32Flags & TEXSMP_FLAG_GATHER) && psSrcSamp->GetNumSwizzleElements() > 0) - { - ASSERT(psSrcSamp->GetNumSwizzleElements() == 1); - if (psSrcSamp->aui32Swizzle[0] != OPERAND_4_COMPONENT_X) - { - if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)) - { - bformata(glsl, ", %d", psSrcSamp->aui32Swizzle[0]); - } - else - { - // Component selection not supported with depth compare gather - } - } - } - - bcatcstr(glsl, ")"); - - if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) || (ui32Flags & TEXSMP_FLAG_GATHER)) - { - // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms - // does not make sense. But need to re-enable to correctly swizzle this particular instruction. - psSrcTex->iWriteMaskEnabled = 1; - TranslateOperandSwizzleWithMask(psContext, psSrcTex, psDest->GetAccessMask(), 0); - } - AddAssignPrologue(numParenthesis); -} - -const char* swizzleString[] = { ".x", ".y", ".z", ".w" }; - -// Handle cases where vector components are accessed with dynamic index ([] notation). -// A bit ugly hack because compiled HLSL uses byte offsets to access data in structs => we are converting -// the offset back to vector component index in runtime => calculating stuff back and forth. -// TODO: Would be better to eliminate the offset calculation ops and use indexes straight on. Could be tricky though... -void ToGLSL::TranslateDynamicComponentSelection(const ShaderVarType* psVarType, const Operand* psByteAddr, uint32_t offset, uint32_t mask) -{ - bstring glsl = *psContext->currentGLSLString; - ASSERT(psVarType->Class == SVC_VECTOR); - - bcatcstr(glsl, "["); // Access vector component with [] notation - if (offset > 0) - bcatcstr(glsl, "("); - - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - { - // The var containing byte address to the requested element - TranslateOperand(psByteAddr, TO_FLAG_UNSIGNED_INTEGER, mask); - - if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address - bformata(glsl, " - %du)", offset); // Subtract that first - - bcatcstr(glsl, " >> 0x2u"); // Convert byte offset to index: div by four - bcatcstr(glsl, "]"); - } - else - { - // The var containing byte address to the requested element - TranslateOperand(psByteAddr, TO_FLAG_INTEGER, mask); - - if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address - bformata(glsl, " - %d)", offset); // Subtract that first - - bcatcstr(glsl, " >> 0x2"); // Convert byte offset to index: div by four - bcatcstr(glsl, "]"); - } -} - -void ToGLSL::TranslateShaderStorageStore(Instruction* psInst) -{ - bstring glsl = *psContext->currentGLSLString; - int component; - int srcComponent = 0; - - Operand* psDest = 0; - Operand* psDestAddr = 0; - Operand* psDestByteOff = 0; - Operand* psSrc = 0; - - switch (psInst->eOpcode) - { - case OPCODE_STORE_STRUCTURED: - psDest = &psInst->asOperands[0]; - psDestAddr = &psInst->asOperands[1]; - psDestByteOff = &psInst->asOperands[2]; - psSrc = &psInst->asOperands[3]; - break; - case OPCODE_STORE_RAW: - psDest = &psInst->asOperands[0]; - psDestByteOff = &psInst->asOperands[1]; - psSrc = &psInst->asOperands[2]; - break; - default: - ASSERT(0); - break; - } - - uint32_t dstOffFlag = TO_FLAG_UNSIGNED_INTEGER; - SHADER_VARIABLE_TYPE dstOffType = psDestByteOff->GetDataType(psContext); - if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || dstOffType == SVT_INT || dstOffType == SVT_INT16 || dstOffType == SVT_INT12) - dstOffFlag = TO_FLAG_INTEGER; - - for (component = 0; component < 4; component++) - { - ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); - if (psInst->asOperands[0].ui32CompMask & (1 << component)) - { - psContext->AddIndentation(); - - TranslateOperand(psDest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); - - if (psDest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) - bcatcstr(glsl, "_buf"); - - if (psDestAddr) - { - bcatcstr(glsl, "["); - TranslateOperand(psDestAddr, TO_FLAG_INTEGER | TO_FLAG_UNSIGNED_INTEGER); - bcatcstr(glsl, "].value"); - } - - bcatcstr(glsl, "[("); - TranslateOperand(psDestByteOff, dstOffFlag); - bcatcstr(glsl, " >> 2"); - if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - bcatcstr(glsl, ")"); - - if (component != 0) - { - bformata(glsl, " + %d", component); - if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - } - - bcatcstr(glsl, "]"); - - uint32_t srcFlag = TO_FLAG_UNSIGNED_INTEGER; - if (DeclareRWStructuredBufferTemplateTypeAsInteger(psContext, psDest) && - psDest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) // group shared is uint - srcFlag = TO_FLAG_INTEGER; - - bcatcstr(glsl, " = "); - if (psSrc->GetNumSwizzleElements() > 1) - TranslateOperand(psSrc, srcFlag, 1 << (srcComponent++)); - else - TranslateOperand(psSrc, srcFlag, OPERAND_4_COMPONENT_MASK_X); - - bcatcstr(glsl, ";\n"); - } - } -} - -void ToGLSL::TranslateShaderStorageLoad(Instruction* psInst) -{ - bstring glsl = *psContext->currentGLSLString; - int component; - Operand* psDest = 0; - Operand* psSrcAddr = 0; - Operand* psSrcByteOff = 0; - Operand* psSrc = 0; - - switch (psInst->eOpcode) - { - case OPCODE_LD_STRUCTURED: - psDest = &psInst->asOperands[0]; - psSrcAddr = &psInst->asOperands[1]; - psSrcByteOff = &psInst->asOperands[2]; - psSrc = &psInst->asOperands[3]; - break; - case OPCODE_LD_RAW: - psDest = &psInst->asOperands[0]; - psSrcByteOff = &psInst->asOperands[1]; - psSrc = &psInst->asOperands[2]; - break; - default: - ASSERT(0); - break; - } - - uint32_t destCount = psDest->GetNumSwizzleElements(); - uint32_t destMask = psDest->GetAccessMask(); - - int numParenthesis = 0; - int firstItemAdded = 0; - SHADER_VARIABLE_TYPE destDataType = psDest->GetDataType(psContext); - uint32_t srcOffFlag = TO_FLAG_UNSIGNED_INTEGER; - SHADER_VARIABLE_TYPE srcOffType = psSrcByteOff->GetDataType(psContext); - if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || srcOffType == SVT_INT || srcOffType == SVT_INT16 || srcOffType == SVT_INT12) - srcOffFlag = TO_FLAG_INTEGER; - - psContext->AddIndentation(); - AddAssignToDest(psDest, destDataType, destCount, psInst->ui32PreciseMask, &numParenthesis); //TODO check this out? - if (destCount > 1 || destDataType == SVT_FLOAT16) - { - bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, destDataType, destCount, false)); - numParenthesis++; - } - for (component = 0; component < 4; component++) - { - int addedBitcast = 0; - if (!(destMask & (1 << component))) - continue; - - if (firstItemAdded) - bcatcstr(glsl, ", "); - else - firstItemAdded = 1; - - // always uint array atm - if (destDataType == SVT_FLOAT) - { - if (HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, "uintBitsToFloat("); - else - bcatcstr(glsl, "float("); - addedBitcast = 1; - } - else if (destDataType == SVT_INT || destDataType == SVT_INT16 || destDataType == SVT_INT12) - { - bcatcstr(glsl, "int("); - addedBitcast = 1; - } - - TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); - - if (psSrc->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) - bcatcstr(glsl, "_buf"); - - if (psSrcAddr) - { - bcatcstr(glsl, "["); - TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_INTEGER); - bcatcstr(glsl, "].value"); - } - bcatcstr(glsl, "[("); - TranslateOperand(psSrcByteOff, srcOffFlag); - bcatcstr(glsl, " >> 2"); - if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - - bformata(glsl, ") + %d", psSrc->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE ? psSrc->aui32Swizzle[component] : component); - if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - - bcatcstr(glsl, "]"); - - if (addedBitcast) - bcatcstr(glsl, ")"); - } - AddAssignPrologue(numParenthesis); -} - -void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) -{ - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - uint32_t ui32DstDataTypeFlag = TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY; - uint32_t ui32DataTypeFlag = TO_FLAG_INTEGER; - const char* func = ""; - Operand* dest = 0; - Operand* previousValue = 0; - Operand* destAddr = 0; - Operand* src = 0; - Operand* compare = 0; - int texDim = 0; - bool isUint = true; - - switch (psInst->eOpcode) - { - case OPCODE_IMM_ATOMIC_IADD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IADD\n"); - } - func = "Add"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_IADD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IADD\n"); - } - func = "Add"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_AND: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_AND\n"); - } - func = "And"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_AND: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_AND\n"); - } - func = "And"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_OR: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_OR\n"); - } - func = "Or"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_OR: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_OR\n"); - } - func = "Or"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_XOR: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_XOR\n"); - } - func = "Xor"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_XOR: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_XOR\n"); - } - func = "Xor"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - - case OPCODE_IMM_ATOMIC_EXCH: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_EXCH\n"); - } - func = "Exchange"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_IMM_ATOMIC_CMP_EXCH: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_CMP_EXC\n"); - } - func = "CompSwap"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - compare = &psInst->asOperands[3]; - src = &psInst->asOperands[4]; - break; - } - case OPCODE_ATOMIC_CMP_STORE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_CMP_STORE\n"); - } - func = "CompSwap"; - previousValue = 0; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - compare = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_IMM_ATOMIC_UMIN: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_UMIN\n"); - } - func = "Min"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_UMIN: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_UMIN\n"); - } - func = "Min"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_IMIN: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IMIN\n"); - } - func = "Min"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_IMIN: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IMIN\n"); - } - func = "Min"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_UMAX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_UMAX\n"); - } - func = "Max"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_UMAX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_UMAX\n"); - } - func = "Max"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_IMAX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IMAX\n"); - } - func = "Max"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_IMAX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IMAX\n"); - } - func = "Max"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - default: - ASSERT(0); - break; - } - - psContext->AddIndentation(); - - if (dest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) - { - const ResourceBinding* psBinding = 0; - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, dest->ui32RegisterNumber, &psBinding); - - if (psBinding->eType == RTYPE_UAV_RWTYPED) - { - isUint = (psBinding->ui32ReturnType == RETURN_TYPE_UINT); - - // Find out if it's texture and of what dimension - switch (psBinding->eDimension) - { - case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: - case REFLECT_RESOURCE_DIMENSION_BUFFER: - texDim = 1; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: - case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: - texDim = 2; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: - texDim = 3; - break; - default: - ASSERT(0); - break; - } - } - else if (psBinding->eType == RTYPE_UAV_RWSTRUCTURED) - { - if (DeclareRWStructuredBufferTemplateTypeAsInteger(psContext, dest)) - { - isUint = false; - ui32DstDataTypeFlag |= TO_FLAG_INTEGER; - } - } - } - - if (isUint && HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - ui32DataTypeFlag = TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT; - else - ui32DataTypeFlag = TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT; - - if (previousValue) - AddAssignToDest(previousValue, isUint ? SVT_UINT : SVT_INT, 1, psInst->ui32PreciseMask, &numParenthesis); - - if (texDim > 0) - bcatcstr(glsl, "imageAtomic"); - else - bcatcstr(glsl, "atomic"); - - bcatcstr(glsl, func); - bcatcstr(glsl, "("); - - TranslateOperand(dest, ui32DstDataTypeFlag); - - if (texDim > 0) - { - bcatcstr(glsl, ", "); - unsigned int compMask = OPERAND_4_COMPONENT_MASK_X; - if (texDim >= 2) - compMask |= OPERAND_4_COMPONENT_MASK_Y; - if (texDim == 3) - compMask |= OPERAND_4_COMPONENT_MASK_Z; - - TranslateOperand(destAddr, TO_FLAG_INTEGER, compMask); - } - else - { - if (dest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) - bcatcstr(glsl, "_buf"); - - uint32_t destAddrFlag = TO_FLAG_UNSIGNED_INTEGER; - SHADER_VARIABLE_TYPE destAddrType = destAddr->GetDataType(psContext); - if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || destAddrType == SVT_INT || destAddrType == SVT_INT16 || destAddrType == SVT_INT12) - destAddrFlag = TO_FLAG_INTEGER; - - bcatcstr(glsl, "["); - TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_X); - - // Structured buf if we have both x & y swizzles. Raw buf has only x -> no .value[] - if (destAddr->GetNumSwizzleElements(OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y) == 2) - { - bcatcstr(glsl, "]"); - - bcatcstr(glsl, ".value["); - TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_Y); - } - - bcatcstr(glsl, " >> 2");//bytes to floats - if (destAddrFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - - bcatcstr(glsl, "]"); - } - - bcatcstr(glsl, ", "); - - if (compare) - { - TranslateOperand(compare, ui32DataTypeFlag); - bcatcstr(glsl, ", "); - } - - TranslateOperand(src, ui32DataTypeFlag); - bcatcstr(glsl, ")"); - if (previousValue) - { - AddAssignPrologue(numParenthesis); - } - else - bcatcstr(glsl, ";\n"); -} - -void ToGLSL::TranslateConditional( - Instruction* psInst, - bstring glsl) -{ - const char* statement = ""; - if (psInst->eOpcode == OPCODE_BREAKC) - { - statement = "break"; - } - else if (psInst->eOpcode == OPCODE_CONTINUEC) - { - statement = "continue"; - } - else if (psInst->eOpcode == OPCODE_RETC) // FIXME! Need to spew out shader epilogue - { - statement = "return"; - } - - SHADER_VARIABLE_TYPE argType = psInst->asOperands[0].GetDataType(psContext); - if (argType == SVT_BOOL) - { - bcatcstr(glsl, "if("); - if (psInst->eBooleanTestType != INSTRUCTION_TEST_NONZERO) - bcatcstr(glsl, "!"); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_BOOL); - if (psInst->eOpcode != OPCODE_IF) - { - bformata(glsl, "){%s;}\n", statement); - } - else - { - bcatcstr(glsl, "){\n"); - } - } - else - { - uint32_t oFlag = TO_FLAG_UNSIGNED_INTEGER; - bool isInt = false; - if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || argType == SVT_INT || argType == SVT_INT16 || argType == SVT_INT12) - { - isInt = true; - oFlag = TO_FLAG_INTEGER; - } - - bcatcstr(glsl, "if("); - TranslateOperand(&psInst->asOperands[0], oFlag); - - if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) - bcatcstr(glsl, " == "); - else - bcatcstr(glsl, " != "); - - bcatcstr(glsl, isInt ? "0)" : "uint(0))"); // Old ES3.0 Adrenos treat 0u as const int. - - if (psInst->eOpcode != OPCODE_IF) - { - bformata(glsl, " {%s;}\n", statement); - } - else - { - bcatcstr(glsl, " {\n"); - } - } -} - -void ToGLSL::HandleSwitchTransformation(Instruction* psInst, bstring glsl) -{ - SwitchConversion& current = m_SwitchStack.back(); - if (psInst->eOpcode != OPCODE_CASE && current.currentCaseOperands.size() > 0) - { - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, current.isFirstCase ? "if(" : "} else if("); - current.isFirstCase = false; - for (size_t i = 0; i < current.currentCaseOperands.size(); ++i) - { - if (i > 0) - bcatcstr(glsl, " || "); - - bformata(glsl, "%s == %s", current.switchOperand->data, current.currentCaseOperands[i]->data); - bdestroy(current.currentCaseOperands[i]); - } - bcatcstr(glsl, ") {\n"); - ++psContext->indent; - current.currentCaseOperands.clear(); - } - - if (current.conditionalsInfo.size() > 0) - { - SwitchConversion::ConditionalInfo& conditional = current.conditionalsInfo.back(); - - if (conditional.breakEncountered) - { - // We first check for BREAK ENDIF sequence. - // If we see ELSE or CASE afterwards, we don't emit our own ELSE. - if (psInst->eOpcode == OPCODE_ENDIF && !conditional.endifEncountered) - conditional.endifEncountered = true; - else - { - conditional.endifEncountered = false; - conditional.breakEncountered = false; - if (psInst->eOpcode == OPCODE_ELSE) - { - if (conditional.breakCount > 0) - --conditional.breakCount; - } - else if (psInst->eOpcode != OPCODE_CASE) - { - psContext->AddIndentation(); - bcatcstr(glsl, "else {\n"); - ++psContext->indent; - } - } - } - - if (psInst->eOpcode == OPCODE_CASE || psInst->eOpcode == OPCODE_ENDSWITCH || (psInst->eOpcode == OPCODE_ENDIF && !conditional.endifEncountered)) - { - for (int i = 0; i < conditional.breakCount; ++i) - { - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - } - current.conditionalsInfo.pop_back(); - } - } -} - -void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = false */) -{ - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - const bool isVulkan = ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0); - const bool avoidAtomicCounter = ((psContext->flags & HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS) != 0); - - if (!isEmbedded) - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - // Uncomment to print instruction IDs - //psContext->AddIndentation(); - //bformata(glsl, "//Instruction %d\n", psInst->id); - #if 0 - if (psInst->id == 73) - { - ASSERT(1); //Set breakpoint here to debug an instruction from its ID. - } - #endif - } - if (psInst->m_SkipTranslation) - return; - } - - if (!m_SwitchStack.empty()) - HandleSwitchTransformation(psInst, glsl); - - switch (psInst->eOpcode) - { - case OPCODE_FTOI: - case OPCODE_FTOU: - { - uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); - uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); - SHADER_VARIABLE_TYPE castType = psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_FTOU) - bcatcstr(glsl, "//FTOU\n"); - else - bcatcstr(glsl, "//FTOI\n"); - } - switch (psInst->asOperands[0].eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - break; - case OPERAND_MIN_PRECISION_SINT_16: - castType = SVT_INT16; - ASSERT(psInst->eOpcode == OPCODE_FTOI); - break; - case OPERAND_MIN_PRECISION_UINT_16: - castType = SVT_UINT16; - ASSERT(psInst->eOpcode == OPCODE_FTOU); - break; - default: - ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. - } - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[0], castType, srcCount, psInst->ui32PreciseMask, &numParenthesis); - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, castType, dstCount, false)); - bcatcstr(glsl, "("); // 1 - TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT, psInst->asOperands[0].GetAccessMask()); - bcatcstr(glsl, ")"); // 1 - AddAssignPrologue(numParenthesis); - break; - } - - case OPCODE_MOV: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - if (!isEmbedded) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//MOV\n"); - } - } - if (!isEmbedded) - psContext->AddIndentation(); - - // UNITY SPECIFIC: you can check case 1158280 - // This looks like a hack because it is! There is a bug that is quite hard to reproduce. - // When doing data analysis we assume that immediates are ints and hope it will be promoted later - // which is kinda fine unless there is an unfortunate combination happening: - // We operate on 4-component registers - we need different components to be treated as float/int - // but we should not use float operations (as this will mark register as float) - // instead "float" components should be used for MOV and friends to other registers - // and they, in turn, should be used for float ops - // In pseudocode it can look like this: - // var2.xy = var1.xy; var1.xy = var2.xy; // not marked as float explicitly - // bool foo = var1.z | <...> // marked as int - // Now we have immediate that will be treated as int but NOT promoted because we think we have all ints - // var1.w = 1 // var1 is marked int - // What is important is that this temporary is marked as int by us but DX compiler treats it - // as "normal" float (and rightfully so) [or rather - we speak about cases where it does treat it as float] - // It is also important that we speak about temps (otherwise we have explicit data type to use, so promotion works) - // - // At this point we have mov immediate to int temp (which should really be float temp) - { - Operand *pDst = &psInst->asOperands[0], *pSrc = &psInst->asOperands[1]; - if (pDst->GetDataType(psContext) == SVT_INT // dst marked as int - && pDst->eType == OPERAND_TYPE_TEMP // dst is temp - && pSrc->eType == OPERAND_TYPE_IMMEDIATE32 // src is immediate - && psContext->psShader->psIntTempSizes[pDst->ui32RegisterNumber] == 0 // no temp register allocated - ) - { - pDst->aeDataType[0] = pDst->aeDataType[1] = pDst->aeDataType[2] = pDst->aeDataType[3] = SVT_FLOAT; - } - } - - AddMOVBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], psInst->ui32PreciseMask, isEmbedded); - break; - } - case OPCODE_ITOF://signed to float - case OPCODE_UTOF://unsigned to float - { - SHADER_VARIABLE_TYPE castType = SVT_FLOAT; - uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); - uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_ITOF) - bcatcstr(glsl, "//ITOF\n"); - else - bcatcstr(glsl, "//UTOF\n"); - } - - switch (psInst->asOperands[0].eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - break; - case OPERAND_MIN_PRECISION_FLOAT_2_8: - castType = SVT_FLOAT10; - break; - case OPERAND_MIN_PRECISION_FLOAT_16: - castType = SVT_FLOAT16; - break; - default: - ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. - } - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], castType, srcCount, psInst->ui32PreciseMask, &numParenthesis); - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, castType, dstCount, false)); - bcatcstr(glsl, "("); // 1 - TranslateOperand(&psInst->asOperands[1], psInst->eOpcode == OPCODE_UTOF ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT, psInst->asOperands[0].GetAccessMask()); - bcatcstr(glsl, ")"); // 1 - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_MAD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//MAD\n"); - } - CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, TO_FLAG_NONE); - break; - } - case OPCODE_IMAD: - { - uint32_t ui32Flags = TO_FLAG_INTEGER; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMAD\n"); - } - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - ui32Flags = TO_FLAG_UNSIGNED_INTEGER; - } - - CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, ui32Flags); - break; - } - case OPCODE_DADD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DADD\n"); - } - CallBinaryOp("+", psInst, 0, 1, 2, SVT_DOUBLE); - break; - } - case OPCODE_IADD: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - if (!isEmbedded) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IADD\n"); - } - } - //Is this a signed or unsigned add? - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - CallBinaryOp("+", psInst, 0, 1, 2, eType, isEmbedded); - break; - } - case OPCODE_ADD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ADD\n"); - } - CallBinaryOp("+", psInst, 0, 1, 2, SVT_FLOAT); - break; - } - case OPCODE_OR: - { - /*Todo: vector version */ - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//OR\n"); - } - uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) - { - if (dstSwizCount == 1) - { - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - - int needsParenthesis = 0; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, &needsParenthesis); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, " || "); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); - AddAssignPrologue(needsParenthesis); - } - else - { - Operand* pDest = &psInst->asOperands[0]; - const SHADER_VARIABLE_TYPE eDestType = pDest->GetDataType(psContext); - const std::string tempName = "hlslcc_orTemp"; - - psContext->AddIndentation(); - bcatcstr(glsl, "{\n"); - ++psContext->indent; - psContext->AddIndentation(); - - int numComponents = (pDest->eType == OPERAND_TYPE_TEMP) ? - psContext->psShader->GetTempComponentCount(eDestType, pDest->ui32RegisterNumber) : - pDest->iNumComponents; - const char* constructorStr = HLSLcc::GetConstructorForType(psContext, eDestType, numComponents, false); - bformata(glsl, "%s %s = ", constructorStr, tempName.c_str()); - TranslateOperand(pDest, TO_FLAG_NAME_ONLY); - bformata(glsl, ";\n"); - - const_cast(pDest)->specialName.assign(tempName); - - int srcElem = -1; - for (uint32_t destElem = 0; destElem < 4; ++destElem) - { - int numParenthesis = 0; - srcElem++; - if (pDest->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && pDest->ui32CompMask != 0 && !(pDest->ui32CompMask & (1 << destElem))) - continue; - - psContext->AddIndentation(); - AddOpAssignToDestWithMask(pDest, eDestType, 1, psInst->ui32PreciseMask, &numParenthesis, 1 << destElem); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, 1 << srcElem); - bcatcstr(glsl, " || "); - TranslateOperand(&psInst->asOperands[2], SVTTypeToFlag(eDestType), 1 << srcElem); - AddAssignPrologue(numParenthesis); - } - - const_cast(pDest)->specialName.clear(); - - psContext->AddIndentation(); - TranslateOperand(glsl, pDest, TO_FLAG_NAME_ONLY); - bformata(glsl, " = %s;\n", tempName.c_str()); - - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - } - } - else - CallBinaryOp("|", psInst, 0, 1, 2, SVT_UINT); - break; - } - case OPCODE_AND: - { - SHADER_VARIABLE_TYPE eA = psInst->asOperands[1].GetDataType(psContext); - SHADER_VARIABLE_TYPE eB = psInst->asOperands[2].GetDataType(psContext); - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//AND\n"); - } - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - const uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); - SHADER_VARIABLE_TYPE eDataType = psInst->asOperands[0].GetDataType(psContext); - uint32_t ui32Flags = SVTTypeToFlag(eDataType); - if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) - { - if (dstSwizCount == 1) - { - int needsParenthesis = 0; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, &needsParenthesis); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, " && "); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); - AddAssignPrologue(needsParenthesis); - } - else - { - // Do component-wise and, glsl doesn't support && on bvecs - for (uint32_t k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) == 0) - continue; - - int needsParenthesis = 0; - psContext->AddIndentation(); - // Override dest mask temporarily - psInst->asOperands[0].ui32CompMask = (1 << k); - ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, 1, psInst->ui32PreciseMask, &needsParenthesis); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, 1 << k); - bcatcstr(glsl, " && "); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, 1 << k); - AddAssignPrologue(needsParenthesis); - } - // Restore old mask - psInst->asOperands[0].ui32CompMask = destMask; - } - } - else if ((eA == SVT_BOOL || eB == SVT_BOOL) && !(eA == SVT_BOOL && eB == SVT_BOOL)) - { - int boolOp = eA == SVT_BOOL ? 1 : 2; - int otherOp = eA == SVT_BOOL ? 2 : 1; - int needsParenthesis = 0; - uint32_t i; - psContext->AddIndentation(); - - if (dstSwizCount == 1) - { - AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, psInst->ui32PreciseMask, &needsParenthesis); - TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, " ? "); - TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); - bcatcstr(glsl, " : "); - - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); - bcatcstr(glsl, "("); - switch (eDataType) - { - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - case SVT_DOUBLE: - bcatcstr(glsl, "0.0"); - break; - default: - bcatcstr(glsl, "0"); - } - bcatcstr(glsl, ")"); - } - else if (eDataType == SVT_FLOAT) - { - // We cannot use mix(), because it propagates NaN from both endpoints, which - // is not correct if the AND was used to implement a branch that guards against NaN. - // Instead, do either a single ?: select if the bool is a scalar, or component-wise - // ?: selects if the bool is a vector. - if (psInst->asOperands[boolOp].IsSwizzleReplicated()) - { - // Bool is effectively a scalar, we can just do a single ?: - - // The swizzle is either xxxx, yyyy, zzzz, or wwww. In each case, - // the max component will give us the 1-based index. - int boolChannel = psInst->asOperands[boolOp].GetMaxComponent(); - - AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, psInst->ui32PreciseMask, &needsParenthesis); - TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, 1 << (boolChannel - 1)); - bcatcstr(glsl, " ? "); - TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); - bcatcstr(glsl, " : "); - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); - bcatcstr(glsl, "("); - for (i = 0; i < dstSwizCount; i++) - { - if (i > 0) - bcatcstr(glsl, ", "); - bcatcstr(glsl, "0.0"); - } - bcatcstr(glsl, ")"); - } - else - { - bool needsIndent = false; - - // Do component-wise select - for (uint32_t k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) == 0) - continue; - - int needsParenthesis = 0; - if (needsIndent) - psContext->AddIndentation(); - - // Override dest mask temporarily - psInst->asOperands[0].ui32CompMask = (1 << k); - ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); - AddAssignToDest(&psInst->asOperands[0], eDataType, 1, psInst->ui32PreciseMask, &needsParenthesis); - TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, 1 << k); - bcatcstr(glsl, " ? "); - TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, 1 << k); - bcatcstr(glsl, " : 0.0"); - AddAssignPrologue(needsParenthesis); - - needsIndent = true; - } - - // Restore old mask - psInst->asOperands[0].ui32CompMask = destMask; - } - } - else - { - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, dstSwizCount, psInst->ui32PreciseMask, &needsParenthesis); - const bool haveNativeBitwiseOps = HaveNativeBitwiseOps(psContext->psShader->eTargetLanguage); - if (!haveNativeBitwiseOps) - { - UseExtraFunctionDependency("op_and"); - bcatcstr(glsl, "op_and"); - } - bcatcstr(glsl, "("); - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_UINT, dstSwizCount, false)); - bcatcstr(glsl, "("); - TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, ") * 0xFFFFFFFFu"); - else - bcatcstr(glsl, ") * -1"); // GLSL ES 2 spec: high precision ints are guaranteed to have a range of at least (-2^16, 2^16) - - if (haveNativeBitwiseOps) - bcatcstr(glsl, ") & "); - else - bcatcstr(glsl, ", "); - - TranslateOperand(&psInst->asOperands[otherOp], TO_FLAG_UNSIGNED_INTEGER, destMask); - if (!haveNativeBitwiseOps) - bcatcstr(glsl, ")"); - } - - AddAssignPrologue(needsParenthesis); - } - else - { - CallBinaryOp("&", psInst, 0, 1, 2, SVT_UINT); - } - - break; - } - case OPCODE_GE: - { - /* - dest = vec4(greaterThanEqual(vec4(srcA), vec4(srcB)); - Caveat: The result is a boolean but HLSL asm returns 0xFFFFFFFF/0x0 instead. - */ - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//GE\n"); - } - AddComparison(psInst, CMP_GE, TO_FLAG_NONE); - break; - } - case OPCODE_MUL: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//MUL\n"); - } - CallBinaryOp("*", psInst, 0, 1, 2, SVT_FLOAT); - break; - } - case OPCODE_IMUL: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMUL\n"); - } - if (psInst->asOperands[1].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - - ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_NULL); - - CallBinaryOp("*", psInst, 1, 2, 3, eType); - break; - } - case OPCODE_UDIV: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//UDIV\n"); - } - //destQuotient, destRemainder, src0, src1 - - // There are cases where destQuotient is the same variable as src0 or src1. If that happens, - // we need to compute "%" before the "/" in order to avoid src0 or src1 being overriden first. - if ((psInst->asOperands[0].eType != psInst->asOperands[2].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[2].ui32RegisterNumber) - && (psInst->asOperands[0].eType != psInst->asOperands[3].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[3].ui32RegisterNumber)) - { - CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); - CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); - } - else - { - CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); - CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); - } - break; - } - case OPCODE_DIV: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DIV\n"); - } - CallBinaryOp("/", psInst, 0, 1, 2, SVT_FLOAT); - break; - } - case OPCODE_SINCOS: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SINCOS\n"); - } - // Need careful ordering if src == dest[0], as then the cos() will be reading from wrong value - if (psInst->asOperands[0].eType == psInst->asOperands[2].eType && - psInst->asOperands[0].ui32RegisterNumber == psInst->asOperands[2].ui32RegisterNumber) - { - // sin() result overwrites source, do cos() first. - // The case where both write the src shouldn't really happen anyway. - if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) - { - CallHelper1("cos", psInst, 1, 2, 1); - } - - if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) - { - CallHelper1( - "sin", psInst, 0, 2, 1); - } - } - else - { - if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) - { - CallHelper1("sin", psInst, 0, 2, 1); - } - - if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) - { - CallHelper1("cos", psInst, 1, 2, 1); - } - } - break; - } - - case OPCODE_DP2: - { - int numParenthesis = 0; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DP2\n"); - } - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, psInst->ui32PreciseMask, &numParenthesis); - bcatcstr(glsl, "dot("); - TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[2], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_DP3: - { - int numParenthesis = 0; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DP3\n"); - } - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, psInst->ui32PreciseMask, &numParenthesis); - bcatcstr(glsl, "dot("); - TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[2], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_DP4: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DP4\n"); - } - CallHelper2("dot", psInst, 0, 1, 2, 0); - break; - } - case OPCODE_INE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//INE\n"); - } - AddComparison(psInst, CMP_NE, TO_FLAG_INTEGER); - break; - } - case OPCODE_NE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//NE\n"); - } - AddComparison(psInst, CMP_NE, TO_FLAG_NONE); - break; - } - case OPCODE_IGE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IGE\n"); - } - AddComparison(psInst, CMP_GE, TO_FLAG_INTEGER); - break; - } - case OPCODE_ILT: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ILT\n"); - } - AddComparison(psInst, CMP_LT, TO_FLAG_INTEGER); - break; - } - case OPCODE_LT: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LT\n"); - } - AddComparison(psInst, CMP_LT, TO_FLAG_NONE); - break; - } - case OPCODE_IEQ: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IEQ\n"); - } - AddComparison(psInst, CMP_EQ, TO_FLAG_INTEGER); - break; - } - case OPCODE_ULT: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ULT\n"); - } - AddComparison(psInst, CMP_LT, TO_FLAG_UNSIGNED_INTEGER); - break; - } - case OPCODE_UGE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//UGE\n"); - } - AddComparison(psInst, CMP_GE, TO_FLAG_UNSIGNED_INTEGER); - break; - } - case OPCODE_MOVC: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//MOVC\n"); - } - AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], psInst->ui32PreciseMask); - break; - } - case OPCODE_SWAPC: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SWAPC\n"); - } - // TODO needs temps!! - AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[2], &psInst->asOperands[4], &psInst->asOperands[3], psInst->ui32PreciseMask); - AddMOVCBinaryOp(&psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], &psInst->asOperands[4], psInst->ui32PreciseMask); - break; - } - - case OPCODE_LOG: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LOG\n"); - } - CallHelper1("log2", psInst, 0, 1, 1); - break; - } - case OPCODE_RSQ: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//RSQ\n"); - } - CallHelper1("inversesqrt", psInst, 0, 1, 1); - break; - } - case OPCODE_EXP: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//EXP\n"); - } - CallHelper1("exp2", psInst, 0, 1, 1); - break; - } - case OPCODE_SQRT: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SQRT\n"); - } - CallHelper1("sqrt", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_PI: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_PI\n"); - } - CallHelper1("ceil", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_NI: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_NI\n"); - } - CallHelper1("floor", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_Z: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_Z\n"); - } - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - UseExtraFunctionDependency("trunc"); - - CallHelper1("trunc", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_NE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_NE\n"); - } - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - UseExtraFunctionDependency("roundEven"); - - CallHelper1("roundEven", psInst, 0, 1, 1); - break; - } - case OPCODE_FRC: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//FRC\n"); - } - CallHelper1("fract", psInst, 0, 1, 1); - break; - } - case OPCODE_IMAX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMAX\n"); - } - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - CallHelper2("max", psInst, 0, 1, 2, 1); - else - CallHelper2Int("max", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_UMAX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//UMAX\n"); - } - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - CallHelper2("max", psInst, 0, 1, 2, 1); - else - CallHelper2UInt("max", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_MAX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//MAX\n"); - } - CallHelper2("max", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_IMIN: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMIN\n"); - } - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - CallHelper2("min", psInst, 0, 1, 2, 1); - else - CallHelper2Int("min", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_UMIN: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//UMIN\n"); - } - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - CallHelper2("min", psInst, 0, 1, 2, 1); - else - CallHelper2UInt("min", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_MIN: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//MIN\n"); - } - CallHelper2("min", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_GATHER4: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER); - break; - } - case OPCODE_GATHER4_PO_C: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_PO_C\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET | TEXSMP_FLAG_DEPTHCOMPARE); - break; - } - case OPCODE_GATHER4_PO: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_PO\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET); - break; - } - case OPCODE_GATHER4_C: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_C\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_DEPTHCOMPARE); - break; - } - case OPCODE_SAMPLE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_NONE); - break; - } - case OPCODE_SAMPLE_L: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_L\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_LOD); - break; - } - case OPCODE_SAMPLE_C: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_C\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE); - break; - } - case OPCODE_SAMPLE_C_LZ: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_C_LZ\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE | TEXSMP_FLAG_FIRSTLOD); - break; - } - case OPCODE_SAMPLE_D: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_D\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_GRAD); - break; - } - case OPCODE_SAMPLE_B: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_B\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_BIAS); - break; - } - case OPCODE_RET: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//RET\n"); - } - if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Post shader code ---\n"); - } - - bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End post shader code ---\n"); - } - } - psContext->AddIndentation(); - bcatcstr(glsl, "return;\n"); - break; - } - case OPCODE_INTERFACE_CALL: - { - const char* name; - ShaderVar* psVar; - uint32_t varFound; - - uint32_t funcPointer; - uint32_t funcBodyIndex; - uint32_t ui32NumBodiesPerTable; - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//INTERFACE_CALL\n"); - } - - ASSERT(psInst->asOperands[0].eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32); - - funcPointer = psInst->asOperands[0].aui32ArraySizes[0]; - funcBodyIndex = psInst->ui32FuncIndexWithinInterface; - - ui32NumBodiesPerTable = psContext->psShader->funcPointer[funcPointer].ui32NumBodiesPerTable; - - varFound = psContext->psShader->sInfo.GetInterfaceVarFromOffset(funcPointer, &psVar); - - ASSERT(varFound); - - name = &psVar->name[0]; - - psContext->AddIndentation(); - bcatcstr(glsl, name); - TranslateOperandIndexMAD(&psInst->asOperands[0], 1, ui32NumBodiesPerTable, funcBodyIndex); - //bformata(glsl, "[%d]", funcBodyIndex); - bcatcstr(glsl, "();\n"); - break; - } - case OPCODE_LABEL: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LABEL\n"); - } - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); //Closing brace ends the previous function. - psContext->AddIndentation(); - - bcatcstr(glsl, "subroutine(SubroutineType)\n"); - bcatcstr(glsl, "void "); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, "(){\n"); - ++psContext->indent; - break; - } - case OPCODE_COUNTBITS: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//COUNTBITS\n"); - } - psContext->AddIndentation(); - - // in glsl bitCount decl is genIType bitCount(genIType), so it is important that input/output types agree - // enter assembly: when writing swizzle encoding we use 0 to say "source from x" - // now, say, we generate code o.xy = bitcount(i.xy) - // output gets component mask 1,1,0,0 (note that we use bit 1<).<..> will still collapse everything into - // bitCount(i.<..>) [well, tweaking swizzle, sure] - // what does that mean is that we can safely take output component count to determine "proper" type - // note that hlsl compiler already checked that things can work out, so it should be fine doing this magic - const Operand* dst = &psInst->asOperands[0]; - const int dstCompCount = dst->eSelMode == OPERAND_4_COMPONENT_MASK_MODE ? dst->ui32CompMask : OPERAND_4_COMPONENT_MASK_ALL; - - TranslateOperand(dst, TO_FLAG_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = bitCount("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, dstCompCount); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_FIRSTBIT_HI: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_HI\n"); - } - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = findMSB("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_FIRSTBIT_LO: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_LO\n"); - } - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = findLSB("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_FIRSTBIT_SHI: //signed high - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_SHI\n"); - } - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = findMSB("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_BFREV: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//BFREV\n"); - } - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = bitfieldReverse("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_BFI: - { - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - uint32_t numelements_width = psInst->asOperands[1].GetNumSwizzleElements(); - uint32_t numelements_offset = psInst->asOperands[2].GetNumSwizzleElements(); - uint32_t numelements_dest = psInst->asOperands[0].GetNumSwizzleElements(); - uint32_t numoverall_elements = std::min(std::min(numelements_width, numelements_offset), numelements_dest); - uint32_t i, j, k; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//BFI\n"); - } - if (psContext->psShader->eTargetLanguage == LANG_ES_300) - UseExtraFunctionDependency("int_bitfieldInsert"); - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, numoverall_elements, psInst->ui32PreciseMask, &numParenthesis); - - if (numoverall_elements == 1) - bformata(glsl, "int("); - else - bformata(glsl, "ivec%d(", numoverall_elements); - - k = 0; - for (i = 0; i < 4; ++i) - { - if ((destMask & (1 << i)) == 0) - continue; - - k++; - if (psContext->psShader->eTargetLanguage == LANG_ES_300) - bcatcstr(glsl, "int_bitfieldInsert("); - else - bcatcstr(glsl, "bitfieldInsert("); - - for (j = 4; j >= 1; --j) - { - TranslateOperand(&psInst->asOperands[j], TO_FLAG_INTEGER, 1 << i); - if (j != 1) - bcatcstr(glsl, ","); - } - - bcatcstr(glsl, ") "); - if (k != numoverall_elements) - bcatcstr(glsl, ", "); - } - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_CUT: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//CUT\n"); - } - psContext->AddIndentation(); - bcatcstr(glsl, "EndPrimitive();\n"); - break; - } - case OPCODE_EMIT: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//EMIT\n"); - } - if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Post shader code ---\n"); - } - - bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End post shader code ---\n"); - } - } - - psContext->AddIndentation(); - bcatcstr(glsl, "EmitVertex();\n"); - break; - } - case OPCODE_EMITTHENCUT: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//EMITTHENCUT\n"); - } - psContext->AddIndentation(); - bcatcstr(glsl, "EmitVertex();\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "EndPrimitive();\n"); - break; - } - - case OPCODE_CUT_STREAM: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//CUT_STREAM\n"); - } - psContext->AddIndentation(); - ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); - if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) - { - // ES geom shaders only support one stream. - bcatcstr(glsl, "EndPrimitive();\n"); - } - else - { - bcatcstr(glsl, "EndStreamPrimitive("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, ");\n"); - } - - break; - } - case OPCODE_EMIT_STREAM: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//EMIT_STREAM\n"); - } - if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Post shader code ---\n"); - } - - bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End post shader code ---\n"); - } - } - - psContext->AddIndentation(); - - ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); - if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) - { - // ES geom shaders only support one stream. - bcatcstr(glsl, "EmitVertex();\n"); - } - else - { - bcatcstr(glsl, "EmitStreamVertex("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, ");\n"); - } - break; - } - case OPCODE_EMITTHENCUT_STREAM: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//EMITTHENCUT\n"); - } - ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); - if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) - { - // ES geom shaders only support one stream. - bcatcstr(glsl, "EmitVertex();\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "EndPrimitive();\n"); - } - else - { - bcatcstr(glsl, "EmitStreamVertex("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, ");\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "EndStreamPrimitive("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, ");\n"); - } - break; - } - case OPCODE_REP: - { - if (!m_SwitchStack.empty()) - ++m_SwitchStack.back().isInLoop; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//REP\n"); - } - //Need to handle nesting. - //Max of 4 for rep - 'Flow Control Limitations' http://msdn.microsoft.com/en-us/library/windows/desktop/bb219848(v=vs.85).aspx - - psContext->AddIndentation(); - bcatcstr(glsl, "RepCounter = "); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); - bcatcstr(glsl, ";\n"); - - psContext->AddIndentation(); - bcatcstr(glsl, "while(RepCounter!=0){\n"); - ++psContext->indent; - break; - } - case OPCODE_ENDREP: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDREP\n"); - } - psContext->AddIndentation(); - bcatcstr(glsl, "RepCounter--;\n"); - - --psContext->indent; - - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - if (!m_SwitchStack.empty()) - --m_SwitchStack.back().isInLoop; - break; - } - case OPCODE_LOOP: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LOOP\n"); - } - if (!m_SwitchStack.empty()) - ++m_SwitchStack.back().isInLoop; - psContext->AddIndentation(); - - if (psInst->ui32NumOperands == 2) - { - //DX9 version - ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_SPECIAL_LOOPCOUNTER); - bcatcstr(glsl, "for("); - bcatcstr(glsl, "LoopCounter = "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); - bcatcstr(glsl, ".y, ZeroBasedCounter = 0;"); - bcatcstr(glsl, "ZeroBasedCounter < "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); - bcatcstr(glsl, ".x;"); - - bcatcstr(glsl, "LoopCounter += "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); - bcatcstr(glsl, ".z, ZeroBasedCounter++){\n"); - ++psContext->indent; - } - else if (psInst->m_LoopInductors[1] != 0) - { - // Can emit as for - uint32_t typeFlags = TO_FLAG_INTEGER; - bcatcstr(glsl, "for("); - if (psInst->m_LoopInductors[0] != 0) - { - if (psInst->m_InductorRegister != 0) - { - // Do declaration here as well - switch (psInst->m_LoopInductors[0]->asOperands[0].GetDataType(psContext)) - { - case SVT_INT: - bcatcstr(glsl, "int "); - break; - case SVT_UINT: - bcatcstr(glsl, "uint "); - typeFlags = TO_FLAG_UNSIGNED_INTEGER; - break; - default: - ASSERT(0); - break; - } - } - TranslateInstruction(psInst->m_LoopInductors[0], true); - } - bcatcstr(glsl, " ; "); - bool negateCondition = psInst->m_LoopInductors[1]->eBooleanTestType - != psInst->m_LoopInductors[2]->eBooleanTestType; - bool negateOrder = false; - - // Yet Another NVidia OSX shader compiler bug workaround (really nvidia, get your s#!t together): - // For reasons unfathomable to us, this breaks SSAO effect on OSX (case 756028) - // Broken: for(int ti_loop_1 = int(int(0xFFFFFFFCu)) ; 4 >= ti_loop_1 ; ti_loop_1++) - // Works: for (int ti_loop_1 = int(int(0xFFFFFFFCu)); ti_loop_1 <= 4; ti_loop_1++) - // - // So, check if the first argument is an immediate value, and if so, switch the order or the operands - // (and adjust condition) - if (psInst->m_LoopInductors[1]->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) - negateOrder = true; - - const char *cmpOp = ""; - switch (psInst->m_LoopInductors[1]->eOpcode) - { - case OPCODE_IGE: - if (negateOrder) - cmpOp = negateCondition ? ">" : "<="; - else - cmpOp = negateCondition ? "<" : ">="; - break; - case OPCODE_ILT: - if (negateOrder) - cmpOp = negateCondition ? "<=" : ">"; - else - cmpOp = negateCondition ? ">=" : "<"; - break; - case OPCODE_IEQ: - // No need to change the comparison if negateOrder is true - cmpOp = negateCondition ? "!=" : "=="; - if (psInst->m_LoopInductors[1]->asOperands[0].GetDataType(psContext) == SVT_UINT) - typeFlags = TO_FLAG_UNSIGNED_INTEGER; - break; - case OPCODE_INE: - // No need to change the comparison if negateOrder is true - cmpOp = negateCondition ? "==" : "!="; - if (psInst->m_LoopInductors[1]->asOperands[0].GetDataType(psContext) == SVT_UINT) - typeFlags = TO_FLAG_UNSIGNED_INTEGER; - break; - case OPCODE_UGE: - if (negateOrder) - cmpOp = negateCondition ? ">" : "<="; - else - cmpOp = negateCondition ? "<" : ">="; - typeFlags = TO_FLAG_UNSIGNED_INTEGER; - break; - case OPCODE_ULT: - if (negateOrder) - cmpOp = negateCondition ? "<=" : ">"; - else - cmpOp = negateCondition ? ">=" : "<"; - typeFlags = TO_FLAG_UNSIGNED_INTEGER; - break; - - default: - ASSERT(0); - } - TranslateOperand(&psInst->m_LoopInductors[1]->asOperands[negateOrder ? 2 : 1], typeFlags); - bcatcstr(glsl, cmpOp); - TranslateOperand(&psInst->m_LoopInductors[1]->asOperands[negateOrder ? 1 : 2], typeFlags); - - bcatcstr(glsl, " ; "); - // One more shortcut: translate IADD tX, tX, 1 to tX++ - if (HLSLcc::IsAddOneInstruction(psInst->m_LoopInductors[3])) - { - TranslateOperand(&psInst->m_LoopInductors[3]->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, "++"); - } - else - TranslateInstruction(psInst->m_LoopInductors[3], true); - - bcatcstr(glsl, ")\n"); - psContext->AddIndentation(); - bcatcstr(glsl, "{\n"); - ++psContext->indent; - } - else - { - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - { - bstring name; - name = bformat(HLSLCC_TEMP_PREFIX "i_while_true_%d", m_NumDeclaredWhileTrueLoops++); - - // Workaround limitation with WebGL 1.0 GLSL, as we're expecting something to break the loop in any case - // Fragment shaders on some devices don't like too large integer constants (Adreno 3xx, for example) - int hardcoded_iteration_limit = (psContext->psShader->eShaderType == PIXEL_SHADER) ? 0x7FFF : 0x7FFFFFFF; - - bformata(glsl, "for(int %s = 0 ; %s < 0x%X ; %s++){\n", name->data, name->data, hardcoded_iteration_limit, name->data); - bdestroy(name); - } - else - { - bcatcstr(glsl, "while(true){\n"); - } - ++psContext->indent; - } - break; - } - case OPCODE_ENDLOOP: - { - --psContext->indent; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDLOOP\n"); - } - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - if (!m_SwitchStack.empty()) - --m_SwitchStack.back().isInLoop; - break; - } - case OPCODE_BREAK: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//BREAK\n"); - } - if (m_SwitchStack.empty() || m_SwitchStack.back().isInLoop != 0) - { - psContext->AddIndentation(); - bcatcstr(glsl, "break;\n"); - } - else - { - std::vector& conditionalsInfo = m_SwitchStack.back().conditionalsInfo; - if (conditionalsInfo.size() > 0) - { - conditionalsInfo.back().breakEncountered = true; - ++conditionalsInfo.back().breakCount; - } - } - break; - } - case OPCODE_BREAKC: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//BREAKC\n"); - } - psContext->AddIndentation(); - - if (m_SwitchStack.empty() || m_SwitchStack.back().isInLoop != 0) - { - TranslateConditional(psInst, glsl); - } - else - { - // This way we won't emit a "break" when we're transforming a "switch" into if/else for ES2 - OPCODE_TYPE opcode = psInst->eOpcode; - psInst->eOpcode = OPCODE_IF; - TranslateConditional(psInst, glsl); - psInst->eOpcode = opcode; - std::vector& conditionalsInfo = m_SwitchStack.back().conditionalsInfo; - conditionalsInfo.push_back(SwitchConversion::ConditionalInfo(1, true, true)); - } - break; - } - case OPCODE_CONTINUEC: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//CONTINUEC\n"); - } - psContext->AddIndentation(); - - TranslateConditional(psInst, glsl); - break; - } - case OPCODE_IF: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IF\n"); - } - psContext->AddIndentation(); - - TranslateConditional(psInst, glsl); - ++psContext->indent; - - if (!m_SwitchStack.empty() && m_SwitchStack.back().isInLoop == 0) - { - std::vector& conditionalsInfo = m_SwitchStack.back().conditionalsInfo; - conditionalsInfo.push_back(SwitchConversion::ConditionalInfo(0)); - } - - break; - } - case OPCODE_RETC: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//RETC\n"); - } - psContext->AddIndentation(); - - TranslateConditional(psInst, glsl); - break; - } - case OPCODE_ELSE: - { - --psContext->indent; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ELSE\n"); - } - psContext->AddIndentation(); - bcatcstr(glsl, "} else {\n"); - psContext->indent++; - - if (!m_SwitchStack.empty() && m_SwitchStack.back().isInLoop == 0) - { - std::vector& conditionalsInfo = m_SwitchStack.back().conditionalsInfo; - conditionalsInfo.push_back(SwitchConversion::ConditionalInfo(0)); - } - break; - } - case OPCODE_ENDSWITCH: - { - const bool endsSwitch = m_SwitchStack.empty(); - --psContext->indent; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDSWITCH\n"); - } - if (endsSwitch) - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - if (!endsSwitch) - m_SwitchStack.pop_back(); - break; - } - case OPCODE_ENDIF: - { - --psContext->indent; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDIF\n"); - } - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - - if (!m_SwitchStack.empty() && m_SwitchStack.back().isInLoop == 0) - { - std::vector& conditionalsInfo = m_SwitchStack.back().conditionalsInfo; - conditionalsInfo.pop_back(); - } - break; - } - case OPCODE_CONTINUE: - { - psContext->AddIndentation(); - bcatcstr(glsl, "continue;\n"); - break; - } - case OPCODE_DEFAULT: - { - --psContext->indent; - psContext->AddIndentation(); - if (m_SwitchStack.empty()) - bcatcstr(glsl, "default:\n"); - else - bcatcstr(glsl, "} else {\n"); - ++psContext->indent; - break; - } - case OPCODE_NOP: - { - break; - } - case OPCODE_SYNC: - { - const uint32_t ui32SyncFlags = psInst->ui32SyncFlags; - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SYNC\n"); - } - - if (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) - { - psContext->AddIndentation(); - bcatcstr(glsl, "memoryBarrierShared();\n"); - } - if (ui32SyncFlags & (SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP | SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL)) - { - psContext->AddIndentation(); - bcatcstr(glsl, "memoryBarrier();\n"); - } - if (ui32SyncFlags & SYNC_THREADS_IN_GROUP) - { - psContext->AddIndentation(); - bcatcstr(glsl, "barrier();\n"); - } - break; - } - case OPCODE_SWITCH: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SWITCH\n"); - } - if (psContext->psShader->eTargetLanguage != LANG_ES_100) - { - psContext->AddIndentation(); - bcatcstr(glsl, "switch("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, "){\n"); - - psContext->indent += 2; - } - else - { - // GLSL ES2 doesn't support switch, need to convert to if/else if/else - SwitchConversion conversion; - TranslateOperand(conversion.switchOperand, &psInst->asOperands[0], TO_FLAG_INTEGER); - m_SwitchStack.push_back(conversion); - ++psContext->indent; - } - break; - } - case OPCODE_CASE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//case\n"); - } - if (m_SwitchStack.empty()) - { - --psContext->indent; - psContext->AddIndentation(); - - bcatcstr(glsl, "case "); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ":\n"); - - ++psContext->indent; - } - else - { - bstring operand = bfromcstr(""); - TranslateOperand(operand, &psInst->asOperands[0], TO_FLAG_INTEGER); - m_SwitchStack.back().currentCaseOperands.push_back(operand); - } - break; - } - case OPCODE_EQ: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//EQ\n"); - } - AddComparison(psInst, CMP_EQ, TO_FLAG_NONE); - break; - } - case OPCODE_USHR: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//USHR\n"); - } - CallBinaryOp(">>", psInst, 0, 1, 2, SVT_UINT); - break; - } - case OPCODE_ISHL: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ISHL\n"); - } - - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - - CallBinaryOp("<<", psInst, 0, 1, 2, eType); - break; - } - case OPCODE_ISHR: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ISHR\n"); - } - - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - - CallBinaryOp(">>", psInst, 0, 1, 2, eType); - break; - } - case OPCODE_LD: - case OPCODE_LD_MS: - { - const ResourceBinding* psBinding = 0; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_LD) - bcatcstr(glsl, "//LD\n"); - else - bcatcstr(glsl, "//LD_MS\n"); - } - - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, &psBinding); - - TranslateTexelFetch(psInst, psBinding, glsl); - break; - } - case OPCODE_DISCARD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DISCARD\n"); - } - psContext->AddIndentation(); - if (psContext->psShader->ui32MajorVersion <= 3) - { - bcatcstr(glsl, "if(any(lessThan(("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_NONE); - - if (psContext->psShader->ui32MajorVersion == 1) - { - /* SM1.X only kills based on the rgb channels */ - bcatcstr(glsl, ").xyz, vec3(0)))){discard;}\n"); - } - else - { - bcatcstr(glsl, "), vec4(0)))){discard;}\n"); - } - } - else if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) - { - const bool isBool = psInst->asOperands[0].GetDataType(psContext, SVT_INT) == SVT_BOOL; - const bool forceNoBoolUpscale = psContext->psShader->eTargetLanguage >= LANG_ES_FIRST && psContext->psShader->eTargetLanguage <= LANG_ES_LAST; - const bool useDirectTest = isBool && forceNoBoolUpscale; - bcatcstr(glsl, "if("); - bcatcstr(glsl, useDirectTest ? "!" : "("); - TranslateOperand(&psInst->asOperands[0], useDirectTest ? TO_FLAG_BOOL : TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_ALL, forceNoBoolUpscale); - if (!useDirectTest) - bcatcstr(glsl, ")==0"); - bcatcstr(glsl, "){discard;}\n"); - } - else - { - ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); - const bool isBool = psInst->asOperands[0].GetDataType(psContext, SVT_INT) == SVT_BOOL; - const bool forceNoBoolUpscale = psContext->psShader->eTargetLanguage >= LANG_ES_FIRST && psContext->psShader->eTargetLanguage <= LANG_ES_LAST; - const bool useDirectTest = isBool && forceNoBoolUpscale; - bcatcstr(glsl, "if("); - if (!useDirectTest) - bcatcstr(glsl, "("); - TranslateOperand(&psInst->asOperands[0], useDirectTest ? TO_FLAG_BOOL : TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_ALL, forceNoBoolUpscale); - if (!useDirectTest) - bcatcstr(glsl, ")!=0"); - bcatcstr(glsl, "){discard;}\n"); - } - break; - } - case OPCODE_LOD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LOD\n"); - } - //LOD computes the following vector (ClampedLOD, NonClampedLOD, 0, 0) - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 4, psInst->ui32PreciseMask, &numParenthesis); - - //If the core language does not have query-lod feature, - //then the extension is used. The name of the function - //changed between extension and core. - if (HaveQueryLod(psContext->psShader->eTargetLanguage)) - { - bcatcstr(glsl, "textureQueryLod("); - } - else - { - bcatcstr(glsl, "textureQueryLOD("); - } - - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ","); - TranslateTexCoord( - psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber], - &psInst->asOperands[1]); - bcatcstr(glsl, ")"); - - //The swizzle on srcResource allows the returned values to be swizzled arbitrarily before they are written to the destination. - - // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms - // does not make sense. But need to re-enable to correctly swizzle this particular instruction. - psInst->asOperands[2].iWriteMaskEnabled = 1; - TranslateOperandSwizzleWithMask(psContext, &psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_EVAL_CENTROID: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_CENTROID\n"); - } - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = interpolateAtCentroid("); - //interpolateAtCentroid accepts in-qualified variables. - //As long as bytecode only writes vX registers in declarations - //we should be able to use the declared name directly. - TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_EVAL_SAMPLE_INDEX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_SAMPLE_INDEX\n"); - } - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = interpolateAtSample("); - //interpolateAtSample accepts in-qualified variables. - //As long as bytecode only writes vX registers in declarations - //we should be able to use the declared name directly. - TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_EVAL_SNAPPED: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_SNAPPED\n"); - } - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = interpolateAtOffset("); - //interpolateAtOffset accepts in-qualified variables. - //As long as bytecode only writes vX registers in declarations - //we should be able to use the declared name directly. - TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); - bcatcstr(glsl, ".xy);\n"); - break; - } - case OPCODE_LD_STRUCTURED: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_STRUCTURED\n"); - } - TranslateShaderStorageLoad(psInst); - break; - } - case OPCODE_LD_UAV_TYPED: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_UAV_TYPED\n"); - } - Operand* psDest = &psInst->asOperands[0]; - Operand* psSrc = &psInst->asOperands[2]; - Operand* psSrcAddr = &psInst->asOperands[1]; - - int srcCount = psSrc->GetNumSwizzleElements(); - int numParenthesis = 0; - uint32_t compMask = 0; - - switch (psInst->eResDim) - { - case RESOURCE_DIMENSION_TEXTURE3D: - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - compMask |= (1 << 2); - case RESOURCE_DIMENSION_TEXTURECUBE: - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - case RESOURCE_DIMENSION_TEXTURE2D: - case RESOURCE_DIMENSION_TEXTURE2DMS: - compMask |= (1 << 1); - case RESOURCE_DIMENSION_TEXTURE1D: - case RESOURCE_DIMENSION_BUFFER: - compMask |= 1; - break; - default: - ASSERT(0); - break; - } - - SHADER_VARIABLE_TYPE srcDataType = SVT_FLOAT; - const ResourceBinding* psBinding = 0; - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psSrc->ui32RegisterNumber, &psBinding); - switch (psBinding->ui32ReturnType) - { - case RETURN_TYPE_FLOAT: - srcDataType = SVT_FLOAT; - break; - case RETURN_TYPE_SINT: - srcDataType = SVT_INT; - break; - case RETURN_TYPE_UINT: - srcDataType = SVT_UINT; - break; - case RETURN_TYPE_SNORM: - case RETURN_TYPE_UNORM: - srcDataType = SVT_FLOAT; - break; - default: - ASSERT(0); - // Suppress uninitialised variable warning - srcDataType = SVT_VOID; - break; - } - - psContext->AddIndentation(); - AddAssignToDest(psDest, srcDataType, srcCount, psInst->ui32PreciseMask, &numParenthesis); - bcatcstr(glsl, "imageLoad("); - TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); - bcatcstr(glsl, ", "); - TranslateOperand(psSrcAddr, TO_FLAG_INTEGER, compMask); - bcatcstr(glsl, ")"); - TranslateOperandSwizzleWithMask(psContext, psSrc, psDest->ui32CompMask, 0); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_STORE_RAW: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_RAW\n"); - } - TranslateShaderStorageStore(psInst); - break; - } - case OPCODE_STORE_STRUCTURED: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_STRUCTURED\n"); - } - TranslateShaderStorageStore(psInst); - break; - } - - case OPCODE_STORE_UAV_TYPED: - { - const ResourceBinding* psRes; - int foundResource; - uint32_t flags = TO_FLAG_INTEGER; - uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_UAV_TYPED\n"); - } - psContext->AddIndentation(); - - foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, - psInst->asOperands[0].ui32RegisterNumber, - &psRes); - - ASSERT(foundResource); - - bcatcstr(glsl, "imageStore("); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_NAME_ONLY); - bcatcstr(glsl, ", "); - - switch (psRes->eDimension) - { - case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: - case REFLECT_RESOURCE_DIMENSION_BUFFER: - opMask = OPERAND_4_COMPONENT_MASK_X; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; - flags |= TO_AUTO_EXPAND_TO_VEC2; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; - flags |= TO_AUTO_EXPAND_TO_VEC3; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: - flags |= TO_AUTO_EXPAND_TO_VEC4; - break; - default: - ASSERT(0); - break; - } - - TranslateOperand(&psInst->asOperands[1], flags, opMask); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[2], ResourceReturnTypeToFlag(psRes->ui32ReturnType)); - bformata(glsl, ");\n"); - - break; - } - case OPCODE_LD_RAW: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_RAW\n"); - } - - TranslateShaderStorageLoad(psInst); - break; - } - - case OPCODE_ATOMIC_AND: - case OPCODE_ATOMIC_OR: - case OPCODE_ATOMIC_XOR: - case OPCODE_ATOMIC_CMP_STORE: - case OPCODE_ATOMIC_IADD: - case OPCODE_ATOMIC_IMAX: - case OPCODE_ATOMIC_IMIN: - case OPCODE_ATOMIC_UMAX: - case OPCODE_ATOMIC_UMIN: - case OPCODE_IMM_ATOMIC_IADD: - case OPCODE_IMM_ATOMIC_AND: - case OPCODE_IMM_ATOMIC_OR: - case OPCODE_IMM_ATOMIC_XOR: - case OPCODE_IMM_ATOMIC_EXCH: - case OPCODE_IMM_ATOMIC_CMP_EXCH: - case OPCODE_IMM_ATOMIC_IMAX: - case OPCODE_IMM_ATOMIC_IMIN: - case OPCODE_IMM_ATOMIC_UMAX: - case OPCODE_IMM_ATOMIC_UMIN: - { - TranslateAtomicMemOp(psInst); - break; - } - case OPCODE_UBFE: - case OPCODE_IBFE: - { - int numParenthesis = 0; - int i; - uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); - SHADER_VARIABLE_TYPE dataType = psInst->eOpcode == OPCODE_UBFE ? SVT_UINT : SVT_INT; - uint32_t flags = psInst->eOpcode == OPCODE_UBFE ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_UBFE) - bcatcstr(glsl, "//OPCODE_UBFE\n"); - else - bcatcstr(glsl, "//OPCODE_IBFE\n"); - } - // Need to open this up, GLSL bitfieldextract uses same offset and width for all components - for (i = 0; i < 4; i++) - { - if ((writeMask & (1 << i)) == 0) - continue; - psContext->AddIndentation(); - psInst->asOperands[0].ui32CompMask = (1 << i); - psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - AddAssignToDest(&psInst->asOperands[0], dataType, 1, psInst->ui32PreciseMask, &numParenthesis); - - bcatcstr(glsl, "bitfieldExtract("); - TranslateOperand(&psInst->asOperands[3], flags, (1 << i)); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[2], TO_AUTO_BITCAST_TO_INT, (1 << i)); - bcatcstr(glsl, ", "); - TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_INT, (1 << i)); - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - } - break; - } - case OPCODE_RCP: - { - const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); - const uint32_t srcElemCount = psInst->asOperands[1].GetNumSwizzleElements(); - int numParenthesis = 0; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//RCP\n"); - } - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, srcElemCount, psInst->ui32PreciseMask, &numParenthesis); - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_FLOAT, destElemCount, false)); - bcatcstr(glsl, "(1.0) / "); - bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_FLOAT, destElemCount, false)); - bcatcstr(glsl, "("); - numParenthesis++; - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, psInst->asOperands[0].GetAccessMask()); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_F32TOF16: - { - uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//F32TOF16\n"); - } - - for (int i = 0; i < 4; i++) - { - if ((writeMask & (1 << i)) == 0) - continue; - psContext->AddIndentation(); - psInst->asOperands[0].ui32CompMask = (1 << i); - psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, &numParenthesis); - - bcatcstr(glsl, "packHalf2x16(vec2("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, (1 << i)); - bcatcstr(glsl, ", 0.0))"); - AddAssignPrologue(numParenthesis); - } - break; - } - case OPCODE_F16TOF32: - { - uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//F16TOF32\n"); - } - - for (int i = 0; i < 4; i++) - { - if ((writeMask & (1 << i)) == 0) - continue; - psContext->AddIndentation(); - psInst->asOperands[0].ui32CompMask = (1 << i); - psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, psInst->ui32PreciseMask, &numParenthesis); - - bcatcstr(glsl, "unpackHalf2x16("); - TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_UINT, (1 << i)); - bcatcstr(glsl, ").x"); - AddAssignPrologue(numParenthesis); - } - break; - } - case OPCODE_INEG: - { - int numParenthesis = 0; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//INEG\n"); - } - //dest = 0 - src0 - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), psInst->ui32PreciseMask, &numParenthesis); - - bcatcstr(glsl, "0 - "); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_DERIV_RTX_COARSE: - case OPCODE_DERIV_RTX_FINE: - case OPCODE_DERIV_RTX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DERIV_RTX\n"); - } - CallHelper1("dFdx", psInst, 0, 1, 1); - break; - } - case OPCODE_DERIV_RTY_COARSE: - case OPCODE_DERIV_RTY_FINE: - case OPCODE_DERIV_RTY: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DERIV_RTY\n"); - } - CallHelper1("dFdy", psInst, 0, 1, 1); - break; - } - case OPCODE_LRP: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LRP\n"); - } - CallHelper3("mix", psInst, 0, 2, 3, 1, 1); - break; - } - case OPCODE_DP2ADD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DP2ADD\n"); - } - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = dot(vec2("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); - bcatcstr(glsl, "), vec2("); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ")) + "); - TranslateOperand(&psInst->asOperands[3], TO_FLAG_NONE); - bcatcstr(glsl, ";\n"); - break; - } - case OPCODE_POW: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//POW\n"); - } - psContext->AddIndentation(); - TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = pow(abs("); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); - bcatcstr(glsl, "), "); - TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ");\n"); - break; - } - - case OPCODE_IMM_ATOMIC_ALLOC: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_ALLOC\n"); - } - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, &numParenthesis); - if (isVulkan || avoidAtomicCounter) - bcatcstr(glsl, "atomicAdd("); - else - bcatcstr(glsl, "atomicCounterIncrement("); - ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0); - bformata(glsl, "_counter"); - if (isVulkan || avoidAtomicCounter) - bcatcstr(glsl, ", 1u)"); - else - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_IMM_ATOMIC_CONSUME: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_CONSUME\n"); - } - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, &numParenthesis); - if (isVulkan || avoidAtomicCounter) - bcatcstr(glsl, "(atomicAdd("); - else - bcatcstr(glsl, "atomicCounterDecrement("); - ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0); - bformata(glsl, "_counter"); - if (isVulkan || avoidAtomicCounter) - bcatcstr(glsl, ", 0xffffffffu) + 0xffffffffu)"); - else - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - - case OPCODE_NOT: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//NOT\n"); - } - // Adreno 3xx fails on ~a with "Internal compiler error: unexpected operator", use op_not instead - if (!HaveNativeBitwiseOps(psContext->psShader->eTargetLanguage) || psContext->psShader->eTargetLanguage == LANG_ES_300) - { - UseExtraFunctionDependency("op_not"); - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), psInst->ui32PreciseMask, &numParenthesis); - bcatcstr(glsl, "op_not("); - numParenthesis++; - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); - AddAssignPrologue(numParenthesis); - } - else - { - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), psInst->ui32PreciseMask, &numParenthesis); - - bcatcstr(glsl, "~("); - numParenthesis++; - TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); - AddAssignPrologue(numParenthesis); - } - break; - } - case OPCODE_XOR: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//XOR\n"); - } - CallBinaryOp("^", psInst, 0, 1, 2, SVT_UINT); - break; - } - case OPCODE_RESINFO: - { - uint32_t destElem; - uint32_t mask = psInst->asOperands[0].GetAccessMask(); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//RESINFO\n"); - } - - for (destElem = 0; destElem < 4; ++destElem) - { - if (1 << destElem & mask) - GetResInfoData(psInst, psInst->asOperands[2].aui32Swizzle[destElem], destElem); - } - - break; - } - case OPCODE_BUFINFO: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//BUFINFO\n"); - } - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, 1, psInst->ui32PreciseMask, &numParenthesis); - TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY); - bcatcstr(glsl, "_buf.length()"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_SAMPLE_INFO: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_INFO\n"); - } - const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT ? SVT_FLOAT : SVT_UINT, 1, psInst->ui32PreciseMask, &numParenthesis); - bcatcstr(glsl, "textureSamples("); - std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psInst->asOperands[1].ui32RegisterNumber, 0); - if (psContext->IsVulkan()) - { - std::string vulkanSamplerName = GetVulkanDummySamplerName(); - - const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; - std::string smpType = GetSamplerType(psContext, eResDim, psInst->asOperands[2].ui32RegisterNumber); - std::ostringstream oss; - oss << smpType; - oss << "(" << texName << ", " << vulkanSamplerName << ")"; - texName = oss.str(); - } - bcatcstr(glsl, texName.c_str()); - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_DMAX: - case OPCODE_DMIN: - case OPCODE_DMUL: - case OPCODE_DEQ: - case OPCODE_DGE: - case OPCODE_DLT: - case OPCODE_DNE: - case OPCODE_DMOV: - case OPCODE_DMOVC: - case OPCODE_DTOF: - case OPCODE_FTOD: - case OPCODE_DDIV: - case OPCODE_DFMA: - case OPCODE_DRCP: - case OPCODE_MSAD: - case OPCODE_DTOI: - case OPCODE_DTOU: - case OPCODE_ITOD: - case OPCODE_UTOD: - default: - { - ASSERT(0); - break; - } - } - - if (psInst->bSaturate) //Saturate is only for floating point data (float opcodes or MOV) - { - int dstCount = psInst->asOperands[0].GetNumSwizzleElements(); - - const bool workaroundAdrenoBugs = psContext->psShader->eTargetLanguage == LANG_ES_300; - - if (workaroundAdrenoBugs) - bcatcstr(glsl, "#ifdef UNITY_ADRENO_ES3\n"); - - for (int i = workaroundAdrenoBugs ? 0 : 1; i < 2; ++i) - { - const bool generateWorkaround = (i == 0); - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, dstCount, psInst->ui32PreciseMask, &numParenthesis); - bcatcstr(glsl, generateWorkaround ? "min(max(" : "clamp("); - TranslateOperand(&psInst->asOperands[0], TO_AUTO_BITCAST_TO_FLOAT); - bcatcstr(glsl, generateWorkaround ? ", 0.0), 1.0)" : ", 0.0, 1.0)"); - AddAssignPrologue(numParenthesis); - - if (generateWorkaround) - bcatcstr(glsl, "#else\n"); - } - - if (workaroundAdrenoBugs) - bcatcstr(glsl, "#endif\n"); - } -} diff --git a/third_party/HLSLcc/src/toGLSLOperand.cpp b/third_party/HLSLcc/src/toGLSLOperand.cpp deleted file mode 100644 index 55b8db5..0000000 --- a/third_party/HLSLcc/src/toGLSLOperand.cpp +++ /dev/null @@ -1,1892 +0,0 @@ -#include "internal_includes/toGLSLOperand.h" -#include "internal_includes/HLSLccToolkit.h" -#include "internal_includes/HLSLCrossCompilerContext.h" -#include "internal_includes/languages.h" -#include "bstrlib.h" -#include "hlslcc.h" -#include "internal_includes/debug.h" -#include "internal_includes/Shader.h" -#include "internal_includes/toGLSL.h" -#include "internal_includes/languages.h" -#include - -#include - -#include -#include - -using namespace HLSLcc; - -#ifndef fpcheck -#ifdef _MSC_VER -#define fpcheck(x) (_isnan(x) || !_finite(x)) -#else -#define fpcheck(x) (std::isnan(x) || std::isinf(x)) -#endif -#endif // #ifndef fpcheck - -// In case we need to fake dynamic indexing -static const char *squareBrackets[2][2] = { { "DynamicIndex(", ")" }, { "[", "]" } }; - -// Returns nonzero if types are just different precisions of the same underlying type -static bool AreTypesCompatible(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag) -{ - SHADER_VARIABLE_TYPE b = TypeFlagsToSVTType(ui32TOFlag); - - if (a == b) - return true; - - // Special case for array indices: both uint and int are fine - if ((ui32TOFlag & TO_FLAG_INTEGER) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER) && - (a == SVT_INT || a == SVT_INT16 || a == SVT_UINT || a == SVT_UINT16)) - return true; - - if ((a == SVT_FLOAT || a == SVT_FLOAT16 || a == SVT_FLOAT10) && - (b == SVT_FLOAT || b == SVT_FLOAT16 || b == SVT_FLOAT10)) - return true; - - if ((a == SVT_INT || a == SVT_INT16 || a == SVT_INT12) && - (b == SVT_INT || b == SVT_INT16 || a == SVT_INT12)) - return true; - - if ((a == SVT_UINT || a == SVT_UINT16) && - (b == SVT_UINT || b == SVT_UINT16)) - return true; - - return false; -} - -void TranslateOperandSwizzle(HLSLCrossCompilerContext* psContext, const Operand* psOperand, int iRebase) -{ - TranslateOperandSwizzleWithMask(psContext, psOperand, OPERAND_4_COMPONENT_MASK_ALL, iRebase); -} - -void TranslateOperandSwizzleWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase) -{ - TranslateOperandSwizzleWithMask(*psContext->currentGLSLString, psContext, psOperand, ui32ComponentMask, iRebase); -} - -void TranslateOperandSwizzleWithMask(bstring glsl, HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase) -{ - uint32_t accessMask = ui32ComponentMask & psOperand->GetAccessMask(); - if (psOperand->eType == OPERAND_TYPE_INPUT) - { - int regSpace = psOperand->GetRegisterSpace(psContext); - // Skip swizzle for scalar inputs, but only if we haven't redirected them - if (regSpace == 0) - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return; - } - } - else - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return; - } - } - } - if (psOperand->eType == OPERAND_TYPE_OUTPUT) - { - int regSpace = psOperand->GetRegisterSpace(psContext); - // Skip swizzle for scalar outputs, but only if we haven't redirected them - if (regSpace == 0) - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return; - } - } - else - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return; - } - } - } - - if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER) - { - /*ConstantBuffer* psCBuf = NULL; - ShaderVar* psVar = NULL; - int32_t index = -1; - GetConstantBufferFromBindingPoint(psOperand->aui32ArraySizes[0], &psContext->psShader->sInfo, &psCBuf); - - //Access the Nth vec4 (N=psOperand->aui32ArraySizes[1]) - //then apply the sizzle. - - GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVar, &index); - - bformata(glsl, ".%s", psVar->Name); - if(index != -1) - { - bformata(glsl, "[%d]", index); - }*/ - - //return; - } - - if (psOperand->iWriteMaskEnabled && - psOperand->iNumComponents != 1) - { - //Component Mask - if (psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) - { - uint32_t mask; - if (psOperand->ui32CompMask != 0) - mask = psOperand->ui32CompMask & ui32ComponentMask; - else - mask = ui32ComponentMask; - - if (mask != 0 && mask != OPERAND_4_COMPONENT_MASK_ALL) - { - bcatcstr(glsl, "."); - if (mask & OPERAND_4_COMPONENT_MASK_X) - { - ASSERT(iRebase == 0); - bcatcstr(glsl, "x"); - } - if (mask & OPERAND_4_COMPONENT_MASK_Y) - { - ASSERT(iRebase <= 1); - bformata(glsl, "%c", "xy"[1 - iRebase]); - } - if (mask & OPERAND_4_COMPONENT_MASK_Z) - { - ASSERT(iRebase <= 2); - bformata(glsl, "%c", "xyz"[2 - iRebase]); - } - if (mask & OPERAND_4_COMPONENT_MASK_W) - { - ASSERT(iRebase <= 3); - bformata(glsl, "%c", "xyzw"[3 - iRebase]); - } - } - } - else - //Component Swizzle - if (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - if (ui32ComponentMask != OPERAND_4_COMPONENT_MASK_ALL || - !(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X && - psOperand->aui32Swizzle[1] == OPERAND_4_COMPONENT_Y && - psOperand->aui32Swizzle[2] == OPERAND_4_COMPONENT_Z && - psOperand->aui32Swizzle[3] == OPERAND_4_COMPONENT_W - ) - ) - { - uint32_t i; - - bcatcstr(glsl, "."); - - for (i = 0; i < 4; ++i) - { - if (!(ui32ComponentMask & (OPERAND_4_COMPONENT_MASK_X << i))) - continue; - - if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_X) - { - ASSERT(iRebase == 0); - bcatcstr(glsl, "x"); - } - else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Y) - { - ASSERT(iRebase <= 1); - bformata(glsl, "%c", "xy"[1 - iRebase]); - } - else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Z) - { - ASSERT(iRebase <= 2); - bformata(glsl, "%c", "xyz"[2 - iRebase]); - } - else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_W) - { - ASSERT(iRebase <= 3); - bformata(glsl, "%c", "xyzw"[3 - iRebase]); - } - } - } - } - else if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) // ui32ComponentMask is ignored in this case - { - bcatcstr(glsl, "."); - - if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X) - { - ASSERT(iRebase == 0); - bcatcstr(glsl, "x"); - } - else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Y) - { - ASSERT(iRebase <= 1); - bformata(glsl, "%c", "xy"[1 - iRebase]); - } - else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Z) - { - ASSERT(iRebase <= 2); - bformata(glsl, "%c", "xyz"[2 - iRebase]); - } - else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_W) - { - ASSERT(iRebase <= 3); - bformata(glsl, "%c", "xyzw"[3 - iRebase]); - } - } - - //Component Select 1 - } -} - -void ToGLSL::TranslateOperandIndex(const Operand* psOperand, int index) -{ - int i = index; - - bstring glsl = *psContext->currentGLSLString; - - ASSERT(index < psOperand->iIndexDims); - - switch (psOperand->eIndexRep[i]) - { - case OPERAND_INDEX_IMMEDIATE32: - { - bformata(glsl, "[%d]", psOperand->aui32ArraySizes[i]); - break; - } - case OPERAND_INDEX_RELATIVE: - { - bcatcstr(glsl, "["); - TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER); - bcatcstr(glsl, "]"); - break; - } - case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: - { - bcatcstr(glsl, "["); //Indexes must be integral. - TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER); - bformata(glsl, " + %d]", psOperand->aui32ArraySizes[i]); - break; - } - default: - { - break; - } - } -} - -void ToGLSL::TranslateOperandIndexMAD(const Operand* psOperand, int index, uint32_t multiply, uint32_t add) -{ - int i = index; - int isGeoShader = psContext->psShader->eShaderType == GEOMETRY_SHADER ? 1 : 0; - - bstring glsl = *psContext->currentGLSLString; - - ASSERT(index < psOperand->iIndexDims); - - switch (psOperand->eIndexRep[i]) - { - case OPERAND_INDEX_IMMEDIATE32: - { - if (i > 0 || isGeoShader) - { - bformata(glsl, "[%d*%d+%d]", psOperand->aui32ArraySizes[i], multiply, add); - } - else - { - bformata(glsl, "%d*%d+%d", psOperand->aui32ArraySizes[i], multiply, add); - } - break; - } - case OPERAND_INDEX_RELATIVE: - { - bcatcstr(glsl, "[int("); //Indexes must be integral. - TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_NONE); - bformata(glsl, ")*%d+%d]", multiply, add); - break; - } - case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: - { - bcatcstr(glsl, "[(int("); //Indexes must be integral. - TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_NONE); - bformata(glsl, ") + %d)*%d+%d]", psOperand->aui32ArraySizes[i], multiply, add); - break; - } - default: - { - break; - } - } -} - -static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARIABLE_TYPE from, SHADER_VARIABLE_TYPE to, uint32_t numComponents, bool &needsBitcastOp) -{ - if (psContext->psShader->eTargetLanguage == LANG_METAL) - { - needsBitcastOp = false; - std::ostringstream oss; - oss << "as_type<"; - oss << GetConstructorForTypeMetal(to, numComponents); - oss << ">"; - return oss.str(); - } - else - { - needsBitcastOp = true; - if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_INT) - return "intBitsToFloat"; - else if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_UINT) - return "uintBitsToFloat"; - else if (to == SVT_INT && (from == SVT_FLOAT || from == SVT_FLOAT16 || from == SVT_FLOAT10)) - return "floatBitsToInt"; - else if (to == SVT_UINT && (from == SVT_FLOAT || from == SVT_FLOAT16 || from == SVT_FLOAT10)) - return "floatBitsToUint"; - } - - ASSERT(0); - return "ERROR missing components in GetBitcastOp()"; -} - -// Helper function to print out a single 32-bit immediate value in desired format -static void printImmediate32(HLSLCrossCompilerContext *psContext, bstring glsl, uint32_t value, SHADER_VARIABLE_TYPE eType) -{ - int needsParenthesis = 0; - - // Print floats as bit patterns. - if ((eType == SVT_FLOAT || eType == SVT_FLOAT16 || eType == SVT_FLOAT10) && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage) && fpcheck(*((float *)(&value)))) - { - if (psContext->psShader->eTargetLanguage == LANG_METAL) - bcatcstr(glsl, "as_type("); - else - bcatcstr(glsl, "intBitsToFloat("); - eType = SVT_INT; - needsParenthesis = 1; - } - - switch (eType) - { - default: - ASSERT(0); - case SVT_INT: - case SVT_INT16: - case SVT_INT12: - if (value > 0x3ffffffe) - { - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bformata(glsl, "int(0x%Xu)", value); - else - bformata(glsl, "%d", value); - } - else - bformata(glsl, "%d", value); - break; - case SVT_UINT: - case SVT_UINT16: - // Adreno bug workaround (happens only on pre-lollipop Nexus 4's): '0u' is treated as int. - if (value == 0 && psContext->psShader->eTargetLanguage == LANG_ES_300) - bcatcstr(glsl, "uint(0u)"); - else - bformata(glsl, "%uu", value); - break; - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - HLSLcc::PrintFloat(glsl, *((float *)(&value))); - break; - case SVT_BOOL: - if (value == 0) - bcatcstr(glsl, "false"); - else - bcatcstr(glsl, "true"); - } - if (needsParenthesis) - bcatcstr(glsl, ")"); -} - -void ToGLSL::TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion) -{ - TranslateVariableNameWithMask(*psContext->currentGLSLString, psOperand, ui32TOFlag, pui32IgnoreSwizzle, ui32CompMask, piRebase, forceNoConversion); -} - -void ToGLSL::DeclareDynamicIndexWrapper(const struct ShaderVarType* psType) -{ - DeclareDynamicIndexWrapper(psType->name.c_str(), psType->Class, psType->Type, psType->Rows, psType->Columns, psType->Elements); -} - -void ToGLSL::DeclareDynamicIndexWrapper(const char* psName, SHADER_VARIABLE_CLASS eClass, SHADER_VARIABLE_TYPE eType, uint32_t ui32Rows, uint32_t ui32Columns, uint32_t ui32Elements) -{ - bstring glsl = psContext->beforeMain; - - const char* suffix = "DynamicIndex"; - const uint32_t maxElemCount = 256; - uint32_t elemCount = ui32Elements; - - if (m_FunctionDefinitions.find(psName) != m_FunctionDefinitions.end()) - return; - - // Add a simple define that one can search and replace on devices that support dynamic indexing the usual way - if (m_FunctionDefinitions.find(suffix) == m_FunctionDefinitions.end()) - { - m_FunctionDefinitions.insert(std::make_pair(suffix, "#define UNITY_DYNAMIC_INDEX_ES2 0\n")); - m_FunctionDefinitionsOrder.push_back(suffix); - } - - bcatcstr(glsl, "\n"); - - char name[256]; - if ((eClass == SVC_MATRIX_COLUMNS || eClass == SVC_MATRIX_ROWS) && psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) - sprintf(name, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s", ui32Rows, ui32Columns, psName); - else - memcpy(name, psName, strlen(psName) + 1); - - if (eClass == SVC_STRUCT) - { - bformata(glsl, "%s_Type %s%s", psName, psName, suffix); - } - else if (eClass == SVC_MATRIX_COLUMNS || eClass == SVC_MATRIX_ROWS) - { - if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) - { - // Translate matrices into vec4 arrays - bformata(glsl, "%s " HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s%s", HLSLcc::GetConstructorForType(psContext, eType, 4), ui32Rows, ui32Columns, psName, suffix); - elemCount = (eClass == SVC_MATRIX_COLUMNS ? ui32Columns : ui32Rows); - if (ui32Elements > 1) - { - elemCount *= ui32Elements; - } - } - else - { - bformata(glsl, "%s %s%s", HLSLcc::GetMatrixTypeName(psContext, eType, ui32Columns, ui32Rows).c_str(), psName, suffix); - } - } - else if (eClass == SVC_VECTOR && ui32Columns > 1) - { - bformata(glsl, "%s %s%s", HLSLcc::GetConstructorForType(psContext, eType, ui32Columns), psName, suffix); - } - else if ((eClass == SVC_SCALAR) || (eClass == SVC_VECTOR && ui32Columns == 1)) - { - bformata(glsl, "%s %s%s", HLSLcc::GetConstructorForType(psContext, eType, 1), psName, suffix); - } - bformata(glsl, "(int i){\n"); - bcatcstr(glsl, "#if UNITY_DYNAMIC_INDEX_ES2\n"); - bformata(glsl, " return %s[i];\n", name); - bcatcstr(glsl, "#else\n"); - bformata(glsl, "#define d_ar %s\n", name); - bformata(glsl, " if (i <= 0) return d_ar[0];"); - - // Let's draw a line somewhere with this workaround - for (int i = 1; i < std::min(elemCount, maxElemCount); i++) - { - bformata(glsl, " else if (i == %d) return d_ar[%d];", i, i); - } - bformata(glsl, "\n return d_ar[0];\n"); - bformata(glsl, "#undef d_ar\n"); - bcatcstr(glsl, "#endif\n"); - bformata(glsl, "}\n\n"); - m_FunctionDefinitions.insert(std::make_pair(psName, "")); - m_FunctionDefinitionsOrder.push_back(psName); -} - -void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion) -{ - int numParenthesis = 0; - int hasCtor = 0; - int needsBoolUpscale = 0; // If nonzero, bools need * 0xffffffff in them - SHADER_VARIABLE_TYPE requestedType = TypeFlagsToSVTType(ui32TOFlag); - SHADER_VARIABLE_TYPE eType = psOperand->GetDataType(psContext, requestedType); - int numComponents = psOperand->GetNumSwizzleElements(ui32CompMask); - int requestedComponents = 0; - int scalarWithSwizzle = 0; - - *pui32IgnoreSwizzle = 0; - - if (psOperand->eType == OPERAND_TYPE_TEMP) - { - // Check for scalar - if (psContext->psShader->GetTempComponentCount(eType, psOperand->ui32RegisterNumber) == 1 && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - scalarWithSwizzle = 1; // Going to need a constructor - } - } - - if (psOperand->eType == OPERAND_TYPE_INPUT) - { - // Check for scalar - // You would think checking would be easy but there is a caveat: - // checking abScalarInput might report as scalar, while in reality that was redirected and now is vector so swizzle must be preserved - // as an example consider we have input: - // float2 x; float y; - // and later on we do - // tex2D(xxx, fixed2(x.x, y)); - // in that case we will generate redirect but which ui32RegisterNumber will be used for it is not strictly "specified" - // so we may end up with treating it as scalar (even though it is vector now) - const int redirectInput = psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber]; - const bool wasRedirected = redirectInput == 0xFF || redirectInput == 0xFE; - - const int scalarInput = psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber]; - if (!wasRedirected && (scalarInput & psOperand->GetAccessMask()) && (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)) - { - scalarWithSwizzle = 1; - *pui32IgnoreSwizzle = 1; - } - } - - if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE && psOperand->IsSwizzleReplicated()) - { - // Needs scalar check as well - const ConstantBuffer* psCBuf = NULL; - const ShaderVarType* psVarType = NULL; - int32_t rebase = 0; - bool isArray; - psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); - ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); - if (psVarType->Columns == 1) - { - scalarWithSwizzle = 1; // Needs a constructor - *pui32IgnoreSwizzle = 1; - } - } - - if (piRebase) - *piRebase = 0; - - if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC2) - requestedComponents = 2; - else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC3) - requestedComponents = 3; - else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC4) - requestedComponents = 4; - - requestedComponents = std::max(requestedComponents, numComponents); - - bool needsBitcastOp = false; - - if (!(ui32TOFlag & (TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY | TO_FLAG_DECLARATION_NAME))) - { - if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64) - { - // Mark the operand type to match whatever we're asking for in the flags. - ((Operand *)psOperand)->aeDataType[0] = requestedType; - ((Operand *)psOperand)->aeDataType[1] = requestedType; - ((Operand *)psOperand)->aeDataType[2] = requestedType; - ((Operand *)psOperand)->aeDataType[3] = requestedType; - } - - if (AreTypesCompatible(eType, ui32TOFlag) == 0) - { - if (CanDoDirectCast(psContext, eType, requestedType) || !HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - { - hasCtor = 1; - if (eType == SVT_BOOL && !forceNoConversion) - { - needsBoolUpscale = 1; - // make sure to wrap the whole thing in parens so the upscale - // multiply only applies to the bool - bcatcstr(glsl, "("); - numParenthesis++; - } - - // case 1154828: In case of OPERAND_TYPE_INPUT_PRIMITIVEID we end up here with requestedComponents == 0, GetConstructorForType below would return empty string and we miss the cast to uint - if (requestedComponents < 1) - requestedComponents = 1; - - bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false)); - numParenthesis++; - } - else - { - // Direct cast not possible, need to do bitcast. - if (IsESLanguage(psContext->psShader->eTargetLanguage) && (requestedType == SVT_UINT)) - { - // without explicit cast Adreno may treat the return type of floatBitsToUint as signed int (case 1256567) - bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false)); - numParenthesis++; - } - bformata(glsl, "%s(", GetBitcastOp(psContext, eType, requestedType, requestedComponents, /*out*/ needsBitcastOp).c_str()); - numParenthesis++; - } - } - - // Add ctor if needed (upscaling). Type conversion is already handled above, so here we must - // use the original type to not make type conflicts in bitcasts - if (((numComponents < requestedComponents) || (scalarWithSwizzle != 0)) && (hasCtor == 0)) - { -// ASSERT(numComponents == 1); - bformata(glsl, "%s(", GetConstructorForType(psContext, eType, requestedComponents, false)); - numParenthesis++; - hasCtor = 1; - } - } - - - switch (psOperand->eType) - { - case OPERAND_TYPE_IMMEDIATE32: - { - if (psOperand->iNumComponents == 1) - { - printImmediate32(psContext, glsl, *((unsigned int*)(&psOperand->afImmediates[0])), requestedType); - } - else - { - int i; - int firstItemAdded = 0; - if (hasCtor == 0) - { - bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false)); - numParenthesis++; - hasCtor = 1; - } - for (i = 0; i < 4; i++) - { - uint32_t uval; - if (!(ui32CompMask & (1 << i))) - continue; - - if (firstItemAdded) - bcatcstr(glsl, ", "); - uval = *((uint32_t*)(&psOperand->afImmediates[i >= psOperand->iNumComponents ? psOperand->iNumComponents - 1 : i])); - printImmediate32(psContext, glsl, uval, requestedType); - firstItemAdded = 1; - } - bcatcstr(glsl, ")"); - *pui32IgnoreSwizzle = 1; - numParenthesis--; - } - break; - } - case OPERAND_TYPE_IMMEDIATE64: - { - if (psOperand->iNumComponents == 1) - { - bformata(glsl, "%.17g", - psOperand->adImmediates[0]); - } - else - { - bformata(glsl, "dvec4(%.17g, %.17g, %.17g, %.17g)", - psOperand->adImmediates[0], - psOperand->adImmediates[1], - psOperand->adImmediates[2], - psOperand->adImmediates[3]); - if (psOperand->iNumComponents != 4) - { - AddSwizzleUsingElementCount(glsl, psOperand->iNumComponents); - } - } - break; - } - case OPERAND_TYPE_INPUT: - { - int regSpace = psOperand->GetRegisterSpace(psContext); - switch (psOperand->iIndexDims) - { - case INDEX_2D: - { - const ShaderInfo::InOutSignature *psSig = NULL; - psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); - - if ((psSig->eSystemValueType == NAME_POSITION && psSig->ui32SemanticIndex == 0) || - (psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) || - (psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0) || - (psSig->semanticName == "POSITION" && psSig->ui32SemanticIndex == 0)) - { - bcatcstr(glsl, "gl_in"); - TranslateOperandIndex(psOperand, 0);//Vertex index - bcatcstr(glsl, ".gl_Position"); - } - else - { - std::string name = psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); - - bformata(glsl, "%s", name.c_str()); - TranslateOperandIndex(psOperand, 0);//Vertex index - } - break; - } - default: - { - if (psOperand->eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE) - { - bformata(glsl, "phase%d_Input%d_%d[", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); - bcatcstr(glsl, "]"); - } - else - { - if (psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0) - { - const uint32_t parentIndex = psContext->psShader->aIndexedInputParents[regSpace][psOperand->ui32RegisterNumber]; - bformata(glsl, "phase%d_Input%d_%d[%d]", psContext->currentPhase, regSpace, parentIndex, - psOperand->ui32RegisterNumber - parentIndex); - } - else - { - std::string name = psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); - - // Rewrite the variable name if we're using framebuffer fetch - if (psContext->psShader->extensions->EXT_shader_framebuffer_fetch && - psContext->psShader->eShaderType == PIXEL_SHADER && - psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) - { - // With ES2, leave separate variable names for input - if (!WriteToFragData(psContext->psShader->eTargetLanguage) && - name.size() == 13 && !strncmp(name.c_str(), "vs_SV_Target", 12)) - bcatcstr(glsl, name.substr(3).c_str()); - else - bcatcstr(glsl, name.c_str()); - } - else - { - bcatcstr(glsl, name.c_str()); - } - } - } - break; - } - } - break; - } - case OPERAND_TYPE_OUTPUT: - { - /*if(psContext->psShader->eShaderType == HULL_SHADER && psOperand->eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE) - { - int stream = 0; - const char* name = GetDeclaredOutputName(psContext, HULL_SHADER, psOperand, &stream); - bcatcstr(glsl, name); - } - else*/ - { - int stream = 0; - std::string name = psContext->GetDeclaredOutputName(psOperand, &stream, pui32IgnoreSwizzle, piRebase, 0); - - // If we are writing out to built in type then we need to redirect tot he built in arrays - // this is safe to do as HLSL enforces 1:1 mapping, so output maps to gl_InvocationID by default - if (name == "gl_Position" && psContext->psShader->eShaderType == HULL_SHADER) - { - bcatcstr(glsl, "gl_out[gl_InvocationID]."); - } - - bcatcstr(glsl, name.c_str()); - - if (psOperand->m_SubOperands[0].get()) - { - bcatcstr(glsl, "["); - TranslateOperand(psOperand->m_SubOperands[0].get(), TO_AUTO_BITCAST_TO_INT); - bcatcstr(glsl, "]"); - } - } - break; - } - case OPERAND_TYPE_OUTPUT_DEPTH: - if (psContext->psShader->eTargetLanguage == LANG_ES_100 && !psContext->EnableExtension("GL_EXT_frag_depth")) - { - bcatcstr(psContext->extensions, "#define gl_FragDepth gl_FragDepthEXT\n"); - } - // fall through - case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: - case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: - { - bcatcstr(glsl, "gl_FragDepth"); - break; - } - case OPERAND_TYPE_TEMP: - { - SHADER_VARIABLE_TYPE eTempType = psOperand->GetDataType(psContext); - - if (psOperand->eSpecialName == NAME_UNDEFINED && psOperand->specialName.length()) - { - bcatcstr(glsl, psOperand->specialName.c_str()); - break; - } - - bcatcstr(glsl, HLSLCC_TEMP_PREFIX); - ASSERT(psOperand->ui32RegisterNumber < 0x10000); // Sanity check after temp splitting. - switch (eTempType) - { - case SVT_FLOAT: - ASSERT(psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] != 0); - if (psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_FLOAT16: - ASSERT(psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "16_"); - if (psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_FLOAT10: - ASSERT(psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "10_"); - if (psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_INT: - ASSERT(psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "i"); - if (psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_INT16: - ASSERT(psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "i16_"); - if (psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_INT12: - ASSERT(psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "i12_"); - if (psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_UINT: - ASSERT(psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "u"); - if (psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_UINT16: - ASSERT(psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "u16_"); - if (psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_DOUBLE: - ASSERT(psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "d"); - if (psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - case SVT_BOOL: - ASSERT(psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] != 0); - bcatcstr(glsl, "b"); - if (psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - break; - default: - ASSERT(0 && "Should never get here!"); - } - // m_ForLoopInductorName overrides the register number, if available - if (psOperand->m_ForLoopInductorName != 0) - { - bformata(glsl, "_loop_%d", psOperand->m_ForLoopInductorName); - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - } - else - bformata(glsl, "%d", psOperand->ui32RegisterNumber); - break; - } - case OPERAND_TYPE_SPECIAL_IMMCONSTINT: - { - bformata(glsl, "IntImmConst%d", psOperand->ui32RegisterNumber); - break; - } - case OPERAND_TYPE_SPECIAL_IMMCONST: - { - ASSERT(0 && "DX9 shaders no longer supported!"); - break; - } - case OPERAND_TYPE_SPECIAL_OUTBASECOLOUR: - { - bcatcstr(glsl, "BaseColour"); - break; - } - case OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR: - { - bcatcstr(glsl, "OffsetColour"); - break; - } - case OPERAND_TYPE_SPECIAL_POSITION: - { - bcatcstr(glsl, "gl_Position"); - break; - } - case OPERAND_TYPE_SPECIAL_FOG: - { - bcatcstr(glsl, "Fog"); - break; - } - case OPERAND_TYPE_SPECIAL_POINTSIZE: - { - bcatcstr(glsl, "gl_PointSize"); - break; - } - case OPERAND_TYPE_SPECIAL_ADDRESS: - { - bcatcstr(glsl, "Address"); - break; - } - case OPERAND_TYPE_SPECIAL_LOOPCOUNTER: - { - bcatcstr(glsl, "LoopCounter"); - pui32IgnoreSwizzle[0] = 1; - break; - } - case OPERAND_TYPE_SPECIAL_TEXCOORD: - { - bformata(glsl, "TexCoord%d", psOperand->ui32RegisterNumber); - break; - } - case OPERAND_TYPE_CONSTANT_BUFFER: - { - const char* StageName = "VS"; - const ConstantBuffer* psCBuf = NULL; - const ShaderVarType* psVarType = NULL; - int32_t index = -1; - std::vector arrayIndices; - bool isArray = false; - bool isSubpassMS = false; - psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); - - switch (psContext->psShader->eShaderType) - { - case PIXEL_SHADER: - { - StageName = "PS"; - break; - } - case HULL_SHADER: - { - StageName = "HS"; - break; - } - case DOMAIN_SHADER: - { - StageName = "DS"; - break; - } - case GEOMETRY_SHADER: - { - StageName = "GS"; - break; - } - case COMPUTE_SHADER: - { - StageName = "CS"; - break; - } - default: - { - break; - } - } - - if (psCBuf && psCBuf->name == "OVR_multiview") - { - pui32IgnoreSwizzle[0] = 1; - bformata(glsl, "gl_ViewID_OVR"); - break; - } - - - if (ui32TOFlag & TO_FLAG_DECLARATION_NAME) - { - pui32IgnoreSwizzle[0] = 1; - } - - // FIXME: With ES 3.0 the buffer name is often not prepended to variable names - if (((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) != HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) && - ((psContext->flags & HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT) != HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT)) - { - if (psCBuf) - { - //$Globals. - if (psCBuf->name[0] == '$') - { - bformata(glsl, "Globals%s", StageName); - } - else - { - bformata(glsl, "%s%s", psCBuf->name.c_str(), StageName); - } - if ((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) - { - bcatcstr(glsl, "."); - } - } - else - { - //bformata(glsl, "cb%d", psOperand->aui32ArraySizes[0]); - } - } - - if ((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) - { - //Work out the variable name. Don't apply swizzle to that variable yet. - int32_t rebase = 0; - - ASSERT(psCBuf != NULL); - - uint32_t componentsNeeded = 1; - uint32_t minSwiz = 3; - uint32_t maxSwiz = 0; - if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE) - { - int i; - for (i = 0; i < 4; i++) - { - if ((ui32CompMask & (1 << i)) == 0) - continue; - minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]); - maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]); - } - componentsNeeded = maxSwiz - minSwiz + 1; - } - else - { - minSwiz = maxSwiz = 1; - } - - // When we have a component mask that doesn't have .x set (this basically only happens when we manually open operands into components) - // We have to pull down the swizzle array to match the first bit that's actually set - uint32_t tmpSwizzle[4] = { 0 }; - int firstBitSet = 0; - if (ui32CompMask == 0) - ui32CompMask = 0xf; - while ((ui32CompMask & (1 << firstBitSet)) == 0) - firstBitSet++; - std::copy(&psOperand->aui32Swizzle[firstBitSet], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); - - ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags); - - // Get a possible dynamic array index - bstring dynamicIndex = bfromcstr(""); - bool needsIndexCalcRevert = false; - bool isAoS = ((!isArray && arrayIndices.size() > 0) || (isArray && arrayIndices.size() > 1)); - - Operand *psDynIndexOp = psOperand->GetDynamicIndexOperand(psContext, psVarType, isAoS, &needsIndexCalcRevert); - - if (psDynIndexOp != NULL) - { - SHADER_VARIABLE_TYPE eType = psDynIndexOp->GetDataType(psContext); - uint32_t opFlags = TO_FLAG_INTEGER; - - if (eType != SVT_INT && eType != SVT_UINT) - opFlags = TO_AUTO_BITCAST_TO_INT; - - TranslateOperand(dynamicIndex, psDynIndexOp, opFlags, 0x1); // We only care about the first component - } - - char *tmp = bstr2cstr(dynamicIndex, '\0'); - std::string dynamicIndexStr = tmp; - bcstrfree(tmp); - bdestroy(dynamicIndex); - - if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || ((componentsNeeded + minSwiz) <= psVarType->Columns)) - { - // Simple case: just access one component - std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); - - if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) && psCBuf) - { - std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name); - bformata(glsl, "%s.", instanceName.c_str()); - } - - // Special hack for MSAA subpass inputs: the index is actually the sample index, so do special handling later. - if (strncmp(fullName.c_str(), "subpassLoad", 11) == 0 && fullName[fullName.length() - 1] == ',') - isSubpassMS = true; - - if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) - { - // We'll need to add the prefix only to the last section of the name - size_t commaPos = fullName.find_last_of('.'); - char prefix[256]; - sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns); - if (commaPos == std::string::npos) - fullName.insert(0, prefix); - else - fullName.insert(commaPos + 1, prefix); - - bformata(glsl, "%s", fullName.c_str()); - } - else - bformata(glsl, "%s", fullName.c_str()); - } - else - { - // Non-simple case: build vec4 and apply mask - - std::string instanceNamePrefix; - if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) && psCBuf) - { - std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name); - instanceNamePrefix = instanceName + "."; - } - - uint32_t i; - std::vector tmpArrayIndices; - bool tmpIsArray; - int32_t tmpRebase; - int firstItemAdded = 0; - - bformata(glsl, "%s(", GetConstructorForType(psContext, psVarType->Type, GetNumberBitsSet(ui32CompMask), false)); - for (i = 0; i < 4; i++) - { - const ShaderVarType *tmpVarType = NULL; - if ((ui32CompMask & (1 << i)) == 0) - continue; - tmpRebase = 0; - if (firstItemAdded != 0) - bcatcstr(glsl, ", "); - else - firstItemAdded = 1; - - memset(tmpSwizzle, 0, sizeof(uint32_t) * 4); - std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); - - ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); - std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); - - // Special hack for MSAA subpass inputs: the index is actually the sample index, so do special handling later. - if (strncmp(fullName.c_str(), "subpassLoad", 11) == 0 && fullName[fullName.length() - 1] == ',') - isSubpassMS = true; - - if (tmpVarType->Class == SVC_SCALAR) - { - bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str()); - } - else - { - uint32_t swizzle; - tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3 - swizzle = psOperand->aui32Swizzle[i] - tmpRebase; - - bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str()); - bformata(glsl, ".%c", "xyzw"[swizzle]); - } - } - bcatcstr(glsl, ")"); - // Clear rebase, we've already done it. - rebase = 0; - // Also swizzle. - *pui32IgnoreSwizzle = 1; - } - - if (isArray) - { - index = arrayIndices.back(); - - // Dynamic index is atm supported only at the root array level. Add here only if there is no such parent. - bool hasDynamicIndex = !dynamicIndexStr.empty() && (arrayIndices.size() <= 1); - bool hasImmediateIndex = (index != -1) && !(hasDynamicIndex && index == 0); - - if (hasDynamicIndex || hasImmediateIndex) - { - std::ostringstream fullIndexOss; - if (hasDynamicIndex && hasImmediateIndex) - fullIndexOss << "(" << dynamicIndexStr << " + " << index << ")"; - else if (hasDynamicIndex) - fullIndexOss << dynamicIndexStr; - else // hasImmediateStr - fullIndexOss << index; - - int squareBracketType = hasDynamicIndex ? HaveDynamicIndexing(psContext, psOperand) : 1; - - if (!squareBracketType) - DeclareDynamicIndexWrapper(psVarType); - - if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) - { - // Special handling for old matrix arrays - bformata(glsl, "%s%s / 4%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]); - bformata(glsl, "%s%s %% 4%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]); - } - else // This path is atm the default - { - if (isSubpassMS) - bformata(glsl, "%s%s%s", " ", fullIndexOss.str().c_str(), ")"); - else - bformata(glsl, "%s%s%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]); - } - } - } - - if (psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) - { - switch (rebase) - { - case 4: - { - if (psVarType->Columns == 2) - { - //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) - bcatcstr(glsl, ".xxyx"); - } - else if (psVarType->Columns == 3) - { - //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) .z(GLSL) is .w(HLSL) - bcatcstr(glsl, ".xxyz"); - } - break; - } - case 8: - { - if (psVarType->Columns == 2) - { - //.x(GLSL) is .z(HLSL). .y(GLSL) is .w(HLSL) - bcatcstr(glsl, ".xxxy"); - } - break; - } - case 0: - default: - { - //No rebase, but extend to vec4 if needed - uint32_t maxComp = psOperand->GetMaxComponent(); - if (psVarType->Columns == 2 && maxComp > 2) - { - bcatcstr(glsl, ".xyxx"); - } - else if (psVarType->Columns == 3 && maxComp > 3) - { - bcatcstr(glsl, ".xyzx"); - } - break; - } - } - } - - if (psVarType && psVarType->Class == SVC_SCALAR) - { - *pui32IgnoreSwizzle = 1; - } - } - break; - } - case OPERAND_TYPE_RESOURCE: - { - ResourceName(glsl, psContext, RGROUP_TEXTURE, psOperand->ui32RegisterNumber, 0); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_SAMPLER: - { - bformata(glsl, "Sampler%d", psOperand->ui32RegisterNumber); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_FUNCTION_BODY: - { - const uint32_t ui32FuncBody = psOperand->ui32RegisterNumber; - const uint32_t ui32FuncTable = psContext->psShader->aui32FuncBodyToFuncTable[ui32FuncBody]; - //const uint32_t ui32FuncPointer = psContext->psShader->aui32FuncTableToFuncPointer[ui32FuncTable]; - const uint32_t ui32ClassType = psContext->psShader->sInfo.aui32TableIDToTypeID[ui32FuncTable]; - const char* ClassTypeName = &psContext->psShader->sInfo.psClassTypes[ui32ClassType].name[0]; - const uint32_t ui32UniqueClassFuncIndex = psContext->psShader->ui32NextClassFuncName[ui32ClassType]++; - - bformata(glsl, "%s_Func%d", ClassTypeName, ui32UniqueClassFuncIndex); - break; - } - case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: - case OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID: - { - bcatcstr(glsl, "phaseInstanceID"); // Not a real builtin, but passed as a function parameter. - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: - { - if (psContext->IsVulkan() || psContext->IsSwitch()) - { - bformata(glsl, "ImmCB_%d", psContext->currentPhase); - TranslateOperandIndex(psOperand, 0); - } - else - { - int squareBracketType = HaveDynamicIndexing(psContext, psOperand); - - bformata(glsl, "ImmCB_%d_%d_%d", psContext->currentPhase, psOperand->ui32RegisterNumber, psOperand->m_Rebase); - if (psOperand->m_SubOperands[0].get()) - { - bformata(glsl, "%s", squareBrackets[squareBracketType][0]); //Indexes must be integral. Offset is already taken care of above. - TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); - bformata(glsl, "%s", squareBrackets[squareBracketType][1]); - } - if (psOperand->m_Size == 1) - *pui32IgnoreSwizzle = 1; - } - break; - } - case OPERAND_TYPE_INPUT_DOMAIN_POINT: - { - bcatcstr(glsl, "gl_TessCoord"); - break; - } - case OPERAND_TYPE_INPUT_CONTROL_POINT: - { - const ShaderInfo::InOutSignature *psSig = NULL; - psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); - - if ((psSig->eSystemValueType == NAME_POSITION && psSig->ui32SemanticIndex == 0) || - (psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) || - (psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0)) - { - bcatcstr(glsl, "gl_in"); - TranslateOperandIndex(psOperand, 0);//Vertex index - bcatcstr(glsl, ".gl_Position"); - } - else - { - std::string name = psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); - - bformata(glsl, "%s", name.c_str()); - TranslateOperandIndex(psOperand, 0);//Vertex index - - // Check for scalar - if ((psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) - *pui32IgnoreSwizzle = 1; - } - break; - } - case OPERAND_TYPE_NULL: - { - // Null register, used to discard results of operations - if (psContext->psShader->eTargetLanguage == LANG_ES_100) - { - // On ES2 we can pass this as an argument to a function, e.g. fake integer operations that we do. See case 1124159. - bcatcstr(glsl, "null"); - bool alreadyDeclared = false; - std::string toDeclare = "vec4 null;"; - for (size_t i = 0; i < m_AdditionalDefinitions.size(); ++i) - { - if (toDeclare == m_AdditionalDefinitions[i]) - { - alreadyDeclared = true; - break; - } - } - - if (!alreadyDeclared) - m_AdditionalDefinitions.push_back(toDeclare); - } - else - bcatcstr(glsl, "//null"); - break; - } - case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: - { - bcatcstr(glsl, "gl_InvocationID"); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: - { - bcatcstr(glsl, "gl_SampleMask[0]"); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_INPUT_COVERAGE_MASK: - { - bcatcstr(glsl, "gl_SampleMaskIn[0]"); - //Skip swizzle on scalar types. - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_INPUT_THREAD_ID://SV_DispatchThreadID - { - bcatcstr(glsl, "gl_GlobalInvocationID"); - break; - } - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP://SV_GroupThreadID - { - bcatcstr(glsl, "gl_LocalInvocationID"); - break; - } - case OPERAND_TYPE_INPUT_THREAD_GROUP_ID://SV_GroupID - { - bcatcstr(glsl, "gl_WorkGroupID"); - break; - } - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED://SV_GroupIndex - { - if (requestedComponents > 1 && !hasCtor) - { - bcatcstr(glsl, GetConstructorForType(psContext, eType, requestedComponents, false)); - bcatcstr(glsl, "("); - numParenthesis++; - hasCtor = 1; - } - - for (uint32_t i = 0; i < requestedComponents; i++) - { - bcatcstr(glsl, "gl_LocalInvocationIndex"); - if (i < requestedComponents - 1) - bcatcstr(glsl, ", "); - } - *pui32IgnoreSwizzle = 1; // No swizzle meaningful for scalar. - break; - } - case OPERAND_TYPE_UNORDERED_ACCESS_VIEW: - { - ResourceName(glsl, psContext, RGROUP_UAV, psOperand->ui32RegisterNumber, 0); - break; - } - case OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY: - { - bformata(glsl, "TGSM%d", psOperand->ui32RegisterNumber); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_INPUT_PRIMITIVEID: - { - if (psContext->psShader->eShaderType == GEOMETRY_SHADER) - bcatcstr(glsl, "gl_PrimitiveIDIn"); // LOL OpenGL - else - bcatcstr(glsl, "gl_PrimitiveID"); - - break; - } - case OPERAND_TYPE_INDEXABLE_TEMP: - { - bformata(glsl, "TempArray%d", psOperand->aui32ArraySizes[0]); - bcatcstr(glsl, "["); - if (psOperand->aui32ArraySizes[1] != 0 || !psOperand->m_SubOperands[1].get()) - bformata(glsl, "%d", psOperand->aui32ArraySizes[1]); - - if (psOperand->m_SubOperands[1].get()) - { - if (psOperand->aui32ArraySizes[1] != 0) - bcatcstr(glsl, "+"); - TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER); - } - bcatcstr(glsl, "]"); - break; - } - case OPERAND_TYPE_STREAM: - { - bformata(glsl, "%d", psOperand->ui32RegisterNumber); - break; - } - case OPERAND_TYPE_INPUT_GS_INSTANCE_ID: - { - // In HLSL the instance id is uint, so cast here. - bcatcstr(glsl, "uint(gl_InvocationID)"); - break; - } - case OPERAND_TYPE_THIS_POINTER: - { - /* - The "this" register is a register that provides up to 4 pieces of information: - X: Which CB holds the instance data - Y: Base element offset of the instance data within the instance CB - Z: Base sampler index - W: Base Texture index - - Can be different for each function call - */ - break; - } - case OPERAND_TYPE_INPUT_PATCH_CONSTANT: - { - const ShaderInfo::InOutSignature* psIn; - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); - *piRebase = psIn->iRebase; - switch (psIn->eSystemValueType) - { - case NAME_POSITION: - bcatcstr(glsl, "gl_Position"); - break; - case NAME_RENDER_TARGET_ARRAY_INDEX: - bcatcstr(glsl, "gl_Layer"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_CLIP_DISTANCE: - bcatcstr(glsl, "gl_ClipDistance"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_CULL_DISTANCE: - bcatcstr(glsl, "gl_CullDistance"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_VIEWPORT_ARRAY_INDEX: - bcatcstr(glsl, "gl_ViewportIndex"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_VERTEX_ID: - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - bcatcstr(glsl, "gl_VertexIndex"); - else - bcatcstr(glsl, "gl_VertexID"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_INSTANCE_ID: - if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) - bcatcstr(glsl, "gl_InstanceIndex"); - else - bcatcstr(glsl, "gl_InstanceID"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_IS_FRONT_FACE: - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, "(gl_FrontFacing ? 0xffffffffu : uint(0))"); // Old ES3.0 Adrenos treat 0u as const int - else - bcatcstr(glsl, "(gl_FrontFacing ? 1 : 0)"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_PRIMITIVE_ID: - bcatcstr(glsl, "gl_PrimitiveID"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_LINE_DENSITY_TESSFACTOR: - if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) - bcatcstr(glsl, "gl_TessLevelOuter"); - else - bcatcstr(glsl, "gl_TessLevelOuter[0]"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_LINE_DETAIL_TESSFACTOR: - bcatcstr(glsl, "gl_TessLevelOuter[1]"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: - bcatcstr(glsl, "gl_TessLevelOuter[2]"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: - bcatcstr(glsl, "gl_TessLevelOuter[3]"); - *pui32IgnoreSwizzle = 1; - break; - - case NAME_FINAL_TRI_INSIDE_TESSFACTOR: - case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: - if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) - bcatcstr(glsl, "gl_TessLevelInner"); - else - bcatcstr(glsl, "gl_TessLevelInner[0]"); - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: - bcatcstr(glsl, "gl_TessLevelInner[1]"); - *pui32IgnoreSwizzle = 1; - break; - default: - bformata(glsl, "%spatch%s%d", psContext->psShader->eShaderType == HULL_SHADER ? psContext->outputPrefix : psContext->inputPrefix, psIn->semanticName.c_str(), psIn->ui32SemanticIndex); - // Disable swizzles if this is a scalar - if (psContext->psShader->eShaderType == HULL_SHADER) - { - if ((psContext->psShader->abScalarOutput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) - *pui32IgnoreSwizzle = 1; - } - else - { - if ((psContext->psShader->abScalarInput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) - *pui32IgnoreSwizzle = 1; - } - - break; - } - - - break; - } - default: - { - ASSERT(0); - break; - } - } - - if (hasCtor && (*pui32IgnoreSwizzle == 0)) - { - TranslateOperandSwizzleWithMask(glsl, psContext, psOperand, ui32CompMask, piRebase ? *piRebase : 0); - *pui32IgnoreSwizzle = 1; - } - - if (needsBitcastOp && (*pui32IgnoreSwizzle == 0)) - { - // some glsl compilers (Switch's GLSLc) emit warnings "u_xlat.w uninitialized" if generated code looks like: "floatBitsToUint(u_xlat).xz". Instead, generate: "floatBitsToUint(u_xlat.xz)" - TranslateOperandSwizzleWithMask(glsl, psContext, psOperand, ui32CompMask, piRebase ? *piRebase : 0); - *pui32IgnoreSwizzle = 1; - } - - if (needsBoolUpscale) - { - if (requestedType == SVT_UINT || requestedType == SVT_UINT16 || requestedType == SVT_UINT8) - bcatcstr(glsl, ") * 0xffffffffu"); - else - { - if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) - bcatcstr(glsl, ") * int(0xffffffffu)"); - else - bcatcstr(glsl, ") * -1"); // GLSL ES 2 spec: high precision ints are guaranteed to have a range of (-2^16, 2^16) - } - - numParenthesis--; - bcatcstr(glsl, ")"); - numParenthesis--; - } - - while (numParenthesis != 0) - { - bcatcstr(glsl, ")"); - numParenthesis--; - } -} - -void ToGLSL::TranslateOperand(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask, bool forceNoConversion) -{ - TranslateOperand(*psContext->currentGLSLString, psOperand, ui32TOFlag, ui32ComponentMask, forceNoConversion); -} - -void ToGLSL::TranslateOperand(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask, bool forceNoConversion) -{ - uint32_t ui32IgnoreSwizzle = 0; - int iRebase = 0; - - // in single-component mode there is no need to use mask - if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) - ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL; - - if (psContext->psShader->ui32MajorVersion <= 3) - { - ui32TOFlag &= ~(TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_BITCAST_TO_INT | TO_AUTO_BITCAST_TO_UINT); - } - - if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER)) - { - ui32TOFlag &= ~TO_FLAG_UNSIGNED_INTEGER; - ui32TOFlag |= TO_FLAG_INTEGER; - } - - if (ui32TOFlag & TO_FLAG_NAME_ONLY) - { - TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase, forceNoConversion); - return; - } - - switch (psOperand->eModifier) - { - case OPERAND_MODIFIER_NONE: - { - break; - } - case OPERAND_MODIFIER_NEG: - { - bcatcstr(glsl, "(-"); - break; - } - case OPERAND_MODIFIER_ABS: - { - bcatcstr(glsl, "abs("); - break; - } - case OPERAND_MODIFIER_ABSNEG: - { - bcatcstr(glsl, "-abs("); - break; - } - } - - TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, ui32ComponentMask, &iRebase, forceNoConversion); - - if (psContext->psShader->eShaderType == HULL_SHADER && psOperand->eType == OPERAND_TYPE_OUTPUT && - psOperand->ui32RegisterNumber != 0 && psOperand->iArrayElements != 0 && psOperand->eIndexRep[0] != OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE - && psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) - { - bcatcstr(glsl, "[gl_InvocationID]"); - } - - if (!ui32IgnoreSwizzle) - { - TranslateOperandSwizzleWithMask(glsl, psContext, psOperand, ui32ComponentMask, iRebase); - } - - switch (psOperand->eModifier) - { - case OPERAND_MODIFIER_NONE: - { - break; - } - case OPERAND_MODIFIER_NEG: - { - bcatcstr(glsl, ")"); - break; - } - case OPERAND_MODIFIER_ABS: - { - bcatcstr(glsl, ")"); - break; - } - case OPERAND_MODIFIER_ABSNEG: - { - bcatcstr(glsl, ")"); - break; - } - } -} - -std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare) -{ - std::ostringstream oss; - const ResourceBinding* psBinding = 0; - int found; - - found = psContext->psShader->sInfo.GetResourceFromBindingPoint(group, ui32RegisterNumber, &psBinding); - - if (bZCompare) - { - oss << "hlslcc_zcmp"; - } - - if (found) - { - int i = 0; - std::string name = psBinding->name; - uint32_t ui32ArrayOffset = ui32RegisterNumber - psBinding->ui32BindPoint; - - while (i < name.length()) - { - //array syntax [X] becomes _0_ - //Otherwise declarations could end up as: - //uniform sampler2D SomeTextures[0]; - //uniform sampler2D SomeTextures[1]; - if (name[i] == '[' || name[i] == ']') - name[i] = '_'; - - ++i; - } - - if (ui32ArrayOffset) - { - oss << name << ui32ArrayOffset; - } - else - { - oss << name; - } - if (psContext->IsVulkan() && group == RGROUP_UAV) - oss << "_origX" << ui32RegisterNumber << "X"; - } - else - { - oss << "UnknownResource" << ui32RegisterNumber; - } - std::string res = oss.str(); - // Prefix sampler names with 'sampler' unless it already starts with it - if (group == RGROUP_SAMPLER) - { - if (strncmp(res.c_str(), "sampler", 7) != 0) - res.insert(0, "sampler"); - } - - return res; -} - -void ResourceName(bstring targetStr, HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare) -{ - bstring glsl = (targetStr == NULL) ? *psContext->currentGLSLString : targetStr; - std::string res = ResourceName(psContext, group, ui32RegisterNumber, bZCompare); - bcatcstr(glsl, res.c_str()); -} - -std::string TextureSamplerName(ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare) -{ - std::ostringstream oss; - const ResourceBinding* psTextureBinding = 0; - const ResourceBinding* psSamplerBinding = 0; - int foundTexture, foundSampler; - uint32_t i = 0; - uint32_t ui32ArrayOffset; - - foundTexture = psShaderInfo->GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32TextureRegisterNumber, &psTextureBinding); - foundSampler = psShaderInfo->GetResourceFromBindingPoint(RGROUP_SAMPLER, ui32SamplerRegisterNumber, &psSamplerBinding); - - if (!foundTexture || !foundSampler) - { - oss << "UnknownResource" << ui32TextureRegisterNumber << "_" << ui32SamplerRegisterNumber; - return oss.str(); - } - - ui32ArrayOffset = ui32TextureRegisterNumber - psTextureBinding->ui32BindPoint; - - std::string texName = psTextureBinding->name; - - while (i < texName.length()) - { - //array syntax [X] becomes _0_ - //Otherwise declarations could end up as: - //uniform sampler2D SomeTextures[0]; - //uniform sampler2D SomeTextures[1]; - if (texName[i] == '[' || texName[i] == ']') - { - texName[i] = '_'; - } - - ++i; - } - - - if (bZCompare) - { - oss << "hlslcc_zcmp"; - } - - - if (ui32ArrayOffset) - { - oss << texName << ui32ArrayOffset << "TEX_with_SMP" << psSamplerBinding->name; - } - else - { - oss << texName << "TEX_with_SMP" << psSamplerBinding->name; - } - - return oss.str(); -} - -void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare) -{ - std::string texturesamplername = TextureSamplerName(psShaderInfo, ui32TextureRegisterNumber, ui32SamplerRegisterNumber, bZCompare); - bcatcstr(str, texturesamplername.c_str()); -} - -// Take an uniform buffer name and generate an instance name. -std::string UniformBufferInstanceName(HLSLCrossCompilerContext* psContext, const std::string& name) -{ - if (name == "$Globals") - { - char prefix = 'A'; - // Need to tweak Globals struct name to prevent clashes between shader stages - switch (psContext->psShader->eShaderType) - { - default: - ASSERT(0); - break; - case COMPUTE_SHADER: - prefix = 'C'; - break; - case VERTEX_SHADER: - prefix = 'V'; - break; - case PIXEL_SHADER: - prefix = 'P'; - break; - case GEOMETRY_SHADER: - prefix = 'G'; - break; - case HULL_SHADER: - prefix = 'H'; - break; - case DOMAIN_SHADER: - prefix = 'D'; - break; - } - - return std::string("_") + prefix + name.substr(1); - } - else - return std::string("_") + name; -} diff --git a/third_party/HLSLcc/src/toMetal.cpp b/third_party/HLSLcc/src/toMetal.cpp deleted file mode 100644 index d66f55e..0000000 --- a/third_party/HLSLcc/src/toMetal.cpp +++ /dev/null @@ -1,988 +0,0 @@ -#include "internal_includes/toMetal.h" -#include "internal_includes/HLSLCrossCompilerContext.h" -#include "internal_includes/Shader.h" -#include "internal_includes/debug.h" - -#include "internal_includes/Declaration.h" -#include "internal_includes/toGLSL.h" -#include "internal_includes/LoopTransform.h" -#include "internal_includes/HLSLccToolkit.h" -#include - -static void PrintStructDeclaration(HLSLCrossCompilerContext *psContext, bstring glsl, std::string &sname, StructDefinitions &defs) -{ - StructDefinition &d = defs[sname]; - if (d.m_IsPrinted) - return; - d.m_IsPrinted = true; - - - std::for_each(d.m_Dependencies.begin(), d.m_Dependencies.end(), [&psContext, &glsl, &defs](std::string &depName) - { - PrintStructDeclaration(psContext, glsl, depName, defs); - }); - - bformata(glsl, "struct %s\n{\n", sname.c_str()); - psContext->indent++; - std::for_each(d.m_Members.begin(), d.m_Members.end(), [&psContext, &glsl](const MemberDefinitions::value_type &mem) - { - psContext->AddIndentation(); - bcatcstr(glsl, mem.second.c_str()); - bcatcstr(glsl, ";\n"); - }); - - psContext->indent--; - bcatcstr(glsl, "};\n\n"); -} - -void ToMetal::PrintStructDeclarations(StructDefinitions &defs, const char *name) -{ - bstring glsl = *psContext->currentGLSLString; - StructDefinition &args = defs[name]; - std::for_each(args.m_Dependencies.begin(), args.m_Dependencies.end(), [this, glsl, &defs](std::string &sname) - { - PrintStructDeclaration(psContext, glsl, sname, defs); - }); -} - -static const char * GetPhaseFuncName(SHADER_PHASE_TYPE eType) -{ - switch (eType) - { - default: - case MAIN_PHASE: return ""; - case HS_GLOBAL_DECL_PHASE: return "hs_global_decls"; - case HS_FORK_PHASE: return "fork_phase"; - case HS_CTRL_POINT_PHASE: return "control_point_phase"; - case HS_JOIN_PHASE: return "join_phase"; - } -} - -static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext) -{ - uint32_t i; - bstring glsl = *psContext->currentGLSLString; - - for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) - { - const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; - - psContext->AddIndentation(); - if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) - bformata(glsl, "%s%s = %scp[controlPointID].%s;\n", psContext->outputPrefix, "mtl_Position", psContext->inputPrefix, "mtl_Position"); - else - bformata(glsl, "%s%s%d = %scp[controlPointID].%s%d;\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); - } -} - -bool ToMetal::Translate() -{ - bstring glsl; - uint32_t i; - Shader* psShader = psContext->psShader; - uint32_t ui32Phase; - - psContext->psTranslator = this; - - SetIOPrefixes(); - psShader->ExpandSWAPCs(); - psShader->ForcePositionToHighp(); - psShader->AnalyzeIOOverlap(); - if ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0) - psShader->SetMaxSemanticIndex(); - psShader->FindUnusedGlobals(psContext->flags); - - psContext->indent = 0; - - glsl = bfromcstralloc(1024 * 10, ""); - bstring bodyglsl = bfromcstralloc(1024 * 10, ""); - - psContext->glsl = glsl; - for (i = 0; i < psShader->asPhases.size(); ++i) - { - psShader->asPhases[i].postShaderCode = bfromcstralloc(1024 * 5, ""); - psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, ""); - } - - psContext->currentGLSLString = &glsl; - psShader->eTargetLanguage = LANG_METAL; - psShader->extensions = NULL; - psContext->currentPhase = MAIN_PHASE; - - psContext->ClearDependencyData(); - - const SHADER_PHASE_TYPE ePhaseFuncCallOrder[3] = { HS_CTRL_POINT_PHASE, HS_FORK_PHASE, HS_JOIN_PHASE }; - uint32_t ui32PhaseCallIndex; - int hasControlPointPhase = 0; - - const int maxThreadsPerThreadGroup = 32; - int numPatchesInThreadGroup = 0; - bool hasControlPoint = false; - bool hasPatchConstant = false; - std::string tessVertexFunctionArguments; - - if ((psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) - { - if (psContext->psDependencies) - { - m_StructDefinitions[""].m_Members = psContext->psDependencies->m_SharedFunctionMembers; - m_TextureSlots = psContext->psDependencies->m_SharedTextureSlots; - m_SamplerSlots = psContext->psDependencies->m_SharedSamplerSlots; - m_BufferSlots = psContext->psDependencies->m_SharedBufferSlots; - hasControlPoint = psContext->psDependencies->hasControlPoint; - hasPatchConstant = psContext->psDependencies->hasPatchConstant; - } - } - - ClampPartialPrecisions(); - - for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - ShaderPhase &phase = psShader->asPhases[ui32Phase]; - phase.UnvectorizeImmMoves(); - psContext->DoDataTypeAnalysis(&phase); - phase.ResolveUAVProperties(psShader->sInfo); - ReserveUAVBindingSlots(&phase); // TODO: unify slot allocation code between gl/metal/vulkan - HLSLcc::DoLoopTransform(psContext, phase); - } - - psShader->PruneTempRegisters(); - - //Special case. Can have multiple phases. - if (psShader->eShaderType == HULL_SHADER) - { - psShader->ConsolidateHullTempVars(); - - // Find out if we have a passthrough hull shader - for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) - hasControlPointPhase = 1; - } - } - - // Hull and Domain shaders get merged into vertex shader output - if (!(psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER)) - { - if (psContext->flags & HLSLCC_FLAG_DISABLE_FASTMATH) - bcatcstr(glsl, "#define UNITY_DISABLE_FASTMATH\n"); - bcatcstr(glsl, "#include \n#include \nusing namespace metal;\n"); - bcatcstr(glsl, "\n#if !(__HAVE_FMA__)\n#define fma(a,b,c) ((a) * (b) + (c))\n#endif\n\n"); - } - - if (psShader->eShaderType == HULL_SHADER) - { - psContext->indent++; - - // Phase 1 is always the global decls phase, no instructions - for (i = 0; i < psShader->asPhases[1].psDecl.size(); ++i) - { - TranslateDeclaration(&psShader->asPhases[1].psDecl[i]); - } - - if (hasControlPointPhase == 0) - { - DeclareHullShaderPassthrough(); - } - - for (ui32PhaseCallIndex = 0; ui32PhaseCallIndex < 3; ui32PhaseCallIndex++) - { - for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; - if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex]) - continue; - psContext->currentPhase = ui32Phase; - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - // bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase)); - } - for (i = 0; i < psPhase->psDecl.size(); ++i) - { - TranslateDeclaration(&psPhase->psDecl[i]); - } - } - } - - psContext->indent--; - - numPatchesInThreadGroup = maxThreadsPerThreadGroup / std::max(psShader->sInfo.ui32TessInputControlPointCount, psShader->sInfo.ui32TessOutputControlPointCount); - } - else - { - psContext->indent++; - - for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) - TranslateDeclaration(&psShader->asPhases[0].psDecl[i]); - - psContext->indent--; - - // Output default implementations for framebuffer index remap if needed - if (m_NeedFBOutputRemapDecl) - bcatcstr(glsl, "#ifndef XLT_REMAP_O\n\t#define XLT_REMAP_O {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_o[] = XLT_REMAP_O;\n"); - if (m_NeedFBInputRemapDecl) - bcatcstr(glsl, "#ifndef XLT_REMAP_I\n\t#define XLT_REMAP_I {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_i[] = XLT_REMAP_I;\n"); - - DeclareClipPlanes(&psShader->asPhases[0].psDecl[0], psShader->asPhases[0].psDecl.size()); - GenerateTexturesReflection(&psContext->m_Reflection); - } - - if (psShader->eShaderType == HULL_SHADER) - { - psContext->currentPhase = MAIN_PHASE; - - if (m_StructDefinitions["Mtl_ControlPoint"].m_Members.size() > 0) - { - hasControlPoint = true; - - m_StructDefinitions["Mtl_ControlPoint"].m_Dependencies.push_back("Mtl_ControlPoint"); - m_StructDefinitions["Mtl_ControlPointIn"].m_Dependencies.push_back("Mtl_ControlPointIn"); - PrintStructDeclarations(m_StructDefinitions, "Mtl_ControlPoint"); - PrintStructDeclarations(m_StructDefinitions, "Mtl_ControlPointIn"); - } - - if (m_StructDefinitions["Mtl_PatchConstant"].m_Members.size() > 0) - { - hasPatchConstant = true; - - m_StructDefinitions["Mtl_PatchConstant"].m_Dependencies.push_back("Mtl_PatchConstant"); - m_StructDefinitions["Mtl_PatchConstantIn"].m_Dependencies.push_back("Mtl_PatchConstantIn"); - PrintStructDeclarations(m_StructDefinitions, "Mtl_PatchConstant"); - PrintStructDeclarations(m_StructDefinitions, "Mtl_PatchConstantIn"); - } - - m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.push_back(std::make_pair("numPatches", "uint numPatches")); - m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.push_back(std::make_pair("numControlPointsPerPatch", "ushort numControlPointsPerPatch")); - - if (m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.size() > 0) - { - m_StructDefinitions["Mtl_KernelPatchInfo"].m_Dependencies.push_back("Mtl_KernelPatchInfo"); - PrintStructDeclarations(m_StructDefinitions, "Mtl_KernelPatchInfo"); - } - - if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0) - { - m_StructDefinitions[GetInputStructName()].m_Dependencies.push_back(GetInputStructName()); - if (psContext->psDependencies) - psContext->psDependencies->m_SharedDependencies.push_back(GetInputStructName()); - - // Hack, we're reusing Mtl_VertexOut as an hull shader input array, so no need to declare original contents - m_StructDefinitions[GetInputStructName()].m_Members.clear(); - - bstring vertexOut = bfromcstr(""); - bformata(vertexOut, "Mtl_VertexOut cp[%d]", psShader->sInfo.ui32TessOutputControlPointCount); - m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("cp", (const char *)vertexOut->data)); - bdestroy(vertexOut); - } - - if (psContext->psDependencies) - { - for (auto i = psContext->psDependencies->m_SharedFunctionMembers.begin(), in = psContext->psDependencies->m_SharedFunctionMembers.end(); i != in;) - { - tessVertexFunctionArguments += i->first.c_str(); - ++i; - - // we want to avoid trailing comma - if (i != in) - tessVertexFunctionArguments += ", "; - } - } - } - - if (psShader->eShaderType == DOMAIN_SHADER) - { - // For preserving data layout, reuse Mtl_ControlPoint/Mtl_PatchConstant from hull shader - if (hasControlPoint) - m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("cp", "patch_control_point cp")); - if (hasPatchConstant) - m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("patch", "Mtl_PatchConstantIn patch")); - } - - if ((psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) - { - if (psContext->psDependencies) - { - psContext->psDependencies->m_SharedFunctionMembers = m_StructDefinitions[""].m_Members; - psContext->psDependencies->m_SharedTextureSlots = m_TextureSlots; - psContext->psDependencies->m_SharedTextureSlots.SaveTotalShaderStageAllocationsCount(); - psContext->psDependencies->m_SharedSamplerSlots = m_SamplerSlots; - psContext->psDependencies->m_SharedSamplerSlots.SaveTotalShaderStageAllocationsCount(); - psContext->psDependencies->m_SharedBufferSlots = m_BufferSlots; - psContext->psDependencies->m_SharedBufferSlots.SaveTotalShaderStageAllocationsCount(); - } - } - - if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0) - { - if (psShader->eShaderType == HULL_SHADER) - { - if (psContext->psDependencies) - { - // if we go for fully procedural geometry we might end up without Mtl_VertexIn - for (std::vector::const_iterator itr = psContext->psDependencies->m_SharedDependencies.begin(); itr != psContext->psDependencies->m_SharedDependencies.end(); itr++) - { - if (*itr == "Mtl_VertexIn") - { - m_StructDefinitions[""].m_Members.push_back(std::make_pair("vertexInput", "Mtl_VertexIn vertexInput [[ stage_in ]]")); - if (tessVertexFunctionArguments.length()) - tessVertexFunctionArguments += ", "; - tessVertexFunctionArguments += "vertexInput"; - break; - } - } - } - - m_StructDefinitions[""].m_Members.push_back(std::make_pair("tID", "uint2 tID [[ thread_position_in_grid ]]")); - m_StructDefinitions[""].m_Members.push_back(std::make_pair("groupID", "ushort2 groupID [[ threadgroup_position_in_grid ]]")); - - bstring buffer = bfromcstr(""); - uint32_t slot = 0; - - if (hasControlPoint) - { - slot = m_BufferSlots.GetBindingSlot(0xffff - 1, BindingSlotAllocator::ConstantBuffer); - bformata(buffer, "device Mtl_ControlPoint *controlPoints [[ buffer(%d) ]]", slot); - m_StructDefinitions[""].m_Members.push_back(std::make_pair("controlPoints", (const char *)buffer->data)); - btrunc(buffer, 0); - } - - if (hasPatchConstant) - { - slot = m_BufferSlots.GetBindingSlot(0xffff - 2, BindingSlotAllocator::ConstantBuffer); - bformata(buffer, "device Mtl_PatchConstant *patchConstants [[ buffer(%d) ]]", slot); - m_StructDefinitions[""].m_Members.push_back(std::make_pair("patchConstants", (const char *)buffer->data)); - btrunc(buffer, 0); - } - - slot = m_BufferSlots.GetBindingSlot(0xffff - 3, BindingSlotAllocator::ConstantBuffer); - bformata(buffer, "device %s *tessFactors [[ buffer(%d) ]]", psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "MTLQuadTessellationFactorsHalf" : "MTLTriangleTessellationFactorsHalf", slot); - m_StructDefinitions[""].m_Members.push_back(std::make_pair("tessFactors", (const char *)buffer->data)); - btrunc(buffer, 0); - - slot = m_BufferSlots.GetBindingSlot(0xffff - 4, BindingSlotAllocator::ConstantBuffer); - bformata(buffer, "constant Mtl_KernelPatchInfo &patchInfo [[ buffer(%d) ]]", slot); - m_StructDefinitions[""].m_Members.push_back(std::make_pair("patchInfo", (const char *)buffer->data)); - btrunc(buffer, 0); - - bdestroy(buffer); - } - else if (psShader->eShaderType == VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) - { - m_StructDefinitions[""].m_Members.push_back(std::make_pair("input", GetInputStructName() + " input")); - } - else - { - m_StructDefinitions[""].m_Members.push_back(std::make_pair("input", GetInputStructName() + " input [[ stage_in ]]")); - } - - m_StructDefinitions[""].m_Dependencies.push_back(GetInputStructName()); - if (psContext->psDependencies) - psContext->psDependencies->m_SharedDependencies.push_back(GetInputStructName()); - } - - if ((psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) - { - // m_StructDefinitions is inherited between tessellation shader stages but some builtins need exceptions - std::for_each(m_StructDefinitions[""].m_Members.begin(), m_StructDefinitions[""].m_Members.end(), [&psShader](MemberDefinitions::value_type &mem) - { - if (mem.first == "mtl_InstanceID") - { - if (psShader->eShaderType == VERTEX_SHADER) - mem.second.assign("uint mtl_InstanceID"); - else if (psShader->eShaderType == HULL_SHADER) - mem.second.assign("// mtl_InstanceID passed through groupID"); - } - else if (mem.first == "mtl_BaseInstance") - { - if (psShader->eShaderType == VERTEX_SHADER) - mem.second.assign("uint mtl_BaseInstance"); - else if (psShader->eShaderType == HULL_SHADER) - mem.second.assign("// mtl_BaseInstance ignored"); - } - else if (mem.first == "mtl_VertexID") - { - if (psShader->eShaderType == VERTEX_SHADER) - mem.second.assign("uint mtl_VertexID"); - else if (psShader->eShaderType == HULL_SHADER) - mem.second.assign("// mtl_VertexID generated in compute kernel"); - else if (psShader->eShaderType == DOMAIN_SHADER) - mem.second.assign("// mtl_VertexID unused"); - } - else if (mem.first == "mtl_BaseVertex") - { - if (psShader->eShaderType == VERTEX_SHADER) - mem.second.assign("uint mtl_BaseVertex"); - else if (psShader->eShaderType == HULL_SHADER) - mem.second.assign("// mtl_BaseVertex generated in compute kernel"); - else if (psShader->eShaderType == DOMAIN_SHADER) - mem.second.assign("// mtl_BaseVertex unused"); - } - }); - } - - if (psShader->eShaderType != COMPUTE_SHADER) - { - if (m_StructDefinitions[GetOutputStructName()].m_Members.size() > 0) - { - m_StructDefinitions[""].m_Dependencies.push_back(GetOutputStructName()); - if (psContext->psDependencies) - psContext->psDependencies->m_SharedDependencies.push_back(GetOutputStructName()); - } - } - - PrintStructDeclarations(m_StructDefinitions); - - psContext->currentGLSLString = &bodyglsl; - - bool popPragmaDiagnostic = false; - if (psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) - { - popPragmaDiagnostic = true; - - bcatcstr(bodyglsl, "#pragma clang diagnostic push\n"); - bcatcstr(bodyglsl, "#pragma clang diagnostic ignored \"-Wunused-parameter\"\n"); - } - - switch (psShader->eShaderType) - { - case VERTEX_SHADER: - if ((psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) == 0) - bcatcstr(bodyglsl, "vertex Mtl_VertexOut xlatMtlMain(\n"); - else - bcatcstr(bodyglsl, "static Mtl_VertexOut vertexFunction(\n"); - break; - case PIXEL_SHADER: - if (psShader->sInfo.bEarlyFragmentTests) - bcatcstr(bodyglsl, "[[early_fragment_tests]]\n"); - if (m_StructDefinitions[GetOutputStructName()].m_Members.size() > 0) - bcatcstr(bodyglsl, "fragment Mtl_FragmentOut xlatMtlMain(\n"); - else - bcatcstr(bodyglsl, "fragment void xlatMtlMain(\n"); - break; - case COMPUTE_SHADER: - bcatcstr(bodyglsl, "kernel void computeMain(\n"); - break; - case HULL_SHADER: - bcatcstr(bodyglsl, "kernel void patchKernel(\n"); - break; - case DOMAIN_SHADER: - { - const char *patchType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "quad" : "triangle"; - uint32_t patchCount = psShader->sInfo.ui32TessOutputControlPointCount; - bformata(bodyglsl, "[[patch(%s, %d)]] vertex Mtl_VertexOutPostTess xlatMtlMain(\n", patchType, patchCount); - break; - } - default: - // Not supported - ASSERT(0); - return false; - } - - psContext->indent++; - for (auto itr = m_StructDefinitions[""].m_Members.begin();;) - { - if (itr == m_StructDefinitions[""].m_Members.end()) - break; - - psContext->AddIndentation(); - bcatcstr(bodyglsl, itr->second.c_str()); - - itr++; - if (itr != m_StructDefinitions[""].m_Members.end()) - bcatcstr(bodyglsl, ",\n"); - } - - // Figure and declare counters and their binds (we also postponed buffer reflection until now) - for (auto it = m_BufferReflections.begin(); it != m_BufferReflections.end(); ++it) - { - uint32_t bind = it->second.bind; - if (it->second.hasCounter) - { - const uint32_t counterBind = m_BufferSlots.PeekFirstFreeSlot(); - m_BufferSlots.ReserveBindingSlot(counterBind, BindingSlotAllocator::UAV); - - bformata(bodyglsl, ",\n\t\tdevice atomic_uint* %s_counter [[ buffer(%d) ]]", it->first.c_str(), counterBind); - - // Offset with 1 so we can capture counters that are bound to slot 0 (if, say, user decides to start buffers at register 1 or higher) - bind |= ((counterBind + 1) << 16); - } - psContext->m_Reflection.OnBufferBinding(it->first, bind, it->second.isUAV); - } - - bcatcstr(bodyglsl, ")\n{\n"); - - if (popPragmaDiagnostic) - bcatcstr(bodyglsl, "#pragma clang diagnostic pop\n"); - - if (psShader->eShaderType != COMPUTE_SHADER) - { - if (psShader->eShaderType == VERTEX_SHADER) - { - // Fix HLSL compatibility with DrawProceduralIndirect, SV_InstanceID always starts at 0 but with Metal, a base instance was not subtracted for equal behavior - // Base semantics available everywhere starting with iOS9 (except hardware limitation exists with the original Apple A7/A8 GPUs, causing UNITY_SUPPORT_INDIRECT_BUFFERS=0) - std::for_each(m_StructDefinitions[""].m_Members.begin(), m_StructDefinitions[""].m_Members.end(), [&](MemberDefinitions::value_type &mem) - { - if (mem.first == "mtl_InstanceID") - { - bcatcstr(bodyglsl, "#if !UNITY_SUPPORT_INDIRECT_BUFFERS\n"); - psContext->AddIndentation(); - bcatcstr(bodyglsl, "mtl_BaseInstance = 0;\n"); - bcatcstr(bodyglsl, "#endif\n"); - psContext->AddIndentation(); - bcatcstr(bodyglsl, "mtl_InstanceID = mtl_InstanceID - mtl_BaseInstance;\n"); - } - else if (mem.first == "mtl_VertexID") - { - bcatcstr(bodyglsl, "#if !UNITY_SUPPORT_INDIRECT_BUFFERS\n"); - psContext->AddIndentation(); - bcatcstr(bodyglsl, "mtl_BaseVertex = 0;\n"); - bcatcstr(bodyglsl, "#endif\n"); - psContext->AddIndentation(); - bcatcstr(bodyglsl, "mtl_VertexID = mtl_VertexID - mtl_BaseVertex;\n"); - } - }); - } - - if (m_StructDefinitions[GetOutputStructName().c_str()].m_Members.size() > 0) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, GetOutputStructName().c_str()); - bcatcstr(bodyglsl, " output;\n"); - } - } - - if (psShader->eShaderType == HULL_SHADER) - { - if (hasPatchConstant) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "Mtl_PatchConstant patch;\n"); - } - - psContext->AddIndentation(); - bformata(bodyglsl, "const uint numPatchesInThreadGroup = %d;\n", numPatchesInThreadGroup); // Hardcoded because of threadgroup array below - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint patchID = (tID.x / patchInfo.numControlPointsPerPatch);\n"); - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const bool patchValid = (patchID < patchInfo.numPatches);\n"); - - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint mtl_BaseInstance = 0;\n"); - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint mtl_InstanceID = groupID.y - mtl_BaseInstance;\n"); - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint internalPatchID = mtl_InstanceID * patchInfo.numPatches + patchID;\n"); - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint patchIDInThreadGroup = (patchID % numPatchesInThreadGroup);\n"); - - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint controlPointID = (tID.x % patchInfo.numControlPointsPerPatch);\n"); - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint mtl_BaseVertex = 0;\n"); - psContext->AddIndentation(); - bcatcstr(bodyglsl, "const uint mtl_VertexID = ((mtl_InstanceID * (patchInfo.numControlPointsPerPatch * patchInfo.numPatches)) + tID.x) - mtl_BaseVertex;\n"); - - psContext->AddIndentation(); - bformata(bodyglsl, "threadgroup %s inputGroup[numPatchesInThreadGroup];\n", GetInputStructName().c_str()); - psContext->AddIndentation(); - bformata(bodyglsl, "threadgroup %s &input = inputGroup[patchIDInThreadGroup];\n", GetInputStructName().c_str()); - - psContext->AddIndentation(); - std::string tessFactorBufferType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "MTLQuadTessellationFactorsHalf" : "MTLTriangleTessellationFactorsHalf"; - bformata(bodyglsl, "%s tessFactor;\n", tessFactorBufferType.c_str()); - } - - // There are cases when there are no control point phases and we have to do passthrough - if (psShader->eShaderType == HULL_SHADER && hasControlPointPhase == 0) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "if (patchValid) {\n"); - psContext->indent++; - - // Passthrough control point phase, run the rest only once per patch - psContext->AddIndentation(); - bformata(bodyglsl, "input.cp[controlPointID] = vertexFunction(%s);\n", tessVertexFunctionArguments.c_str()); - - DoHullShaderPassthrough(psContext); - - psContext->indent--; - psContext->AddIndentation(); - bcatcstr(bodyglsl, "}\n"); - - psContext->AddIndentation(); - bcatcstr(bodyglsl, "threadgroup_barrier(mem_flags::mem_threadgroup);\n"); - - psContext->AddIndentation(); - bcatcstr(bodyglsl, "if (!patchValid) {\n"); - psContext->indent++; - psContext->AddIndentation(); - bcatcstr(bodyglsl, "return;\n"); - psContext->indent--; - psContext->AddIndentation(); - bcatcstr(bodyglsl, "}\n"); - } - - if (psShader->eShaderType == HULL_SHADER) - { - for (ui32PhaseCallIndex = 0; ui32PhaseCallIndex < 3; ui32PhaseCallIndex++) - { - for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) - { - uint32_t i; - ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; - if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex]) - continue; - psContext->currentPhase = ui32Phase; - - if (psPhase->earlyMain->slen > 1) - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); - } - - bconcat(bodyglsl, psPhase->earlyMain); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- End Early Main ---\n"); - } - } - - psContext->AddIndentation(); - bformata(bodyglsl, "// %s%d\n", GetPhaseFuncName(psShader->asPhases[ui32Phase].ePhase), ui32Phase); - if (psPhase->ui32InstanceCount > 1) - { - psContext->AddIndentation(); - bformata(bodyglsl, "for (int phaseInstanceID = 0; phaseInstanceID < %d; phaseInstanceID++) {\n", psPhase->ui32InstanceCount); - psContext->indent++; - } - else - { - if (psContext->currentPhase == HS_CTRL_POINT_PHASE && hasControlPointPhase == 1) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "if (patchValid) {\n"); - psContext->indent++; - - psContext->AddIndentation(); - bformata(bodyglsl, "input.cp[controlPointID] = vertexFunction(%s);\n", tessVertexFunctionArguments.c_str()); - } - else - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "{\n"); - psContext->indent++; - } - } - - if (psPhase->psInst.size() > 0) - { - //The minus one here is remove the return statement at end of phases. - //We don't want to translate that, we'll just end the function body. - ASSERT(psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_RET); - for (i = 0; i < psPhase->psInst.size() - 1; ++i) - { - TranslateInstruction(&psPhase->psInst[i]); - } - } - - psContext->indent--; - psContext->AddIndentation(); - bformata(bodyglsl, "}\n"); - - if (psPhase->hasPostShaderCode) - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- Post shader code ---\n"); - } - - bconcat(bodyglsl, psPhase->postShaderCode); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- End post shader code ---\n"); - } - } - - if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) - { - // We're done printing control point phase, run the rest only once per patch - psContext->AddIndentation(); - bcatcstr(bodyglsl, "threadgroup_barrier(mem_flags::mem_threadgroup);\n"); - - psContext->AddIndentation(); - bcatcstr(bodyglsl, "if (!patchValid) {\n"); - psContext->indent++; - psContext->AddIndentation(); - bcatcstr(bodyglsl, "return;\n"); - psContext->indent--; - psContext->AddIndentation(); - bcatcstr(bodyglsl, "}\n"); - } - } - } - - if (hasControlPoint) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "controlPoints[mtl_VertexID] = output;\n"); - } - - psContext->AddIndentation(); - bcatcstr(bodyglsl, "tessFactors[internalPatchID] = tessFactor;\n"); - - if (hasPatchConstant) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "patchConstants[internalPatchID] = patch;\n"); - } - - if (psContext->psDependencies) - { - //Save partitioning and primitive type for use by domain shader. - psContext->psDependencies->eTessOutPrim = psShader->sInfo.eTessOutPrim; - psContext->psDependencies->eTessPartitioning = psShader->sInfo.eTessPartitioning; - psContext->psDependencies->numPatchesInThreadGroup = numPatchesInThreadGroup; - psContext->psDependencies->hasControlPoint = hasControlPoint; - psContext->psDependencies->hasPatchConstant = hasPatchConstant; - } - } - else - { - if (psContext->psShader->asPhases[0].earlyMain->slen > 1) - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); - } - - bconcat(bodyglsl, psContext->psShader->asPhases[0].earlyMain); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(bodyglsl, "//--- End Early Main ---\n"); - } - } - - for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i) - { - TranslateInstruction(&psShader->asPhases[0].psInst[i]); - } - } - - psContext->indent--; - - bcatcstr(bodyglsl, "}\n"); - - psContext->currentGLSLString = &glsl; - - if (psShader->eShaderType == HULL_SHADER && psContext->psDependencies) - { - psContext->m_Reflection.OnTessellationKernelInfo(psContext->psDependencies->m_SharedBufferSlots.SaveTotalShaderStageAllocationsCount()); - } - - if (psShader->eShaderType == DOMAIN_SHADER && psContext->psDependencies) - { - int mtlTessellationPartitionMode = -1; - int mtlWinding = -1; - - switch (psContext->psDependencies->eTessPartitioning) - { - case TESSELLATOR_PARTITIONING_INTEGER: - mtlTessellationPartitionMode = 1; // MTLTessellationPartitionModeInteger - break; - case TESSELLATOR_PARTITIONING_POW2: - mtlTessellationPartitionMode = 0; // MTLTessellationPartitionModePow2 - break; - case TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: - mtlTessellationPartitionMode = 2; // MTLTessellationPartitionModeFractionalOdd - break; - case TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: - mtlTessellationPartitionMode = 3; // MTLTessellationPartitionModeFractionalEven - break; - case TESSELLATOR_PARTITIONING_UNDEFINED: - default: - ASSERT(0); - break; - } - - switch (psContext->psDependencies->eTessOutPrim) - { - case TESSELLATOR_OUTPUT_TRIANGLE_CW: - mtlWinding = 0; // MTLWindingClockwise - break; - case TESSELLATOR_OUTPUT_TRIANGLE_CCW: - mtlWinding = 1; // MTLWindingCounterClockwise - break; - case TESSELLATOR_OUTPUT_POINT: - psContext->m_Reflection.OnDiagnostics("Metal Tessellation: outputtopology(\"point\") not supported.", 0, true); - break; - case TESSELLATOR_OUTPUT_LINE: - psContext->m_Reflection.OnDiagnostics("Metal Tessellation: outputtopology(\"line\") not supported.", 0, true); - break; - case TESSELLATOR_OUTPUT_UNDEFINED: - default: - ASSERT(0); - break; - } - - psContext->m_Reflection.OnTessellationInfo(mtlTessellationPartitionMode, mtlWinding, (uint32_t)psContext->psDependencies->fMaxTessFactor, psContext->psDependencies->numPatchesInThreadGroup); - } - - bcatcstr(glsl, m_ExtraGlobalDefinitions.c_str()); - - // Print out extra functions we generated - std::for_each(m_FunctionDefinitions.begin(), m_FunctionDefinitions.end(), [&glsl](const FunctionDefinitions::value_type &p) - { - bcatcstr(glsl, p.second.c_str()); - bcatcstr(glsl, "\n"); - }); - - // And then the actual function body - bconcat(glsl, bodyglsl); - bdestroy(bodyglsl); - - return true; -} - -void ToMetal::DeclareExtraFunction(const std::string &name, const std::string &body) -{ - if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end()) - return; - m_FunctionDefinitions.insert(std::make_pair(name, body)); -} - -std::string ToMetal::GetOutputStructName() const -{ - switch (psContext->psShader->eShaderType) - { - case VERTEX_SHADER: - return "Mtl_VertexOut"; - case PIXEL_SHADER: - return "Mtl_FragmentOut"; - case HULL_SHADER: - if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_FORK_PHASE || - psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_JOIN_PHASE) - return "Mtl_PatchConstant"; - return "Mtl_ControlPoint"; - case DOMAIN_SHADER: - return "Mtl_VertexOutPostTess"; - default: - ASSERT(0); - return ""; - } -} - -std::string ToMetal::GetInputStructName() const -{ - switch (psContext->psShader->eShaderType) - { - case VERTEX_SHADER: - return "Mtl_VertexIn"; - case PIXEL_SHADER: - return "Mtl_FragmentIn"; - case COMPUTE_SHADER: - return "Mtl_KernelIn"; - case HULL_SHADER: - return "Mtl_HullIn"; - case DOMAIN_SHADER: - return "Mtl_VertexInPostTess"; - default: - ASSERT(0); - return ""; - } -} - -std::string ToMetal::GetCBName(const std::string& cbName) const -{ - std::string output = cbName; - if (cbName[0] == '$') - { - // "$Globals" should have different names in different shaders so that CbKey can discretely identify a CB. - switch (psContext->psShader->eShaderType) - { - case VERTEX_SHADER: - case HULL_SHADER: - case DOMAIN_SHADER: - output[0] = 'V'; - break; - case PIXEL_SHADER: - output[0] = 'F'; - break; - case COMPUTE_SHADER: - output = cbName.substr(1); - break; - default: - ASSERT(0); - break; - } - } - return output; -} - -void ToMetal::SetIOPrefixes() -{ - switch (psContext->psShader->eShaderType) - { - case VERTEX_SHADER: - case HULL_SHADER: - case DOMAIN_SHADER: - psContext->inputPrefix = "input."; - psContext->outputPrefix = "output."; - break; - - case PIXEL_SHADER: - psContext->inputPrefix = "input."; - psContext->outputPrefix = "output."; - break; - - case COMPUTE_SHADER: - psContext->inputPrefix = ""; - psContext->outputPrefix = ""; - break; - default: - ASSERT(0); - break; - } -} - -void ToMetal::ClampPartialPrecisions() -{ - HLSLcc::ForEachOperand(psContext->psShader->asPhases[0].psInst.begin(), psContext->psShader->asPhases[0].psInst.end(), FEO_FLAG_ALL, - [](std::vector::iterator &i, Operand *o, uint32_t flags) - { - if (o->eMinPrecision == OPERAND_MIN_PRECISION_FLOAT_2_8) - o->eMinPrecision = OPERAND_MIN_PRECISION_FLOAT_16; - }); -} - -void ToMetal::ReserveUAVBindingSlots(ShaderPhase *phase) -{ - for (uint32_t p = 0; p < phase->psDecl.size(); ++p) - { - uint32_t regNo = phase->psDecl[p].asOperands[0].ui32RegisterNumber; - - if (phase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW || - phase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED) - { - m_BufferSlots.ReserveBindingSlot(regNo, BindingSlotAllocator::RWBuffer); - } - else if (phase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) - { - // Typed buffers are atm faked using structured buffers -> bind in buffer space - if (phase->psDecl[p].value.eResourceDimension == RESOURCE_DIMENSION_BUFFER) - m_BufferSlots.ReserveBindingSlot(regNo, BindingSlotAllocator::RWBuffer); - else - m_TextureSlots.ReserveBindingSlot(regNo, BindingSlotAllocator::UAV); - } - } -} diff --git a/third_party/HLSLcc/src/toMetalDeclaration.cpp b/third_party/HLSLcc/src/toMetalDeclaration.cpp deleted file mode 100644 index 73a0cd0..0000000 --- a/third_party/HLSLcc/src/toMetalDeclaration.cpp +++ /dev/null @@ -1,2454 +0,0 @@ -#include "internal_includes/toMetal.h" -#include "internal_includes/debug.h" -#include "internal_includes/HLSLccToolkit.h" -#include "internal_includes/Declaration.h" -#include "internal_includes/HLSLCrossCompilerContext.h" -#include "internal_includes/languages.h" -#include -#include -#include - -using namespace HLSLcc; - -#ifndef fpcheck -#ifdef _MSC_VER -#define fpcheck(x) (_isnan(x) || !_finite(x)) -#else -#define fpcheck(x) (std::isnan(x) || std::isinf(x)) -#endif -#endif // #ifndef fpcheck - - -bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix, int *iIgnoreRedirect) -{ - if (sig) - { - if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_TessFactor") - { - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - ASSERT(sig->ui32SemanticIndex <= 3); - std::ostringstream oss; - oss << "tessFactor.edgeTessellationFactor[" << sig->ui32SemanticIndex << "]"; - result = oss.str(); - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; - return true; - } - - if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_InsideTessFactor") - { - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - ASSERT(sig->ui32SemanticIndex <= 1); - std::ostringstream oss; - oss << "tessFactor.insideTessellationFactor"; - if (psContext->psShader->sInfo.eTessDomain != TESSELLATOR_DOMAIN_TRI) - oss << "[" << sig->ui32SemanticIndex << "]"; - result = oss.str(); - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; - return true; - } - - if (sig->semanticName == "SV_InstanceID") - { - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - } - - if (((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) && - ((psContext->psShader->eShaderType == VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) == 0))) - { - result = "mtl_Position"; - return true; - } - - switch (sig->eSystemValueType) - { - case NAME_POSITION: - if (psContext->psShader->eShaderType == PIXEL_SHADER) - result = "hlslcc_FragCoord"; - else - result = "mtl_Position"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - return true; - case NAME_RENDER_TARGET_ARRAY_INDEX: - result = "mtl_Layer"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_CLIP_DISTANCE: - { - // this is temp variable, declaration and redirecting to actual output is handled in DeclareClipPlanes - char tmpName[128]; sprintf(tmpName, "phase%d_ClipDistance%d", psContext->currentPhase, sig->ui32SemanticIndex); - result = tmpName; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; - return true; - } - case NAME_VIEWPORT_ARRAY_INDEX: - result = "mtl_ViewPortIndex"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_VERTEX_ID: - result = "mtl_VertexID"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_INSTANCE_ID: - result = "mtl_InstanceID"; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - return true; - case NAME_IS_FRONT_FACE: - result = "(mtl_FrontFace ? 0xffffffffu : uint(0))"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case NAME_SAMPLE_INDEX: - result = "mtl_SampleID"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - - default: - break; - } - - if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE || - psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_FORK_PHASE) - { - std::ostringstream oss; - oss << sig->semanticName << sig->ui32SemanticIndex; - result = oss.str(); - return true; - } - } - - switch (psOperand->eType) - { - case OPERAND_TYPE_INPUT_COVERAGE_MASK: - case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: - result = "mtl_CoverageMask"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case OPERAND_TYPE_INPUT_THREAD_ID: - result = "mtl_ThreadID"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - return true; - case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: - result = "mtl_ThreadGroupID"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - return true; - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: - result = "mtl_ThreadIDInGroup"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - return true; - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: - result = "mtl_ThreadIndexInThreadGroup"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case OPERAND_TYPE_INPUT_DOMAIN_POINT: - result = "mtl_TessCoord"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case OPERAND_TYPE_OUTPUT_DEPTH: - case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: - case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: - result = "mtl_Depth"; - if (outSkipPrefix != NULL) *outSkipPrefix = true; - if (pui32IgnoreSwizzle) - *pui32IgnoreSwizzle = 1; - return true; - case OPERAND_TYPE_OUTPUT: - case OPERAND_TYPE_INPUT: - { - std::ostringstream oss; - ASSERT(sig != nullptr); - oss << sig->semanticName << sig->ui32SemanticIndex; - result = oss.str(); - if (HLSLcc::WriteMaskToComponentCount(sig->ui32Mask) == 1 && pui32IgnoreSwizzle != NULL) - *pui32IgnoreSwizzle = 1; - return true; - } - case OPERAND_TYPE_INPUT_PATCH_CONSTANT: - { - std::ostringstream oss; - ASSERT(sig != nullptr); - oss << sig->semanticName << sig->ui32SemanticIndex; - result = oss.str(); - if (outSkipPrefix != NULL) *outSkipPrefix = true; - return true; - } - case OPERAND_TYPE_INPUT_CONTROL_POINT: - { - std::ostringstream oss; - ASSERT(sig != nullptr); - oss << sig->semanticName << sig->ui32SemanticIndex; - result = oss.str(); - if (outSkipPrefix != NULL) *outSkipPrefix = true; - return true; - break; - } - default: - ASSERT(0); - break; - } - - - return false; -} - -void ToMetal::DeclareBuiltinInput(const Declaration *psDecl) -{ - const SPECIAL_NAME eSpecialName = psDecl->asOperands[0].eSpecialName; - - Shader* psShader = psContext->psShader; - const Operand* psOperand = &psDecl->asOperands[0]; - const int regSpace = psOperand->GetRegisterSpace(psContext); - ASSERT(regSpace == 0); - - // we need to at least mark if they are scalars or not (as we might need to use vector ctor) - if (psOperand->GetNumInputElements(psContext) == 1) - psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] |= (int)psOperand->ui32CompMask; - - switch (eSpecialName) - { - case NAME_POSITION: - ASSERT(psContext->psShader->eShaderType == PIXEL_SHADER); - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FragCoord", "float4 mtl_FragCoord [[ position ]]")); - bcatcstr(GetEarlyMain(psContext), "float4 hlslcc_FragCoord = float4(mtl_FragCoord.xyz, 1.0/mtl_FragCoord.w);\n"); - break; - case NAME_RENDER_TARGET_ARRAY_INDEX: - // Only supported on a Mac - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_Layer", "uint mtl_Layer [[ render_target_array_index ]]")); - break; - case NAME_CLIP_DISTANCE: - ASSERT(0); // Should never be an input - break; - case NAME_VIEWPORT_ARRAY_INDEX: - // Only supported on a Mac - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_ViewPortIndex", "uint mtl_ViewPortIndex [[ viewport_array_index ]]")); - break; - case NAME_INSTANCE_ID: - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_InstanceID", "uint mtl_InstanceID [[ instance_id ]]")); - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_BaseInstance", "uint mtl_BaseInstance [[ base_instance ]]")); // Requires Metal runtime 1.1+ - break; - case NAME_IS_FRONT_FACE: - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FrontFace", "bool mtl_FrontFace [[ front_facing ]]")); - break; - case NAME_SAMPLE_INDEX: - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_SampleID", "uint mtl_SampleID [[ sample_id ]]")); - break; - case NAME_VERTEX_ID: - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_VertexID", "uint mtl_VertexID [[ vertex_id ]]")); - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_BaseVertex", "uint mtl_BaseVertex [[ base_vertex ]]")); // Requires Metal runtime 1.1+ - break; - case NAME_PRIMITIVE_ID: - // Not on Metal - ASSERT(0); - break; - default: - m_StructDefinitions[""].m_Members.push_back(std::make_pair(psDecl->asOperands[0].specialName, std::string("float4 ").append(psDecl->asOperands[0].specialName))); - ASSERT(0); // Catch this to see what's happening - break; - } -} - -void ToMetal::DeclareClipPlanes(const Declaration* decl, unsigned declCount) -{ - unsigned planeCount = 0; - for (unsigned i = 0, n = declCount; i < n; ++i) - { - const Operand* operand = &decl[i].asOperands[0]; - if (operand->eSpecialName == NAME_CLIP_DISTANCE) - planeCount += operand->GetMaxComponent(); - } - if (planeCount == 0) return; - - std::ostringstream oss; oss << "float mtl_ClipDistance [[ clip_distance ]]"; - if (planeCount > 1) oss << "[" << planeCount << "]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(std::string("mtl_ClipDistance"), oss.str())); - - Shader* shader = psContext->psShader; - - unsigned compCount = 1; - const ShaderInfo::InOutSignature* psFirstClipSignature; - if (shader->sInfo.GetOutputSignatureFromSystemValue(NAME_CLIP_DISTANCE, 0, &psFirstClipSignature)) - { - if (psFirstClipSignature->ui32Mask & (1 << 3)) compCount = 4; - else if (psFirstClipSignature->ui32Mask & (1 << 2)) compCount = 3; - else if (psFirstClipSignature->ui32Mask & (1 << 1)) compCount = 2; - } - - for (unsigned i = 0, n = declCount; i < n; ++i) - { - const Operand* operand = &decl[i].asOperands[0]; - if (operand->eSpecialName != NAME_CLIP_DISTANCE) continue; - - const ShaderInfo::InOutSignature* signature = 0; - shader->sInfo.GetOutputSignatureFromRegister(operand->ui32RegisterNumber, operand->ui32CompMask, 0, &signature); - const int semanticIndex = signature->ui32SemanticIndex; - - bformata(GetEarlyMain(psContext), "float4 phase%d_ClipDistance%d;\n", psContext->currentPhase, signature->ui32SemanticIndex); - - const char* swizzleStr[] = { "x", "y", "z", "w" }; - if (planeCount > 1) - { - for (int i = 0; i < compCount; ++i) - { - bformata(GetPostShaderCode(psContext), "%s.mtl_ClipDistance[%d] = phase%d_ClipDistance%d.%s;\n", "output", semanticIndex * compCount + i, psContext->currentPhase, semanticIndex, swizzleStr[i]); - } - } - else - { - bformata(GetPostShaderCode(psContext), "%s.mtl_ClipDistance = phase%d_ClipDistance%d.x;\n", "output", psContext->currentPhase, semanticIndex); - } - } -} - -void ToMetal::GenerateTexturesReflection(HLSLccReflection* refl) -{ - for (unsigned i = 0, n = m_Textures.size(); i < n; ++i) - { - // Match CheckSamplerAndTextureNameMatch behavior - const std::string samplerName1 = m_Textures[i].name, samplerName2 = "sampler" + m_Textures[i].name, samplerName3 = "sampler_" + m_Textures[i].name; - for (unsigned j = 0, m = m_Samplers.size(); j < m; ++j) - { - if (m_Samplers[j].name == samplerName1 || m_Samplers[j].name == samplerName2 || m_Samplers[j].name == samplerName3) - { - m_Textures[i].samplerBind = m_Samplers[j].slot; - break; - } - } - } - - for (unsigned i = 0, n = m_Textures.size(); i < n; ++i) - refl->OnTextureBinding(m_Textures[i].name, m_Textures[i].textureBind, m_Textures[i].samplerBind, m_Textures[i].isMultisampled, m_Textures[i].dim, m_Textures[i].uav); -} - -void ToMetal::DeclareBuiltinOutput(const Declaration *psDecl) -{ - std::string out = GetOutputStructName(); - - switch (psDecl->asOperands[0].eSpecialName) - { - case NAME_POSITION: - m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Position", "float4 mtl_Position [[ position ]]")); - break; - case NAME_RENDER_TARGET_ARRAY_INDEX: - m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Layer", "uint mtl_Layer [[ render_target_array_index ]]")); - break; - case NAME_CLIP_DISTANCE: - // it will be done separately in DeclareClipPlanes - break; - case NAME_VIEWPORT_ARRAY_INDEX: - // Only supported on a Mac - m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_ViewPortIndex", "uint mtl_ViewPortIndex [[ viewport_array_index ]]")); - break; - case NAME_VERTEX_ID: - ASSERT(0); //VertexID is not an output - break; - case NAME_PRIMITIVE_ID: - // Not on Metal - ASSERT(0); - break; - case NAME_INSTANCE_ID: - ASSERT(0); //InstanceID is not an output - break; - case NAME_IS_FRONT_FACE: - ASSERT(0); //FrontFacing is not an output - break; - - //For the quadrilateral domain, there are 6 factors (4 sides, 2 inner). - case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: - case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: - case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: - - //For the triangular domain, there are 4 factors (3 sides, 1 inner) - case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_INSIDE_TESSFACTOR: - - //For the isoline domain, there are 2 factors (detail and density). - case NAME_FINAL_LINE_DETAIL_TESSFACTOR: - case NAME_FINAL_LINE_DENSITY_TESSFACTOR: - { - // Handled separately - break; - } - default: - // This might be SV_Position (because d3dcompiler is weird). Get signature and check - const ShaderInfo::InOutSignature *sig = NULL; - psContext->psShader->sInfo.GetOutputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].GetAccessMask(), 0, &sig); - ASSERT(sig != NULL); - if (sig->eSystemValueType == NAME_POSITION && sig->ui32SemanticIndex == 0) - { - m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Position", "float4 mtl_Position [[ position ]]")); - break; - } - - ASSERT(0); // Wut - break; - } - - psContext->m_Reflection.OnBuiltinOutput(psDecl->asOperands[0].eSpecialName); -} - -static std::string BuildOperandTypeString(OPERAND_MIN_PRECISION ePrec, INOUT_COMPONENT_TYPE eType, int numComponents) -{ - SHADER_VARIABLE_TYPE t = SVT_FLOAT; - switch (eType) - { - case INOUT_COMPONENT_FLOAT32: - t = SVT_FLOAT; - break; - case INOUT_COMPONENT_UINT32: - t = SVT_UINT; - break; - case INOUT_COMPONENT_SINT32: - t = SVT_INT; - break; - default: - ASSERT(0); - break; - } - // Can be overridden by precision - switch (ePrec) - { - case OPERAND_MIN_PRECISION_DEFAULT: - break; - - case OPERAND_MIN_PRECISION_FLOAT_16: - ASSERT(eType == INOUT_COMPONENT_FLOAT32); - t = SVT_FLOAT16; - break; - - case OPERAND_MIN_PRECISION_FLOAT_2_8: - ASSERT(eType == INOUT_COMPONENT_FLOAT32); - t = SVT_FLOAT10; - break; - - case OPERAND_MIN_PRECISION_SINT_16: - ASSERT(eType == INOUT_COMPONENT_SINT32); - t = SVT_INT16; - break; - case OPERAND_MIN_PRECISION_UINT_16: - ASSERT(eType == INOUT_COMPONENT_UINT32); - t = SVT_UINT16; - break; - } - return HLSLcc::GetConstructorForTypeMetal(t, numComponents); -} - -void ToMetal::DeclareHullShaderPassthrough() -{ - uint32_t i; - - for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) - { - ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; - - std::string name; - { - std::ostringstream oss; - oss << psSig->semanticName << psSig->ui32SemanticIndex; - name = oss.str(); - } - - if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) - name = "mtl_Position"; - - uint32_t ui32NumComponents = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); - std::string typeName = BuildOperandTypeString(OPERAND_MIN_PRECISION_DEFAULT, psSig->eComponentType, ui32NumComponents); - - std::ostringstream oss; - oss << typeName << " " << name; - oss << " [[ user(" << name << ") ]]"; - - std::string declString; - declString = oss.str(); - - m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair(name, declString)); - - std::string out = GetOutputStructName(); - m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, declString)); - - // For preserving data layout, declare output struct as domain shader input, too - oss.str(""); - out += "In"; - - oss << typeName << " " << name; - // VERTEX_SHADER hardcoded on purpose - bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); - uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true, keepLocation, psContext->psShader->maxSemanticIndex); - oss << " [[ " << "attribute(" << loc << ")" << " ]] "; - - psContext->m_Reflection.OnInputBinding(name, loc); - m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); - } -} - -void ToMetal::HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName) -{ - const Operand *psOperand = &psDecl->asOperands[0]; - Shader *psShader = psContext->psShader; - int needsRedirect = 0; - const ShaderInfo::InOutSignature *psSig = NULL; - - int regSpace = psOperand->GetRegisterSpace(psContext); - if (regSpace == 0 && psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - { - needsRedirect = 1; - } - else if (regSpace == 1 && psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - { - needsRedirect = 1; - } - - if (needsRedirect == 1) - { - // TODO What if this is indexed? - int comp = 0; - uint32_t origMask = psOperand->ui32CompMask; - - ASSERT(psContext->psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber] == 0); - - bformata(GetEarlyMain(psContext), "%s phase%d_Output%d_%d;\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - - while (comp < 4) - { - int numComps = 0; - int hasCast = 0; - uint32_t mask, i; - psSig = NULL; - if (regSpace == 0) - psContext->psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, psContext->psShader->ui32CurrentVertexOutputStream, &psSig, true); - else - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); - - // The register isn't necessarily packed full. Continue with the next component. - if (psSig == NULL) - { - comp++; - continue; - } - - numComps = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); - mask = psSig->ui32Mask; - - ((Operand *)psOperand)->ui32CompMask = 1 << comp; - bstring str = GetPostShaderCode(psContext); - bcatcstr(str, TranslateOperand(psOperand, TO_FLAG_NAME_ONLY).c_str()); - bcatcstr(str, " = "); - - if (psSig->eComponentType == INOUT_COMPONENT_SINT32) - { - bformata(str, "as_type("); - hasCast = 1; - } - else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) - { - bformata(str, "as_type("); - hasCast = 1; - } - bformata(str, "phase%d_Output%d_%d.", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - // Print out mask - for (i = 0; i < 4; i++) - { - if ((mask & (1 << i)) == 0) - continue; - - bformata(str, "%c", "xyzw"[i]); - } - - if (hasCast) - bcatcstr(str, ")"); - comp += numComps; - bcatcstr(str, ";\n"); - } - - ((Operand *)psOperand)->ui32CompMask = origMask; - if (regSpace == 0) - psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - else - psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - } -} - -void ToMetal::HandleInputRedirect(const Declaration *psDecl, const std::string &typeName) -{ - Operand *psOperand = (Operand *)&psDecl->asOperands[0]; - Shader *psShader = psContext->psShader; - int needsRedirect = 0; - const ShaderInfo::InOutSignature *psSig = NULL; - - int regSpace = psOperand->GetRegisterSpace(psContext); - if (regSpace == 0) - { - if (psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - needsRedirect = 1; - } - else if (psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) - { - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); - needsRedirect = 1; - } - - if (needsRedirect == 1) - { - // TODO What if this is indexed? - int needsLooping = 0; - int i = 0; - uint32_t origArraySize = 0; - uint32_t origMask = psOperand->ui32CompMask; - - ASSERT(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] == 0); - - ++psContext->indent; - - // Does the input have multiple array components (such as geometry shader input, or domain shader control point input) - if ((psShader->eShaderType == DOMAIN_SHADER && regSpace == 0) || (psShader->eShaderType == GEOMETRY_SHADER)) - { - // The count is actually stored in psOperand->aui32ArraySizes[0] - origArraySize = psOperand->aui32ArraySizes[0]; - // bformata(glsl, "%s vec4 phase%d_Input%d_%d[%d];\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, origArraySize); - bformata(GetEarlyMain(psContext), "%s phase%d_Input%d_%d[%d];\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, origArraySize); - needsLooping = 1; - i = origArraySize - 1; - } - else - // bformata(glsl, "%s vec4 phase%d_Input%d_%d;\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - bformata(GetEarlyMain(psContext), "%s phase%d_Input%d_%d;\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); - - // Do a conditional loop. In normal cases needsLooping == 0 so this is only run once. - do - { - int comp = 0; - bstring str = GetEarlyMain(psContext); - if (needsLooping) - bformata(str, "phase%d_Input%d_%d[%d] = %s(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, i, typeName.c_str()); - else - bformata(str, "phase%d_Input%d_%d = %s(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, typeName.c_str()); - - while (comp < 4) - { - int numComps = 0; - int hasCast = 0; - int hasSig = 0; - if (regSpace == 0) - hasSig = psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); - else - hasSig = psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); - - if (hasSig) - { - numComps = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); - if (psSig->eComponentType != INOUT_COMPONENT_FLOAT32) - { - if (numComps > 1) - bformata(str, "as_type(", numComps); - else - bformata(str, "as_type("); - hasCast = 1; - } - - // Override the array size of the operand so TranslateOperand call below prints the correct index - if (needsLooping) - psOperand->aui32ArraySizes[0] = i; - - // And the component mask - psOperand->ui32CompMask = 1 << comp; - - bformata(str, TranslateOperand(psOperand, TO_FLAG_NAME_ONLY).c_str()); - - // Restore the original array size value and mask - psOperand->ui32CompMask = origMask; - if (needsLooping) - psOperand->aui32ArraySizes[0] = origArraySize; - - if (hasCast) - bcatcstr(str, ")"); - comp += numComps; - } - else // no signature found -> fill with zero - { - bcatcstr(str, "0"); - comp++; - } - - if (comp < 4) - bcatcstr(str, ", "); - } - bcatcstr(str, ");\n"); - } - while ((--i) >= 0); - - --psContext->indent; - - if (regSpace == 0) - psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - else - psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; - } -} - -static std::string TranslateResourceDeclaration(HLSLCrossCompilerContext* psContext, - const Declaration *psDecl, const std::string& textureName, - bool isDepthSampler, bool isUAV) -{ - std::ostringstream oss; - const ResourceBinding* psBinding = 0; - const RESOURCE_DIMENSION eDimension = psDecl->value.eResourceDimension; - const uint32_t ui32RegisterNumber = psDecl->asOperands[0].ui32RegisterNumber; - REFLECT_RESOURCE_PRECISION ePrec = REFLECT_RESOURCE_PRECISION_UNKNOWN; - RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; - std::string access = "sample"; - - if (isUAV) - { - if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) != 0) - { - access = "write"; - if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) != 0) - { - access = "read_write"; - } - } - else - { - access = "read"; - eType = psDecl->sUAV.Type; - } - int found; - found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, ui32RegisterNumber, &psBinding); - if (found) - { - ePrec = psBinding->ePrecision; - eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; - // Figured out by reverse engineering bitcode. flags b00xx means float1, b01xx = float2, b10xx = float3 and b11xx = float4 - } - } - else - { - int found; - found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); - if (found) - { - eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; - ePrec = psBinding->ePrecision; - - // TODO: it might make sense to propagate float earlier (as hlslcc might declare other variables depending on sampler prec) - // metal supports ONLY float32 depth textures - if (isDepthSampler) - { - switch (eDimension) - { - case RESOURCE_DIMENSION_TEXTURE2D: case RESOURCE_DIMENSION_TEXTURE2DMS: case RESOURCE_DIMENSION_TEXTURECUBE: - case RESOURCE_DIMENSION_TEXTURE2DARRAY: case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - ePrec = REFLECT_RESOURCE_PRECISION_HIGHP, eType = RETURN_TYPE_FLOAT; break; - default: - break; - } - } - } - switch (eDimension) - { - case RESOURCE_DIMENSION_BUFFER: - case RESOURCE_DIMENSION_TEXTURE2DMS: - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - access = "read"; - default: - break; - } - } - - SHADER_VARIABLE_TYPE svtType = HLSLcc::ResourceReturnTypeToSVTType(eType, ePrec); - std::string typeName = HLSLcc::GetConstructorForTypeMetal(svtType, 1); - - if ((textureName == "_CameraDepthTexture" || textureName == "_LastCameraDepthTexture") && svtType != SVT_FLOAT) - { - std::string msg = textureName + " should be float on Metal (use sampler2D or sampler2D_float). Incorrect type " - "can cause Metal validation failures or undefined results on some devices."; - psContext->m_Reflection.OnDiagnostics(msg, 0, false); - } - - switch (eDimension) - { - case RESOURCE_DIMENSION_BUFFER: - { - oss << "texture1d<" << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURE1D: - { - oss << "texture1d<" << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURE2D: - { - oss << (isDepthSampler ? "depth2d<" : "texture2d<") << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DMS: - { - oss << (isDepthSampler ? "depth2d_ms<" : "texture2d_ms<") << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURE3D: - { - oss << "texture3d<" << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURECUBE: - { - oss << (isDepthSampler ? "depthcube<" : "texturecube<") << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - oss << "texture1d_array<" << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - oss << (isDepthSampler ? "depth2d_array<" : "texture2d_array<") << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - // Not really supported in Metal but let's print it here anyway - oss << "texture2d_ms_array<" << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - oss << (isDepthSampler ? "depthcube_array<" : "texturecube_array<") << typeName << ", access::" << access << " >"; - return oss.str(); - break; - } - default: - ASSERT(0); - oss << "texture2d<" << typeName << ", access::" << access << " >"; - return oss.str(); - } -} - -static std::string GetInterpolationString(INTERPOLATION_MODE eMode) -{ - switch (eMode) - { - case INTERPOLATION_CONSTANT: - return " [[ flat ]]"; - - case INTERPOLATION_LINEAR: - return ""; - - case INTERPOLATION_LINEAR_CENTROID: - return " [[ centroid_perspective ]]"; - - case INTERPOLATION_LINEAR_NOPERSPECTIVE: - return " [[ center_no_perspective ]]"; - - case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: - return " [[ centroid_no_perspective ]]"; - - case INTERPOLATION_LINEAR_SAMPLE: - return " [[ sample_perspective ]]"; - - case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: - return " [[ sample_no_perspective ]]"; - default: - ASSERT(0); - return ""; - } -} - -void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderVar &var, bool withinCB, uint32_t cumulativeOffset, bool isUsed) -{ - DeclareStructVariable(parentName, var.sType, withinCB, cumulativeOffset + var.ui32StartOffset, isUsed); -} - -void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderVarType &var, bool withinCB, uint32_t cumulativeOffset, bool isUsed) -{ - // CB arrays need to be defined as 4 component vectors to match DX11 data layout - bool arrayWithinCB = (withinCB && (var.Elements > 1) && (psContext->psShader->eShaderType == COMPUTE_SHADER)); - bool doDeclare = true; - - if (isUsed == false && ((psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS)) == 0) - isUsed = true; - - if (var.Class == SVC_STRUCT) - { - if (m_StructDefinitions.find(var.name + "_Type") == m_StructDefinitions.end()) - DeclareStructType(var.name + "_Type", var.Members, withinCB, cumulativeOffset + var.Offset); - - // Report Array-of-Struct CB top-level struct var after all members are reported. - if (var.Parent == NULL && var.Elements > 1 && withinCB) - { - // var.Type being SVT_VOID indicates it is a struct in this case. - psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, false, var.Elements, true); - } - - std::ostringstream oss; - oss << var.name << "_Type " << var.name; - if (var.Elements > 1) - { - oss << "[" << var.Elements << "]"; - } - m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); - m_StructDefinitions[parentName].m_Dependencies.push_back(var.name + "_Type"); - return; - } - else if (var.Class == SVC_MATRIX_COLUMNS || var.Class == SVC_MATRIX_ROWS) - { - std::ostringstream oss; - if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) - { - // Translate matrices into vec4 arrays - char prefix[256]; - sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, var.Rows, var.Columns); - oss << HLSLcc::GetConstructorForType(psContext, var.Type, 4) << " " << prefix << var.name; - - uint32_t elemCount = (var.Class == SVC_MATRIX_COLUMNS ? var.Columns : var.Rows); - if (var.Elements > 1) - { - elemCount *= var.Elements; - } - oss << "[" << elemCount << "]"; - - if (withinCB) - { - // On compute shaders we need to reflect the vec array as it is to support all possible matrix sizes correctly. - // On non-compute we can fake that we still have a matrix, as CB upload code will fill the data correctly on 4x4 matrices. - // That way we avoid the issues with mismatching types for builtins etc. - if (psContext->psShader->eShaderType == COMPUTE_SHADER) - doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, 4, false, elemCount, isUsed); - else - doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, true, var.Elements, isUsed); - } - } - else - { - oss << HLSLcc::GetMatrixTypeName(psContext, var.Type, var.Columns, var.Rows); - oss << " " << var.name; - if (var.Elements > 1) - { - oss << "[" << var.Elements << "]"; - } - - // TODO Verify whether the offset is from the beginning of the CB or from the beginning of the struct - if (withinCB) - doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, true, var.Elements, isUsed); - } - - if (doDeclare) - m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); - } - else if (var.Class == SVC_VECTOR && var.Columns > 1) - { - std::ostringstream oss; - oss << HLSLcc::GetConstructorForTypeMetal(var.Type, arrayWithinCB ? 4 : var.Columns); - oss << " " << var.name; - if (var.Elements > 1) - { - oss << "[" << var.Elements << "]"; - } - - if (withinCB) - doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, var.Columns, false, var.Elements, isUsed); - - if (doDeclare) - m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); - } - else if ((var.Class == SVC_SCALAR) || - (var.Class == SVC_VECTOR && var.Columns == 1)) - { - if (var.Type == SVT_BOOL) - { - //Use int instead of bool. - //Allows implicit conversions to integer and - //bool consumes 4-bytes in HLSL and GLSL anyway. - ((ShaderVarType &)var).Type = SVT_INT; - } - - std::ostringstream oss; - oss << HLSLcc::GetConstructorForTypeMetal(var.Type, arrayWithinCB ? 4 : 1); - oss << " " << var.name; - if (var.Elements > 1) - { - oss << "[" << var.Elements << "]"; - } - - if (withinCB) - doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, 1, false, var.Elements, isUsed); - - if (doDeclare) - m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); - } - else - { - ASSERT(0); - } -} - -void ToMetal::DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB, uint32_t cumulativeOffset, bool stripUnused /* = false */) -{ - for (std::vector::const_iterator itr = contents.begin(); itr != contents.end(); itr++) - { - if (stripUnused && !itr->sType.m_IsUsed) - continue; - - DeclareStructVariable(name, *itr, withinCB, cumulativeOffset, itr->sType.m_IsUsed); - } -} - -void ToMetal::DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB, uint32_t cumulativeOffset) -{ - for (std::vector::const_iterator itr = contents.begin(); itr != contents.end(); itr++) - { - DeclareStructVariable(name, *itr, withinCB, cumulativeOffset); - } -} - -void ToMetal::DeclareConstantBuffer(const ConstantBuffer *psCBuf, uint32_t ui32BindingPoint) -{ - const bool isGlobals = (psCBuf->name == "$Globals"); - const bool stripUnused = isGlobals && (psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS); - std::string cbname = GetCBName(psCBuf->name); - - // Note: if we're stripping unused members, both ui32TotalSizeInBytes and individual offsets into reflection will be completely off. - // However, the reflection layer re-calculates both to match Metal alignment rules anyway, so we're good. - if (!psContext->m_Reflection.OnConstantBuffer(cbname, psCBuf->ui32TotalSizeInBytes, psCBuf->GetMemberCount(stripUnused))) - return; - - if (psContext->psDependencies->IsMemberDeclared(cbname)) - return; - - DeclareStructType(cbname + "_Type", psCBuf->asVars, true, 0, stripUnused); - - std::ostringstream oss; - uint32_t slot = m_BufferSlots.GetBindingSlot(ui32BindingPoint, BindingSlotAllocator::ConstantBuffer); - - if (HLSLcc::IsUnityFlexibleInstancingBuffer(psCBuf)) - oss << "const constant " << psCBuf->asVars[0].name << "_Type* "; - else - oss << "constant " << cbname << "_Type& "; - oss << cbname << " [[ buffer(" << slot << ") ]]"; - - m_StructDefinitions[""].m_Members.push_back(std::make_pair(cbname, oss.str())); - m_StructDefinitions[""].m_Dependencies.push_back(cbname + "_Type"); - psContext->m_Reflection.OnConstantBufferBinding(cbname, slot); -} - -void ToMetal::DeclareBufferVariable(const Declaration *psDecl, bool isRaw, bool isUAV) -{ - uint32_t regNo = psDecl->asOperands[0].ui32RegisterNumber; - std::string BufName, BufType, BufConst; - - BufName = ""; - BufType = ""; - BufConst = ""; - - BufName = ResourceName(isUAV ? RGROUP_UAV : RGROUP_TEXTURE, regNo); - - if (!isRaw) // declare struct containing uint array when needed - { - std::ostringstream typeoss; - BufType = BufName + "_Type"; - typeoss << "uint value["; - typeoss << psDecl->ui32BufferStride / 4 << "]"; - m_StructDefinitions[BufType].m_Members.push_back(std::make_pair("value", typeoss.str())); - m_StructDefinitions[""].m_Dependencies.push_back(BufType); - } - - if (!psContext->psDependencies->IsMemberDeclared(BufName)) - { - std::ostringstream oss; - - if (!isUAV || ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) == 0)) - { - BufConst = "const "; - oss << BufConst; - } - - if (isRaw) - oss << "device uint *" << BufName; - else - oss << "device " << BufType << " *" << BufName; - - uint32_t loc = m_BufferSlots.GetBindingSlot(regNo, isUAV ? BindingSlotAllocator::RWBuffer : BindingSlotAllocator::Texture); - oss << " [[ buffer(" << loc << ") ]]"; - - m_StructDefinitions[""].m_Members.push_back(std::make_pair(BufName, oss.str())); - - // We don't do REAL reflection here, we need to collect all data and figure out if we're dealing with counters. - // And if so - we need to patch counter binding info, add counters to empty slots, etc - const BufferReflection br = { loc, isUAV, psDecl->sUAV.bCounter != 0 }; - m_BufferReflections.insert(std::make_pair(BufName, br)); - } -} - -static int ParseInlineSamplerWrapMode(const std::string& samplerName, const std::string& wrapName) -{ - int res = 0; - const bool hasWrap = (samplerName.find(wrapName) != std::string::npos); - if (!hasWrap) - return res; - - const bool hasU = (samplerName.find(wrapName + 'u') != std::string::npos); - const bool hasV = (samplerName.find(wrapName + 'v') != std::string::npos); - const bool hasW = (samplerName.find(wrapName + 'w') != std::string::npos); - - if (hasWrap) res |= 1; - if (hasU) res |= 2; - if (hasV) res |= 4; - if (hasW) res |= 8; - return res; -} - -static bool EmitInlineSampler(HLSLCrossCompilerContext* psContext, const std::string& name) -{ - // See if it's a sampler that goes with the texture, or an "inline" sampler - // where sampler states are hardcoded in the shader directly. - // - // The logic for "inline" samplers below must match what is recognized - // by other shader platforms in Unity (ParseInlineSamplerName function - // in the shader compiler). - - std::string samplerName(name); std::transform(samplerName.begin(), samplerName.end(), samplerName.begin(), ::tolower); - - // filter modes - const bool hasPoint = (samplerName.find("point") != std::string::npos); - const bool hasTrilinear = (samplerName.find("trilinear") != std::string::npos); - const bool hasLinear = (samplerName.find("linear") != std::string::npos); - const bool hasAnyFilter = hasPoint || hasTrilinear || hasLinear; - - // wrap modes - const int bitsClamp = ParseInlineSamplerWrapMode(samplerName, "clamp"); - const int bitsRepeat = ParseInlineSamplerWrapMode(samplerName, "repeat"); - const int bitsMirror = ParseInlineSamplerWrapMode(samplerName, "mirror"); - const int bitsMirrorOnce = ParseInlineSamplerWrapMode(samplerName, "mirroronce"); - - const bool hasAnyWrap = bitsClamp != 0 || bitsRepeat != 0 || bitsMirror != 0 || bitsMirrorOnce != 0; - - // depth comparison - const bool hasCompare = (samplerName.find("compare") != std::string::npos); - - // name must contain a filter mode and a wrap mode at least - if (!hasAnyFilter || !hasAnyWrap) - { - return false; - } - - // Starting with macOS 11/iOS 14, the metal compiler will warn about unused inline samplers, that might - // happen on mobile due to _mtl_xl_shadow_sampler workaround that's required for pre-GPUFamily3. - if (hasCompare && IsMobileTarget(psContext)) - return true; - - bstring str = GetEarlyMain(psContext); - bformata(str, "constexpr sampler %s(", name.c_str()); - - if (hasCompare) - bformata(str, "compare_func::greater_equal,"); - - if (hasTrilinear) - bformata(str, "filter::linear,mip_filter::linear,"); - else if (hasLinear) - bformata(str, "filter::linear,mip_filter::nearest,"); - else - bformata(str, "filter::nearest,"); - - const char* kTexWrapClamp = "clamp_to_edge"; - const char* kTexWrapRepeat = "repeat"; - const char* kTexWrapMirror = "mirrored_repeat"; - const char* kTexWrapMirrorOnce = "mirrored_repeat"; // currently Metal shading language does not have syntax for inline sampler state that would do "mirror clamp to edge" - const char* wrapU = kTexWrapRepeat; - const char* wrapV = kTexWrapRepeat; - const char* wrapW = kTexWrapRepeat; - - if (bitsClamp == 1) wrapU = wrapV = wrapW = kTexWrapClamp; - else if (bitsRepeat == 1) wrapU = wrapV = wrapW = kTexWrapRepeat; - else if (bitsMirrorOnce == 1) wrapU = wrapV = wrapW = kTexWrapMirrorOnce; - else if (bitsMirror == 1) wrapU = wrapV = wrapW = kTexWrapMirror; - - if ((bitsClamp & 2) != 0) wrapU = kTexWrapClamp; - if ((bitsClamp & 4) != 0) wrapV = kTexWrapClamp; - if ((bitsClamp & 8) != 0) wrapW = kTexWrapClamp; - - if ((bitsRepeat & 2) != 0) wrapU = kTexWrapRepeat; - if ((bitsRepeat & 4) != 0) wrapV = kTexWrapRepeat; - if ((bitsRepeat & 8) != 0) wrapW = kTexWrapRepeat; - - if ((bitsMirrorOnce & 2) != 0) wrapU = kTexWrapMirrorOnce; - if ((bitsMirrorOnce & 4) != 0) wrapV = kTexWrapMirrorOnce; - if ((bitsMirrorOnce & 8) != 0) wrapW = kTexWrapMirrorOnce; - - if ((bitsMirror & 2) != 0) wrapU = kTexWrapMirror; - if ((bitsMirror & 4) != 0) wrapV = kTexWrapMirror; - if ((bitsMirror & 8) != 0) wrapW = kTexWrapMirror; - - if (wrapU == wrapV && wrapU == wrapW) - bformata(str, "address::%s", wrapU); - else - bformata(str, "s_address::%s,t_address::%s,r_address::%s", wrapU, wrapV, wrapW); - - bformata(str, ");\n"); - - return true; -} - -void ToMetal::TranslateDeclaration(const Declaration* psDecl) -{ - bstring glsl = *psContext->currentGLSLString; - Shader* psShader = psContext->psShader; - - switch (psDecl->eOpcode) - { - case OPCODE_DCL_INPUT_SGV: - case OPCODE_DCL_INPUT_PS_SGV: - DeclareBuiltinInput(psDecl); - break; - case OPCODE_DCL_OUTPUT_SIV: - DeclareBuiltinOutput(psDecl); - break; - case OPCODE_DCL_INPUT: - case OPCODE_DCL_INPUT_PS_SIV: - case OPCODE_DCL_INPUT_SIV: - case OPCODE_DCL_INPUT_PS: - { - const Operand* psOperand = &psDecl->asOperands[0]; - - if ((psOperand->eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) || - (psOperand->eType == OPERAND_TYPE_INPUT_FORK_INSTANCE_ID)) - { - break; - } - - // No need to declare patch constants read again by the hull shader. - if ((psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT) && psContext->psShader->eShaderType == HULL_SHADER) - { - break; - } - // ...or control points - if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && psContext->psShader->eShaderType == HULL_SHADER) - { - break; - } - - //Already declared as part of an array. - if (psDecl->eOpcode == OPCODE_DCL_INPUT && psShader->aIndexedInput[psOperand->GetRegisterSpace(psContext)][psDecl->asOperands[0].ui32RegisterNumber] == -1) - { - break; - } - - uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; - uint32_t ui32CompMask = psDecl->asOperands[0].ui32CompMask; - - std::string name = psContext->GetDeclaredInputName(psOperand, nullptr, 1, nullptr); - - // NB: unlike GL we keep arrays of 2-component vectors as is (without collapsing into float4) - // if(psShader->aIndexedInput[0][psDecl->asOperands[0].ui32RegisterNumber] == -1) - // break; - - // Already declared? - if ((ui32CompMask != 0) && ((ui32CompMask & ~psShader->acInputDeclared[0][ui32Reg]) == 0)) - { - ASSERT(0); // Catch this - break; - } - - if (psOperand->eType == OPERAND_TYPE_INPUT_COVERAGE_MASK) - { - std::ostringstream oss; - oss << "uint " << name << " [[ sample_mask ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - - if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID) - { - std::ostringstream oss; - oss << "uint3 " << name << " [[ thread_position_in_grid ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - - if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_GROUP_ID) - { - std::ostringstream oss; - oss << "uint3 " << name << " [[ threadgroup_position_in_grid ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - - if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP) - { - std::ostringstream oss; - oss << "uint3 " << name << " [[ thread_position_in_threadgroup ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - if (psOperand->eSpecialName == NAME_RENDER_TARGET_ARRAY_INDEX) - { - std::ostringstream oss; - oss << "uint " << name << " [[ render_target_array_index ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - if (psOperand->eType == OPERAND_TYPE_INPUT_DOMAIN_POINT) - { - std::ostringstream oss; - std::string patchPositionType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "float2 " : "float3 "; - oss << patchPositionType << name << " [[ position_in_patch ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED) - { - std::ostringstream oss; - oss << "uint " << name << " [[ thread_index_in_threadgroup ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - if (psOperand->eSpecialName == NAME_VIEWPORT_ARRAY_INDEX) - { - std::ostringstream oss; - oss << "uint " << name << " [[ viewport_array_index ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - - if (psDecl->eOpcode == OPCODE_DCL_INPUT_PS_SIV && psOperand->eSpecialName == NAME_POSITION) - { - m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FragCoord", "float4 mtl_FragCoord [[ position ]]")); - bcatcstr(GetEarlyMain(psContext), "float4 hlslcc_FragCoord = float4(mtl_FragCoord.xyz, 1.0/mtl_FragCoord.w);\n"); - break; - } - - if (psContext->psDependencies) - { - if (psShader->eShaderType == PIXEL_SHADER) - { - psContext->psDependencies->SetInterpolationMode(ui32Reg, psDecl->value.eInterpolation); - } - } - - int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - - const ShaderInfo::InOutSignature *psSig = NULL; - - // This falls within the specified index ranges. The default is 0 if no input range is specified - if (regSpace == 0) - psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); - else - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); - - if (!psSig) - break; - - // fragment shader cannot reference builtins generated by vertex program (with obvious exception of position) - // TODO: some visible error? handle more builtins? - if (psContext->psShader->eShaderType == PIXEL_SHADER && !strncmp(psSig->semanticName.c_str(), "PSIZE", 5)) - break; - - int iNumComponents = psOperand->GetNumInputElements(psContext); - psShader->acInputDeclared[0][ui32Reg] = (char)psSig->ui32Mask; - - std::string typeName = BuildOperandTypeString(psOperand->eMinPrecision, psSig->eComponentType, iNumComponents); - - std::string semantic; - if (psContext->psShader->eShaderType == VERTEX_SHADER || psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) - { - std::ostringstream oss; - // VERTEX_SHADER hardcoded on purpose - bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); - uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true, keepLocation, psShader->maxSemanticIndex); - oss << "attribute(" << loc << ")"; - semantic = oss.str(); - psContext->m_Reflection.OnInputBinding(name, loc); - } - else - { - std::ostringstream oss; - - // UNITY_FRAMEBUFFER_FETCH_AVAILABLE - // special case mapping for inout color, see HLSLSupport.cginc - if (psOperand->iPSInOut && name.size() == 10 && !strncmp(name.c_str(), "SV_Target", 9)) - { - // Metal allows color(X) declared in input/output structs - oss << "color(xlt_remap_i[" << psSig->ui32SemanticIndex << "])"; - m_NeedFBInputRemapDecl = true; - } - else - { - oss << "user(" << name << ")"; - } - semantic = oss.str(); - } - - std::string interpolation = ""; - if (psDecl->eOpcode == OPCODE_DCL_INPUT_PS) - { - interpolation = GetInterpolationString(psDecl->value.eInterpolation); - } - - std::string declString; - if ((OPERAND_INDEX_DIMENSION)psOperand->iIndexDims == INDEX_2D && psOperand->eType != OPERAND_TYPE_INPUT_CONTROL_POINT && psContext->psShader->eShaderType != HULL_SHADER) - { - std::ostringstream oss; - oss << typeName << " " << name << " [ " << psOperand->aui32ArraySizes[0] << " ] "; - - if (psContext->psShader->eShaderType != HULL_SHADER) - oss << " [[ " << semantic << " ]] " << interpolation; - declString = oss.str(); - } - else - { - std::ostringstream oss; - oss << typeName << " " << name; - if (psContext->psShader->eShaderType != HULL_SHADER) - oss << " [[ " << semantic << " ]] " << interpolation; - declString = oss.str(); - } - - if (psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT && psContext->psShader->eShaderType == DOMAIN_SHADER) - { - m_StructDefinitions["Mtl_PatchConstant"].m_Members.push_back(std::make_pair(name, declString)); - } - else if (psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT && psContext->psShader->eShaderType == DOMAIN_SHADER) - { - m_StructDefinitions["Mtl_ControlPoint"].m_Members.push_back(std::make_pair(name, declString)); - } - else if (psContext->psShader->eShaderType == HULL_SHADER) - { - m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair(name, declString)); - } - else - { - m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair(name, declString)); - } - - HandleInputRedirect(psDecl, BuildOperandTypeString(psOperand->eMinPrecision, INOUT_COMPONENT_FLOAT32, 4)); - break; - } - case OPCODE_DCL_TEMPS: - { - uint32_t i = 0; - const uint32_t ui32NumTemps = psDecl->value.ui32NumTemps; - for (i = 0; i < ui32NumTemps; i++) - { - if (psShader->psFloatTempSizes[i] != 0) - bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i]), i); - if (psShader->psFloat16TempSizes[i] != 0) - bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i]), i); - if (psShader->psFloat10TempSizes[i] != 0) - bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "10_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i]), i); - if (psShader->psIntTempSizes[i] != 0) - bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "i%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i]), i); - if (psShader->psInt16TempSizes[i] != 0) - bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "i16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i]), i); - if (psShader->psInt12TempSizes[i] != 0) - bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "i12_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i]), i); - if (psShader->psUIntTempSizes[i] != 0) - bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "u%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i]), i); - if (psShader->psUInt16TempSizes[i] != 0) - bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "u16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i]), i); - if (psShader->fp64 && (psShader->psDoubleTempSizes[i] != 0)) - bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "d%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i]), i); - if (psShader->psBoolTempSizes[i] != 0) - bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "b%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i]), i); - } - break; - } - case OPCODE_SPECIAL_DCL_IMMCONST: - { - ASSERT(0 && "DX9 shaders no longer supported!"); - break; - } - case OPCODE_DCL_CONSTANT_BUFFER: - { - const ConstantBuffer* psCBuf = NULL; - psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psDecl->asOperands[0].aui32ArraySizes[0], &psCBuf); - ASSERT(psCBuf != NULL); - - if (psCBuf->name.substr(0, 20) == "hlslcc_SubpassInput_" && psCBuf->name.length() >= 23 && !psCBuf->asVars.empty()) - { - // Special case for framebuffer fetch. - char ty = psCBuf->name[20]; - int idx = psCBuf->name[22] - '0'; - - const ShaderVar &sv = psCBuf->asVars[0]; - if (sv.name.substr(0, 15) == "hlslcc_fbinput_") - { - // Pick up the type and index - std::ostringstream oss; - m_NeedFBInputRemapDecl = true; - switch (ty) - { - case 'f': - case 'F': - oss << "float4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); - break; - case 'h': - case 'H': - oss << "half4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); - break; - case 'i': - case 'I': - oss << "int4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); - break; - case 'u': - case 'U': - oss << "uint4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; - m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); - break; - default: - break; - } - } - // Break out so this doesn't get declared. - break; - } - - DeclareConstantBuffer(psCBuf, psDecl->asOperands[0].aui32ArraySizes[0]); - break; - } - case OPCODE_DCL_RESOURCE: - { - DeclareResource(psDecl); - break; - } - case OPCODE_DCL_OUTPUT: - { - DeclareOutput(psDecl); - break; - } - - case OPCODE_DCL_GLOBAL_FLAGS: - { - uint32_t ui32Flags = psDecl->value.ui32GlobalFlags; - - if (ui32Flags & GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL && psContext->psShader->eShaderType == PIXEL_SHADER) - { - psShader->sInfo.bEarlyFragmentTests = true; - } - if (!(ui32Flags & GLOBAL_FLAG_REFACTORING_ALLOWED)) - { - //TODO add precise - //HLSL precise - http://msdn.microsoft.com/en-us/library/windows/desktop/hh447204(v=vs.85).aspx - } - if (ui32Flags & GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS) - { - // Not supported on Metal -// psShader->fp64 = 1; - } - break; - } - case OPCODE_DCL_THREAD_GROUP: - { - // Send this info to reflecion: Metal gives this at runtime as a param - psContext->m_Reflection.OnThreadGroupSize(psDecl->value.aui32WorkGroupSize[0], - psDecl->value.aui32WorkGroupSize[1], - psDecl->value.aui32WorkGroupSize[2]); - break; - } - case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: - { - if (psContext->psShader->eShaderType == HULL_SHADER) - { - psContext->psShader->sInfo.eTessOutPrim = psDecl->value.eTessOutPrim; - if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CW) - psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CCW; - else if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CCW) - psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CW; - } - break; - } - case OPCODE_DCL_TESS_DOMAIN: - { - psContext->psShader->sInfo.eTessDomain = psDecl->value.eTessDomain; - - if (psContext->psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_ISOLINE) - psContext->m_Reflection.OnDiagnostics("Metal Tessellation: domain(\"isoline\") not supported.", 0, true); - break; - } - case OPCODE_DCL_TESS_PARTITIONING: - { - psContext->psShader->sInfo.eTessPartitioning = psDecl->value.eTessPartitioning; - break; - } - case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: - { - // Not supported - break; - } - case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: - { - // Not supported - break; - } - case OPCODE_DCL_GS_INPUT_PRIMITIVE: - { - // Not supported - break; - } - case OPCODE_DCL_INTERFACE: - { - // Are interfaces ever even used? - ASSERT(0); - break; - } - case OPCODE_DCL_FUNCTION_BODY: - { - ASSERT(0); - break; - } - case OPCODE_DCL_FUNCTION_TABLE: - { - ASSERT(0); - break; - } - case OPCODE_CUSTOMDATA: - { - // TODO: This is only ever accessed as a float currently. Do trickery if we ever see ints accessed from an array. - // Walk through all the chunks we've seen in this phase. - - bstring glsl = *psContext->currentGLSLString; - bformata(glsl, "constant float4 ImmCB_%d[%d] =\n{\n", psContext->currentPhase, psDecl->asImmediateConstBuffer.size()); - bool isFirst = true; - std::for_each(psDecl->asImmediateConstBuffer.begin(), psDecl->asImmediateConstBuffer.end(), [&](const ICBVec4 &data) - { - if (!isFirst) - { - bcatcstr(glsl, ",\n"); - } - isFirst = false; - - float val[4] = { - *(float*)&data.a, - *(float*)&data.b, - *(float*)&data.c, - *(float*)&data.d - }; - - bformata(glsl, "\tfloat4("); - for (uint32_t k = 0; k < 4; k++) - { - if (k != 0) - bcatcstr(glsl, ", "); - if (fpcheck(val[k])) - bformata(glsl, "as_type(0x%Xu)", *(uint32_t *)&val[k]); - else - HLSLcc::PrintFloat(glsl, val[k]); - } - bcatcstr(glsl, ")"); - }); - bcatcstr(glsl, "\n};\n"); - break; - } - case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: - case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: - break; // Nothing to do - - case OPCODE_DCL_INDEXABLE_TEMP: - { - const uint32_t ui32RegIndex = psDecl->sIdxTemp.ui32RegIndex; - const uint32_t ui32RegCount = psDecl->sIdxTemp.ui32RegCount; - const uint32_t ui32RegComponentSize = psDecl->sIdxTemp.ui32RegComponentSize; - bformata(GetEarlyMain(psContext), "float%d TempArray%d[%d];\n", ui32RegComponentSize, ui32RegIndex, ui32RegCount); - break; - } - case OPCODE_DCL_INDEX_RANGE: - { - switch (psDecl->asOperands[0].eType) - { - case OPERAND_TYPE_OUTPUT: - case OPERAND_TYPE_INPUT: - { - const ShaderInfo::InOutSignature* psSignature = NULL; - const char* type = "float"; - uint32_t startReg = 0; - uint32_t i; - bstring *oldString; - int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - int isInput = psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT ? 1 : 0; - - if (regSpace == 0) - { - if (isInput) - psShader->sInfo.GetInputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].ui32CompMask, - &psSignature); - else - psShader->sInfo.GetOutputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].ui32CompMask, - psShader->ui32CurrentVertexOutputStream, - &psSignature); - } - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].ui32CompMask, &psSignature); - - ASSERT(psSignature != NULL); - - switch (psSignature->eComponentType) - { - case INOUT_COMPONENT_UINT32: - { - type = "uint"; - break; - } - case INOUT_COMPONENT_SINT32: - { - type = "int"; - break; - } - case INOUT_COMPONENT_FLOAT32: - { - break; - } - default: - ASSERT(0); - break; - } - - switch (psSignature->eMinPrec) // TODO What if the inputs in the indexed range are of different precisions? - { - default: - break; - case MIN_PRECISION_ANY_16: - ASSERT(0); // Wut? - break; - case MIN_PRECISION_FLOAT_16: - case MIN_PRECISION_FLOAT_2_8: - type = "half"; - break; - case MIN_PRECISION_SINT_16: - type = "short"; - break; - case MIN_PRECISION_UINT_16: - type = "ushort"; - break; - } - - startReg = psDecl->asOperands[0].ui32RegisterNumber; - oldString = psContext->currentGLSLString; - psContext->currentGLSLString = &psContext->psShader->asPhases[psContext->currentPhase].earlyMain; - psContext->AddIndentation(); - bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, "%s4 phase%d_%sput%d_%d[%d];\n", type, psContext->currentPhase, isInput ? "In" : "Out", regSpace, startReg, psDecl->value.ui32IndexRange); - glsl = isInput ? psContext->psShader->asPhases[psContext->currentPhase].earlyMain : psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; - psContext->currentGLSLString = &glsl; - if (isInput == 0) - psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; - for (i = 0; i < psDecl->value.ui32IndexRange; i++) - { - int dummy = 0; - std::string realName; - uint32_t destMask = psDecl->asOperands[0].ui32CompMask; - uint32_t rebase = 0; - const ShaderInfo::InOutSignature *psSig = NULL; - uint32_t regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - - if (regSpace == 0) - if (isInput) - psContext->psShader->sInfo.GetInputSignatureFromRegister(startReg + i, destMask, &psSig); - else - psContext->psShader->sInfo.GetOutputSignatureFromRegister(startReg + i, destMask, 0, &psSig); - else - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(startReg + i, destMask, &psSig); - - ASSERT(psSig != NULL); - - if ((psSig->ui32Mask & destMask) == 0) - continue; // Skip dummy writes (vec2 texcoords get filled to vec4 with zeroes etc) - - while ((psSig->ui32Mask & (1 << rebase)) == 0) - rebase++; - - ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg + i; - - if (isInput) - { - realName = psContext->GetDeclaredInputName(&psDecl->asOperands[0], &dummy, 1, NULL); - - psContext->AddIndentation(); - bformata(glsl, "phase%d_Input%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); - - if (destMask != OPERAND_4_COMPONENT_MASK_ALL) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k]); - } - } - } - - // for some reason input struct is missed here from GetDeclaredInputName result, so add it manually - bformata(glsl, " = input.%s", realName.c_str()); - if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k - rebase]); - } - } - } - } - else - { - realName = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], &dummy, NULL, NULL, 0); - - psContext->AddIndentation(); - bcatcstr(glsl, realName.c_str()); - if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k - rebase]); - } - } - } - - bformata(glsl, " = phase%d_Output%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); - - if (destMask != OPERAND_4_COMPONENT_MASK_ALL) - { - int k; - const char *swizzle = "xyzw"; - bcatcstr(glsl, "."); - for (k = 0; k < 4; k++) - { - if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) - { - bformata(glsl, "%c", swizzle[k]); - } - } - } - } - - bcatcstr(glsl, ";\n"); - } - - ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg; - psContext->currentGLSLString = oldString; - glsl = *psContext->currentGLSLString; - - for (i = 0; i < psDecl->value.ui32IndexRange; i++) - { - if (regSpace == 0) - { - if (isInput) - psShader->sInfo.GetInputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber + i, - psDecl->asOperands[0].ui32CompMask, - &psSignature); - else - psShader->sInfo.GetOutputSignatureFromRegister( - psDecl->asOperands[0].ui32RegisterNumber + i, - psDecl->asOperands[0].ui32CompMask, - psShader->ui32CurrentVertexOutputStream, - &psSignature); - } - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber + i, psDecl->asOperands[0].ui32CompMask, &psSignature); - - ASSERT(psSignature != NULL); - - ((ShaderInfo::InOutSignature *)psSignature)->isIndexed.insert(psContext->currentPhase); - ((ShaderInfo::InOutSignature *)psSignature)->indexStart[psContext->currentPhase] = startReg; - ((ShaderInfo::InOutSignature *)psSignature)->index[psContext->currentPhase] = i; - } - - - break; - } - default: - // TODO Input index ranges. - ASSERT(0); - } - break; - } - - case OPCODE_HS_DECLS: - { - // Not supported - break; - } - case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: - { - if (psContext->psShader->eShaderType == HULL_SHADER) - psShader->sInfo.ui32TessInputControlPointCount = psDecl->value.ui32MaxOutputVertexCount; - else if (psContext->psShader->eShaderType == DOMAIN_SHADER) - psShader->sInfo.ui32TessOutputControlPointCount = psDecl->value.ui32MaxOutputVertexCount; - break; - } - case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: - { - if (psContext->psShader->eShaderType == HULL_SHADER) - psShader->sInfo.ui32TessOutputControlPointCount = psDecl->value.ui32MaxOutputVertexCount; - break; - } - case OPCODE_HS_FORK_PHASE: - { - // Not supported - break; - } - case OPCODE_HS_JOIN_PHASE: - { - // Not supported - break; - } - case OPCODE_DCL_SAMPLER: - { - std::string name = TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY); - - if (!EmitInlineSampler(psContext, name)) - { - // for some reason we have some samplers start with "sampler" and some not - const bool startsWithSampler = name.find("sampler") == 0; - - std::ostringstream samplerOss; - samplerOss << (startsWithSampler ? "" : "sampler") << name; - std::string samplerName = samplerOss.str(); - - if (!psContext->psDependencies->IsMemberDeclared(samplerName)) - { - const uint32_t slot = m_SamplerSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); - std::ostringstream oss; - oss << "sampler " << samplerName << " [[ sampler (" << slot << ") ]]"; - - m_StructDefinitions[""].m_Members.push_back(std::make_pair(samplerName, oss.str())); - - SamplerDesc desc = { name, psDecl->asOperands[0].ui32RegisterNumber, slot }; - m_Samplers.push_back(desc); - } - } - - break; - } - case OPCODE_DCL_HS_MAX_TESSFACTOR: - { - if (psContext->psShader->eShaderType == HULL_SHADER && psContext->psDependencies) - psContext->psDependencies->fMaxTessFactor = psDecl->value.fMaxTessFactor; - break; - } - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: - { - // A hack to support single component 32bit RWBuffers: Declare as raw buffer. - // TODO: Use textures for RWBuffers when the scripting API has actual format selection etc - // way to flag the created ComputeBuffer as typed. Even then might want to leave this - // hack path for 32bit (u)int typed buffers to continue support atomic ops on those formats. - if (psDecl->value.eResourceDimension == RESOURCE_DIMENSION_BUFFER) - { - DeclareBufferVariable(psDecl, true, true); - break; - } - std::string texName = ResourceName(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber); - std::string samplerTypeName = TranslateResourceDeclaration(psContext, psDecl, texName, false, true); - if (!psContext->psDependencies->IsMemberDeclared(texName)) - { - uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::UAV); - - std::ostringstream oss; - oss << samplerTypeName << " " << texName << " [[ texture(" << slot << ") ]] "; - - m_StructDefinitions[""].m_Members.push_back(std::make_pair(texName, oss.str())); - - HLSLCC_TEX_DIMENSION texDim = TD_INT; - switch (psDecl->value.eResourceDimension) - { - default: break; - case RESOURCE_DIMENSION_TEXTURE2D: - case RESOURCE_DIMENSION_TEXTURE2DMS: - texDim = TD_2D; - break; - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - texDim = TD_2DARRAY; - break; - case RESOURCE_DIMENSION_TEXTURE3D: - texDim = TD_3D; - break; - case RESOURCE_DIMENSION_TEXTURECUBE: - texDim = TD_CUBE; - break; - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - texDim = TD_CUBEARRAY; - break; - } - TextureSamplerDesc desc = {texName, (int)slot, -1, texDim, false, false, true}; - m_Textures.push_back(desc); - } - break; - } - - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: - { - DeclareBufferVariable(psDecl, false, true); - break; - } - case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: - { - DeclareBufferVariable(psDecl, true, true); - break; - } - case OPCODE_DCL_RESOURCE_STRUCTURED: - { - DeclareBufferVariable(psDecl, false, false); - break; - } - case OPCODE_DCL_RESOURCE_RAW: - { - DeclareBufferVariable(psDecl, true, false); - break; - } - case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: - { - ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; - std::ostringstream oss; - oss << "uint value[" << psDecl->sTGSM.ui32Stride / 4 << "]"; - m_StructDefinitions[TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + "_Type"].m_Members.push_back(std::make_pair("value", oss.str())); - m_StructDefinitions[""].m_Dependencies.push_back(TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + "_Type"); - oss.str(""); - oss << "threadgroup " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) - << "_Type " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) - << "[" << psDecl->sTGSM.ui32Count << "]"; - - bformata(GetEarlyMain(psContext), "%s;\n", oss.str().c_str()); - psVarType->name = "$Element"; - - psVarType->Columns = psDecl->sTGSM.ui32Stride / 4; - psVarType->Elements = psDecl->sTGSM.ui32Count; - break; - } - case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: - { - ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; - - std::ostringstream oss; - oss << "threadgroup uint " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) - << "[" << (psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride) << "]"; - - bformata(GetEarlyMain(psContext), "%s;\n", oss.str().c_str()); - psVarType->name = "$Element"; - - psVarType->Columns = 1; - psVarType->Elements = psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride; - break; - } - - case OPCODE_DCL_STREAM: - { - // Not supported on Metal - break; - } - case OPCODE_DCL_GS_INSTANCE_COUNT: - { - // Not supported on Metal - break; - } - - default: - ASSERT(0); - break; - } -} - -std::string ToMetal::ResourceName(ResourceGroup group, const uint32_t ui32RegisterNumber) -{ - const ResourceBinding* psBinding = 0; - std::ostringstream oss; - int found; - - found = psContext->psShader->sInfo.GetResourceFromBindingPoint(group, ui32RegisterNumber, &psBinding); - - if (found) - { - size_t i = 0; - std::string name = psBinding->name; - uint32_t ui32ArrayOffset = ui32RegisterNumber - psBinding->ui32BindPoint; - - while (i < name.length()) - { - //array syntax [X] becomes _0_ - //Otherwise declarations could end up as: - //uniform sampler2D SomeTextures[0]; - //uniform sampler2D SomeTextures[1]; - if (name[i] == '[' || name[i] == ']') - name[i] = '_'; - - ++i; - } - - if (ui32ArrayOffset) - { - oss << name << ui32ArrayOffset; - return oss.str(); - } - else - { - return name; - } - } - else - { - oss << "UnknownResource" << ui32RegisterNumber; - return oss.str(); - } -} - -void ToMetal::TranslateResourceTexture(const Declaration* psDecl, uint32_t samplerCanDoShadowCmp, HLSLCC_TEX_DIMENSION texDim) -{ - std::string texName = ResourceName(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber); - const bool isDepthSampler = (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex); - std::string samplerTypeName = TranslateResourceDeclaration(psContext, psDecl, texName, isDepthSampler, false); - - bool isMS = false; - switch (psDecl->value.eResourceDimension) - { - default: - break; - case RESOURCE_DIMENSION_TEXTURE2DMS: - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - isMS = true; - break; - } - - if (!psContext->psDependencies->IsMemberDeclared(texName)) - { - uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); - - std::ostringstream oss; - oss << samplerTypeName << " " << texName << " [[ texture(" << slot << ") ]] "; - - m_StructDefinitions[""].m_Members.push_back(std::make_pair(texName, oss.str())); - - TextureSamplerDesc desc = {texName, (int)slot, -1, texDim, isMS, isDepthSampler, false}; - m_Textures.push_back(desc); - - if (isDepthSampler) - EnsureShadowSamplerDeclared(); - } -} - -void ToMetal::DeclareResource(const Declaration *psDecl) -{ - switch (psDecl->value.eResourceDimension) - { - case RESOURCE_DIMENSION_BUFFER: - { - // Fake single comp 32bit texel buffers by using raw buffer - DeclareBufferVariable(psDecl, true, false); - break; - - // TODO: re-enable this code for buffer textures when sripting API has proper support for it -#if 0 - if (!psContext->psDependencies->IsMemberDeclared(texName)) - { - uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); - std::string texName = TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY); - std::ostringstream oss; - oss << "device " << TranslateResourceDeclaration(psContext, psDecl, texName, false, false); - - oss << texName << " [[ texture(" << slot << ") ]]"; - - m_StructDefinitions[""].m_Members.push_back(std::make_pair(texName, oss.str())); - psContext->m_Reflection.OnTextureBinding(texName, slot, TD_2D, false); //TODO: correct HLSLCC_TEX_DIMENSION? - } - break; -#endif - } - default: - ASSERT(0); - break; - - case RESOURCE_DIMENSION_TEXTURE1D: - { - TranslateResourceTexture(psDecl, 1, TD_2D); //TODO: correct HLSLCC_TEX_DIMENSION? - break; - } - case RESOURCE_DIMENSION_TEXTURE2D: - { - TranslateResourceTexture(psDecl, 1, TD_2D); - break; - } - case RESOURCE_DIMENSION_TEXTURE2DMS: - { - TranslateResourceTexture(psDecl, 0, TD_2D); - break; - } - case RESOURCE_DIMENSION_TEXTURE3D: - { - TranslateResourceTexture(psDecl, 0, TD_3D); - break; - } - case RESOURCE_DIMENSION_TEXTURECUBE: - { - TranslateResourceTexture(psDecl, 1, TD_CUBE); - break; - } - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - TranslateResourceTexture(psDecl, 1, TD_2DARRAY); //TODO: correct HLSLCC_TEX_DIMENSION? - break; - } - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - TranslateResourceTexture(psDecl, 1, TD_2DARRAY); - break; - } - case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - TranslateResourceTexture(psDecl, 0, TD_2DARRAY); - break; - } - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - TranslateResourceTexture(psDecl, 1, TD_CUBEARRAY); - break; - } - } - psContext->psShader->aeResourceDims[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.eResourceDimension; -} - -void ToMetal::DeclareOutput(const Declaration *psDecl) -{ - Shader* psShader = psContext->psShader; - - if (!psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], 1)) - return; - - const Operand* psOperand = &psDecl->asOperands[0]; - int iNumComponents; - int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); - uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; - - const ShaderInfo::InOutSignature* psSignature = NULL; - SHADER_VARIABLE_TYPE cType = SVT_VOID; - - if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH || - psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL || - psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL) - { - iNumComponents = 1; - cType = SVT_FLOAT; - } - else - { - if (regSpace == 0) - psShader->sInfo.GetOutputSignatureFromRegister( - ui32Reg, - psDecl->asOperands[0].ui32CompMask, - psShader->ui32CurrentVertexOutputStream, - &psSignature); - else - psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, psDecl->asOperands[0].ui32CompMask, &psSignature); - - iNumComponents = HLSLcc::GetNumberBitsSet(psSignature->ui32Mask); - - switch (psSignature->eComponentType) - { - case INOUT_COMPONENT_UINT32: - { - cType = SVT_UINT; - break; - } - case INOUT_COMPONENT_SINT32: - { - cType = SVT_INT; - break; - } - case INOUT_COMPONENT_FLOAT32: - { - cType = SVT_FLOAT; - break; - } - default: - ASSERT(0); - break; - } - // Don't set this for oDepth (or variants), because depth output register is in separate space from other outputs (regno 0, but others may overlap with that) - if (iNumComponents == 1) - psContext->psShader->abScalarOutput[regSpace][ui32Reg] |= (int)psDecl->asOperands[0].ui32CompMask; - - switch (psOperand->eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - break; - case OPERAND_MIN_PRECISION_FLOAT_16: - cType = SVT_FLOAT16; - break; - case OPERAND_MIN_PRECISION_FLOAT_2_8: - cType = SVT_FLOAT10; - break; - case OPERAND_MIN_PRECISION_SINT_16: - cType = SVT_INT16; - break; - case OPERAND_MIN_PRECISION_UINT_16: - cType = SVT_UINT16; - break; - } - } - - std::string type = HLSLcc::GetConstructorForTypeMetal(cType, iNumComponents); - std::string name = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], nullptr, nullptr, nullptr, 1); - - switch (psShader->eShaderType) - { - case PIXEL_SHADER: - { - switch (psDecl->asOperands[0].eType) - { - case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: - { - std::ostringstream oss; - oss << type << " " << name << " [[ sample_mask ]]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - case OPERAND_TYPE_OUTPUT_DEPTH: - { - std::ostringstream oss; - oss << type << " " << name << " [[ depth(any) ]]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: - { - std::ostringstream oss; - oss << type << " " << name << " [[ depth(greater) ]]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: - { - std::ostringstream oss; - oss << type << " " << name << " [[ depth(less) ]]"; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); - break; - } - default: - { - std::ostringstream oss; - oss << type << " " << name << " [[ color(xlt_remap_o[" << psSignature->ui32SemanticIndex << "]) ]]"; - m_NeedFBOutputRemapDecl = true; - m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); - psContext->m_Reflection.OnFragmentOutputDeclaration(iNumComponents, psSignature->ui32SemanticIndex); - } - } - break; - } - case VERTEX_SHADER: - case DOMAIN_SHADER: - case HULL_SHADER: - { - std::string out = GetOutputStructName(); - bool isTessKernel = (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0 && (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == VERTEX_SHADER); - - std::ostringstream oss; - oss << type << " " << name; - if (!isTessKernel && (psSignature->eSystemValueType == NAME_POSITION || psSignature->semanticName == "POS") && psOperand->ui32RegisterNumber == 0) - oss << " [[ position ]]"; - else if (!isTessKernel && psSignature->eSystemValueType == NAME_UNDEFINED && psSignature->semanticName == "PSIZE" && psSignature->ui32SemanticIndex == 0) - oss << " [[ point_size ]]"; - else - oss << " [[ user(" << name << ") ]]"; - m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); - - if (psContext->psShader->eShaderType == VERTEX_SHADER) - psContext->m_Reflection.OnVertexProgramOutput(name, psSignature->semanticName, psSignature->ui32SemanticIndex); - - // For preserving data layout, declare output struct as domain shader input, too - if (psContext->psShader->eShaderType == HULL_SHADER) - { - out += "In"; - - std::ostringstream oss; - oss << type << " " << name; - - // VERTEX_SHADER hardcoded on purpose - bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); - uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true, keepLocation, psContext->psShader->maxSemanticIndex); - oss << " [[ " << "attribute(" << loc << ")" << " ]] "; - - psContext->m_Reflection.OnInputBinding(name, loc); - m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); - } - break; - } - case GEOMETRY_SHADER: - default: - ASSERT(0); - break; - } - HandleOutputRedirect(psDecl, HLSLcc::GetConstructorForTypeMetal(cType, 4)); -} - -void ToMetal::EnsureShadowSamplerDeclared() -{ - // on macos we will set comparison func from the app side - if (m_ShadowSamplerDeclared || !IsMobileTarget(psContext)) - return; - - if ((psContext->flags & HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR) != 0 || (psContext->psShader->eShaderType == COMPUTE_SHADER)) - m_ExtraGlobalDefinitions += "constexpr sampler _mtl_xl_shadow_sampler(address::clamp_to_edge, filter::linear, compare_func::greater_equal);\n"; - else - m_ExtraGlobalDefinitions += "constexpr sampler _mtl_xl_shadow_sampler(address::clamp_to_edge, filter::nearest, compare_func::greater_equal);\n"; - m_ShadowSamplerDeclared = true; -} diff --git a/third_party/HLSLcc/src/toMetalInstruction.cpp b/third_party/HLSLcc/src/toMetalInstruction.cpp deleted file mode 100644 index a4c0645..0000000 --- a/third_party/HLSLcc/src/toMetalInstruction.cpp +++ /dev/null @@ -1,4096 +0,0 @@ -#include "internal_includes/toMetal.h" -#include "internal_includes/HLSLccToolkit.h" -#include "internal_includes/languages.h" -#include "internal_includes/HLSLCrossCompilerContext.h" -#include "bstrlib.h" -#include "stdio.h" -#include -#include -#include -#include "internal_includes/debug.h" -#include "internal_includes/Shader.h" -#include "internal_includes/Instruction.h" -#include "hlslcc.h" - -using namespace HLSLcc; - -bstring operator<<(bstring a, const std::string &b) -{ - bcatcstr(a, b.c_str()); - return a; -} - -static void AddOpAssignToDest(bstring glsl, SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, SHADER_VARIABLE_TYPE eDestType, uint32_t ui32DestElementCount, uint32_t precise, int& numParenthesis, bool allowReinterpretCast = true) -{ - numParenthesis = 0; - - // Find out from type the precisions and types without precision - RESOURCE_RETURN_TYPE srcBareType = SVTTypeToResourceReturnType(eSrcType); - RESOURCE_RETURN_TYPE dstBareType = SVTTypeToResourceReturnType(eDestType); - REFLECT_RESOURCE_PRECISION srcPrec = SVTTypeToPrecision(eSrcType); - REFLECT_RESOURCE_PRECISION dstPrec = SVTTypeToPrecision(eDestType); - - // Add assigment - bcatcstr(glsl, " = "); - - /* TODO: implement precise for metal - if (precise) - { - bcatcstr(glsl, "u_xlat_precise("); - numParenthesis++; - }*/ - - // Special reinterpret cast between float<->uint/int if size matches - // TODO: Handle bools? - if (srcBareType != dstBareType && (srcBareType == RETURN_TYPE_FLOAT || dstBareType == RETURN_TYPE_FLOAT) && srcPrec == dstPrec && allowReinterpretCast) - { - bformata(glsl, "as_type<%s>(", GetConstructorForTypeMetal(eDestType, ui32DestElementCount)); - numParenthesis++; - if (ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, "%s(", GetConstructorForTypeMetal(eSrcType, ui32DestElementCount)); - numParenthesis++; - } - return; - } - - // Do cast in case of type missmatch or dimension - if (eSrcType != eDestType || ui32DestElementCount > ui32SrcElementCount) - { - bformata(glsl, "%s(", GetConstructorForTypeMetal(eDestType, ui32DestElementCount)); - numParenthesis++; - return; - } -} - -// This function prints out the destination name, possible destination writemask, assignment operator -// and any possible conversions needed based on the eSrcType+ui32SrcElementCount (type and size of data expected to be coming in) -// As an output, pNeedsParenthesis will be filled with the amount of closing parenthesis needed -// and pSrcCount will be filled with the number of components expected -// ui32CompMask can be used to only write to 1 or more components (used by MOVC) -void ToMetal::AddOpAssignToDestWithMask(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis, uint32_t ui32CompMask) -{ - uint32_t ui32DestElementCount = psDest->GetNumSwizzleElements(ui32CompMask); - bstring glsl = *psContext->currentGLSLString; - SHADER_VARIABLE_TYPE eDestType = psDest->GetDataType(psContext); - glsl << TranslateOperand(psDest, TO_FLAG_DESTINATION, ui32CompMask); - AddOpAssignToDest(glsl, eSrcType, ui32SrcElementCount, eDestType, ui32DestElementCount, precise, numParenthesis, psContext->psShader->ui32MajorVersion > 3); -} - -void ToMetal::AddAssignToDest(const Operand* psDest, - SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis) -{ - AddOpAssignToDestWithMask(psDest, eSrcType, ui32SrcElementCount, precise, numParenthesis, OPERAND_4_COMPONENT_MASK_ALL); -} - -void ToMetal::AddAssignPrologue(int numParenthesis) -{ - bstring glsl = *psContext->currentGLSLString; - while (numParenthesis != 0) - { - bcatcstr(glsl, ")"); - numParenthesis--; - } - bcatcstr(glsl, ";\n"); -} - -void ToMetal::AddComparison(Instruction* psInst, ComparisonType eType, - uint32_t typeFlag) -{ - // Multiple cases to consider here: - // OPCODE_LT, _GT, _NE etc: inputs are floats, outputs UINT 0xffffffff or 0. typeflag: TO_FLAG_NONE - // OPCODE_ILT, _IGT etc: comparisons are signed ints, outputs UINT 0xffffffff or 0 typeflag TO_FLAG_INTEGER - // _ULT, UGT etc: inputs unsigned ints, outputs UINTs typeflag TO_FLAG_UNSIGNED_INTEGER - // - - - bstring glsl = *psContext->currentGLSLString; - const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); - const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); - const uint32_t s1ElemCount = psInst->asOperands[2].GetNumSwizzleElements(); - int isBoolDest = psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL; - const uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - - int needsParenthesis = 0; - if (typeFlag == TO_FLAG_NONE - && CanForceToHalfOperand(&psInst->asOperands[1]) - && CanForceToHalfOperand(&psInst->asOperands[2])) - typeFlag = TO_FLAG_FORCE_HALF; - ASSERT(s0ElemCount == s1ElemCount || s1ElemCount == 1 || s0ElemCount == 1); - if ((s0ElemCount != s1ElemCount) && (destElemCount > 1)) - { - // Set the proper auto-expand flag is either argument is scalar - typeFlag |= (TO_AUTO_EXPAND_TO_VEC2 << (std::min(std::max(s0ElemCount, s1ElemCount), destElemCount) - 2)); - } - if (destElemCount > 1) - { - const char* glslOpcode[] = { - "==", - "<", - ">=", - "!=", - }; - psContext->AddIndentation(); - if (isBoolDest) - { - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); - bcatcstr(glsl, " = "); - } - else - { - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, destElemCount, psInst->ui32PreciseMask, needsParenthesis); - - bcatcstr(glsl, GetConstructorForTypeMetal(SVT_UINT, destElemCount)); - bcatcstr(glsl, "("); - } - bcatcstr(glsl, "("); - glsl << TranslateOperand(&psInst->asOperands[1], typeFlag, destMask); - bformata(glsl, "%s", glslOpcode[eType]); - glsl << TranslateOperand(&psInst->asOperands[2], typeFlag, destMask); - bcatcstr(glsl, ")"); - if (!isBoolDest) - { - bcatcstr(glsl, ")"); - bcatcstr(glsl, " * 0xFFFFFFFFu"); - } - - AddAssignPrologue(needsParenthesis); - } - else - { - const char* glslOpcode[] = { - "==", - "<", - ">=", - "!=", - }; - - //Scalar compare - - psContext->AddIndentation(); - if (isBoolDest) - { - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); - bcatcstr(glsl, " = "); - } - else - { - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, destElemCount, psInst->ui32PreciseMask, needsParenthesis); - bcatcstr(glsl, "("); - } - glsl << TranslateOperand(&psInst->asOperands[1], typeFlag, destMask); - bformata(glsl, "%s", glslOpcode[eType]); - glsl << TranslateOperand(&psInst->asOperands[2], typeFlag, destMask); - if (!isBoolDest) - { - bcatcstr(glsl, ") ? 0xFFFFFFFFu : uint(0)"); - } - AddAssignPrologue(needsParenthesis); - } -} - -bool ToMetal::CanForceToHalfOperand(const Operand *psOperand) -{ - if (psOperand->GetDataType(psContext) == SVT_FLOAT16) - return true; - - if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER) - { - for (int i = 0; i < psOperand->iNumComponents; i++) - { - float val = fabs(psOperand->afImmediates[i]); - // Do not allow forcing immediate value to half if value is beyond half min/max boundaries - if (val != 0 && (val > 65504 || val < 6.10352e-5)) - return false; - } - return true; - } - - return false; -} - -void ToMetal::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise) -{ - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - int srcSwizzleCount = pSrc->GetNumSwizzleElements(); - uint32_t writeMask = pDest->GetAccessMask(); - - const SHADER_VARIABLE_TYPE eSrcType = pSrc->GetDataType(psContext, pDest->GetDataType(psContext)); - uint32_t flags = SVTTypeToFlag(eSrcType); - - AddAssignToDest(pDest, eSrcType, srcSwizzleCount, precise, numParenthesis); - glsl << TranslateOperand(pSrc, flags, writeMask); - - AddAssignPrologue(numParenthesis); -} - -void ToMetal::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise) -{ - bstring glsl = *psContext->currentGLSLString; - uint32_t destElemCount = pDest->GetNumSwizzleElements(); - uint32_t s0ElemCount = src0->GetNumSwizzleElements(); - uint32_t s1ElemCount = src1->GetNumSwizzleElements(); - uint32_t s2ElemCount = src2->GetNumSwizzleElements(); - uint32_t destWriteMask = pDest->GetAccessMask(); - uint32_t destElem; - - const SHADER_VARIABLE_TYPE eDestType = pDest->GetDataType(psContext); - /* - for each component in dest[.mask] - if the corresponding component in src0 (POS-swizzle) - has any bit set - { - copy this component (POS-swizzle) from src1 into dest - } - else - { - copy this component (POS-swizzle) from src2 into dest - } - endfor - */ - - /* Single-component conditional variable (src0) */ - if (s0ElemCount == 1 || src0->IsSwizzleReplicated()) - { - int numParenthesis = 0; - SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); - psContext->AddIndentation(); - AddAssignToDest(pDest, eDestType, destElemCount, precise, numParenthesis); - bcatcstr(glsl, "("); - if (s0Type == SVT_UINT || s0Type == SVT_UINT16) - glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_UINT, OPERAND_4_COMPONENT_MASK_X); - else if (s0Type == SVT_BOOL) - glsl << TranslateOperand(src0, TO_FLAG_BOOL, OPERAND_4_COMPONENT_MASK_X); - else - glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, OPERAND_4_COMPONENT_MASK_X); - - if (psContext->psShader->ui32MajorVersion < 4) - { - //cmp opcode uses >= 0 - bcatcstr(glsl, " >= 0) ? "); - } - else - { - if (s0Type == SVT_UINT || s0Type == SVT_UINT16) - bcatcstr(glsl, " != uint(0)) ? "); - else if (s0Type == SVT_BOOL) - bcatcstr(glsl, ") ? "); - else - bcatcstr(glsl, " != 0) ? "); - } - - if (s1ElemCount == 1 && destElemCount > 1) - glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); - else - glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType), destWriteMask); - - bcatcstr(glsl, " : "); - if (s2ElemCount == 1 && destElemCount > 1) - glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); - else - glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType), destWriteMask); - - AddAssignPrologue(numParenthesis); - } - else - { - int srcElem = -1; - SHADER_VARIABLE_TYPE dstType = pDest->GetDataType(psContext); - SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); - - // Use an extra temp if dest is also one of the sources. Without this some swizzle combinations - // might alter the source before all components are handled. - const std::string tempName = "hlslcc_movcTemp"; - bool dstIsSrc1 = (pDest->eType == src1->eType) - && (dstType == src1->GetDataType(psContext)) - && (pDest->ui32RegisterNumber == src1->ui32RegisterNumber); - bool dstIsSrc2 = (pDest->eType == src2->eType) - && (dstType == src2->GetDataType(psContext)) - && (pDest->ui32RegisterNumber == src2->ui32RegisterNumber); - - if (dstIsSrc1 || dstIsSrc2) - { - psContext->AddIndentation(); - bcatcstr(glsl, "{\n"); - ++psContext->indent; - psContext->AddIndentation(); - int numComponents = (pDest->eType == OPERAND_TYPE_TEMP) ? - psContext->psShader->GetTempComponentCount(eDestType, pDest->ui32RegisterNumber) : - pDest->iNumComponents; - bformata(glsl, "%s %s = %s;\n", HLSLcc::GetConstructorForType(psContext, eDestType, numComponents), tempName.c_str(), TranslateOperand(pDest, TO_FLAG_NAME_ONLY).c_str()); - - // Override OPERAND_TYPE_TEMP name temporarily - const_cast(pDest)->specialName.assign(tempName); - } - - for (destElem = 0; destElem < 4; ++destElem) - { - int numParenthesis = 0; - srcElem++; - if (pDest->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && pDest->ui32CompMask != 0 && !(pDest->ui32CompMask & (1 << destElem))) - continue; - - psContext->AddIndentation(); - AddOpAssignToDestWithMask(pDest, eDestType, 1, precise, numParenthesis, 1 << destElem); - bcatcstr(glsl, "("); - if (s0Type == SVT_BOOL) - { - glsl << TranslateOperand(src0, TO_FLAG_BOOL, 1 << srcElem); - bcatcstr(glsl, ") ? "); - } - else - { - glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, 1 << srcElem); - - if (psContext->psShader->ui32MajorVersion < 4) - { - //cmp opcode uses >= 0 - bcatcstr(glsl, " >= 0) ? "); - } - else - { - bcatcstr(glsl, " != 0) ? "); - } - } - - glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem); - bcatcstr(glsl, " : "); - glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem); - AddAssignPrologue(numParenthesis); - } - - if (dstIsSrc1 || dstIsSrc2) - { - const_cast(pDest)->specialName.clear(); - - psContext->AddIndentation(); - glsl << TranslateOperand(pDest, TO_FLAG_NAME_ONLY); - bformata(glsl, " = %s;\n", tempName.c_str()); - - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - } - } -} - -void ToMetal::CallBinaryOp(const char* name, Instruction* psInst, - int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType) -{ - uint32_t ui32Flags = SVTTypeToFlag(eDataType); - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int needsParenthesis = 0; - - if (eDataType == SVT_FLOAT - && CanForceToHalfOperand(&psInst->asOperands[dest]) - && CanForceToHalfOperand(&psInst->asOperands[src0]) - && CanForceToHalfOperand(&psInst->asOperands[src1])) - { - ui32Flags = TO_FLAG_FORCE_HALF; - eDataType = SVT_FLOAT16; - } - - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - if (src1SwizCount != src0SwizCount) - { - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], eDataType, dstSwizCount, psInst->ui32PreciseMask, needsParenthesis); - -/* bool s0NeedsUpscaling = false, s1NeedsUpscaling = false; - SHADER_VARIABLE_TYPE s0Type = psInst->asOperands[src0].GetDataType(psContext); - SHADER_VARIABLE_TYPE s1Type = psInst->asOperands[src1].GetDataType(psContext); - - if((s0Type == SVT_FLOAT10 || s0Type == SVT_FLOAT16) && (s1Type != s) - */ - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bformata(glsl, " %s ", name); - glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - - AddAssignPrologue(needsParenthesis); -} - -void ToMetal::CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, - int dest, int src0, int src1, int src2, uint32_t dataType) -{ - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); - uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - - uint32_t ui32Flags = dataType; - int numParenthesis = 0; - - if (dataType == TO_FLAG_NONE - && CanForceToHalfOperand(&psInst->asOperands[dest]) - && CanForceToHalfOperand(&psInst->asOperands[src0]) - && CanForceToHalfOperand(&psInst->asOperands[src1]) - && CanForceToHalfOperand(&psInst->asOperands[src2])) - ui32Flags = dataType = TO_FLAG_FORCE_HALF; - - if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) - { - uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], TypeFlagsToSVTType(dataType), dstSwizCount, psInst->ui32PreciseMask, numParenthesis); - - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bformata(glsl, " %s ", op1); - glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - bformata(glsl, " %s ", op2); - glsl << TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} - -void ToMetal::CallHelper3(const char* name, Instruction* psInst, - int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask, uint32_t ui32Flags) -{ - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int numParenthesis = 0; - - if (CanForceToHalfOperand(&psInst->asOperands[dest]) - && CanForceToHalfOperand(&psInst->asOperands[src0]) - && CanForceToHalfOperand(&psInst->asOperands[src1]) - && CanForceToHalfOperand(&psInst->asOperands[src2])) - ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; - - if ((src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, psInst->ui32PreciseMask, numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} - -void ToMetal::CallHelper3(const char* name, Instruction* psInst, - int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask) -{ - CallHelper3(name, psInst, dest, src0, src1, src2, paramsShouldFollowWriteMask, TO_AUTO_BITCAST_TO_FLOAT); -} - -void ToMetal::CallHelper2(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask) -{ - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - - int isDotProduct = (strncmp(name, "dot", 3) == 0) ? 1 : 0; - int numParenthesis = 0; - - if (CanForceToHalfOperand(&psInst->asOperands[dest]) - && CanForceToHalfOperand(&psInst->asOperands[src0]) - && CanForceToHalfOperand(&psInst->asOperands[src1])) - ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; - - - if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, isDotProduct ? 1 : dstSwizCount, psInst->ui32PreciseMask, numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - - AddAssignPrologue(numParenthesis); -} - -void ToMetal::CallHelper2Int(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask) -{ - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int numParenthesis = 0; - - if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, psInst->ui32PreciseMask, numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} - -void ToMetal::CallHelper2UInt(const char* name, Instruction* psInst, - int dest, int src0, int src1, int paramsShouldFollowWriteMask) -{ - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_UINT; - bstring glsl = *psContext->currentGLSLString; - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - int numParenthesis = 0; - - if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) - { - uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], SVT_UINT, dstSwizCount, psInst->ui32PreciseMask, numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} - -void ToMetal::CallHelper1(const char* name, Instruction* psInst, - int dest, int src0, int paramsShouldFollowWriteMask) -{ - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; - bstring glsl = *psContext->currentGLSLString; - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - int numParenthesis = 0; - - psContext->AddIndentation(); - if (CanForceToHalfOperand(&psInst->asOperands[dest]) - && CanForceToHalfOperand(&psInst->asOperands[src0])) - ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; - - AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, psInst->ui32PreciseMask, numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} - -//Result is an int. -void ToMetal::CallHelper1Int( - const char* name, - Instruction* psInst, - const int dest, - const int src0, - int paramsShouldFollowWriteMask) -{ - uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; - bstring glsl = *psContext->currentGLSLString; - uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); - uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; - int numParenthesis = 0; - - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, psInst->ui32PreciseMask, numParenthesis); - - bformata(glsl, "%s(", name); - numParenthesis++; - glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); - AddAssignPrologue(numParenthesis); -} - -void ToMetal::TranslateTexelFetch( - Instruction* psInst, - const ResourceBinding* psBinding, - bstring glsl) -{ - int numParenthesis = 0; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], psContext->psShader->sInfo.GetTextureDataType(psInst->asOperands[2].ui32RegisterNumber), 4, psInst->ui32PreciseMask, numParenthesis); - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ".read("); - - switch (psBinding->eDimension) - { - case REFLECT_RESOURCE_DIMENSION_BUFFER: - { - psContext->m_Reflection.OnDiagnostics("Buffer resources not supported in Metal (in texel fetch)", 0, true); - return; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level - break; - } - - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_Z); // Array index - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); // Sample index - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - psContext->m_Reflection.OnDiagnostics("Multisampled texture arrays not supported in Metal (in texel fetch)", 0, true); - return; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: - case REFLECT_RESOURCE_DIMENSION_BUFFEREX: - default: - { - // Shouldn't happen. Cubemap reads are not supported in HLSL - ASSERT(0); - break; - } - } - bcatcstr(glsl, ")"); - - glsl << TranslateOperandSwizzle(&psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); - AddAssignPrologue(numParenthesis); -} - -void ToMetal::TranslateTexelFetchOffset( - Instruction* psInst, - const ResourceBinding* psBinding, - bstring glsl) -{ - int numParenthesis = 0; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], psContext->psShader->sInfo.GetTextureDataType(psInst->asOperands[2].ui32RegisterNumber), 4, psInst->ui32PreciseMask, numParenthesis); - - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ".read("); - - switch (psBinding->eDimension) - { - case REFLECT_RESOURCE_DIMENSION_BUFFER: - { - psContext->m_Reflection.OnDiagnostics("Buffer resources not supported in Metal (in texel fetch)", 0, true); - return; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - { - psContext->m_Reflection.OnDiagnostics("Multisampled texture arrays not supported in Metal (in texel fetch)", 0, true); - return; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); - bformata(glsl, " + %d", psInst->iUAddrOffset); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); - bformata(glsl, " + %d, ", psInst->iUAddrOffset); - - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_Y); - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bformata(glsl, "+ ivec2(%d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bformata(glsl, "+ ivec2(%d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_Z); // Array index - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); - bformata(glsl, "+ ivec3(%d, %d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset, psInst->iWAddrOffset); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: - { - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); - bformata(glsl, "+ ivec2(%d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset); - glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); // Sample index - break; - } - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: - case REFLECT_RESOURCE_DIMENSION_BUFFEREX: - default: - { - // Shouldn't happen. Cubemap reads are not supported in HLSL - ASSERT(0); - break; - } - } - bcatcstr(glsl, ")"); - - glsl << TranslateOperandSwizzle(&psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); - AddAssignPrologue(numParenthesis); -} - -//Makes sure the texture coordinate swizzle is appropriate for the texture type. -//i.e. vecX for X-dimension texture. -//Currently supports floating point coord only, so not used for texelFetch. -void ToMetal::TranslateTexCoord( - const RESOURCE_DIMENSION eResDim, - Operand* psTexCoordOperand) -{ - uint32_t flags = TO_AUTO_BITCAST_TO_FLOAT; - uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; - bool isArray = false; - - switch (eResDim) - { - case RESOURCE_DIMENSION_TEXTURE1D: - { - //Vec1 texcoord. Mask out the other components. - opMask = OPERAND_4_COMPONENT_MASK_X; - break; - } - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - // x for coord, y for array element - opMask = OPERAND_4_COMPONENT_MASK_X; - bstring glsl = *psContext->currentGLSLString; - glsl << TranslateOperand(psTexCoordOperand, flags, opMask); - - bcatcstr(glsl, ", round("); - - opMask = OPERAND_4_COMPONENT_MASK_Y; - flags = TO_AUTO_BITCAST_TO_FLOAT; - isArray = true; - break; - } - case RESOURCE_DIMENSION_TEXTURE2D: - { - //Vec2 texcoord. Mask out the other components. - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; - flags |= TO_AUTO_EXPAND_TO_VEC2; - break; - } - case RESOURCE_DIMENSION_TEXTURECUBE: - case RESOURCE_DIMENSION_TEXTURE3D: - { - //Vec3 texcoord. Mask out the other components. - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; - flags |= TO_AUTO_EXPAND_TO_VEC3; - break; - } - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - // xy for coord, z for array element - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; - flags |= TO_AUTO_EXPAND_TO_VEC2; - - bstring glsl = *psContext->currentGLSLString; - glsl << TranslateOperand(psTexCoordOperand, flags, opMask); - - bcatcstr(glsl, ", round("); - - opMask = OPERAND_4_COMPONENT_MASK_Z; - flags = TO_AUTO_BITCAST_TO_FLOAT; - isArray = true; - break; - } - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - // xyz for coord, w for array element - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; - flags |= TO_AUTO_EXPAND_TO_VEC3; - - bstring glsl = *psContext->currentGLSLString; - glsl << TranslateOperand(psTexCoordOperand, flags, opMask); - - bcatcstr(glsl, ", round("); - - opMask = OPERAND_4_COMPONENT_MASK_W; - flags = TO_AUTO_BITCAST_TO_FLOAT; - isArray = true; - break; - } - default: - { - ASSERT(0); - break; - } - } - - //FIXME detect when integer coords are needed. - bstring glsl = *psContext->currentGLSLString; - glsl << TranslateOperand(psTexCoordOperand, flags, opMask); - - if (isArray) - bcatcstr(glsl, ")"); -} - -void ToMetal::GetResInfoData(Instruction* psInst, int index, int destElem) -{ - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; - - psContext->AddIndentation(); - AddOpAssignToDestWithMask(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, psInst->ui32PreciseMask, numParenthesis, 1 << destElem); - - const char *metalGetters[] = { ".get_width(", ".get_height(", ".get_depth(", ".get_num_mip_levels()" }; - int dim = GetNumTextureDimensions(psInst->eResDim); - if (dim < (index + 1) && index != 3) - { - bcatcstr(glsl, eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? "uint(0)" : "0.0"); - } - else - { - if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT) - { - bcatcstr(glsl, "float("); - numParenthesis++; - } - else if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_RCPFLOAT) - { - bcatcstr(glsl, "1.0f / float("); - numParenthesis++; - } - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NAME_ONLY); - if ((index == 1 && psInst->eResDim == RESOURCE_DIMENSION_TEXTURE1DARRAY) || - (index == 2 && (psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DARRAY || - psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMSARRAY))) - { - bcatcstr(glsl, ".get_array_size()"); - } - else - { - bcatcstr(glsl, metalGetters[index]); - - if (index < 3) - { - if (psInst->eResDim != RESOURCE_DIMENSION_TEXTURE2DMS && - psInst->eResDim != RESOURCE_DIMENSION_TEXTURE2DMSARRAY) - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); //mip level - - bcatcstr(glsl, ")"); - } - } - } - AddAssignPrologue(numParenthesis); -} - -void ToMetal::TranslateTextureSample(Instruction* psInst, - uint32_t ui32Flags) -{ - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - int hasParamOffset = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? 1 : 0; - - Operand* psDest = &psInst->asOperands[0]; - Operand* psDestAddr = &psInst->asOperands[1]; - Operand* psSrcOff = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? &psInst->asOperands[2] : 0; - Operand* psSrcTex = &psInst->asOperands[2 + hasParamOffset]; - Operand* psSrcSamp = &psInst->asOperands[3 + hasParamOffset]; - Operand* psSrcRef = (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) ? &psInst->asOperands[4 + hasParamOffset] : 0; - Operand* psSrcLOD = (ui32Flags & TEXSMP_FLAG_LOD) ? &psInst->asOperands[4] : 0; - Operand* psSrcDx = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[4] : 0; - Operand* psSrcDy = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[5] : 0; - Operand* psSrcBias = (ui32Flags & TEXSMP_FLAG_BIAS) ? &psInst->asOperands[4] : 0; - - const char *funcName = ""; - const char* gradSwizzle = ""; - const char *gradientName = ""; - - uint32_t ui32NumOffsets = 0; - - const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psSrcTex->ui32RegisterNumber]; - - if (ui32Flags & TEXSMP_FLAG_GATHER) - { - if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) - funcName = "gather_compare"; - else - funcName = "gather"; - } - else - { - if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) - funcName = "sample_compare"; - else - funcName = "sample"; - } - - switch (eResDim) - { - case RESOURCE_DIMENSION_TEXTURE1D: - { - gradSwizzle = ".x"; - ui32NumOffsets = 1; - break; - } - case RESOURCE_DIMENSION_TEXTURE2D: - { - gradSwizzle = ".xy"; - gradientName = "gradient2d"; - ui32NumOffsets = 2; - break; - } - case RESOURCE_DIMENSION_TEXTURECUBE: - { - gradSwizzle = ".xyz"; - ui32NumOffsets = 3; - gradientName = "gradientcube"; - break; - } - case RESOURCE_DIMENSION_TEXTURE3D: - { - gradSwizzle = ".xyz"; - ui32NumOffsets = 3; - gradientName = "gradient3d"; - break; - } - case RESOURCE_DIMENSION_TEXTURE1DARRAY: - { - gradSwizzle = ".x"; - ui32NumOffsets = 1; - break; - } - case RESOURCE_DIMENSION_TEXTURE2DARRAY: - { - gradSwizzle = ".xy"; - ui32NumOffsets = 2; - gradientName = "gradient2d"; - break; - } - case RESOURCE_DIMENSION_TEXTURECUBEARRAY: - { - gradSwizzle = ".xyz"; - ui32NumOffsets = 3; - gradientName = "gradientcube"; - break; - } - default: - { - ASSERT(0); - break; - } - } - - - SHADER_VARIABLE_TYPE dataType = psContext->psShader->sInfo.GetTextureDataType(psSrcTex->ui32RegisterNumber); - psContext->AddIndentation(); - AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), psInst->ui32PreciseMask, numParenthesis); - - std::string texName = TranslateOperand(psSrcTex, TO_FLAG_NAME_ONLY); - - // TextureName.FuncName( - glsl << texName; - bformata(glsl, ".%s(", funcName); - - bool isDepthSampler = false; - for (unsigned j = 0, m = m_Textures.size(); j < m; ++j) - { - if (m_Textures[j].name == texName) - { - isDepthSampler = m_Textures[j].isDepthSampler; - break; - } - } - - // Sampler name - // on ios pre-GPUFamily3 we MUST have constexpr in shader for a sampler with compare func - // for now we use fixed shadow sampler in all cases of depth compare (ATM all depth compares are interpreted as shadow usage) - if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE && IsMobileTarget(psContext)) - { - bcatcstr(glsl, "_mtl_xl_shadow_sampler"); - } - else - { - std::string sampName = TranslateOperand(psSrcSamp, TO_FLAG_NAME_ONLY); - - // insert the "sampler" prefix if the sampler name is equal to the texture name (default sampler) - if (texName == sampName) - sampName.insert(0, "sampler"); - glsl << sampName; - } - - bcatcstr(glsl, ", "); - - // Texture coordinates - TranslateTexCoord(eResDim, psDestAddr); - - // Depth compare reference value - if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) - { - bcatcstr(glsl, ", saturate("); // TODO: why the saturate here? - glsl << TranslateOperand(psSrcRef, TO_AUTO_BITCAST_TO_FLOAT); - bcatcstr(glsl, ")"); - } - - // lod_options (LOD/grad/bias) based on the flags - if (ui32Flags & TEXSMP_FLAG_LOD) - { - bcatcstr(glsl, ", level("); - glsl << TranslateOperand(psSrcLOD, TO_AUTO_BITCAST_TO_FLOAT); - if (psContext->psShader->ui32MajorVersion < 4) - { - bcatcstr(glsl, ".w"); - } - bcatcstr(glsl, ")"); - } - else if (ui32Flags & TEXSMP_FLAG_FIRSTLOD) - { - bcatcstr(glsl, ", level(0.0)"); - } - else if (ui32Flags & TEXSMP_FLAG_GRAD) - { - glsl << std::string(", ") << std::string(gradientName) << std::string("(float4("); - glsl << TranslateOperand(psSrcDx, TO_AUTO_BITCAST_TO_FLOAT); - bcatcstr(glsl, ")"); - bcatcstr(glsl, gradSwizzle); - bcatcstr(glsl, ", float4("); - glsl << TranslateOperand(psSrcDy, TO_AUTO_BITCAST_TO_FLOAT); - bcatcstr(glsl, ")"); - bcatcstr(glsl, gradSwizzle); - bcatcstr(glsl, ")"); - } - else if (ui32Flags & (TEXSMP_FLAG_BIAS)) - { - glsl << std::string(", bias(") << TranslateOperand(psSrcBias, TO_AUTO_BITCAST_TO_FLOAT) << std::string(")"); - } - - bool hadOffset = false; - - // Add offset param - if (psInst->bAddressOffset) - { - hadOffset = true; - if (ui32NumOffsets == 1) - { - bformata(glsl, ", %d", - psInst->iUAddrOffset); - } - else if (ui32NumOffsets == 2) - { - bformata(glsl, ", int2(%d, %d)", - psInst->iUAddrOffset, - psInst->iVAddrOffset); - } - else if (ui32NumOffsets == 3) - { - bformata(glsl, ", int3(%d, %d, %d)", - psInst->iUAddrOffset, - psInst->iVAddrOffset, - psInst->iWAddrOffset); - } - } - // HLSL gather has a variant with separate offset operand - else if (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) - { - hadOffset = true; - uint32_t mask = OPERAND_4_COMPONENT_MASK_X; - if (ui32NumOffsets > 1) - mask |= OPERAND_4_COMPONENT_MASK_Y; - if (ui32NumOffsets > 2) - mask |= OPERAND_4_COMPONENT_MASK_Z; - - bcatcstr(glsl, ","); - glsl << TranslateOperand(psSrcOff, TO_FLAG_INTEGER, mask); - } - - // Add texture gather component selection if needed - if ((ui32Flags & TEXSMP_FLAG_GATHER) && psSrcSamp->GetNumSwizzleElements() > 0) - { - ASSERT(psSrcSamp->GetNumSwizzleElements() == 1); - if (psSrcSamp->aui32Swizzle[0] != OPERAND_4_COMPONENT_X) - { - if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)) - { - // Need to add offset param to match func overload - if (!hadOffset) - { - if (ui32NumOffsets == 1) - bcatcstr(glsl, ", 0"); - else - bformata(glsl, ", int%d(0)", ui32NumOffsets); - } - - bcatcstr(glsl, ", component::"); - glsl << TranslateOperandSwizzle(psSrcSamp, OPERAND_4_COMPONENT_MASK_ALL, 0, false); - } - else - { - psContext->m_Reflection.OnDiagnostics("Metal supports gather compare only for the first component.", 0, true); - } - } - } - - bcatcstr(glsl, ")"); - - if (!((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) || isDepthSampler) || (ui32Flags & TEXSMP_FLAG_GATHER)) - { - // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms - // does not make sense. But need to re-enable to correctly swizzle this particular instruction. - psSrcTex->iWriteMaskEnabled = 1; - glsl << TranslateOperandSwizzle(psSrcTex, psDest->GetAccessMask(), 0); - } - AddAssignPrologue(numParenthesis); -} - -// Handle cases where vector components are accessed with dynamic index ([] notation). -// A bit ugly hack because compiled HLSL uses byte offsets to access data in structs => we are converting -// the offset back to vector component index in runtime => calculating stuff back and forth. -// TODO: Would be better to eliminate the offset calculation ops and use indexes straight on. Could be tricky though... -void ToMetal::TranslateDynamicComponentSelection(const ShaderVarType* psVarType, const Operand* psByteAddr, uint32_t offset, uint32_t mask) -{ - bstring glsl = *psContext->currentGLSLString; - ASSERT(psVarType->Class == SVC_VECTOR); - - bcatcstr(glsl, "["); // Access vector component with [] notation - if (offset > 0) - bcatcstr(glsl, "("); - - // The var containing byte address to the requested element - glsl << TranslateOperand(psByteAddr, TO_FLAG_UNSIGNED_INTEGER, mask); - - if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address - bformata(glsl, " - %du)", offset); // Subtract that first - - bcatcstr(glsl, " >> 0x2u"); // Convert byte offset to index: div by four - bcatcstr(glsl, "]"); -} - -void ToMetal::TranslateShaderStorageStore(Instruction* psInst) -{ - bstring glsl = *psContext->currentGLSLString; - int component; - int srcComponent = 0; - - Operand* psDest = 0; - Operand* psDestAddr = 0; - Operand* psDestByteOff = 0; - Operand* psSrc = 0; - - - switch (psInst->eOpcode) - { - case OPCODE_STORE_STRUCTURED: - psDest = &psInst->asOperands[0]; - psDestAddr = &psInst->asOperands[1]; - psDestByteOff = &psInst->asOperands[2]; - psSrc = &psInst->asOperands[3]; - - break; - case OPCODE_STORE_RAW: - case OPCODE_STORE_UAV_TYPED: // Hack typed buffer as raw buf - psDest = &psInst->asOperands[0]; - psDestByteOff = &psInst->asOperands[1]; - psSrc = &psInst->asOperands[2]; - break; - default: - ASSERT(0); - break; - } - - uint32_t dstOffFlag = TO_FLAG_UNSIGNED_INTEGER; - SHADER_VARIABLE_TYPE dstOffType = psDestByteOff->GetDataType(psContext); - if (dstOffType == SVT_INT || dstOffType == SVT_INT16 || dstOffType == SVT_INT12) - dstOffFlag = TO_FLAG_INTEGER; - - for (component = 0; component < 4; component++) - { - ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); - if (psInst->asOperands[0].ui32CompMask & (1 << component)) - { - psContext->AddIndentation(); - glsl << TranslateOperand(psDest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); - - if (psDestAddr) - { - bcatcstr(glsl, "["); - glsl << TranslateOperand(psDestAddr, TO_FLAG_INTEGER | TO_FLAG_UNSIGNED_INTEGER); - bcatcstr(glsl, "].value"); - } - - bcatcstr(glsl, "[("); - glsl << TranslateOperand(psDestByteOff, dstOffFlag); - if (psInst->eOpcode == OPCODE_STORE_UAV_TYPED) - { - bcatcstr(glsl, ")"); - } - else - { - bcatcstr(glsl, " >> 2"); - if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - bcatcstr(glsl, ")"); - - if (component != 0) - { - bformata(glsl, " + %d", component); - if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - } - } - bcatcstr(glsl, "]"); - - //Dest type is currently always a uint array. - bcatcstr(glsl, " = "); - if (psSrc->GetNumSwizzleElements() > 1) - glsl << TranslateOperand(psSrc, TO_FLAG_UNSIGNED_INTEGER, 1 << (srcComponent++)); - else - glsl << TranslateOperand(psSrc, TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); - - bformata(glsl, ";\n"); - } - } -} - -void ToMetal::TranslateShaderStorageLoad(Instruction* psInst) -{ - bstring glsl = *psContext->currentGLSLString; - int component; - Operand* psDest = 0; - Operand* psSrcAddr = 0; - Operand* psSrcByteOff = 0; - Operand* psSrc = 0; - - switch (psInst->eOpcode) - { - case OPCODE_LD_STRUCTURED: - psDest = &psInst->asOperands[0]; - psSrcAddr = &psInst->asOperands[1]; - psSrcByteOff = &psInst->asOperands[2]; - psSrc = &psInst->asOperands[3]; - break; - case OPCODE_LD_RAW: - case OPCODE_LD_UAV_TYPED: // Hack typed buffer as raw buf - psDest = &psInst->asOperands[0]; - psSrcByteOff = &psInst->asOperands[1]; - psSrc = &psInst->asOperands[2]; - break; - default: - ASSERT(0); - break; - } - - uint32_t destCount = psDest->GetNumSwizzleElements(); - uint32_t destMask = psDest->GetAccessMask(); - - int numParenthesis = 0; - int firstItemAdded = 0; - SHADER_VARIABLE_TYPE destDataType = psDest->GetDataType(psContext); - uint32_t srcOffFlag = TO_FLAG_UNSIGNED_INTEGER; - SHADER_VARIABLE_TYPE srcOffType = psSrcByteOff->GetDataType(psContext); - if (srcOffType == SVT_INT || srcOffType == SVT_INT16 || srcOffType == SVT_INT12) - srcOffFlag = TO_FLAG_INTEGER; - - psContext->AddIndentation(); - AddAssignToDest(psDest, destDataType, destCount, psInst->ui32PreciseMask, numParenthesis); - if (destCount > 1) - { - bformata(glsl, "%s(", GetConstructorForTypeMetal(destDataType, destCount)); - numParenthesis++; - } - for (component = 0; component < 4; component++) - { - bool addedBitcast = false; - if (!(destMask & (1 << component))) - continue; - - if (firstItemAdded) - bcatcstr(glsl, ", "); - else - firstItemAdded = 1; - - // always uint array atm - if (destDataType == SVT_FLOAT) - { - // input already in uints, need bitcast - bcatcstr(glsl, "as_type("); - addedBitcast = true; - } - else if (destDataType == SVT_INT || destDataType == SVT_INT16 || destDataType == SVT_INT12) - { - bcatcstr(glsl, "int("); - addedBitcast = true; - } - - glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); - - if (psSrcAddr) - { - bcatcstr(glsl, "["); - glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_INTEGER); - bcatcstr(glsl, "].value"); - } - bcatcstr(glsl, "[("); - glsl << TranslateOperand(psSrcByteOff, srcOffFlag); - if (psInst->eOpcode == OPCODE_LD_UAV_TYPED) - { - bcatcstr(glsl, ")"); - } - else - { - bcatcstr(glsl, " >> 2"); - if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - - bformata(glsl, ") + %d", psSrc->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE ? psSrc->aui32Swizzle[component] : component); - if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - } - bcatcstr(glsl, "]"); - - if (addedBitcast) - bcatcstr(glsl, ")"); - } - AddAssignPrologue(numParenthesis); -} - -void ToMetal::TranslateAtomicMemOp(Instruction* psInst) -{ - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - uint32_t ui32DataTypeFlag = TO_FLAG_INTEGER; - const char* func = ""; - Operand* dest = 0; - Operand* previousValue = 0; - Operand* destAddr = 0; - Operand* src = 0; - Operand* compare = 0; - int texDim = 0; - bool isUint = true; - - switch (psInst->eOpcode) - { - case OPCODE_IMM_ATOMIC_IADD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IADD\n"); - } - func = "atomic_fetch_add_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_IADD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IADD\n"); - } - func = "atomic_fetch_add_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_AND: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_AND\n"); - } - func = "atomic_fetch_and_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_AND: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_AND\n"); - } - func = "atomic_fetch_and_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_OR: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_OR\n"); - } - func = "atomic_fetch_or_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_OR: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_OR\n"); - } - func = "atomic_fetch_or_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_XOR: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_XOR\n"); - } - func = "atomic_fetch_xor_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_XOR: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_XOR\n"); - } - func = "atomic_fetch_xor_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - - case OPCODE_IMM_ATOMIC_EXCH: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_EXCH\n"); - } - func = "atomic_exchange_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_IMM_ATOMIC_CMP_EXCH: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_CMP_EXC\n"); - } - func = "atomic_compare_exchange_weak_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - compare = &psInst->asOperands[3]; - src = &psInst->asOperands[4]; - break; - } - case OPCODE_ATOMIC_CMP_STORE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_CMP_STORE\n"); - } - func = "atomic_compare_exchange_weak_explicit"; - previousValue = 0; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - compare = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_IMM_ATOMIC_UMIN: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_UMIN\n"); - } - func = "atomic_fetch_min_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_UMIN: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_UMIN\n"); - } - func = "atomic_fetch_min_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_IMIN: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IMIN\n"); - } - func = "atomic_fetch_min_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_IMIN: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IMIN\n"); - } - func = "atomic_fetch_min_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_UMAX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_UMAX\n"); - } - func = "atomic_fetch_max_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_UMAX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_UMAX\n"); - } - func = "atomic_fetch_max_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - case OPCODE_IMM_ATOMIC_IMAX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_IMAX\n"); - } - func = "atomic_fetch_max_explicit"; - previousValue = &psInst->asOperands[0]; - dest = &psInst->asOperands[1]; - destAddr = &psInst->asOperands[2]; - src = &psInst->asOperands[3]; - break; - } - case OPCODE_ATOMIC_IMAX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ATOMIC_IMAX\n"); - } - func = "atomic_fetch_max_explicit"; - dest = &psInst->asOperands[0]; - destAddr = &psInst->asOperands[1]; - src = &psInst->asOperands[2]; - break; - } - default: - ASSERT(0); - break; - } - - psContext->AddIndentation(); - - const ResourceBinding* psBinding = 0; - if (dest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) - { - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, dest->ui32RegisterNumber, &psBinding); - - if (psBinding->eType == RTYPE_UAV_RWTYPED) - { - isUint = (psBinding->ui32ReturnType == RETURN_TYPE_UINT); - - // Find out if it's texture and of what dimension - switch (psBinding->eDimension) - { - case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: - texDim = 1; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: - case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: - texDim = 2; - break; - case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: - case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: - texDim = 3; - break; - case REFLECT_RESOURCE_DIMENSION_BUFFER: // Hack typed buffer as raw buf - break; - default: - ASSERT(0); - break; - } - } - } - - if (texDim > 0) - { - psContext->m_Reflection.OnDiagnostics("Texture atomics are not supported in Metal", 0, true); - return; - } - - if (isUint) - ui32DataTypeFlag = TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT; - else - ui32DataTypeFlag = TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT; - - if (compare) - { - bcatcstr(glsl, "{\n"); - ++psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "uint compare_value = "); - glsl << TranslateOperand(compare, ui32DataTypeFlag); - bcatcstr(glsl, ";\n"); - psContext->AddIndentation(); - } - else if (previousValue) - AddAssignToDest(previousValue, isUint ? SVT_UINT : SVT_INT, 1, psInst->ui32PreciseMask, numParenthesis); - - bcatcstr(glsl, func); - bcatcstr(glsl, "("); - - uint32_t destAddrFlag = TO_FLAG_UNSIGNED_INTEGER; - SHADER_VARIABLE_TYPE destAddrType = destAddr->GetDataType(psContext); - if (destAddrType == SVT_INT || destAddrType == SVT_INT16 || destAddrType == SVT_INT12) - destAddrFlag = TO_FLAG_INTEGER; - - if (dest->eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW) - bcatcstr(glsl, "reinterpret_cast(&"); - else - bcatcstr(glsl, "reinterpret_cast(&"); - glsl << TranslateOperand(dest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); - bcatcstr(glsl, "["); - glsl << TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_X); - - if (!psBinding || psBinding->eType != RTYPE_UAV_RWTYPED) - { - // Structured buf if we have both x & y swizzles. Raw buf has only x -> no .value[] - if (destAddr->GetNumSwizzleElements(OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y) == 2) - { - bcatcstr(glsl, "]"); - bcatcstr(glsl, ".value["); - glsl << TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_Y); - } - - bcatcstr(glsl, " >> 2");//bytes to floats - if (destAddrFlag == TO_FLAG_UNSIGNED_INTEGER) - bcatcstr(glsl, "u"); - } - bcatcstr(glsl, "]), "); - - if (compare) - bcatcstr(glsl, "&compare_value, "); - - glsl << TranslateOperand(src, ui32DataTypeFlag); - bcatcstr(glsl, ", memory_order::memory_order_relaxed"); - if (compare) - bcatcstr(glsl, ", memory_order::memory_order_relaxed"); - bcatcstr(glsl, ")"); - if (previousValue) - { - AddAssignPrologue(numParenthesis); - } - else - bcatcstr(glsl, ";\n"); - - if (compare) - { - if (previousValue) - { - psContext->AddIndentation(); - AddAssignToDest(previousValue, SVT_UINT, 1, psInst->ui32PreciseMask, numParenthesis); - bcatcstr(glsl, "compare_value"); - AddAssignPrologue(numParenthesis); - } - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - } -} - -void ToMetal::TranslateConditional( - Instruction* psInst, - bstring glsl) -{ - const char* statement = ""; - if (psInst->eOpcode == OPCODE_BREAKC) - { - statement = "break"; - } - else if (psInst->eOpcode == OPCODE_CONTINUEC) - { - statement = "continue"; - } - else if (psInst->eOpcode == OPCODE_RETC) // FIXME! Need to spew out shader epilogue - { - if (psContext->psShader->eShaderType == COMPUTE_SHADER || (psContext->psShader->eShaderType == PIXEL_SHADER && m_StructDefinitions[GetOutputStructName()].m_Members.size() == 0)) - statement = "return"; - else - statement = "return output"; - } - - - int isBool = psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL; - - if (isBool) - { - bcatcstr(glsl, "if("); - if (psInst->eBooleanTestType != INSTRUCTION_TEST_NONZERO) - bcatcstr(glsl, "!"); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_BOOL); - if (psInst->eOpcode != OPCODE_IF) - { - bformata(glsl, "){%s;}\n", statement); - } - else - { - bcatcstr(glsl, "){\n"); - } - } - else - { - if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) - { - bcatcstr(glsl, "if(("); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER); - - if (psInst->eOpcode != OPCODE_IF) - { - bformata(glsl, ")==uint(0)){%s;}\n", statement); - } - else - { - bcatcstr(glsl, ")==uint(0)){\n"); - } - } - else - { - ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); - bcatcstr(glsl, "if(("); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER); - - if (psInst->eOpcode != OPCODE_IF) - { - bformata(glsl, ")!=uint(0)){%s;}\n", statement); - } - else - { - bcatcstr(glsl, ")!=uint(0)){\n"); - } - } - } -} - -void ToMetal::TranslateInstruction(Instruction* psInst) -{ - bstring glsl = *psContext->currentGLSLString; - int numParenthesis = 0; - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - // Uncomment to print instruction IDs - //psContext->AddIndentation(); - //bformata(glsl, "//Instruction %d\n", psInst->id); - #if 0 - if (psInst->id == 73) - { - ASSERT(1); //Set breakpoint here to debug an instruction from its ID. - } - #endif - } - - switch (psInst->eOpcode) - { - case OPCODE_FTOI: - case OPCODE_FTOU: - { - uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); - uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); - SHADER_VARIABLE_TYPE castType = psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_FTOU) - bcatcstr(glsl, "//FTOU\n"); - else - bcatcstr(glsl, "//FTOI\n"); - } - switch (psInst->asOperands[0].eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - break; - case OPERAND_MIN_PRECISION_SINT_16: - castType = SVT_INT16; - ASSERT(psInst->eOpcode == OPCODE_FTOI); - break; - case OPERAND_MIN_PRECISION_UINT_16: - castType = SVT_UINT16; - ASSERT(psInst->eOpcode == OPCODE_FTOU); - break; - default: - ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. - } - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[0], castType, srcCount, psInst->ui32PreciseMask, numParenthesis); - bcatcstr(glsl, GetConstructorForTypeMetal(castType, dstCount)); - bcatcstr(glsl, "("); // 1 - glsl << TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT, psInst->asOperands[0].GetAccessMask()); - bcatcstr(glsl, ")"); // 1 - AddAssignPrologue(numParenthesis); - break; - } - - case OPCODE_MOV: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//MOV\n"); - } - psContext->AddIndentation(); - - // UNITY SPECIFIC: you can check case 1158280 - // This looks like a hack because it is! There is a bug that is quite hard to reproduce. - // When doing data analysis we assume that immediates are ints and hope it will be promoted later - // which is kinda fine unless there is an unfortunate combination happening: - // We operate on 4-component registers - we need different components to be treated as float/int - // but we should not use float operations (as this will mark register as float) - // instead "float" components should be used for MOV and friends to other registers - // and they, in turn, should be used for float ops - // In pseudocode it can look like this: - // var2.xy = var1.xy; var1.xy = var2.xy; // not marked as float explicitly - // bool foo = var1.z | <...> // marked as int - // Now we have immediate that will be treated as int but NOT promoted because we think we have all ints - // var1.w = 1 // var1 is marked int - // What is important is that this temporary is marked as int by us but DX compiler treats it - // as "normal" float (and rightfully so) [or rather - we speak about cases where it does treat it as float] - // It is also important that we speak about temps (otherwise we have explicit data type to use, so promotion works) - // - // At this point we have mov immediate to int temp (which should really be float temp) - { - Operand *pDst = &psInst->asOperands[0], *pSrc = &psInst->asOperands[1]; - if (pDst->GetDataType(psContext) == SVT_INT // dst marked as int - && pDst->eType == OPERAND_TYPE_TEMP // dst is temp - && pSrc->eType == OPERAND_TYPE_IMMEDIATE32 // src is immediate - && psContext->psShader->psIntTempSizes[pDst->ui32RegisterNumber] == 0 // no temp register allocated - ) - { - pDst->aeDataType[0] = pDst->aeDataType[1] = pDst->aeDataType[2] = pDst->aeDataType[3] = SVT_FLOAT; - } - } - - AddMOVBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], psInst->ui32PreciseMask); - break; - } - case OPCODE_ITOF://signed to float - case OPCODE_UTOF://unsigned to float - { - SHADER_VARIABLE_TYPE castType = SVT_FLOAT; - uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); - uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_ITOF) - bcatcstr(glsl, "//ITOF\n"); - else - bcatcstr(glsl, "//UTOF\n"); - } - - switch (psInst->asOperands[0].eMinPrecision) - { - case OPERAND_MIN_PRECISION_DEFAULT: - break; - case OPERAND_MIN_PRECISION_FLOAT_2_8: - castType = SVT_FLOAT10; - break; - case OPERAND_MIN_PRECISION_FLOAT_16: - castType = SVT_FLOAT16; - break; - default: - ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. - } - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], castType, srcCount, psInst->ui32PreciseMask, numParenthesis); - bcatcstr(glsl, GetConstructorForTypeMetal(castType, dstCount)); - bcatcstr(glsl, "("); // 1 - glsl << TranslateOperand(&psInst->asOperands[1], psInst->eOpcode == OPCODE_UTOF ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT, psInst->asOperands[0].GetAccessMask()); - bcatcstr(glsl, ")"); // 1 - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_MAD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//MAD\n"); - } - CallHelper3("fma", psInst, 0, 1, 2, 3, 1); - break; - } - case OPCODE_IMAD: - { - uint32_t ui32Flags = TO_FLAG_INTEGER; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMAD\n"); - } - - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - ui32Flags = TO_FLAG_UNSIGNED_INTEGER; - } - - CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, ui32Flags); - break; - } - case OPCODE_DFMA: - { - uint32_t ui32Flags = TO_FLAG_DOUBLE; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DFMA\n"); - } - CallHelper3("fma", psInst, 0, 1, 2, 3, 1, ui32Flags); - break; - } - case OPCODE_DADD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DADD\n"); - } - CallBinaryOp("+", psInst, 0, 1, 2, SVT_DOUBLE); - break; - } - case OPCODE_IADD: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IADD\n"); - } - //Is this a signed or unsigned add? - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - CallBinaryOp("+", psInst, 0, 1, 2, eType); - break; - } - case OPCODE_ADD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ADD\n"); - } - CallBinaryOp("+", psInst, 0, 1, 2, SVT_FLOAT); - break; - } - case OPCODE_OR: - { - /*Todo: vector version */ - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//OR\n"); - } - if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) - { - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - - int needsParenthesis = 0; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, needsParenthesis); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, " || "); - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); - AddAssignPrologue(needsParenthesis); - } - else - CallBinaryOp("|", psInst, 0, 1, 2, SVT_UINT); - break; - } - case OPCODE_AND: - { - SHADER_VARIABLE_TYPE eA = psInst->asOperands[1].GetDataType(psContext); - SHADER_VARIABLE_TYPE eB = psInst->asOperands[2].GetDataType(psContext); - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//AND\n"); - } - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - const uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); - SHADER_VARIABLE_TYPE eDataType = psInst->asOperands[0].GetDataType(psContext); - uint32_t ui32Flags = SVTTypeToFlag(eDataType); - if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) - { - int needsParenthesis = 0; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, needsParenthesis); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, " && "); - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); - AddAssignPrologue(needsParenthesis); - } - else if ((eA == SVT_BOOL || eB == SVT_BOOL) && !(eA == SVT_BOOL && eB == SVT_BOOL)) - { - int boolOp = eA == SVT_BOOL ? 1 : 2; - int otherOp = eA == SVT_BOOL ? 2 : 1; - int needsParenthesis = 0; - uint32_t i; - psContext->AddIndentation(); - - if (dstSwizCount == 1) - { - AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, psInst->ui32PreciseMask, needsParenthesis); - glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, " ? "); - glsl << TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); - bcatcstr(glsl, " : "); - - bcatcstr(glsl, GetConstructorForTypeMetal(eDataType, dstSwizCount)); - bcatcstr(glsl, "("); - switch (eDataType) - { - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - case SVT_DOUBLE: - bcatcstr(glsl, "0.0"); - break; - default: - bcatcstr(glsl, "0"); - } - bcatcstr(glsl, ")"); - } - else if (eDataType == SVT_FLOAT) - { - // We can use select() - AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, psInst->ui32PreciseMask, needsParenthesis); - bcatcstr(glsl, "select("); - bcatcstr(glsl, GetConstructorForTypeMetal(eDataType, dstSwizCount)); - bcatcstr(glsl, "("); - for (i = 0; i < dstSwizCount; i++) - { - if (i > 0) - bcatcstr(glsl, ", "); - bcatcstr(glsl, "0.0"); - } - bcatcstr(glsl, "), "); - glsl << TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); - bcatcstr(glsl, ", "); - bcatcstr(glsl, GetConstructorForTypeMetal(SVT_BOOL, dstSwizCount)); - bcatcstr(glsl, "("); - glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, ")"); - bcatcstr(glsl, ")"); - } - else - { - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, dstSwizCount, psInst->ui32PreciseMask, needsParenthesis); - bcatcstr(glsl, "("); - bcatcstr(glsl, GetConstructorForTypeMetal(SVT_UINT, dstSwizCount)); - bcatcstr(glsl, "("); - glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); - bcatcstr(glsl, ") * 0xffffffffu) & "); - glsl << TranslateOperand(&psInst->asOperands[otherOp], TO_FLAG_UNSIGNED_INTEGER, destMask); - } - - AddAssignPrologue(needsParenthesis); - } - else - { - CallBinaryOp("&", psInst, 0, 1, 2, SVT_UINT); - } - - - break; - } - case OPCODE_GE: - { - /* - dest = vec4(greaterThanEqual(vec4(srcA), vec4(srcB)); - Caveat: The result is a boolean but HLSL asm returns 0xFFFFFFFF/0x0 instead. - */ - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//GE\n"); - } - AddComparison(psInst, CMP_GE, TO_FLAG_NONE); - break; - } - case OPCODE_MUL: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//MUL\n"); - } - CallBinaryOp("*", psInst, 0, 1, 2, SVT_FLOAT); - break; - } - case OPCODE_IMUL: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMUL\n"); - } - if (psInst->asOperands[1].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - - ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_NULL); - - CallBinaryOp("*", psInst, 1, 2, 3, eType); - break; - } - case OPCODE_UDIV: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//UDIV\n"); - } - //destQuotient, destRemainder, src0, src1 - - // There are cases where destQuotient is the same variable as src0 or src1. If that happens, - // we need to compute "%" before the "/" in order to avoid src0 or src1 being overriden first. - if ((psInst->asOperands[0].eType != psInst->asOperands[2].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[2].ui32RegisterNumber) - && (psInst->asOperands[0].eType != psInst->asOperands[3].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[3].ui32RegisterNumber)) - { - CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); - CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); - } - else - { - CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); - CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); - } - break; - } - case OPCODE_DIV: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DIV\n"); - } - CallBinaryOp("/", psInst, 0, 1, 2, SVT_FLOAT); - break; - } - case OPCODE_SINCOS: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SINCOS\n"); - } - // Need careful ordering if src == dest[0], as then the cos() will be reading from wrong value - if (psInst->asOperands[0].eType == psInst->asOperands[2].eType && - psInst->asOperands[0].ui32RegisterNumber == psInst->asOperands[2].ui32RegisterNumber) - { - // sin() result overwrites source, do cos() first. - // The case where both write the src shouldn't really happen anyway. - if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) - { - CallHelper1("cos", psInst, 1, 2, 1); - } - - if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) - { - CallHelper1( - "sin", psInst, 0, 2, 1); - } - } - else - { - if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) - { - CallHelper1("sin", psInst, 0, 2, 1); - } - - if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) - { - CallHelper1("cos", psInst, 1, 2, 1); - } - } - break; - } - - case OPCODE_DP2: - { - int numParenthesis = 0; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DP2\n"); - } - psContext->AddIndentation(); - SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); - uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2; - if (CanForceToHalfOperand(&psInst->asOperands[1]) - && CanForceToHalfOperand(&psInst->asOperands[2])) - typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC2; - - if (dstType != SVT_FLOAT16) - dstType = SVT_FLOAT; - - AddAssignToDest(&psInst->asOperands[0], dstType, 1, psInst->ui32PreciseMask, numParenthesis); - bcatcstr(glsl, "dot("); - glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, 3 /* .xy */); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[2], typeFlags, 3 /* .xy */); - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_DP3: - { - int numParenthesis = 0; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DP3\n"); - } - psContext->AddIndentation(); - SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); - uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3; - if (CanForceToHalfOperand(&psInst->asOperands[1]) - && CanForceToHalfOperand(&psInst->asOperands[2])) - typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC3; - - if (dstType != SVT_FLOAT16) - dstType = SVT_FLOAT; - - AddAssignToDest(&psInst->asOperands[0], dstType, 1, psInst->ui32PreciseMask, numParenthesis); - bcatcstr(glsl, "dot("); - glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, 7 /* .xyz */); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[2], typeFlags, 7 /* .xyz */); - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_DP4: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DP4\n"); - } - CallHelper2("dot", psInst, 0, 1, 2, 0); - break; - } - case OPCODE_INE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//INE\n"); - } - AddComparison(psInst, CMP_NE, TO_FLAG_INTEGER); - break; - } - case OPCODE_NE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//NE\n"); - } - AddComparison(psInst, CMP_NE, TO_FLAG_NONE); - break; - } - case OPCODE_IGE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IGE\n"); - } - AddComparison(psInst, CMP_GE, TO_FLAG_INTEGER); - break; - } - case OPCODE_ILT: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ILT\n"); - } - AddComparison(psInst, CMP_LT, TO_FLAG_INTEGER); - break; - } - case OPCODE_LT: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LT\n"); - } - AddComparison(psInst, CMP_LT, TO_FLAG_NONE); - break; - } - case OPCODE_IEQ: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IEQ\n"); - } - AddComparison(psInst, CMP_EQ, TO_FLAG_INTEGER); - break; - } - case OPCODE_ULT: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ULT\n"); - } - AddComparison(psInst, CMP_LT, TO_FLAG_UNSIGNED_INTEGER); - break; - } - case OPCODE_UGE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//UGE\n"); - } - AddComparison(psInst, CMP_GE, TO_FLAG_UNSIGNED_INTEGER); - break; - } - case OPCODE_MOVC: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//MOVC\n"); - } - AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], psInst->ui32PreciseMask); - break; - } - case OPCODE_SWAPC: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SWAPC\n"); - } - // TODO needs temps!! - ASSERT(0); - AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[2], &psInst->asOperands[4], &psInst->asOperands[3], psInst->ui32PreciseMask); - AddMOVCBinaryOp(&psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], &psInst->asOperands[4], psInst->ui32PreciseMask); - break; - } - - case OPCODE_LOG: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LOG\n"); - } - CallHelper1("log2", psInst, 0, 1, 1); - break; - } - case OPCODE_RSQ: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//RSQ\n"); - } - CallHelper1("rsqrt", psInst, 0, 1, 1); - break; - } - case OPCODE_EXP: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//EXP\n"); - } - CallHelper1("exp2", psInst, 0, 1, 1); - break; - } - case OPCODE_SQRT: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SQRT\n"); - } - CallHelper1("sqrt", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_PI: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_PI\n"); - } - CallHelper1("ceil", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_NI: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_NI\n"); - } - CallHelper1("floor", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_Z: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_Z\n"); - } - CallHelper1("trunc", psInst, 0, 1, 1); - break; - } - case OPCODE_ROUND_NE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ROUND_NE\n"); - } - CallHelper1("rint", psInst, 0, 1, 1); - break; - } - case OPCODE_FRC: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//FRC\n"); - } - CallHelper1("fract", psInst, 0, 1, 1); - break; - } - case OPCODE_IMAX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMAX\n"); - } - CallHelper2Int("max", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_UMAX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//UMAX\n"); - } - CallHelper2UInt("max", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_MAX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//MAX\n"); - } - CallHelper2("max", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_IMIN: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMIN\n"); - } - CallHelper2Int("min", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_UMIN: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//UMIN\n"); - } - CallHelper2UInt("min", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_MIN: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//MIN\n"); - } - CallHelper2("min", psInst, 0, 1, 2, 1); - break; - } - case OPCODE_GATHER4: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER); - break; - } - case OPCODE_GATHER4_PO_C: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_PO_C\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET | TEXSMP_FLAG_DEPTHCOMPARE); - break; - } - case OPCODE_GATHER4_PO: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_PO\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET); - break; - } - case OPCODE_GATHER4_C: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//GATHER4_C\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_DEPTHCOMPARE); - break; - } - case OPCODE_SAMPLE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_NONE); - break; - } - case OPCODE_SAMPLE_L: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_L\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_LOD); - break; - } - case OPCODE_SAMPLE_C: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_C\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE); - break; - } - case OPCODE_SAMPLE_C_LZ: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_C_LZ\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE | TEXSMP_FLAG_FIRSTLOD); - break; - } - case OPCODE_SAMPLE_D: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_D\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_GRAD); - break; - } - case OPCODE_SAMPLE_B: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_B\n"); - } - TranslateTextureSample(psInst, TEXSMP_FLAG_BIAS); - break; - } - case OPCODE_RET: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//RET\n"); - } - if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//--- Post shader code ---\n"); - } - - bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//--- End post shader code ---\n"); - } - } - psContext->AddIndentation(); - if (psContext->psShader->eShaderType == COMPUTE_SHADER || (psContext->psShader->eShaderType == PIXEL_SHADER && m_StructDefinitions[GetOutputStructName()].m_Members.size() == 0)) - bcatcstr(glsl, "return;\n"); - else - bcatcstr(glsl, "return output;\n"); - - break; - } - case OPCODE_INTERFACE_CALL: - { - ASSERT(0); - } - case OPCODE_LABEL: - { - ASSERT(0); // Never seen this - } - case OPCODE_COUNTBITS: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//COUNTBITS\n"); - } - psContext->AddIndentation(); - - // in metal popcount decl is T popcount(T), so it is important that input/output types agree - // enter assembly: when writing swizzle encoding we use 0 to say "source from x" - // now, say, we generate code o.xy = bitcount(i.xy) - // output gets component mask 1,1,0,0 (note that we use bit 1<).<..> will still collapse everything into - // popcount(i.<..>) [well, tweaking swizzle, sure] - // what does that mean is that we can safely take output component count to determine "proper" type - // note that hlsl compiler already checked that things can work out, so it should be fine doing this magic - - const Operand* dst = &psInst->asOperands[0]; - const int dstCompCount = dst->eSelMode == OPERAND_4_COMPONENT_MASK_MODE ? dst->ui32CompMask : OPERAND_4_COMPONENT_MASK_ALL; - - glsl << TranslateOperand(dst, TO_FLAG_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = popcount("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, dstCompCount); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_FIRSTBIT_HI: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_HI\n"); - } - DeclareExtraFunction("firstBit_hi", "template UVecType firstBit_hi(const UVecType input) { UVecType res = clz(input); return res; };"); - // TODO implement the 0-case (must return 0xffffffff) - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = firstBit_hi("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_FIRSTBIT_LO: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_LO\n"); - } - // TODO implement the 0-case (must return 0xffffffff) - DeclareExtraFunction("firstBit_lo", "template UVecType firstBit_lo(const UVecType input) { UVecType res = ctz(input); return res; };"); - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = firstBit_lo("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_FIRSTBIT_SHI: //signed high - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//FIRSTBIT_SHI\n"); - } - // TODO Not at all correct for negative values yet. - DeclareExtraFunction("firstBit_shi", "template IVecType firstBit_shi(const IVecType input) { IVecType res = clz(input); return res; };"); - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = firstBit_shi("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_BFREV: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//BFREV\n"); - } - DeclareExtraFunction("bitReverse", "template UVecType bitReverse(const UVecType input)\n\ -\t\t{ UVecType x = input;\n\ -\t\t\tx = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));\n\ -\t\t\tx = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));\n\ -\t\t\tx = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));\n\ -\t\t\tx = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));\n\ -\t\t\treturn((x >> 16) | (x << 16));\n\ -\t\t}; "); - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); - bcatcstr(glsl, " = bitReverse("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_BFI: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//BFI\n"); - } - DeclareExtraFunction("BFI", "\ -\t\ttemplate UVecType bitFieldInsert(const UVecType width, const UVecType offset, const UVecType src2, const UVecType src3)\n\ -\t\t{\n\ -\t\t\tUVecType bitmask = (((UVecType(1) << width)-1) << offset) & 0xffffffff;\n\ -\t\t\treturn ((src2 << offset) & bitmask) | (src3 & ~bitmask);\n\ -\t\t}; "); - psContext->AddIndentation(); - - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, numParenthesis); - bcatcstr(glsl, "bitFieldInsert("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_UNSIGNED_INTEGER, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[4], TO_FLAG_UNSIGNED_INTEGER, destMask); - bcatcstr(glsl, ")"); - - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_CUT: - case OPCODE_EMITTHENCUT_STREAM: - case OPCODE_EMIT: - case OPCODE_EMITTHENCUT: - case OPCODE_CUT_STREAM: - case OPCODE_EMIT_STREAM: - { - ASSERT(0); // Not on metal - } - case OPCODE_REP: - case OPCODE_ENDREP: - { - ASSERT(0); // Shouldn't see these anymore - } - case OPCODE_LOOP: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LOOP\n"); - } - psContext->AddIndentation(); - - bcatcstr(glsl, "while(true){\n"); - ++psContext->indent; - break; - } - case OPCODE_ENDLOOP: - { - --psContext->indent; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDLOOP\n"); - } - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - break; - } - case OPCODE_BREAK: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//BREAK\n"); - } - psContext->AddIndentation(); - bcatcstr(glsl, "break;\n"); - break; - } - case OPCODE_BREAKC: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//BREAKC\n"); - } - psContext->AddIndentation(); - - TranslateConditional(psInst, glsl); - break; - } - case OPCODE_CONTINUEC: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//CONTINUEC\n"); - } - psContext->AddIndentation(); - - TranslateConditional(psInst, glsl); - break; - } - case OPCODE_IF: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IF\n"); - } - psContext->AddIndentation(); - - TranslateConditional(psInst, glsl); - ++psContext->indent; - break; - } - case OPCODE_RETC: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//RETC\n"); - } - psContext->AddIndentation(); - - TranslateConditional(psInst, glsl); - break; - } - case OPCODE_ELSE: - { - --psContext->indent; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ELSE\n"); - } - psContext->AddIndentation(); - bcatcstr(glsl, "} else {\n"); - psContext->indent++; - break; - } - case OPCODE_ENDSWITCH: - case OPCODE_ENDIF: - { - --psContext->indent; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ENDIF\n"); - } - psContext->AddIndentation(); - bcatcstr(glsl, "}\n"); - break; - } - case OPCODE_CONTINUE: - { - psContext->AddIndentation(); - bcatcstr(glsl, "continue;\n"); - break; - } - case OPCODE_DEFAULT: - { - --psContext->indent; - psContext->AddIndentation(); - bcatcstr(glsl, "default:\n"); - ++psContext->indent; - break; - } - case OPCODE_NOP: - { - break; - } - case OPCODE_SYNC: - { - const uint32_t ui32SyncFlags = psInst->ui32SyncFlags; - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SYNC\n"); - } - const bool sync_threadgroup = (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) != 0; - const bool sync_device = (ui32SyncFlags & (SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP | SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL)) != 0; - - const char* barrierFlags = "mem_flags::mem_none"; - if (sync_threadgroup && sync_device) barrierFlags = "mem_flags::mem_threadgroup | mem_flags::mem_device"; - else if (sync_threadgroup) barrierFlags = "mem_flags::mem_threadgroup"; - else if (sync_device) barrierFlags = "mem_flags::mem_device"; - - if (ui32SyncFlags & SYNC_THREADS_IN_GROUP) - { - psContext->AddIndentation(); - bformata(glsl, "threadgroup_barrier(%s);\n", barrierFlags); - } - else - { - psContext->AddIndentation(); bformata(glsl, "#if __HAVE_SIMDGROUP_BARRIER__\n"); - psContext->AddIndentation(); bformata(glsl, "simdgroup_barrier(%s);\n", barrierFlags); - psContext->AddIndentation(); bformata(glsl, "#else\n"); - psContext->AddIndentation(); bformata(glsl, "threadgroup_barrier(%s);\n", barrierFlags); - psContext->AddIndentation(); bformata(glsl, "#endif\n"); - } - - break; - } - case OPCODE_SWITCH: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SWITCH\n"); - } - psContext->AddIndentation(); - bcatcstr(glsl, "switch(int("); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ")){\n"); - - psContext->indent += 2; - break; - } - case OPCODE_CASE: - { - --psContext->indent; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//case\n"); - } - psContext->AddIndentation(); - - bcatcstr(glsl, "case "); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ":\n"); - - ++psContext->indent; - break; - } - case OPCODE_EQ: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//EQ\n"); - } - AddComparison(psInst, CMP_EQ, TO_FLAG_NONE); - break; - } - case OPCODE_USHR: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//USHR\n"); - } - CallBinaryOp(">>", psInst, 0, 1, 2, SVT_UINT); - break; - } - case OPCODE_ISHL: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ISHL\n"); - } - - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - - CallBinaryOp("<<", psInst, 0, 1, 2, eType); - break; - } - case OPCODE_ISHR: - { - SHADER_VARIABLE_TYPE eType = SVT_INT; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//ISHR\n"); - } - - if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) - { - eType = SVT_UINT; - } - - CallBinaryOp(">>", psInst, 0, 1, 2, eType); - break; - } - case OPCODE_LD: - case OPCODE_LD_MS: - { - const ResourceBinding* psBinding = 0; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_LD) - bcatcstr(glsl, "//LD\n"); - else - bcatcstr(glsl, "//LD_MS\n"); - } - - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, &psBinding); - - if (psInst->eResDim == RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf - { - psInst->eOpcode = OPCODE_LD_UAV_TYPED; - psInst->asOperands[1].eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; - if (psInst->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) - psInst->asOperands[1].iNumComponents = 1; - TranslateShaderStorageLoad(psInst); - break; - } - - if (psInst->bAddressOffset) - { - TranslateTexelFetchOffset(psInst, psBinding, glsl); - } - else - { - TranslateTexelFetch(psInst, psBinding, glsl); - } - break; - } - case OPCODE_DISCARD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DISCARD\n"); - } - - psContext->AddIndentation(); - if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) - { - bcatcstr(glsl, "if(("); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ")==0){discard_fragment();}\n"); - } - else - { - ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); - bcatcstr(glsl, "if(("); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); - bcatcstr(glsl, ")!=0){discard_fragment();}\n"); - } - break; - } - case OPCODE_LOD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LOD\n"); - } - //LOD computes the following vector (ClampedLOD, NonClampedLOD, 0, 0) - - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 4, psInst->ui32PreciseMask, numParenthesis); - - //If the core language does not have query-lod feature, - //then the extension is used. The name of the function - //changed between extension and core. - if (HaveQueryLod(psContext->psShader->eTargetLanguage)) - { - bcatcstr(glsl, "textureQueryLod("); - } - else - { - bcatcstr(glsl, "textureQueryLOD("); - } - - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ","); - TranslateTexCoord( - psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber], - &psInst->asOperands[1]); - bcatcstr(glsl, ")"); - - //The swizzle on srcResource allows the returned values to be swizzled arbitrarily before they are written to the destination. - - // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms - // does not make sense. But need to re-enable to correctly swizzle this particular instruction. - psInst->asOperands[2].iWriteMaskEnabled = 1; - glsl << TranslateOperandSwizzle(&psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_EVAL_CENTROID: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_CENTROID\n"); - } - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = interpolateAtCentroid("); - //interpolateAtCentroid accepts in-qualified variables. - //As long as bytecode only writes vX registers in declarations - //we should be able to use the declared name directly. - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_EVAL_SAMPLE_INDEX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_SAMPLE_INDEX\n"); - } - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = interpolateAtSample("); - //interpolateAtSample accepts in-qualified variables. - //As long as bytecode only writes vX registers in declarations - //we should be able to use the declared name directly. - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); - bcatcstr(glsl, ");\n"); - break; - } - case OPCODE_EVAL_SNAPPED: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//EVAL_SNAPPED\n"); - } - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = interpolateAtOffset("); - //interpolateAtOffset accepts in-qualified variables. - //As long as bytecode only writes vX registers in declarations - //we should be able to use the declared name directly. - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); - bcatcstr(glsl, ".xy);\n"); - break; - } - case OPCODE_LD_STRUCTURED: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_STRUCTURED\n"); - } - TranslateShaderStorageLoad(psInst); - break; - } - case OPCODE_LD_UAV_TYPED: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_UAV_TYPED\n"); - } - Operand* psDest = &psInst->asOperands[0]; - Operand* psSrc = &psInst->asOperands[2]; - Operand* psSrcAddr = &psInst->asOperands[1]; - - const ResourceBinding* psRes = 0; - psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psSrc->ui32RegisterNumber, &psRes); - SHADER_VARIABLE_TYPE srcDataType = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision); - - if (psInst->eResDim == RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf - { - psSrc->aeDataType[0] = srcDataType; - psSrcAddr->eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; - if (psSrcAddr->eType == OPERAND_TYPE_IMMEDIATE32) - psSrcAddr->iNumComponents = 1; - TranslateShaderStorageLoad(psInst); - break; - } - -#define RRD(n) REFLECT_RESOURCE_DIMENSION_ ## n - - // unlike glsl, texture arrays will have index in separate argument - const bool isArray = psRes->eDimension == RRD(TEXTURE1DARRAY) || psRes->eDimension == RRD(TEXTURE2DARRAY) - || psRes->eDimension == RRD(TEXTURE2DMSARRAY) || psRes->eDimension == RRD(TEXTURECUBEARRAY); - - uint32_t flags = TO_FLAG_UNSIGNED_INTEGER, opMask = OPERAND_4_COMPONENT_MASK_ALL; - switch (psRes->eDimension) - { - case RRD(TEXTURE3D): - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; - flags |= TO_AUTO_EXPAND_TO_VEC3; - break; - case RRD(TEXTURECUBE): case RRD(TEXTURECUBEARRAY): - case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): case RRD(TEXTURE2D): case RRD(TEXTURE2DMS): - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; - flags |= TO_AUTO_EXPAND_TO_VEC2; - break; - case RRD(TEXTURE1D): case RRD(TEXTURE1DARRAY): - opMask = OPERAND_4_COMPONENT_MASK_X; - break; - default: - ASSERT(0); break; - } - - int srcCount = psSrc->GetNumSwizzleElements(), numParenthesis = 0; - psContext->AddIndentation(); - AddAssignToDest(psDest, srcDataType, srcCount, psInst->ui32PreciseMask, numParenthesis); - glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); - bcatcstr(glsl, ".read("); - glsl << TranslateOperand(psSrcAddr, flags, opMask); - if (isArray) - { - // NB cube array is handled incorrectly - it needs extra "face" arg - switch (psRes->eDimension) - { - case RRD(TEXTURE1DARRAY): opMask = OPERAND_4_COMPONENT_MASK_Y; break; - case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break; - case RRD(TEXTURECUBEARRAY): opMask = OPERAND_4_COMPONENT_MASK_W; break; - default: ASSERT(0); break; - } - - bcatcstr(glsl, ", "); - glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER, opMask); - } - bcatcstr(glsl, ")"); - glsl << TranslateOperandSwizzle(psSrc, psDest->ui32CompMask, 0); - AddAssignPrologue(numParenthesis); - -#undef RRD - - break; - } - case OPCODE_STORE_RAW: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_RAW\n"); - } - TranslateShaderStorageStore(psInst); - break; - } - case OPCODE_STORE_STRUCTURED: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_STRUCTURED\n"); - } - TranslateShaderStorageStore(psInst); - break; - } - - case OPCODE_STORE_UAV_TYPED: - { - const ResourceBinding* psRes; - int foundResource; - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//STORE_UAV_TYPED\n"); - } - foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, - psInst->asOperands[0].ui32RegisterNumber, - &psRes); - ASSERT(foundResource); - - if (psRes->eDimension == REFLECT_RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf - { - psInst->asOperands[0].aeDataType[0] = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision); - psInst->asOperands[1].eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; - if (psInst->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) - psInst->asOperands[1].iNumComponents = 1; - TranslateShaderStorageStore(psInst); - break; - } - - psContext->AddIndentation(); - - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_NAME_ONLY); - bcatcstr(glsl, ".write("); - - #define RRD(n) REFLECT_RESOURCE_DIMENSION_ ## n - - // unlike glsl, texture arrays will have index in separate argument - const bool isArray = psRes->eDimension == RRD(TEXTURE1DARRAY) || psRes->eDimension == RRD(TEXTURE2DARRAY) - || psRes->eDimension == RRD(TEXTURE2DMSARRAY) || psRes->eDimension == RRD(TEXTURECUBEARRAY); - - uint32_t flags = TO_FLAG_UNSIGNED_INTEGER, opMask = OPERAND_4_COMPONENT_MASK_ALL; - switch (psRes->eDimension) - { - case RRD(TEXTURE1D): case RRD(TEXTURE1DARRAY): - opMask = OPERAND_4_COMPONENT_MASK_X; - break; - case RRD(TEXTURE2D): case RRD(TEXTURE2DMS): case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; - flags |= TO_AUTO_EXPAND_TO_VEC2; - break; - case RRD(TEXTURE3D): case RRD(TEXTURECUBE): case RRD(TEXTURECUBEARRAY): - opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; - flags |= TO_AUTO_EXPAND_TO_VEC3; - break; - default: - ASSERT(0); - break; - } - - - glsl << TranslateOperand(&psInst->asOperands[2], ResourceReturnTypeToFlag(psRes->ui32ReturnType)); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask); - if (isArray) - { - // NB cube array is handled incorrectly - it needs extra "face" arg - flags = TO_FLAG_UNSIGNED_INTEGER; - switch (psRes->eDimension) - { - case RRD(TEXTURE1DARRAY): opMask = OPERAND_4_COMPONENT_MASK_Y; break; - case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break; - case RRD(TEXTURECUBEARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break; - default: ASSERT(0); break; - } - - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask); - } - bformata(glsl, ");\n"); - -#undef RRD - - break; - } - case OPCODE_LD_RAW: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LD_RAW\n"); - } - TranslateShaderStorageLoad(psInst); - break; - } - - case OPCODE_ATOMIC_CMP_STORE: - case OPCODE_IMM_ATOMIC_AND: - case OPCODE_ATOMIC_AND: - case OPCODE_IMM_ATOMIC_IADD: - case OPCODE_ATOMIC_IADD: - case OPCODE_ATOMIC_OR: - case OPCODE_ATOMIC_XOR: - case OPCODE_ATOMIC_IMAX: - case OPCODE_ATOMIC_IMIN: - case OPCODE_ATOMIC_UMAX: - case OPCODE_ATOMIC_UMIN: - case OPCODE_IMM_ATOMIC_IMAX: - case OPCODE_IMM_ATOMIC_IMIN: - case OPCODE_IMM_ATOMIC_UMAX: - case OPCODE_IMM_ATOMIC_UMIN: - case OPCODE_IMM_ATOMIC_OR: - case OPCODE_IMM_ATOMIC_XOR: - case OPCODE_IMM_ATOMIC_EXCH: - case OPCODE_IMM_ATOMIC_CMP_EXCH: - { - TranslateAtomicMemOp(psInst); - break; - } - case OPCODE_UBFE: - case OPCODE_IBFE: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - if (psInst->eOpcode == OPCODE_UBFE) - bcatcstr(glsl, "//OPCODE_UBFE\n"); - else - bcatcstr(glsl, "//OPCODE_IBFE\n"); - } - - bool isUBFE = psInst->eOpcode == OPCODE_UBFE; - bool isScalar = psInst->asOperands[0].GetNumSwizzleElements() == 1; - - if (isUBFE) - { - if (isScalar) - { - DeclareExtraFunction("UBFE", "\ -uint bitFieldExtractU(uint width, uint offset, uint src);\n\ -uint bitFieldExtractU(uint width, uint offset, uint src)\n\ -{\n\ -\tbool isWidthZero = (width == 0);\n\ -\tbool needsClamp = ((width + offset) < 32);\n\ -\tuint clampVersion = src << (32-(width+offset));\n\ -\tclampVersion = clampVersion >> (32 - width);\n\ -\tuint simpleVersion = src >> offset;\n\ -\tuint res = select(simpleVersion, clampVersion, needsClamp);\n\ -\treturn select(res, (uint)0, isWidthZero);\n\ -}; "); - } - else - { - DeclareExtraFunction("UBFEV", "\ -template vec bitFieldExtractU(const vec width, const vec offset, const vec src)\n\ -{\n\ -\tvec isWidthZero = (width == 0);\n\ -\tvec needsClamp = ((width + offset) < 32);\n\ -\tvec clampVersion = src << (32-(width+offset));\n\ -\tclampVersion = clampVersion >> (32 - width);\n\ -\tvec simpleVersion = src >> offset;\n\ -\tvec res = select(simpleVersion, clampVersion, needsClamp);\n\ -\treturn select(res, vec(0), isWidthZero);\n\ -}; "); - } - } - else - { - if (isScalar) - { - DeclareExtraFunction("IBFE", "\ -template int bitFieldExtractI(uint width, uint offset, int src)\n\ -{\n\ -\tbool isWidthZero = (width == 0);\n\ -\tbool needsClamp = ((width + offset) < 32);\n\ -\tint clampVersion = src << (32-(width+offset));\n\ -\tclampVersion = clampVersion >> (32 - width);\n\ -\tint simpleVersion = src >> offset;\n\ -\tint res = select(simpleVersion, clampVersion, needsClamp);\n\ -\treturn select(res, (int)0, isWidthZero);\n\ -}; "); - } - else - { - DeclareExtraFunction("IBFEV", "\ -template vec bitFieldExtractI(const vec width, const vec offset, const vec src)\n\ -{\n\ -\tvec isWidthZero = (width == 0);\n\ -\tvec needsClamp = ((width + offset) < 32);\n\ -\tvec clampVersion = src << (32-(width+offset));\n\ -\tclampVersion = clampVersion >> (32 - width);\n\ -\tvec simpleVersion = src >> offset;\n\ -\tvec res = select(simpleVersion, clampVersion, needsClamp);\n\ -\treturn select(res, vec(0), isWidthZero);\n\ -}; "); - } - } - psContext->AddIndentation(); - - uint32_t destMask = psInst->asOperands[0].GetAccessMask(); - uint32_t src2SwizCount = psInst->asOperands[3].GetNumSwizzleElements(destMask); - uint32_t src1SwizCount = psInst->asOperands[2].GetNumSwizzleElements(destMask); - uint32_t src0SwizCount = psInst->asOperands[1].GetNumSwizzleElements(destMask); - uint32_t ui32Flags = 0; - - if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) - { - uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); - ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); - } - - AddAssignToDest(&psInst->asOperands[0], isUBFE ? SVT_UINT : SVT_INT, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, numParenthesis); - bcatcstr(glsl, "bitFieldExtract"); - bcatcstr(glsl, isUBFE ? "U" : "I"); - bcatcstr(glsl, "("); - glsl << TranslateOperand(&psInst->asOperands[1], ui32Flags | TO_FLAG_UNSIGNED_INTEGER, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[2], ui32Flags | TO_FLAG_UNSIGNED_INTEGER, destMask); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[3], ui32Flags | (isUBFE ? TO_FLAG_UNSIGNED_INTEGER : TO_FLAG_INTEGER), destMask); - bcatcstr(glsl, ")"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_RCP: - { - const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); - const uint32_t srcElemCount = psInst->asOperands[1].GetNumSwizzleElements(); - int numParenthesis = 0; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//RCP\n"); - } - psContext->AddIndentation(); - - SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); - SHADER_VARIABLE_TYPE srcType = psInst->asOperands[1].GetDataType(psContext); - - uint32_t typeFlags = TO_FLAG_NONE; - if (dstType == SVT_FLOAT16 && srcType == SVT_FLOAT16) - { - typeFlags = TO_FLAG_FORCE_HALF; - } - else - srcType = SVT_FLOAT; - - AddAssignToDest(&psInst->asOperands[0], srcType, srcElemCount, psInst->ui32PreciseMask, numParenthesis); - bcatcstr(glsl, GetConstructorForTypeMetal(srcType, destElemCount)); - bcatcstr(glsl, "(1.0) / "); - bcatcstr(glsl, GetConstructorForTypeMetal(srcType, destElemCount)); - bcatcstr(glsl, "("); - numParenthesis++; - glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, psInst->asOperands[0].GetAccessMask()); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_F32TOF16: - { - uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//F32TOF16\n"); - } - - for (int i = 0; i < 4; i++) - { - if ((writeMask & (1 << i)) == 0) - continue; - psContext->AddIndentation(); - psInst->asOperands[0].ui32CompMask = (1 << i); - psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, numParenthesis); - - bcatcstr(glsl, "as_type(half2("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, (1 << i)); - bcatcstr(glsl, ", 0.0))"); - AddAssignPrologue(numParenthesis); - } - break; - } - case OPCODE_F16TOF32: - { - uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); - - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//F16TOF32\n"); - } - - for (int i = 0; i < 4; i++) - { - if ((writeMask & (1 << i)) == 0) - continue; - psContext->AddIndentation(); - psInst->asOperands[0].ui32CompMask = (1 << i); - psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; - AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, psInst->ui32PreciseMask, numParenthesis); - - bcatcstr(glsl, "as_type("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_UINT, (1 << i)); - bcatcstr(glsl, ").x"); - AddAssignPrologue(numParenthesis); - } - break; - } - case OPCODE_INEG: - { - int numParenthesis = 0; - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//INEG\n"); - } - //dest = 0 - src0 - psContext->AddIndentation(); - - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), psInst->ui32PreciseMask, numParenthesis); - - bcatcstr(glsl, "0 - "); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_DERIV_RTX_COARSE: - case OPCODE_DERIV_RTX_FINE: - case OPCODE_DERIV_RTX: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DERIV_RTX\n"); - } - CallHelper1("dfdx", psInst, 0, 1, 1); - break; - } - case OPCODE_DERIV_RTY_COARSE: - case OPCODE_DERIV_RTY_FINE: - case OPCODE_DERIV_RTY: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DERIV_RTY\n"); - } - CallHelper1("dfdy", psInst, 0, 1, 1); - break; - } - case OPCODE_LRP: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//LRP\n"); - } - CallHelper3("mix", psInst, 0, 2, 3, 1, 1); - break; - } - case OPCODE_DP2ADD: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//DP2ADD\n"); - } - psContext->AddIndentation(); - bool isFP16 = false; - if (CanForceToHalfOperand(&psInst->asOperands[0]) - && CanForceToHalfOperand(&psInst->asOperands[1]) - && CanForceToHalfOperand(&psInst->asOperands[2]) - && CanForceToHalfOperand(&psInst->asOperands[2])) - isFP16 = true; - int parenthesis = 0; - AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, 2, psInst->ui32PreciseMask, parenthesis); - - uint32_t flags = TO_AUTO_EXPAND_TO_VEC2; - flags |= isFP16 ? TO_FLAG_FORCE_HALF : TO_AUTO_BITCAST_TO_FLOAT; - - bcatcstr(glsl, "dot("); - glsl << TranslateOperand(&psInst->asOperands[1], flags); - bcatcstr(glsl, ", "); - glsl << TranslateOperand(&psInst->asOperands[2], flags); - bcatcstr(glsl, ") + "); - glsl << TranslateOperand(&psInst->asOperands[3], flags); - AddAssignPrologue(parenthesis); - break; - } - case OPCODE_POW: - { - // TODO Check POW opcode whether it actually needs the abs - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//POW\n"); - } - psContext->AddIndentation(); - glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); - bcatcstr(glsl, " = powr(abs("); - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); - bcatcstr(glsl, "), "); - glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ");\n"); - break; - } - - case OPCODE_IMM_ATOMIC_ALLOC: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_ALLOC\n"); - } - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, numParenthesis); - bcatcstr(glsl, "atomic_fetch_add_explicit("); - glsl << ResourceName(RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber); - bcatcstr(glsl, "_counter, 1, memory_order::memory_order_relaxed)"); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_IMM_ATOMIC_CONSUME: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//IMM_ATOMIC_CONSUME\n"); - } - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, numParenthesis); - bcatcstr(glsl, "atomic_fetch_sub_explicit("); - glsl << ResourceName(RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber); - // Metal atomic sub returns previous value. Therefore minus one here to get the correct data index. - bcatcstr(glsl, "_counter, 1, memory_order::memory_order_relaxed) - 1"); - AddAssignPrologue(numParenthesis); - break; - } - - case OPCODE_NOT: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//NOT\n"); - } - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), psInst->ui32PreciseMask, numParenthesis); - - bcatcstr(glsl, "~("); - numParenthesis++; - glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); - AddAssignPrologue(numParenthesis); - break; - } - case OPCODE_XOR: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//XOR\n"); - } - CallBinaryOp("^", psInst, 0, 1, 2, SVT_UINT); - break; - } - case OPCODE_RESINFO: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//RESINFO\n"); - } - - const uint32_t mask = psInst->asOperands[0].GetAccessMask(); - for (int i = 0; i < 4; ++i) - { - if ((1 << i) & mask) - GetResInfoData(psInst, psInst->asOperands[2].aui32Swizzle[i], i); - } - - break; - } - - case OPCODE_BUFINFO: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//BUFINFO\n"); - } - psContext->m_Reflection.OnDiagnostics("Metal shading language does not support buffer size query from shader. Pass the size to shader as const instead.\n", 0, true); - break; - } - - case OPCODE_SAMPLE_INFO: - { - if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) - { - psContext->AddIndentation(); - bcatcstr(glsl, "//SAMPLE_INFO\n"); - } - const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; - psContext->AddIndentation(); - AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, psInst->ui32PreciseMask, numParenthesis); - bcatcstr(glsl, TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY).c_str()); - bcatcstr(glsl, ".get_num_samples()"); - AddAssignPrologue(numParenthesis); - break; - } - - case OPCODE_DMAX: - case OPCODE_DMIN: - case OPCODE_DMUL: - case OPCODE_DEQ: - case OPCODE_DGE: - case OPCODE_DLT: - case OPCODE_DNE: - case OPCODE_DMOV: - case OPCODE_DMOVC: - case OPCODE_DTOF: - case OPCODE_FTOD: - case OPCODE_DDIV: - case OPCODE_DRCP: - case OPCODE_MSAD: - case OPCODE_DTOI: - case OPCODE_DTOU: - case OPCODE_ITOD: - case OPCODE_UTOD: - default: - { - ASSERT(0); - break; - } - } - - if (psInst->bSaturate) //Saturate is only for floating point data (float opcodes or MOV) - { - int dstCount = psInst->asOperands[0].GetNumSwizzleElements(); - psContext->AddIndentation(); - bool isFP16 = false; - if (psInst->asOperands[0].GetDataType(psContext) == SVT_FLOAT16) - isFP16 = true; - AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, dstCount, psInst->ui32PreciseMask, numParenthesis); - bcatcstr(glsl, "clamp("); - - glsl << TranslateOperand(&psInst->asOperands[0], isFP16 ? TO_FLAG_FORCE_HALF : TO_AUTO_BITCAST_TO_FLOAT); - if (isFP16) - bcatcstr(glsl, ", 0.0h, 1.0h)"); - else - bcatcstr(glsl, ", 0.0f, 1.0f)"); - AddAssignPrologue(numParenthesis); - } -} - -#if ENABLE_UNIT_TESTS - -#define UNITY_EXTERNAL_TOOL 1 -#include "Projects/PrecompiledHeaders/UnityPrefix.h" // Needed for defines such as ENABLE_CPP_EXCEPTIONS -#include "Runtime/Testing/Testing.h" - -UNIT_TEST_SUITE(ToMetalInstructionTests) -{ - static void TestAddOpAssignToDest(const char* expect, SHADER_VARIABLE_TYPE srcType, uint32_t srcDim, SHADER_VARIABLE_TYPE dstType, uint32_t dstDim) - { - bstring actual = bfromcstralloc(20, ""); - bstring expected = bfromcstralloc(20, expect); - int parenthesis = 0; - AddOpAssignToDest(actual, srcType, srcDim, dstType, dstDim, 0, parenthesis); - CHECK(bstrcmp(actual, expected) == 0); - bdestroy(actual); - bdestroy(expected); - } - - TEST(AddOpAssignToDest_Works) - { - // Different Type - TestAddOpAssignToDest(" = as_type(", SVT_INT, 1, SVT_FLOAT, 1); - TestAddOpAssignToDest(" = uint(", SVT_INT, 1, SVT_UINT, 1); - TestAddOpAssignToDest(" = as_type(", SVT_FLOAT, 1, SVT_INT, 1); - TestAddOpAssignToDest(" = as_type(", SVT_FLOAT, 1, SVT_UINT, 1); - - TestAddOpAssignToDest(" = as_type(", SVT_INT16, 1, SVT_FLOAT16, 1); - TestAddOpAssignToDest(" = ushort(", SVT_INT16, 1, SVT_UINT16, 1); - TestAddOpAssignToDest(" = as_type(", SVT_FLOAT16, 1, SVT_INT16, 1); - TestAddOpAssignToDest(" = as_type(", SVT_FLOAT16, 1, SVT_UINT16, 1); - - // Simply assign - TestAddOpAssignToDest(" = ", SVT_UINT16, 1, SVT_UINT16, 1); - TestAddOpAssignToDest(" = ", SVT_INT, 4, SVT_INT, 2); - - // Up cast - TestAddOpAssignToDest(" = uint(", SVT_UINT16, 1, SVT_UINT, 1); - TestAddOpAssignToDest(" = float(", SVT_FLOAT16, 1, SVT_FLOAT, 1); - TestAddOpAssignToDest(" = int(", SVT_INT16, 1, SVT_INT, 1); - - // Down cast - TestAddOpAssignToDest(" = ushort(", SVT_UINT, 1, SVT_UINT16, 1); - TestAddOpAssignToDest(" = half(", SVT_FLOAT, 1, SVT_FLOAT16, 1); - TestAddOpAssignToDest(" = short(", SVT_INT, 1, SVT_INT16, 1); - - // Increase dimensions - TestAddOpAssignToDest(" = float4(", SVT_FLOAT, 1, SVT_FLOAT, 4); - TestAddOpAssignToDest(" = uint3(", SVT_UINT, 1, SVT_UINT, 3); - TestAddOpAssignToDest(" = uint2(", SVT_UINT, 1, SVT_UINT, 2); - - // Decrease dimensions - TestAddOpAssignToDest(" = ", SVT_FLOAT, 4, SVT_FLOAT, 1); - TestAddOpAssignToDest(" = ", SVT_UINT, 3, SVT_UINT, 1); - TestAddOpAssignToDest(" = ", SVT_UINT, 2, SVT_UINT, 1); - - // Reinterop cast + Increase dimensions - TestAddOpAssignToDest(" = as_type(int4(", SVT_INT, 1, SVT_FLOAT, 4); - TestAddOpAssignToDest(" = uint4(", SVT_INT, 1, SVT_UINT, 4); - TestAddOpAssignToDest(" = as_type(float4(", SVT_FLOAT, 1, SVT_INT, 4); - TestAddOpAssignToDest(" = as_type(float4(", SVT_FLOAT, 1, SVT_UINT, 4); - - // Reinterop cast + Decrease dimensions - TestAddOpAssignToDest(" = as_type(", SVT_INT, 4, SVT_FLOAT, 1); - TestAddOpAssignToDest(" = uint(", SVT_INT, 4, SVT_UINT, 1); - TestAddOpAssignToDest(" = as_type(", SVT_FLOAT, 4, SVT_INT, 1); - TestAddOpAssignToDest(" = as_type(", SVT_FLOAT, 4, SVT_UINT, 1); - - // Different precision + Different Type - TestAddOpAssignToDest(" = float4(", SVT_INT16, 4, SVT_FLOAT, 4); - TestAddOpAssignToDest(" = short4(", SVT_FLOAT, 4, SVT_INT16, 4); - - // Sanity check as low precision not used in metal they should fall back - TestAddOpAssignToDest(" = short4(", SVT_FLOAT, 4, SVT_INT12, 4); - TestAddOpAssignToDest(" = half4(", SVT_INT, 4, SVT_FLOAT10, 4); - } -} -#endif diff --git a/third_party/HLSLcc/src/toMetalOperand.cpp b/third_party/HLSLcc/src/toMetalOperand.cpp deleted file mode 100644 index 103d611..0000000 --- a/third_party/HLSLcc/src/toMetalOperand.cpp +++ /dev/null @@ -1,1277 +0,0 @@ -#include -#include "internal_includes/HLSLccToolkit.h" -#include "internal_includes/HLSLCrossCompilerContext.h" -#include "hlslcc.h" -#include "internal_includes/debug.h" -#include "internal_includes/Shader.h" -#include "internal_includes/toMetal.h" -#include -#include - -#include -#include - -using namespace HLSLcc; - -#ifdef _MSC_VER -#if _MSC_VER < 1900 -#define snprintf _snprintf -#endif -#endif - -#ifndef fpcheck -#ifdef _MSC_VER -#define fpcheck(x) (_isnan(x) || !_finite(x)) -#else -#define fpcheck(x) (std::isnan(x) || std::isinf(x)) -#endif -#endif // #ifndef fpcheck - - -// Returns nonzero if types are just different precisions of the same underlying type -static bool AreTypesCompatibleMetal(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag) -{ - SHADER_VARIABLE_TYPE b = TypeFlagsToSVTType(ui32TOFlag); - - if (a == b) - return true; - - // Special case for array indices: both uint and int are fine - if ((ui32TOFlag & TO_FLAG_INTEGER) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER) && - (a == SVT_INT || a == SVT_INT16 || a == SVT_UINT || a == SVT_UINT16)) - return true; - - return false; -} - -std::string ToMetal::TranslateOperandSwizzle(const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase, bool includeDot /*= true*/) -{ - std::ostringstream oss; - uint32_t accessMask = ui32ComponentMask & psOperand->GetAccessMask(); - if (psOperand->eType == OPERAND_TYPE_INPUT) - { - int regSpace = psOperand->GetRegisterSpace(psContext); - // Skip swizzle for scalar inputs, but only if we haven't redirected them - if (regSpace == 0) - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return ""; - } - } - else - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return ""; - } - } - } - if (psOperand->eType == OPERAND_TYPE_OUTPUT) - { - int regSpace = psOperand->GetRegisterSpace(psContext); - // Skip swizzle for scalar outputs, but only if we haven't redirected them - if (regSpace == 0) - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return ""; - } - } - else - { - if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && - (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) - { - return ""; - } - } - } - - if (psOperand->iWriteMaskEnabled && - psOperand->iNumComponents != 1) - { - //Component Mask - if (psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) - { - uint32_t mask; - if (psOperand->ui32CompMask != 0) - mask = psOperand->ui32CompMask & ui32ComponentMask; - else - mask = ui32ComponentMask; - - if (mask != 0 && mask != OPERAND_4_COMPONENT_MASK_ALL) - { - if (includeDot) - oss << "."; - if (mask & OPERAND_4_COMPONENT_MASK_X) - { - ASSERT(iRebase == 0); - oss << "x"; - } - if (mask & OPERAND_4_COMPONENT_MASK_Y) - { - ASSERT(iRebase <= 1); - oss << "xy"[1 - iRebase]; - } - if (mask & OPERAND_4_COMPONENT_MASK_Z) - { - ASSERT(iRebase <= 2); - oss << "xyz"[2 - iRebase]; - } - if (mask & OPERAND_4_COMPONENT_MASK_W) - { - ASSERT(iRebase <= 3); - oss << "xyzw"[3 - iRebase]; - } - } - } - else - //Component Swizzle - if (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - if (ui32ComponentMask != OPERAND_4_COMPONENT_MASK_ALL || - !(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X && - psOperand->aui32Swizzle[1] == OPERAND_4_COMPONENT_Y && - psOperand->aui32Swizzle[2] == OPERAND_4_COMPONENT_Z && - psOperand->aui32Swizzle[3] == OPERAND_4_COMPONENT_W - ) - ) - { - uint32_t i; - - if (includeDot) - oss << "."; - - for (i = 0; i < 4; ++i) - { - if (!(ui32ComponentMask & (OPERAND_4_COMPONENT_MASK_X << i))) - continue; - - if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_X) - { - ASSERT(iRebase == 0); - oss << "x"; - } - else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Y) - { - ASSERT(iRebase <= 1); - oss << "xy"[1 - iRebase]; - } - else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Z) - { - ASSERT(iRebase <= 2); - oss << "xyz"[2 - iRebase]; - } - else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_W) - { - ASSERT(iRebase <= 3); - oss << "xyzw"[3 - iRebase]; - } - } - } - } - else if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) // ui32ComponentMask is ignored in this case - { - if (includeDot) - oss << "."; - - if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X) - { - ASSERT(iRebase == 0); - oss << "x"; - } - else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Y) - { - ASSERT(iRebase <= 1); - oss << "xy"[1 - iRebase]; - } - else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Z) - { - ASSERT(iRebase <= 2); - oss << "xyz"[2 - iRebase]; - } - else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_W) - { - ASSERT(iRebase <= 3); - oss << "xyzw"[3 - iRebase]; - } - } - } - return oss.str(); -} - -std::string ToMetal::TranslateOperandIndex(const Operand* psOperand, int index) -{ - int i = index; - std::ostringstream oss; - ASSERT(index < psOperand->iIndexDims); - - switch (psOperand->eIndexRep[i]) - { - case OPERAND_INDEX_IMMEDIATE32: - { - oss << "[" << psOperand->aui32ArraySizes[i] << "]"; - return oss.str(); - } - case OPERAND_INDEX_RELATIVE: - { - oss << "[" << TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER) << "]"; - return oss.str(); - } - case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: - { - oss << "[" << TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER) << " + " << psOperand->aui32ArraySizes[i] << "]"; - return oss.str(); - } - default: - { - ASSERT(0); - return ""; - break; - } - } -} - -/*static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARIABLE_TYPE from, SHADER_VARIABLE_TYPE to, uint32_t numComponents) -{ - if (psContext->psShader->eTargetLanguage == LANG_METAL) - { - std::ostringstream oss; - oss << "as_type<"; - oss << GetConstructorForTypeMetal(to, numComponents); - oss << ">"; - return oss.str(); - } - else - { - if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_INT) - return "intBitsToFloat"; - else if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_UINT) - return "uintBitsToFloat"; - else if (to == SVT_INT && from == SVT_FLOAT) - return "floatBitsToInt"; - else if (to == SVT_UINT && from == SVT_FLOAT) - return "floatBitsToUint"; - } - - ASSERT(0); - return "ERROR missing components in GetBitcastOp()"; -}*/ - - -// Helper function to print floats with full precision -static std::string printFloat(float f) -{ - char temp[30]; - - snprintf(temp, 30, "%.9g", f); - char * ePos = strchr(temp, 'e'); - char * pointPos = strchr(temp, '.'); - - if (ePos == NULL && pointPos == NULL && !fpcheck(f)) - return std::string(temp) + ".0"; - else - return std::string(temp); -} - -// Helper function to print out a single 32-bit immediate value in desired format -static std::string printImmediate32(uint32_t value, SHADER_VARIABLE_TYPE eType) -{ - std::ostringstream oss; - int needsParenthesis = 0; - - // Print floats as bit patterns. - if ((eType == SVT_FLOAT || eType == SVT_FLOAT16 || eType == SVT_FLOAT10) && fpcheck(*((float *)(&value)))) - { - oss << "as_type("; - eType = SVT_INT; - needsParenthesis = 1; - } - - switch (eType) - { - default: - ASSERT(0); - case SVT_INT: - case SVT_INT16: - case SVT_INT12: - // Need special handling for anything >= uint 0x3fffffff - if (value > 0x3ffffffe) - oss << "int(0x" << std::hex << value << "u)"; - else - oss << "0x" << std::hex << value << ""; - break; - case SVT_UINT: - case SVT_UINT16: - oss << "0x" << std::hex << value << "u"; - break; - case SVT_FLOAT: - case SVT_FLOAT10: - case SVT_FLOAT16: - oss << printFloat(*((float *)(&value))); - break; - case SVT_BOOL: - if (value == 0) - oss << "false"; - else - oss << "true"; - } - if (needsParenthesis) - oss << ")"; - - return oss.str(); -} - -static std::string MakeCBVarName(const std::string &cbName, const std::string &fullName, bool isUnityInstancingBuffer) -{ - // For Unity instancing buffer: "CBufferName.StructTypeName[] -> CBufferName[]". See ToMetal::DeclareConstantBuffer. - if (isUnityInstancingBuffer && !cbName.empty() && cbName[cbName.size() - 1] == '.' && fullName.find_first_of('[') != std::string::npos) - { - return cbName.substr(0, cbName.size() - 1) + fullName.substr(fullName.find_first_of('[')); - } - return cbName + fullName; -} - -std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase) -{ - std::ostringstream oss; - int numParenthesis = 0; - int hasCtor = 0; - int needsBoolUpscale = 0; // If nonzero, bools need * 0xffffffff in them - SHADER_VARIABLE_TYPE requestedType = TypeFlagsToSVTType(ui32TOFlag); - SHADER_VARIABLE_TYPE eType = psOperand->GetDataType(psContext, requestedType); - int numComponents = psOperand->GetNumSwizzleElements(ui32CompMask); - int requestedComponents = 0; - int scalarWithSwizzle = 0; - - *pui32IgnoreSwizzle = 0; - - if (psOperand->eType == OPERAND_TYPE_TEMP) - { - // Check for scalar - if (psContext->psShader->GetTempComponentCount(eType, psOperand->ui32RegisterNumber) == 1 && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) - { - scalarWithSwizzle = 1; // Going to need a constructor - } - } - - if (psOperand->eType == OPERAND_TYPE_INPUT) - { - // Check for scalar - // You would think checking would be easy but there is a caveat: - // checking abScalarInput might report as scalar, while in reality that was redirected and now is vector so swizzle must be preserved - // as an example consider we have input: - // float2 x; float y; - // and later on we do - // tex2D(xxx, fixed2(x.x, y)); - // in that case we will generate redirect but which ui32RegisterNumber will be used for it is not strictly "specified" - // so we may end up with treating it as scalar (even though it is vector now) - const int redirectInput = psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber]; - const bool wasRedirected = redirectInput == 0xFF || redirectInput == 0xFE; - - const int scalarInput = psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber]; - if (!wasRedirected && (scalarInput & psOperand->GetAccessMask()) && (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)) - { - scalarWithSwizzle = 1; - *pui32IgnoreSwizzle = 1; - } - } - - if (piRebase) - *piRebase = 0; - - if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC2) - requestedComponents = 2; - else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC3) - requestedComponents = 3; - else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC4) - requestedComponents = 4; - - requestedComponents = std::max(requestedComponents, numComponents); - - if (!(ui32TOFlag & (TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY | TO_FLAG_DECLARATION_NAME))) - { - if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64) - { - // Mark the operand type to match whatever we're asking for in the flags. - ((Operand *)psOperand)->aeDataType[0] = requestedType; - ((Operand *)psOperand)->aeDataType[1] = requestedType; - ((Operand *)psOperand)->aeDataType[2] = requestedType; - ((Operand *)psOperand)->aeDataType[3] = requestedType; - } - - bool bitcast = false; - if (AreTypesCompatibleMetal(eType, ui32TOFlag) == 0) - { - if (CanDoDirectCast(psContext, eType, requestedType)) - { - hasCtor = 1; - if (eType == SVT_BOOL) - { - needsBoolUpscale = 1; - // make sure to wrap the whole thing in parens so the upscale - // multiply only applies to the bool - oss << "("; - numParenthesis++; - } - oss << GetConstructorForType(psContext, requestedType, requestedComponents, false) << "("; - numParenthesis++; - } - else - { - // Direct cast not possible, need to do bitcast. - oss << "as_type<" << GetConstructorForTypeMetal(requestedType, requestedComponents) << ">("; - hasCtor = 1; - bitcast = true; - numParenthesis++; - } - } - - // Add ctor if needed (upscaling). Type conversion is already handled above, so here we must - // use the original type to not make type conflicts in bitcasts - bool needsUpscaling = ((numComponents < requestedComponents) || (scalarWithSwizzle != 0)) && (hasCtor == 0 || bitcast); - - // Add constuctor if half precision is forced to avoid template ambiguity error from compiler - bool needsForcedCtor = (ui32TOFlag & TO_FLAG_FORCE_HALF) && (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64); - - if (needsForcedCtor) - requestedComponents = std::max(requestedComponents, 1); - - if (needsUpscaling || needsForcedCtor) - { - oss << GetConstructorForType(psContext, eType, requestedComponents, false) << "("; - - numParenthesis++; - hasCtor = 1; - } - } - - - switch (psOperand->eType) - { - case OPERAND_TYPE_IMMEDIATE32: - { - if (psOperand->iNumComponents == 1) - { - oss << printImmediate32(*((unsigned int*)(&psOperand->afImmediates[0])), requestedType); - } - else - { - int i; - int firstItemAdded = 0; - if (hasCtor == 0) - { - oss << GetConstructorForTypeMetal(requestedType, requestedComponents) << "("; - numParenthesis++; - hasCtor = 1; - } - for (i = 0; i < 4; i++) - { - uint32_t uval; - if (!(ui32CompMask & (1 << i))) - continue; - - if (firstItemAdded) - oss << ", "; - uval = *((uint32_t*)(&psOperand->afImmediates[i >= psOperand->iNumComponents ? psOperand->iNumComponents - 1 : i])); - oss << printImmediate32(uval, requestedType); - firstItemAdded = 1; - } - oss << ")"; - *pui32IgnoreSwizzle = 1; - numParenthesis--; - } - break; - } - case OPERAND_TYPE_IMMEDIATE64: - { - ASSERT(0); // doubles not supported on Metal - break; - } - case OPERAND_TYPE_INPUT: - { - int regSpace = psOperand->GetRegisterSpace(psContext); - switch (psOperand->iIndexDims) - { - case INDEX_2D: - { - const ShaderInfo::InOutSignature *psSig = NULL; - psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); - if (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) - { - oss << "input.cp"; - oss << TranslateOperandIndex(psOperand, 0);//Vertex index - oss << "." << psContext->GetDeclaredInputName(psOperand, piRebase, 1, pui32IgnoreSwizzle); - } - else - { - // Not sure if this codepath is active outside hull/domain - oss << psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); - - oss << TranslateOperandIndex(psOperand, 0);//Vertex index - } - break; - } - default: - { - if (psOperand->eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE) - { - ASSERT(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0); - oss << "phase" << psContext->currentPhase << "_Input" << regSpace << "_" << psOperand->ui32RegisterNumber << "["; - oss << TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); - oss << "]"; - } - else - { - if (psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0) - { - const uint32_t parentIndex = psContext->psShader->aIndexedInputParents[regSpace][psOperand->ui32RegisterNumber]; - oss << "phase" << psContext->currentPhase << "_Input" << regSpace << "_" << parentIndex << "[" << (psOperand->ui32RegisterNumber - parentIndex) << "]"; - } - else - { - oss << psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); - } - } - break; - } - } - break; - } - case OPERAND_TYPE_OUTPUT: - case OPERAND_TYPE_OUTPUT_DEPTH: - case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: - case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: - { - int stream = 0; - oss << psContext->GetDeclaredOutputName(psOperand, &stream, pui32IgnoreSwizzle, piRebase, 0); - if (psOperand->m_SubOperands[0].get()) - { - oss << "["; - oss << TranslateOperand(psOperand->m_SubOperands[0].get(), TO_AUTO_BITCAST_TO_INT); - oss << "]"; - } - break; - } - case OPERAND_TYPE_TEMP: - { - SHADER_VARIABLE_TYPE eTempType = psOperand->GetDataType(psContext); - - if (psOperand->eSpecialName == NAME_UNDEFINED && psOperand->specialName.length()) - { - oss << psOperand->specialName; - break; - } - - oss << HLSLCC_TEMP_PREFIX; - ASSERT(psOperand->ui32RegisterNumber < 0x10000); // Sanity check after temp splitting. - switch (eTempType) - { - case SVT_FLOAT: - ASSERT(psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] != 0); - if (psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] == 1) - *pui32IgnoreSwizzle = 1; - break; - case SVT_FLOAT16: - ASSERT(psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("16_"); - if (psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] == 1) - *pui32IgnoreSwizzle = 1; - break; - case SVT_FLOAT10: - ASSERT(psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("10_"); - if (psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] == 1) - *pui32IgnoreSwizzle = 1; - break; - case SVT_INT: - ASSERT(psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("i"); - if (psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] == 1) - *pui32IgnoreSwizzle = 1; - break; - case SVT_INT16: - ASSERT(psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("i16_"); - if (psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] == 1) - *pui32IgnoreSwizzle = 1; - break; - case SVT_INT12: - ASSERT(psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("i12_"); - if (psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] == 1) - *pui32IgnoreSwizzle = 1; - break; - case SVT_UINT: - ASSERT(psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("u"); - if (psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] == 1) - *pui32IgnoreSwizzle = 1; - break; - case SVT_UINT16: - ASSERT(psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("u16_"); - if (psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] == 1) - *pui32IgnoreSwizzle = 1; - break; - case SVT_DOUBLE: - ASSERT(psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("d"); - if (psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] == 1) - *pui32IgnoreSwizzle = 1; - break; - case SVT_BOOL: - ASSERT(psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] != 0); - oss << ("b"); - if (psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] == 1) - *pui32IgnoreSwizzle = 1; - break; - default: - ASSERT(0 && "Should never get here!"); - } - oss << psOperand->ui32RegisterNumber; - break; - } - case OPERAND_TYPE_SPECIAL_IMMCONSTINT: - case OPERAND_TYPE_SPECIAL_IMMCONST: - case OPERAND_TYPE_SPECIAL_OUTBASECOLOUR: - case OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR: - case OPERAND_TYPE_SPECIAL_FOG: - case OPERAND_TYPE_SPECIAL_ADDRESS: - case OPERAND_TYPE_SPECIAL_LOOPCOUNTER: - case OPERAND_TYPE_SPECIAL_TEXCOORD: - { - ASSERT(0 && "DX9 shaders no longer supported!"); - break; - } - case OPERAND_TYPE_SPECIAL_POSITION: - { - ASSERT(0 && "TODO normal shader support"); -// bcatcstr(glsl, "gl_Position"); - break; - } - case OPERAND_TYPE_SPECIAL_POINTSIZE: - { - ASSERT(0 && "TODO normal shader support"); - // bcatcstr(glsl, "gl_PointSize"); - break; - } - case OPERAND_TYPE_CONSTANT_BUFFER: - { - const ConstantBuffer* psCBuf = NULL; - const ShaderVarType* psVarType = NULL; - int32_t index = -1; - std::vector arrayIndices; - bool isArray = false; - bool isFBInput = false; - psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); - ASSERT(psCBuf != NULL); - - if (ui32TOFlag & TO_FLAG_DECLARATION_NAME) - { - pui32IgnoreSwizzle[0] = 1; - } - std::string cbName = ""; - if (psCBuf) - { - //$Globals. - cbName = GetCBName(psCBuf->name); - cbName += "."; - // Drop the constant buffer name from subpass inputs - if (cbName.substr(0, 19) == "hlslcc_SubpassInput") - cbName = ""; - } - - if ((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) - { - //Work out the variable name. Don't apply swizzle to that variable yet. - int32_t rebase = 0; - - ASSERT(psCBuf != NULL); - - uint32_t componentsNeeded = 1; - if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE) - { - uint32_t minSwiz = 3; - uint32_t maxSwiz = 0; - int i; - for (i = 0; i < 4; i++) - { - if ((ui32CompMask & (1 << i)) == 0) - continue; - minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]); - maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]); - } - componentsNeeded = maxSwiz - minSwiz + 1; - } - - // When we have a component mask that doesn't have .x set (this basically only happens when we manually open operands into components) - // We have to pull down the swizzle array to match the first bit that's actually set - uint32_t tmpSwizzle[4] = { 0 }; - int firstBitSet = 0; - if (ui32CompMask == 0) - ui32CompMask = 0xf; - while ((ui32CompMask & (1 << firstBitSet)) == 0) - firstBitSet++; - std::copy(&psOperand->aui32Swizzle[firstBitSet], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); - - ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags); - - // Get a possible dynamic array index - std::string dynamicIndexStr; - bool needsIndexCalcRevert = false; - bool isAoS = ((!isArray && arrayIndices.size() > 0) || (isArray && arrayIndices.size() > 1)); - bool isUnityInstancingBuffer = isAoS && IsUnityFlexibleInstancingBuffer(psCBuf); - Operand *psDynIndexOp = psOperand->GetDynamicIndexOperand(psContext, psVarType, isAoS, &needsIndexCalcRevert); - - if (psDynIndexOp != NULL) - { - SHADER_VARIABLE_TYPE eType = psDynIndexOp->GetDataType(psContext); - uint32_t opFlags = TO_FLAG_INTEGER; - - if (eType != SVT_INT && eType != SVT_UINT) - opFlags = TO_AUTO_BITCAST_TO_INT; - - dynamicIndexStr = TranslateOperand(psDynIndexOp, opFlags, 0x1); // Just take the first component for the index - } - - if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || (componentsNeeded <= psVarType->Columns)) - { - // Simple case: just access one component - std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); - - // Special hack for MSAA subpass inputs: in Metal we can only read the "current" sample, so ignore the index - if (strncmp(fullName.c_str(), "hlslcc_fbinput", 14) == 0) - isFBInput = true; - - if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) - { - // We'll need to add the prefix only to the last section of the name - size_t commaPos = fullName.find_last_of('.'); - char prefix[256]; - sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns); - if (commaPos == std::string::npos) - fullName.insert(0, prefix); - else - fullName.insert(commaPos + 1, prefix); - } - - oss << MakeCBVarName(cbName, fullName, isUnityInstancingBuffer); - } - else - { - // Non-simple case: build vec4 and apply mask - uint32_t i; - int32_t tmpRebase; - std::vector tmpArrayIndices; - bool tmpIsArray; - int firstItemAdded = 0; - - oss << GetConstructorForTypeMetal(psVarType->Type, GetNumberBitsSet(ui32CompMask)) << "("; - for (i = 0; i < 4; i++) - { - const ShaderVarType *tmpVarType = NULL; - if ((ui32CompMask & (1 << i)) == 0) - continue; - tmpRebase = 0; - if (firstItemAdded != 0) - oss << ", "; - else - firstItemAdded = 1; - - uint32_t tmpSwizzle[4] = { 0 }; - std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); - - ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); - std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); - oss << MakeCBVarName(cbName, fullName, isUnityInstancingBuffer); - - if (tmpVarType->Class != SVC_SCALAR) - { - uint32_t swizzle; - tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3 - swizzle = psOperand->aui32Swizzle[i] - tmpRebase; - - oss << "." << ("xyzw"[swizzle]); - } - } - oss << ")"; - // Clear rebase, we've already done it. - rebase = 0; - // Also swizzle. - *pui32IgnoreSwizzle = 1; - } - - - if (isArray) - { - index = arrayIndices.back(); - - // Dynamic index is atm supported only at the root array level. Add here only if there is no such parent. - bool hasDynamicIndex = !dynamicIndexStr.empty() && (arrayIndices.size() <= 1); - bool hasImmediateIndex = (index != -1) && !(hasDynamicIndex && index == 0); - - // Ignore index altogether on fb inputs - if (isFBInput) - { - // Nothing to do here - } - else if (hasDynamicIndex || hasImmediateIndex) - { - std::ostringstream fullIndexOss; - if (hasDynamicIndex && hasImmediateIndex) - fullIndexOss << "(" << dynamicIndexStr << " + " << index << ")"; - else if (hasDynamicIndex) - fullIndexOss << dynamicIndexStr; - else // hasImmediateStr - fullIndexOss << index; - - if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) - { - // Special handling for old matrix arrays - oss << "[" << fullIndexOss.str() << " / 4]"; - oss << "[" << fullIndexOss.str() << " %% 4]"; - } - else // This path is atm the default - { - oss << "[" << fullIndexOss.str() << "]"; - } - } - } - - if (psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) - { - switch (rebase) - { - case 4: - { - if (psVarType->Columns == 2) - { - //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) - oss << ".xxyx"; - } - else if (psVarType->Columns == 3) - { - //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) .z(GLSL) is .w(HLSL) - oss << ".xxyz"; - } - break; - } - case 8: - { - if (psVarType->Columns == 2) - { - //.x(GLSL) is .z(HLSL). .y(GLSL) is .w(HLSL) - oss << ".xxxy"; - } - break; - } - case 0: - default: - { - //No rebase, but extend to vec4. - if (psVarType->Columns == 2) - { - oss << ".xyxx"; - } - else if (psVarType->Columns == 3) - { - oss << ".xyzx"; - } - break; - } - } - } - - if (psVarType->Class == SVC_SCALAR) - { - *pui32IgnoreSwizzle = 1; - - // CB arrays are all declared as 4-component vectors to match DX11 data layout. - // Therefore add swizzle here to access the element corresponding to the scalar var. - if ((psVarType->Elements > 0) && (psContext->psShader->eShaderType == COMPUTE_SHADER)) - { - oss << ".x"; - } - } - } - break; - } - case OPERAND_TYPE_RESOURCE: - { - oss << ResourceName(RGROUP_TEXTURE, psOperand->ui32RegisterNumber); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_SAMPLER: - { - oss << ResourceName(RGROUP_SAMPLER, psOperand->ui32RegisterNumber); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_FUNCTION_BODY: - { - ASSERT(0); - break; - } - case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: - case OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID: - { - oss << "phaseInstanceID"; // Not a real builtin, but passed as a function parameter. - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: - { - oss << "ImmCB_" << psContext->currentPhase; - oss << TranslateOperandIndex(psOperand, 0); - break; - } - case OPERAND_TYPE_INPUT_DOMAIN_POINT: - { - oss << "mtl_TessCoord"; - break; - } - case OPERAND_TYPE_INPUT_CONTROL_POINT: - { - int ignoreRedirect = 1; - int regSpace = psOperand->GetRegisterSpace(psContext); - - if ((regSpace == 0 && psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) || - (regSpace == 1 && psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)) - { - ignoreRedirect = 0; - } - - if (ignoreRedirect) - { - oss << "input.cp"; - oss << TranslateOperandIndex(psOperand, 0);//Vertex index - oss << "." << psContext->GetDeclaredInputName(psOperand, piRebase, ignoreRedirect, pui32IgnoreSwizzle); - } - else - { - oss << psContext->GetDeclaredInputName(psOperand, piRebase, ignoreRedirect, pui32IgnoreSwizzle); - oss << TranslateOperandIndex(psOperand, 0);//Vertex index - } - - // Check for scalar - if ((psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_NULL: - { - // Null register, used to discard results of operations - oss << "//null"; - break; - } - case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: - { - oss << "controlPointID"; - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: - { - oss << "mtl_CoverageMask"; - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_INPUT_COVERAGE_MASK: - { - oss << "mtl_CoverageMask"; - //Skip swizzle on scalar types. - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_INPUT_THREAD_ID://SV_DispatchThreadID - { - oss << "mtl_ThreadID"; - break; - } - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP://SV_GroupThreadID - { - oss << "mtl_ThreadIDInGroup"; - break; - } - case OPERAND_TYPE_INPUT_THREAD_GROUP_ID://SV_GroupID - { - oss << "mtl_ThreadGroupID"; - break; - } - case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED://SV_GroupIndex - { - if (requestedComponents > 1 && !hasCtor) - { - oss << GetConstructorForType(psContext, eType, requestedComponents, false) << "("; - numParenthesis++; - hasCtor = 1; - } - for (uint32_t i = 0; i < requestedComponents; i++) - { - oss << "mtl_ThreadIndexInThreadGroup"; - if (i < requestedComponents - 1) - oss << ", "; - } - *pui32IgnoreSwizzle = 1; // No swizzle meaningful for scalar. - break; - } - case OPERAND_TYPE_UNORDERED_ACCESS_VIEW: - { - oss << ResourceName(RGROUP_UAV, psOperand->ui32RegisterNumber); - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY: - { - oss << "TGSM" << psOperand->ui32RegisterNumber; - *pui32IgnoreSwizzle = 1; - break; - } - case OPERAND_TYPE_INPUT_PRIMITIVEID: - { - // Not supported on Metal - ASSERT(0); - break; - } - case OPERAND_TYPE_INDEXABLE_TEMP: - { - oss << "TempArray" << psOperand->aui32ArraySizes[0] << "["; - if (psOperand->aui32ArraySizes[1] != 0 || !psOperand->m_SubOperands[1].get()) - oss << psOperand->aui32ArraySizes[1]; - - if (psOperand->m_SubOperands[1].get()) - { - if (psOperand->aui32ArraySizes[1] != 0) - oss << "+"; - oss << TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER); - } - oss << "]"; - break; - } - case OPERAND_TYPE_STREAM: - { - // Not supported on Metal - ASSERT(0); - break; - } - case OPERAND_TYPE_INPUT_GS_INSTANCE_ID: - { - // Not supported on Metal - ASSERT(0); - break; - } - case OPERAND_TYPE_THIS_POINTER: - { - ASSERT(0); // Nope. - break; - } - case OPERAND_TYPE_INPUT_PATCH_CONSTANT: - { - const ShaderInfo::InOutSignature* psIn; - psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); - *piRebase = psIn->iRebase; - switch (psIn->eSystemValueType) - { - case NAME_POSITION: - oss << "mtl_Position"; - break; - case NAME_RENDER_TARGET_ARRAY_INDEX: - oss << "mtl_Layer"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_CLIP_DISTANCE: - // this is temp variable, declaration and redirecting to actual output is handled in DeclareClipPlanes - char tmpName[128]; sprintf(tmpName, "phase%d_ClipDistance%d", psContext->currentPhase, psIn->ui32SemanticIndex); - oss << tmpName; - *pui32IgnoreSwizzle = 1; - break; - case NAME_VIEWPORT_ARRAY_INDEX: - oss << "mtl_ViewPortIndex"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_VERTEX_ID: - oss << "mtl_VertexID"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_INSTANCE_ID: - oss << "mtl_InstanceID"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_IS_FRONT_FACE: - oss << "(mtl_FrontFace ? 0xffffffffu : uint(0))"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_PRIMITIVE_ID: - // Not on Metal - ASSERT(0); - break; - - // as far as i understand tesselation factors are always coming from tessFactor variable (it is always declared in ToMetal::Translate) - case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_LINE_DENSITY_TESSFACTOR: - if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) - oss << "tessFactor.edgeTessellationFactor"; - else - oss << "tessFactor.edgeTessellationFactor[0]"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: - case NAME_FINAL_LINE_DETAIL_TESSFACTOR: - oss << "tessFactor.edgeTessellationFactor[1]"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: - case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: - oss << "tessFactor.edgeTessellationFactor[2]"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: - oss << "tessFactor.edgeTessellationFactor[3]"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_TRI_INSIDE_TESSFACTOR: - case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: - if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) - oss << "tessFactor.insideTessellationFactor"; - else - oss << "tessFactor.insideTessellationFactor[0]"; - *pui32IgnoreSwizzle = 1; - break; - case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: - oss << "tessFactor.insideTessellationFactor[1]"; - *pui32IgnoreSwizzle = 1; - break; - - default: - const std::string patchPrefix = "patch."; - - if (psContext->psShader->eShaderType == DOMAIN_SHADER) - oss << psContext->inputPrefix << patchPrefix << psIn->semanticName << psIn->ui32SemanticIndex; - else - oss << patchPrefix << psIn->semanticName << psIn->ui32SemanticIndex; - - // Disable swizzles if this is a scalar - if (psContext->psShader->eShaderType == HULL_SHADER) - { - if ((psContext->psShader->abScalarOutput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) - *pui32IgnoreSwizzle = 1; - } - else - { - if ((psContext->psShader->abScalarInput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) - *pui32IgnoreSwizzle = 1; - } - break; - } - break; - } - default: - { - ASSERT(0); - break; - } - } - - if (hasCtor && (*pui32IgnoreSwizzle == 0)) - { - oss << TranslateOperandSwizzle(psOperand, ui32CompMask, piRebase ? *piRebase : 0); - *pui32IgnoreSwizzle = 1; - } - - if (needsBoolUpscale) - { - if (requestedType == SVT_UINT || requestedType == SVT_UINT16 || requestedType == SVT_UINT8) - oss << ") * 0xffffffffu"; - else - oss << ") * int(0xffffffffu)"; - numParenthesis--; - - oss << ")"; - numParenthesis--; - } - - while (numParenthesis != 0) - { - oss << ")"; - numParenthesis--; - } - return oss.str(); -} - -std::string ToMetal::TranslateOperand(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask) -{ - std::ostringstream oss; - uint32_t ui32IgnoreSwizzle = 0; - int iRebase = 0; - - // in single-component mode there is no need to use mask - if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) - ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL; - - if (ui32TOFlag & TO_FLAG_NAME_ONLY) - { - return TranslateVariableName(psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase); - } - - switch (psOperand->eModifier) - { - case OPERAND_MODIFIER_NONE: - { - break; - } - case OPERAND_MODIFIER_NEG: - { - oss << ("(-"); - break; - } - case OPERAND_MODIFIER_ABS: - { - oss << ("abs("); - break; - } - case OPERAND_MODIFIER_ABSNEG: - { - oss << ("-abs("); - break; - } - } - - oss << TranslateVariableName(psOperand, ui32TOFlag, &ui32IgnoreSwizzle, ui32ComponentMask, &iRebase); - - if (!ui32IgnoreSwizzle) - { - oss << TranslateOperandSwizzle(psOperand, ui32ComponentMask, iRebase); - } - - switch (psOperand->eModifier) - { - case OPERAND_MODIFIER_NONE: - { - break; - } - case OPERAND_MODIFIER_NEG: - { - oss << (")"); - break; - } - case OPERAND_MODIFIER_ABS: - { - oss << (")"); - break; - } - case OPERAND_MODIFIER_ABSNEG: - { - oss << (")"); - break; - } - } - return oss.str(); -} diff --git a/third_party/imgui/CMakeLists.txt b/third_party/imgui/CMakeLists.txt index a4b931a..5d70358 100644 --- a/third_party/imgui/CMakeLists.txt +++ b/third_party/imgui/CMakeLists.txt @@ -23,6 +23,9 @@ if (WIN32) imgui/backends/imgui_impl_sdl3.h imgui/backends/imgui_impl_vulkan.cpp imgui/backends/imgui_impl_vulkan.h + imgui/backends/imgui_impl_opengl3.cpp + imgui/backends/imgui_impl_opengl3.h + imgui/backends/imgui_impl_opengl3_loader.h imgui/backends/imgui_impl_dx11.h imgui/backends/imgui_impl_dx11.cpp imgui/backends/imgui_impl_dx12.h @@ -37,12 +40,20 @@ elseif(UNIX AND NOT APPLE) imgui/backends/imgui_impl_sdl3.h imgui/backends/imgui_impl_vulkan.cpp imgui/backends/imgui_impl_vulkan.h + imgui/backends/imgui_impl_opengl3.cpp + imgui/backends/imgui_impl_opengl3.h + imgui/backends/imgui_impl_opengl3_loader.h ) target_link_libraries(${PROJECT_NAME} PUBLIC vulkan SDL3-shared) elseif(APPLE) target_sources(${PROJECT_NAME} PRIVATE imgui/backends/imgui_impl_sdl3.cpp imgui/backends/imgui_impl_sdl3.h + imgui/backends/imgui_impl_vulkan.cpp + imgui/backends/imgui_impl_vulkan.h + imgui/backends/imgui_impl_opengl3.cpp + imgui/backends/imgui_impl_opengl3.h + imgui/backends/imgui_impl_opengl3_loader.h imgui/backends/imgui_impl_metal.h imgui/backends/imgui_impl_metal.mm ) diff --git a/third_party/sdl b/third_party/sdl index 7fbd85a..a4969e3 160000 --- a/third_party/sdl +++ b/third_party/sdl @@ -1 +1 @@ -Subproject commit 7fbd85ad5cf30d46ef20628a212bbec3c1ffec2b +Subproject commit a4969e393e3606a6101ae11c31742f81b51c604c diff --git a/third_party/slang/CMakeLists.txt b/third_party/slang/CMakeLists.txt new file mode 100644 index 0000000..8a713b0 --- /dev/null +++ b/third_party/slang/CMakeLists.txt @@ -0,0 +1,23 @@ +project(slang) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +set(ALL_FILES "") +retrieve_files(ALL_FILES) + +add_library(${PROJECT_NAME} INTERFACE ${ALL_FILES}) + +target_include_directories(${PROJECT_NAME} INTERFACE inc) + +set(LIB_DIR "") +if (WIN32) + set(LIB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib/win64) +elseif (UNIX AND NOT APPLE) + set(LIB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib/linux64) +elseif (APPLE) + set(LIB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib/macos) +endif() + +target_link_directories(${PROJECT_NAME} INTERFACE ${LIB_DIR}) +target_link_libraries(${PROJECT_NAME} INTERFACE ${LIB_DIR}/slang.lib) diff --git a/third_party/slang/inc/prelude/slang-cpp-host-prelude.h b/third_party/slang/inc/prelude/slang-cpp-host-prelude.h new file mode 100644 index 0000000..f69d03e --- /dev/null +++ b/third_party/slang/inc/prelude/slang-cpp-host-prelude.h @@ -0,0 +1,55 @@ +#ifndef SLANG_CPP_HOST_PRELUDE_H +#define SLANG_CPP_HOST_PRELUDE_H + +#include +#include +#include + +#define SLANG_COM_PTR_ENABLE_REF_OPERATOR 1 + +#include "../source/slang-rt/slang-rt.h" +#include "../slang-com-ptr.h" +#include "slang-cpp-types.h" + +#ifdef SLANG_LLVM +#include "slang-llvm.h" +#else // SLANG_LLVM +# if SLANG_GCC_FAMILY && __GNUC__ < 6 +# include +# define SLANG_PRELUDE_STD std:: +# else +# include +# define SLANG_PRELUDE_STD +# endif + +# include +# include +# include +# include +#endif // SLANG_LLVM + +#if defined(_MSC_VER) +# define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport) +#else +# define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default"))) +//# define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport)) __attribute__((__visibility__("default"))) +#endif + +#ifdef __cplusplus +# define SLANG_PRELUDE_EXTERN_C extern "C" +# define SLANG_PRELUDE_EXTERN_C_START extern "C" { +# define SLANG_PRELUDE_EXTERN_C_END } +#else +# define SLANG_PRELUDE_EXTERN_C +# define SLANG_PRELUDE_EXTERN_C_START +# define SLANG_PRELUDE_EXTERN_C_END +#endif + +#include "slang-cpp-scalar-intrinsics.h" + +using namespace Slang; + +template +using Slang_FuncType = TResult(SLANG_MCALL *)(Args...); + +#endif diff --git a/third_party/slang/inc/prelude/slang-cpp-prelude.h b/third_party/slang/inc/prelude/slang-cpp-prelude.h new file mode 100644 index 0000000..2b848dc --- /dev/null +++ b/third_party/slang/inc/prelude/slang-cpp-prelude.h @@ -0,0 +1,316 @@ +#ifndef SLANG_CPP_PRELUDE_H +#define SLANG_CPP_PRELUDE_H + +// Because the signiture of isnan, isfinite, and is isinf changed in C++, we use the macro +// to use the version in the std namespace. +// https://stackoverflow.com/questions/39130040/cmath-hides-isnan-in-math-h-in-c14-c11 + +#ifdef SLANG_LLVM +#include "slang-llvm.h" +#else // SLANG_LLVM +# if SLANG_GCC_FAMILY && __GNUC__ < 6 +# include +# define SLANG_PRELUDE_STD std:: +# else +# include +# define SLANG_PRELUDE_STD +# endif + +# include +# include +# include +# include +#endif // SLANG_LLVM + +#if defined(_MSC_VER) +# define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport) +#else +# define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default"))) +//# define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport)) __attribute__((__visibility__("default"))) +#endif + +#ifdef __cplusplus +# define SLANG_PRELUDE_EXTERN_C extern "C" +# define SLANG_PRELUDE_EXTERN_C_START extern "C" { +# define SLANG_PRELUDE_EXTERN_C_END } +#else +# define SLANG_PRELUDE_EXTERN_C +# define SLANG_PRELUDE_EXTERN_C_START +# define SLANG_PRELUDE_EXTERN_C_END +#endif + +#define SLANG_PRELUDE_EXPORT SLANG_PRELUDE_EXTERN_C SLANG_PRELUDE_SHARED_LIB_EXPORT +#define SLANG_PRELUDE_EXPORT_START SLANG_PRELUDE_EXTERN_C_START SLANG_PRELUDE_SHARED_LIB_EXPORT +#define SLANG_PRELUDE_EXPORT_END SLANG_PRELUDE_EXTERN_C_END + +#ifndef INFINITY +// Must overflow for double +# define INFINITY float(1e+300 * 1e+300) +#endif + +#ifndef SLANG_INFINITY +# define SLANG_INFINITY INFINITY +#endif + +// Detect the compiler type + +#ifndef SLANG_COMPILER +# define SLANG_COMPILER + +/* +Compiler defines, see http://sourceforge.net/p/predef/wiki/Compilers/ +NOTE that SLANG_VC holds the compiler version - not just 1 or 0 +*/ +# if defined(_MSC_VER) +# if _MSC_VER >= 1900 +# define SLANG_VC 14 +# elif _MSC_VER >= 1800 +# define SLANG_VC 12 +# elif _MSC_VER >= 1700 +# define SLANG_VC 11 +# elif _MSC_VER >= 1600 +# define SLANG_VC 10 +# elif _MSC_VER >= 1500 +# define SLANG_VC 9 +# else +# error "unknown version of Visual C++ compiler" +# endif +# elif defined(__clang__) +# define SLANG_CLANG 1 +# elif defined(__SNC__) +# define SLANG_SNC 1 +# elif defined(__ghs__) +# define SLANG_GHS 1 +# elif defined(__GNUC__) /* note: __clang__, __SNC__, or __ghs__ imply __GNUC__ */ +# define SLANG_GCC 1 +# else +# error "unknown compiler" +# endif +/* +Any compilers not detected by the above logic are now now explicitly zeroed out. +*/ +# ifndef SLANG_VC +# define SLANG_VC 0 +# endif +# ifndef SLANG_CLANG +# define SLANG_CLANG 0 +# endif +# ifndef SLANG_SNC +# define SLANG_SNC 0 +# endif +# ifndef SLANG_GHS +# define SLANG_GHS 0 +# endif +# ifndef SLANG_GCC +# define SLANG_GCC 0 +# endif +#endif /* SLANG_COMPILER */ + +/* +The following section attempts to detect the target platform being compiled for. + +If an application defines `SLANG_PLATFORM` before including this header, +they take responsibility for setting any compiler-dependent macros +used later in the file. + +Most applications should not need to touch this section. +*/ +#ifndef SLANG_PLATFORM +# define SLANG_PLATFORM +/** +Operating system defines, see http://sourceforge.net/p/predef/wiki/OperatingSystems/ +*/ +# if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_PARTITION_APP +# define SLANG_WINRT 1 /* Windows Runtime, either on Windows RT or Windows 8 */ +# elif defined(XBOXONE) +# define SLANG_XBOXONE 1 +# elif defined(_WIN64) /* note: XBOXONE implies _WIN64 */ +# define SLANG_WIN64 1 +# elif defined(_M_PPC) +# define SLANG_X360 1 +# elif defined(_WIN32) /* note: _M_PPC implies _WIN32 */ +# define SLANG_WIN32 1 +# elif defined(__ANDROID__) +# define SLANG_ANDROID 1 +# elif defined(__linux__) || defined(__CYGWIN__) /* note: __ANDROID__ implies __linux__ */ +# define SLANG_LINUX 1 +# elif defined(__APPLE__) && !defined(SLANG_LLVM) +# include "TargetConditionals.h" +# if TARGET_OS_MAC +# define SLANG_OSX 1 +# else +# define SLANG_IOS 1 +# endif +# elif defined(__APPLE__) +// On `slang-llvm` we can't inclue "TargetConditionals.h" in general, so for now assume its OSX. +# define SLANG_OSX 1 +# elif defined(__CELLOS_LV2__) +# define SLANG_PS3 1 +# elif defined(__ORBIS__) +# define SLANG_PS4 1 +# elif defined(__SNC__) && defined(__arm__) +# define SLANG_PSP2 1 +# elif defined(__ghs__) +# define SLANG_WIIU 1 +# else +# error "unknown target platform" +# endif + + +/* +Any platforms not detected by the above logic are now now explicitly zeroed out. +*/ +# ifndef SLANG_WINRT +# define SLANG_WINRT 0 +# endif +# ifndef SLANG_XBOXONE +# define SLANG_XBOXONE 0 +# endif +# ifndef SLANG_WIN64 +# define SLANG_WIN64 0 +# endif +# ifndef SLANG_X360 +# define SLANG_X360 0 +# endif +# ifndef SLANG_WIN32 +# define SLANG_WIN32 0 +# endif +# ifndef SLANG_ANDROID +# define SLANG_ANDROID 0 +# endif +# ifndef SLANG_LINUX +# define SLANG_LINUX 0 +# endif +# ifndef SLANG_IOS +# define SLANG_IOS 0 +# endif +# ifndef SLANG_OSX +# define SLANG_OSX 0 +# endif +# ifndef SLANG_PS3 +# define SLANG_PS3 0 +# endif +# ifndef SLANG_PS4 +# define SLANG_PS4 0 +# endif +# ifndef SLANG_PSP2 +# define SLANG_PSP2 0 +# endif +# ifndef SLANG_WIIU +# define SLANG_WIIU 0 +# endif +#endif /* SLANG_PLATFORM */ + +/* Shorthands for "families" of compilers/platforms */ +#define SLANG_GCC_FAMILY (SLANG_CLANG || SLANG_SNC || SLANG_GHS || SLANG_GCC) +#define SLANG_WINDOWS_FAMILY (SLANG_WINRT || SLANG_WIN32 || SLANG_WIN64) +#define SLANG_MICROSOFT_FAMILY (SLANG_XBOXONE || SLANG_X360 || SLANG_WINDOWS_FAMILY) +#define SLANG_LINUX_FAMILY (SLANG_LINUX || SLANG_ANDROID) +#define SLANG_APPLE_FAMILY (SLANG_IOS || SLANG_OSX) /* equivalent to #if __APPLE__ */ +#define SLANG_UNIX_FAMILY (SLANG_LINUX_FAMILY || SLANG_APPLE_FAMILY) /* shortcut for unix/posix platforms */ + +// GCC Specific +#if SLANG_GCC_FAMILY +# define SLANG_ALIGN_OF(T) __alignof__(T) + +# define SLANG_BREAKPOINT(id) __builtin_trap() + +// Use this macro instead of offsetof, because gcc produces warning if offsetof is used on a +// non POD type, even though it produces the correct result +# define SLANG_OFFSET_OF(T, ELEMENT) (size_t(&((T*)1)->ELEMENT) - 1) +#endif // SLANG_GCC_FAMILY + +// Microsoft VC specific +#if SLANG_VC +# define SLANG_ALIGN_OF(T) __alignof(T) + +# define SLANG_BREAKPOINT(id) __debugbreak(); + +#endif // SLANG_VC + +// Default impls + +#ifndef SLANG_OFFSET_OF +# define SLANG_OFFSET_OF(X, Y) offsetof(X, Y) +#endif + +#ifndef SLANG_BREAKPOINT +// Make it crash with a write to 0! +# define SLANG_BREAKPOINT(id) (*((int*)0) = int(id)); +#endif + +// If slang.h has been included we don't need any of these definitions +#ifndef SLANG_H + +/* Macro for declaring if a method is no throw. Should be set before the return parameter. */ +#ifndef SLANG_NO_THROW +# if SLANG_WINDOWS_FAMILY && !defined(SLANG_DISABLE_EXCEPTIONS) +# define SLANG_NO_THROW __declspec(nothrow) +# endif +#endif +#ifndef SLANG_NO_THROW +# define SLANG_NO_THROW +#endif + +/* The `SLANG_STDCALL` and `SLANG_MCALL` defines are used to set the calling +convention for interface methods. +*/ +#ifndef SLANG_STDCALL +# if SLANG_MICROSOFT_FAMILY +# define SLANG_STDCALL __stdcall +# else +# define SLANG_STDCALL +# endif +#endif +#ifndef SLANG_MCALL +# define SLANG_MCALL SLANG_STDCALL +#endif + +#ifndef SLANG_FORCE_INLINE +# define SLANG_FORCE_INLINE inline +#endif + +// TODO(JS): Should these be in slang-cpp-types.h? +// They are more likely to clash with slang.h + +struct SlangUUID +{ + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; +}; + +typedef int32_t SlangResult; + +struct ISlangUnknown +{ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface(SlangUUID const& uuid, void** outObject) = 0; + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() = 0; + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() = 0; +}; + +#define SLANG_COM_INTERFACE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ + public: \ + SLANG_FORCE_INLINE static const SlangUUID& getTypeGuid() \ + { \ + static const SlangUUID guid = { a, b, c, d0, d1, d2, d3, d4, d5, d6, d7 }; \ + return guid; \ + } +#endif // SLANG_H + +// Includes + +#include "slang-cpp-scalar-intrinsics.h" +#include "slang-cpp-types.h" + +// TODO(JS): Hack! Output C++ code from slang can copy uninitialized variables. +#if defined(_MSC_VER) +# pragma warning(disable : 4700) +#endif + +#ifndef SLANG_UNROLL +# define SLANG_UNROLL +#endif + +#endif diff --git a/third_party/slang/inc/prelude/slang-cpp-scalar-intrinsics.h b/third_party/slang/inc/prelude/slang-cpp-scalar-intrinsics.h new file mode 100644 index 0000000..acbe001 --- /dev/null +++ b/third_party/slang/inc/prelude/slang-cpp-scalar-intrinsics.h @@ -0,0 +1,498 @@ +#ifndef SLANG_PRELUDE_SCALAR_INTRINSICS_H +#define SLANG_PRELUDE_SCALAR_INTRINSICS_H + +#if !defined(SLANG_LLVM) && SLANG_PROCESSOR_X86_64 && SLANG_VC +// If we have visual studio and 64 bit processor, we can assume we have popcnt, and can include x86 intrinsics +# include +#endif + +#ifndef SLANG_FORCE_INLINE +# define SLANG_FORCE_INLINE inline +#endif + +#ifdef SLANG_PRELUDE_NAMESPACE +namespace SLANG_PRELUDE_NAMESPACE { +#endif + +#ifndef SLANG_PRELUDE_PI +# define SLANG_PRELUDE_PI 3.14159265358979323846 +#endif + + +union Union32 +{ + uint32_t u; + int32_t i; + float f; +}; + +union Union64 +{ + uint64_t u; + int64_t i; + double d; +}; + +// 32 bit cast conversions +SLANG_FORCE_INLINE int32_t _bitCastFloatToInt(float f) { Union32 u; u.f = f; return u.i; } +SLANG_FORCE_INLINE float _bitCastIntToFloat(int32_t i) { Union32 u; u.i = i; return u.f; } +SLANG_FORCE_INLINE uint32_t _bitCastFloatToUInt(float f) { Union32 u; u.f = f; return u.u; } +SLANG_FORCE_INLINE float _bitCastUIntToFloat(uint32_t ui) { Union32 u; u.u = ui; return u.f; } + +// ----------------------------- F16 ----------------------------------------- + + +// This impl is based on FloatToHalf that is in Slang codebase +uint32_t f32tof16(const float value) +{ + const uint32_t inBits = _bitCastFloatToUInt(value); + + // bits initially set to just the sign bit + uint32_t bits = (inBits >> 16) & 0x8000; + // Mantissa can't be used as is, as it holds last bit, for rounding. + uint32_t m = (inBits >> 12) & 0x07ff; + uint32_t e = (inBits >> 23) & 0xff; + + if (e < 103) + { + // It's zero + return bits; + } + if (e == 0xff) + { + // Could be a NAN or INF. Is INF if *input* mantissa is 0. + + // Remove last bit for rounding to make output mantissa. + m >>= 1; + + // We *assume* float16/float32 signaling bit and remaining bits + // semantics are the same. (The signalling bit convention is target specific!). + // Non signal bit's usage within mantissa for a NAN are also target specific. + + // If the m is 0, it could be because the result is INF, but it could also be because all the + // bits that made NAN were dropped as we have less mantissa bits in f16. + + // To fix for this we make non zero if m is 0 and the input mantissa was not. + // This will (typically) produce a signalling NAN. + m += uint32_t(m == 0 && (inBits & 0x007fffffu)); + + // Combine for output + return (bits | 0x7c00u | m); + } + if (e > 142) + { + // INF. + return bits | 0x7c00u; + } + if (e < 113) + { + m |= 0x0800u; + bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1); + return bits; + } + bits |= ((e - 112) << 10) | (m >> 1); + bits += m & 1; + return bits; +} + +static const float g_f16tof32Magic = _bitCastIntToFloat((127 + (127 - 15)) << 23); + +float f16tof32(const uint32_t value) +{ + const uint32_t sign = (value & 0x8000) << 16; + uint32_t exponent = (value & 0x7c00) >> 10; + uint32_t mantissa = (value & 0x03ff); + + if (exponent == 0) + { + // If mantissa is 0 we are done, as output is 0. + // If it's not zero we must have a denormal. + if (mantissa) + { + // We have a denormal so use the magic to do exponent adjust + return _bitCastIntToFloat(sign | ((value & 0x7fff) << 13)) * g_f16tof32Magic; + } + } + else + { + // If the exponent is NAN or INF exponent is 0x1f on input. + // If that's the case, we just need to set the exponent to 0xff on output + // and the mantissa can just stay the same. If its 0 it's INF, else it is NAN and we just copy the bits + // + // Else we need to correct the exponent in the normalized case. + exponent = (exponent == 0x1F) ? 0xff : (exponent + (-15 + 127)); + } + + return _bitCastUIntToFloat(sign | (exponent << 23) | (mantissa << 13)); +} + +// ----------------------------- F32 ----------------------------------------- + +// Helpers +SLANG_FORCE_INLINE float F32_calcSafeRadians(float radians); + +#ifdef SLANG_LLVM + +SLANG_PRELUDE_EXTERN_C_START + +// Unary +float F32_ceil(float f); +float F32_floor(float f); +float F32_round(float f); +float F32_sin(float f); +float F32_cos(float f); +float F32_tan(float f); +float F32_asin(float f); +float F32_acos(float f); +float F32_atan(float f); +float F32_sinh(float f); +float F32_cosh(float f); +float F32_tanh(float f); +float F32_log2(float f); +float F32_log(float f); +float F32_log10(float f); +float F32_exp2(float f); +float F32_exp(float f); +float F32_abs(float f); +float F32_trunc(float f); +float F32_sqrt(float f); + +bool F32_isnan(float f); +bool F32_isfinite(float f); +bool F32_isinf(float f); + +// Binary +SLANG_FORCE_INLINE float F32_min(float a, float b) { return a < b ? a : b; } +SLANG_FORCE_INLINE float F32_max(float a, float b) { return a > b ? a : b; } +float F32_pow(float a, float b); +float F32_fmod(float a, float b); +float F32_remainder(float a, float b); +float F32_atan2(float a, float b); + +float F32_frexp(float x, int* e); + +float F32_modf(float x, float* ip); + +// Ternary +SLANG_FORCE_INLINE float F32_fma(float a, float b, float c) { return a * b + c; } + +SLANG_PRELUDE_EXTERN_C_END + +#else + +// Unary +SLANG_FORCE_INLINE float F32_ceil(float f) { return ::ceilf(f); } +SLANG_FORCE_INLINE float F32_floor(float f) { return ::floorf(f); } +SLANG_FORCE_INLINE float F32_round(float f) { return ::roundf(f); } +SLANG_FORCE_INLINE float F32_sin(float f) { return ::sinf(f); } +SLANG_FORCE_INLINE float F32_cos(float f) { return ::cosf(f); } +SLANG_FORCE_INLINE float F32_tan(float f) { return ::tanf(f); } +SLANG_FORCE_INLINE float F32_asin(float f) { return ::asinf(f); } +SLANG_FORCE_INLINE float F32_acos(float f) { return ::acosf(f); } +SLANG_FORCE_INLINE float F32_atan(float f) { return ::atanf(f); } +SLANG_FORCE_INLINE float F32_sinh(float f) { return ::sinhf(f); } +SLANG_FORCE_INLINE float F32_cosh(float f) { return ::coshf(f); } +SLANG_FORCE_INLINE float F32_tanh(float f) { return ::tanhf(f); } +SLANG_FORCE_INLINE float F32_log2(float f) { return ::log2f(f); } +SLANG_FORCE_INLINE float F32_log(float f) { return ::logf(f); } +SLANG_FORCE_INLINE float F32_log10(float f) { return ::log10f(f); } +SLANG_FORCE_INLINE float F32_exp2(float f) { return ::exp2f(f); } +SLANG_FORCE_INLINE float F32_exp(float f) { return ::expf(f); } +SLANG_FORCE_INLINE float F32_abs(float f) { return ::fabsf(f); } +SLANG_FORCE_INLINE float F32_trunc(float f) { return ::truncf(f); } +SLANG_FORCE_INLINE float F32_sqrt(float f) { return ::sqrtf(f); } + +SLANG_FORCE_INLINE bool F32_isnan(float f) { return SLANG_PRELUDE_STD isnan(f); } +SLANG_FORCE_INLINE bool F32_isfinite(float f) { return SLANG_PRELUDE_STD isfinite(f); } +SLANG_FORCE_INLINE bool F32_isinf(float f) { return SLANG_PRELUDE_STD isinf(f); } + +// Binary +SLANG_FORCE_INLINE float F32_min(float a, float b) { return ::fminf(a, b); } +SLANG_FORCE_INLINE float F32_max(float a, float b) { return ::fmaxf(a, b); } +SLANG_FORCE_INLINE float F32_pow(float a, float b) { return ::powf(a, b); } +SLANG_FORCE_INLINE float F32_fmod(float a, float b) { return ::fmodf(a, b); } +SLANG_FORCE_INLINE float F32_remainder(float a, float b) { return ::remainderf(a, b); } +SLANG_FORCE_INLINE float F32_atan2(float a, float b) { return float(::atan2(a, b)); } + +SLANG_FORCE_INLINE float F32_frexp(float x, int* e) { return ::frexpf(x, e); } + +SLANG_FORCE_INLINE float F32_modf(float x, float* ip) +{ + return ::modff(x, ip); +} + +// Ternary +SLANG_FORCE_INLINE float F32_fma(float a, float b, float c) { return ::fmaf(a, b, c); } + +#endif + +SLANG_FORCE_INLINE float F32_calcSafeRadians(float radians) +{ + // Put 0 to 2pi cycles to cycle around 0 to 1 + float a = radians * (1.0f / float(SLANG_PRELUDE_PI * 2)); + // Get truncated fraction, as value in 0 - 1 range + a = a - F32_floor(a); + // Convert back to 0 - 2pi range + return (a * float(SLANG_PRELUDE_PI * 2)); +} + +SLANG_FORCE_INLINE float F32_rsqrt(float f) { return 1.0f / F32_sqrt(f); } +SLANG_FORCE_INLINE float F32_sign(float f) { return ( f == 0.0f) ? f : (( f < 0.0f) ? -1.0f : 1.0f); } +SLANG_FORCE_INLINE float F32_frac(float f) { return f - F32_floor(f); } + +SLANG_FORCE_INLINE uint32_t F32_asuint(float f) { Union32 u; u.f = f; return u.u; } +SLANG_FORCE_INLINE int32_t F32_asint(float f) { Union32 u; u.f = f; return u.i; } + +// ----------------------------- F64 ----------------------------------------- + +SLANG_FORCE_INLINE double F64_calcSafeRadians(double radians); + +#ifdef SLANG_LLVM + +SLANG_PRELUDE_EXTERN_C_START + +// Unary +double F64_ceil(double f); +double F64_floor(double f); +double F64_round(double f); +double F64_sin(double f); +double F64_cos(double f); +double F64_tan(double f); +double F64_asin(double f); +double F64_acos(double f); +double F64_atan(double f); +double F64_sinh(double f); +double F64_cosh(double f); +double F64_tanh(double f); +double F64_log2(double f); +double F64_log(double f); +double F64_log10(float f); +double F64_exp2(double f); +double F64_exp(double f); +double F64_abs(double f); +double F64_trunc(double f); +double F64_sqrt(double f); + +bool F64_isnan(double f); +bool F64_isfinite(double f); +bool F64_isinf(double f); + +// Binary +SLANG_FORCE_INLINE double F64_min(double a, double b) { return a < b ? a : b; } +SLANG_FORCE_INLINE double F64_max(double a, double b) { return a > b ? a : b; } +double F64_pow(double a, double b); +double F64_fmod(double a, double b); +double F64_remainder(double a, double b); +double F64_atan2(double a, double b); + +double F64_frexp(double x, int* e); + +double F64_modf(double x, double* ip); + +// Ternary +SLANG_FORCE_INLINE double F64_fma(double a, double b, double c) { return a * b + c; } + +SLANG_PRELUDE_EXTERN_C_END + +#else // SLANG_LLVM + +// Unary +SLANG_FORCE_INLINE double F64_ceil(double f) { return ::ceil(f); } +SLANG_FORCE_INLINE double F64_floor(double f) { return ::floor(f); } +SLANG_FORCE_INLINE double F64_round(double f) { return ::round(f); } +SLANG_FORCE_INLINE double F64_sin(double f) { return ::sin(f); } +SLANG_FORCE_INLINE double F64_cos(double f) { return ::cos(f); } +SLANG_FORCE_INLINE double F64_tan(double f) { return ::tan(f); } +SLANG_FORCE_INLINE double F64_asin(double f) { return ::asin(f); } +SLANG_FORCE_INLINE double F64_acos(double f) { return ::acos(f); } +SLANG_FORCE_INLINE double F64_atan(double f) { return ::atan(f); } +SLANG_FORCE_INLINE double F64_sinh(double f) { return ::sinh(f); } +SLANG_FORCE_INLINE double F64_cosh(double f) { return ::cosh(f); } +SLANG_FORCE_INLINE double F64_tanh(double f) { return ::tanh(f); } +SLANG_FORCE_INLINE double F64_log2(double f) { return ::log2(f); } +SLANG_FORCE_INLINE double F64_log(double f) { return ::log(f); } +SLANG_FORCE_INLINE double F64_log10(float f) { return ::log10(f); } +SLANG_FORCE_INLINE double F64_exp2(double f) { return ::exp2(f); } +SLANG_FORCE_INLINE double F64_exp(double f) { return ::exp(f); } +SLANG_FORCE_INLINE double F64_abs(double f) { return ::fabs(f); } +SLANG_FORCE_INLINE double F64_trunc(double f) { return ::trunc(f); } +SLANG_FORCE_INLINE double F64_sqrt(double f) { return ::sqrt(f); } + + +SLANG_FORCE_INLINE bool F64_isnan(double f) { return SLANG_PRELUDE_STD isnan(f); } +SLANG_FORCE_INLINE bool F64_isfinite(double f) { return SLANG_PRELUDE_STD isfinite(f); } +SLANG_FORCE_INLINE bool F64_isinf(double f) { return SLANG_PRELUDE_STD isinf(f); } + +// Binary +SLANG_FORCE_INLINE double F64_min(double a, double b) { return ::fmin(a, b); } +SLANG_FORCE_INLINE double F64_max(double a, double b) { return ::fmax(a, b); } +SLANG_FORCE_INLINE double F64_pow(double a, double b) { return ::pow(a, b); } +SLANG_FORCE_INLINE double F64_fmod(double a, double b) { return ::fmod(a, b); } +SLANG_FORCE_INLINE double F64_remainder(double a, double b) { return ::remainder(a, b); } +SLANG_FORCE_INLINE double F64_atan2(double a, double b) { return ::atan2(a, b); } + +SLANG_FORCE_INLINE double F64_frexp(double x, int* e) { return ::frexp(x, e); } + +SLANG_FORCE_INLINE double F64_modf(double x, double* ip) +{ + return ::modf(x, ip); +} + +// Ternary +SLANG_FORCE_INLINE double F64_fma(double a, double b, double c) { return ::fma(a, b, c); } + +#endif // SLANG_LLVM + +SLANG_FORCE_INLINE double F64_rsqrt(double f) { return 1.0 / F64_sqrt(f); } +SLANG_FORCE_INLINE double F64_sign(double f) { return (f == 0.0) ? f : ((f < 0.0) ? -1.0 : 1.0); } +SLANG_FORCE_INLINE double F64_frac(double f) { return f - F64_floor(f); } + +SLANG_FORCE_INLINE void F64_asuint(double d, uint32_t* low, uint32_t* hi) +{ + Union64 u; + u.d = d; + *low = uint32_t(u.u); + *hi = uint32_t(u.u >> 32); +} + +SLANG_FORCE_INLINE void F64_asint(double d, int32_t* low, int32_t* hi) +{ + Union64 u; + u.d = d; + *low = int32_t(u.u); + *hi = int32_t(u.u >> 32); +} + +SLANG_FORCE_INLINE double F64_calcSafeRadians(double radians) +{ + // Put 0 to 2pi cycles to cycle around 0 to 1 + double a = radians * (1.0f / (SLANG_PRELUDE_PI * 2)); + // Get truncated fraction, as value in 0 - 1 range + a = a - F64_floor(a); + // Convert back to 0 - 2pi range + return (a * (SLANG_PRELUDE_PI * 2)); +} + +// ----------------------------- I32 ----------------------------------------- + +SLANG_FORCE_INLINE int32_t I32_abs(int32_t f) { return (f < 0) ? -f : f; } + +SLANG_FORCE_INLINE int32_t I32_min(int32_t a, int32_t b) { return a < b ? a : b; } +SLANG_FORCE_INLINE int32_t I32_max(int32_t a, int32_t b) { return a > b ? a : b; } + +SLANG_FORCE_INLINE float I32_asfloat(int32_t x) { Union32 u; u.i = x; return u.f; } +SLANG_FORCE_INLINE uint32_t I32_asuint(int32_t x) { return uint32_t(x); } +SLANG_FORCE_INLINE double I32_asdouble(int32_t low, int32_t hi ) +{ + Union64 u; + u.u = (uint64_t(hi) << 32) | uint32_t(low); + return u.d; +} + +// ----------------------------- U32 ----------------------------------------- + +SLANG_FORCE_INLINE uint32_t U32_abs(uint32_t f) { return f; } + +SLANG_FORCE_INLINE uint32_t U32_min(uint32_t a, uint32_t b) { return a < b ? a : b; } +SLANG_FORCE_INLINE uint32_t U32_max(uint32_t a, uint32_t b) { return a > b ? a : b; } + +SLANG_FORCE_INLINE float U32_asfloat(uint32_t x) { Union32 u; u.u = x; return u.f; } +SLANG_FORCE_INLINE uint32_t U32_asint(int32_t x) { return uint32_t(x); } + +SLANG_FORCE_INLINE double U32_asdouble(uint32_t low, uint32_t hi) +{ + Union64 u; + u.u = (uint64_t(hi) << 32) | low; + return u.d; +} + + +SLANG_FORCE_INLINE uint32_t U32_countbits(uint32_t v) +{ +#if SLANG_GCC_FAMILY && !defined(SLANG_LLVM) + return __builtin_popcount(v); +#elif SLANG_PROCESSOR_X86_64 && SLANG_VC + return __popcnt(v); +#else + uint32_t c = 0; + while (v) + { + c++; + v &= v - 1; + } + return c; +#endif +} + +// ----------------------------- U64 ----------------------------------------- + +SLANG_FORCE_INLINE uint64_t U64_abs(uint64_t f) { return f; } + +SLANG_FORCE_INLINE uint64_t U64_min(uint64_t a, uint64_t b) { return a < b ? a : b; } +SLANG_FORCE_INLINE uint64_t U64_max(uint64_t a, uint64_t b) { return a > b ? a : b; } + +// TODO(JS): We don't define countbits for 64bit in stdlib currently. +// It's not clear from documentation if it should return 32 or 64 bits, if it exists. +// 32 bits can always hold the result, and will be implicitly promoted. +SLANG_FORCE_INLINE uint32_t U64_countbits(uint64_t v) +{ +#if SLANG_GCC_FAMILY && !defined(SLANG_LLVM) + return uint32_t(__builtin_popcountl(v)); +#elif SLANG_PROCESSOR_X86_64 && SLANG_VC + return uint32_t(__popcnt64(v)); +#else + uint32_t c = 0; + while (v) + { + c++; + v &= v - 1; + } + return c; +#endif +} + +// ----------------------------- I64 ----------------------------------------- + +SLANG_FORCE_INLINE int64_t I64_abs(int64_t f) { return (f < 0) ? -f : f; } + +SLANG_FORCE_INLINE int64_t I64_min(int64_t a, int64_t b) { return a < b ? a : b; } +SLANG_FORCE_INLINE int64_t I64_max(int64_t a, int64_t b) { return a > b ? a : b; } + + +// ----------------------------- Interlocked --------------------------------- + +#if SLANG_LLVM + +#else // SLANG_LLVM + +# ifdef _WIN32 +# include +# endif + +void InterlockedAdd(uint32_t* dest, uint32_t value, uint32_t* oldValue) +{ +# ifdef _WIN32 + *oldValue = _InterlockedExchangeAdd((long*)dest, (long)value); +# else + *oldValue = __sync_fetch_and_add(dest, value); +# endif +} + +#endif // SLANG_LLVM + + +// ----------------------- fmod -------------------------- +SLANG_FORCE_INLINE float _slang_fmod(float x, float y) +{ + return F32_fmod(x, y); +} +SLANG_FORCE_INLINE double _slang_fmod(double x, double y) +{ + return F64_fmod(x, y); +} + +#ifdef SLANG_PRELUDE_NAMESPACE +} +#endif + +#endif diff --git a/third_party/slang/inc/prelude/slang-cpp-types-core.h b/third_party/slang/inc/prelude/slang-cpp-types-core.h new file mode 100644 index 0000000..25fe472 --- /dev/null +++ b/third_party/slang/inc/prelude/slang-cpp-types-core.h @@ -0,0 +1,578 @@ +#ifndef SLANG_PRELUDE_CPP_TYPES_CORE_H +#define SLANG_PRELUDE_CPP_TYPES_CORE_H + +#ifndef SLANG_PRELUDE_ASSERT +# ifdef SLANG_PRELUDE_ENABLE_ASSERT +# define SLANG_PRELUDE_ASSERT(VALUE) assert(VALUE) +# else +# define SLANG_PRELUDE_ASSERT(VALUE) +# endif +#endif + +// Since we are using unsigned arithmatic care is need in this comparison. +// It is *assumed* that sizeInBytes >= elemSize. Which means (sizeInBytes >= elemSize) >= 0 +// Which means only a single test is needed + +// Asserts for bounds checking. +// It is assumed index/count are unsigned types. +#define SLANG_BOUND_ASSERT(index, count) SLANG_PRELUDE_ASSERT(index < count); +#define SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_PRELUDE_ASSERT(index <= (sizeInBytes - elemSize) && (index & 3) == 0); + +// Macros to zero index if an access is out of range +#define SLANG_BOUND_ZERO_INDEX(index, count) index = (index < count) ? index : 0; +#define SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes) index = (index <= (sizeInBytes - elemSize)) ? index : 0; + +// The 'FIX' macro define how the index is fixed. The default is to do nothing. If SLANG_ENABLE_BOUND_ZERO_INDEX +// the fix macro will zero the index, if out of range +#ifdef SLANG_ENABLE_BOUND_ZERO_INDEX +# define SLANG_BOUND_FIX(index, count) SLANG_BOUND_ZERO_INDEX(index, count) +# define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes) +# define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) SLANG_BOUND_ZERO_INDEX(index, count) +#else +# define SLANG_BOUND_FIX(index, count) +# define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) +# define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) +#endif + +#ifndef SLANG_BOUND_CHECK +# define SLANG_BOUND_CHECK(index, count) SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX(index, count) +#endif + +#ifndef SLANG_BOUND_CHECK_BYTE_ADDRESS +# define SLANG_BOUND_CHECK_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) +#endif + +#ifndef SLANG_BOUND_CHECK_FIXED_ARRAY +# define SLANG_BOUND_CHECK_FIXED_ARRAY(index, count) SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX_FIXED_ARRAY(index, count) +#endif + +struct TypeInfo +{ + size_t typeSize; +}; + +template +struct FixedArray +{ + const T& operator[](size_t index) const { SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE); return m_data[index]; } + T& operator[](size_t index) { SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE); return m_data[index]; } + + T m_data[SIZE]; +}; + +// An array that has no specified size, becomes a 'Array'. This stores the size so it can potentially +// do bounds checking. +template +struct Array +{ + const T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; } + T& operator[](size_t index) { SLANG_BOUND_CHECK(index, count); return data[index]; } + + T* data; + size_t count; +}; + +/* Constant buffers become a pointer to the contained type, so ConstantBuffer becomes T* in C++ code. +*/ + +template +struct Vector; + +template +struct Vector +{ + T x; + const T& operator[](size_t /*index*/) const { return x; } + T& operator[](size_t /*index*/) { return x; } + operator T() const { return x; } + Vector() = default; + Vector(T scalar) + { + x = scalar; + } + template + Vector(Vector other) + { + x = (T)other.x; + } + template + Vector(Vector other) + { + int minSize = 1; + if (otherSize < minSize) minSize = otherSize; + for (int i = 0; i < minSize; i++) + (*this)[i] = (T)other[i]; + } +}; + +template +struct Vector +{ + T x, y; + const T& operator[](size_t index) const { return index == 0 ? x : y; } + T& operator[](size_t index) { return index == 0 ? x : y; } + Vector() = default; + Vector(T scalar) + { + x = y = scalar; + } + Vector(T _x, T _y) + { + x = _x; + y = _y; + } + template + Vector(Vector other) + { + x = (T)other.x; + y = (T)other.y; + } + template + Vector(Vector other) + { + int minSize = 2; + if (otherSize < minSize) minSize = otherSize; + for (int i = 0; i < minSize; i++) + (*this)[i] = (T)other[i]; + } +}; + +template +struct Vector +{ + T x, y, z; + const T& operator[](size_t index) const { return *((T*)(this) + index); } + T& operator[](size_t index) { return *((T*)(this) + index); } + + Vector() = default; + Vector(T scalar) + { + x = y = z = scalar; + } + Vector(T _x, T _y, T _z) + { + x = _x; + y = _y; + z = _z; + } + template + Vector(Vector other) + { + x = (T)other.x; + y = (T)other.y; + z = (T)other.z; + } + template + Vector(Vector other) + { + int minSize = 3; + if (otherSize < minSize) minSize = otherSize; + for (int i = 0; i < minSize; i++) + (*this)[i] = (T)other[i]; + } +}; + +template +struct Vector +{ + T x, y, z, w; + + const T& operator[](size_t index) const { return *((T*)(this) + index); } + T& operator[](size_t index) { return *((T*)(this) + index); } + Vector() = default; + Vector(T scalar) + { + x = y = z = w = scalar; + } + Vector(T _x, T _y, T _z, T _w) + { + x = _x; + y = _y; + z = _z; + w = _w; + } + template + Vector(Vector other) + { + int minSize = 4; + if (otherSize < minSize) minSize = otherSize; + for (int i = 0; i < minSize; i++) + (*this)[i] = (T)other[i]; + } + +}; + +template +SLANG_FORCE_INLINE Vector _slang_select(Vector condition, Vector v0, Vector v1) +{ + Vector result; + for (int i = 0; i < N; i++) + { + result[i] = condition[i] ? v0[i] : v1[i]; + } + return result; +} + +template +SLANG_FORCE_INLINE T _slang_select(bool condition, T v0, T v1) +{ + return condition ? v0 : v1; +} + +template +SLANG_FORCE_INLINE T _slang_vector_get_element(Vector x, int index) +{ + return x[index]; +} + +template +SLANG_FORCE_INLINE const T* _slang_vector_get_element_ptr(const Vector* x, int index) +{ + return &((*const_cast*>(x))[index]); +} + +template +SLANG_FORCE_INLINE T* _slang_vector_get_element_ptr(Vector* x, int index) +{ + return &((*x)[index]); +} + +template +SLANG_FORCE_INLINE Vector _slang_vector_reshape(const Vector other) +{ + Vector result; + for (int i = 0; i < n; i++) + { + OtherT otherElement = T(0); + if (i < m) + otherElement = _slang_vector_get_element(other, i); + *_slang_vector_get_element_ptr(&result, i) = (T)otherElement; + } + return result; +} + +typedef uint32_t uint; + +#define SLANG_VECTOR_BINARY_OP(T, op) \ + template \ + SLANG_FORCE_INLINE Vector operator op(const Vector& thisVal, const Vector& other) \ + { \ + Vector result;\ + for (int i = 0; i < n; i++) \ + result[i] = thisVal[i] op other[i]; \ + return result;\ + } +#define SLANG_VECTOR_BINARY_COMPARE_OP(T, op) \ + template \ + SLANG_FORCE_INLINE Vector operator op(const Vector& thisVal, const Vector& other) \ + { \ + Vector result;\ + for (int i = 0; i < n; i++) \ + result[i] = thisVal[i] op other[i]; \ + return result;\ + } + +#define SLANG_VECTOR_UNARY_OP(T, op) \ + template \ + SLANG_FORCE_INLINE Vector operator op(const Vector& thisVal) \ + { \ + Vector result;\ + for (int i = 0; i < n; i++) \ + result[i] = op thisVal[i]; \ + return result;\ + } +#define SLANG_INT_VECTOR_OPS(T) \ + SLANG_VECTOR_BINARY_OP(T, +)\ + SLANG_VECTOR_BINARY_OP(T, -)\ + SLANG_VECTOR_BINARY_OP(T, *)\ + SLANG_VECTOR_BINARY_OP(T, / )\ + SLANG_VECTOR_BINARY_OP(T, &)\ + SLANG_VECTOR_BINARY_OP(T, |)\ + SLANG_VECTOR_BINARY_OP(T, &&)\ + SLANG_VECTOR_BINARY_OP(T, ||)\ + SLANG_VECTOR_BINARY_OP(T, ^)\ + SLANG_VECTOR_BINARY_OP(T, %)\ + SLANG_VECTOR_BINARY_OP(T, >>)\ + SLANG_VECTOR_BINARY_OP(T, <<)\ + SLANG_VECTOR_BINARY_COMPARE_OP(T, >)\ + SLANG_VECTOR_BINARY_COMPARE_OP(T, <)\ + SLANG_VECTOR_BINARY_COMPARE_OP(T, >=)\ + SLANG_VECTOR_BINARY_COMPARE_OP(T, <=)\ + SLANG_VECTOR_BINARY_COMPARE_OP(T, ==)\ + SLANG_VECTOR_BINARY_COMPARE_OP(T, !=)\ + SLANG_VECTOR_UNARY_OP(T, !)\ + SLANG_VECTOR_UNARY_OP(T, ~) +#define SLANG_FLOAT_VECTOR_OPS(T) \ + SLANG_VECTOR_BINARY_OP(T, +)\ + SLANG_VECTOR_BINARY_OP(T, -)\ + SLANG_VECTOR_BINARY_OP(T, *)\ + SLANG_VECTOR_BINARY_OP(T, /)\ + SLANG_VECTOR_UNARY_OP(T, -)\ + SLANG_VECTOR_BINARY_COMPARE_OP(T, >)\ + SLANG_VECTOR_BINARY_COMPARE_OP(T, <)\ + SLANG_VECTOR_BINARY_COMPARE_OP(T, >=)\ + SLANG_VECTOR_BINARY_COMPARE_OP(T, <=)\ + SLANG_VECTOR_BINARY_COMPARE_OP(T, ==)\ + SLANG_VECTOR_BINARY_COMPARE_OP(T, !=) + +SLANG_INT_VECTOR_OPS(bool) +SLANG_INT_VECTOR_OPS(int) +SLANG_INT_VECTOR_OPS(int8_t) +SLANG_INT_VECTOR_OPS(int16_t) +SLANG_INT_VECTOR_OPS(int64_t) +SLANG_INT_VECTOR_OPS(uint) +SLANG_INT_VECTOR_OPS(uint8_t) +SLANG_INT_VECTOR_OPS(uint16_t) +SLANG_INT_VECTOR_OPS(uint64_t) + +SLANG_FLOAT_VECTOR_OPS(float) +SLANG_FLOAT_VECTOR_OPS(double) + +#define SLANG_VECTOR_INT_NEG_OP(T) \ + template\ + Vector operator-(const Vector& thisVal) \ + { \ + Vector result;\ + for (int i = 0; i < N; i++) \ + result[i] = 0 - thisVal[i]; \ + return result;\ + } +SLANG_VECTOR_INT_NEG_OP(int) +SLANG_VECTOR_INT_NEG_OP(int8_t) +SLANG_VECTOR_INT_NEG_OP(int16_t) +SLANG_VECTOR_INT_NEG_OP(int64_t) +SLANG_VECTOR_INT_NEG_OP(uint) +SLANG_VECTOR_INT_NEG_OP(uint8_t) +SLANG_VECTOR_INT_NEG_OP(uint16_t) +SLANG_VECTOR_INT_NEG_OP(uint64_t) + +#define SLANG_FLOAT_VECTOR_MOD(T)\ + template \ + Vector operator%(const Vector& left, const Vector& right) \ + {\ + Vector result;\ + for (int i = 0; i < N; i++) \ + result[i] = _slang_fmod(left[i], right[i]); \ + return result;\ + } + +SLANG_FLOAT_VECTOR_MOD(float) +SLANG_FLOAT_VECTOR_MOD(double) +#undef SLANG_FLOAT_VECTOR_MOD +#undef SLANG_VECTOR_BINARY_OP +#undef SLANG_VECTOR_UNARY_OP +#undef SLANG_INT_VECTOR_OPS +#undef SLANG_FLOAT_VECTOR_OPS +#undef SLANG_VECTOR_INT_NEG_OP +#undef SLANG_FLOAT_VECTOR_MOD + +template +struct Matrix +{ + Vector rows[ROWS]; + Vector& operator[](size_t index) { return rows[index]; } + Matrix() = default; + Matrix(T scalar) + { + for (int i = 0; i < ROWS; i++) + rows[i] = Vector(scalar); + } + Matrix(const Vector& row0) + { + rows[0] = row0; + } + Matrix(const Vector& row0, const Vector& row1) + { + rows[0] = row0; + rows[1] = row1; + } + Matrix(const Vector& row0, const Vector& row1, const Vector& row2) + { + rows[0] = row0; + rows[1] = row1; + rows[2] = row2; + } + Matrix(const Vector& row0, const Vector& row1, const Vector& row2, const Vector& row3) + { + rows[0] = row0; + rows[1] = row1; + rows[2] = row2; + rows[3] = row3; + } + template + Matrix(const Matrix& other) + { + int minRow = ROWS; + int minCol = COLS; + if (minRow > otherRow) minRow = otherRow; + if (minCol > otherCol) minCol = otherCol; + for (int i = 0; i < minRow; i++) + for (int j = 0; j < minCol; j++) + rows[i][j] = (T)other.rows[i][j]; + } + Matrix(T v0, T v1, T v2, T v3) + { + rows[0][0] = v0; rows[0][1] = v1; + rows[1][0] = v2; rows[1][1] = v3; + } + Matrix(T v0, T v1, T v2, T v3, T v4, T v5) + { + if (COLS == 3) + { + rows[0][0] = v0; rows[0][1] = v1; rows[0][2] = v2; + rows[1][0] = v3; rows[1][1] = v4; rows[1][2] = v5; + } + else + { + rows[0][0] = v0; rows[0][1] = v1; + rows[1][0] = v2; rows[1][1] = v3; + rows[2][0] = v4; rows[2][1] = v5; + } + } + Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) + { + if (COLS == 4) + { + rows[0][0] = v0; rows[0][1] = v1; rows[0][2] = v2; rows[0][3] = v3; + rows[1][0] = v4; rows[1][1] = v5; rows[1][2] = v6; rows[1][3] = v7; + } + else + { + rows[0][0] = v0; rows[0][1] = v1; + rows[1][0] = v2; rows[1][1] = v3; + rows[2][0] = v4; rows[2][1] = v5; + rows[3][0] = v6; rows[3][1] = v7; + } + } + Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8) + { + rows[0][0] = v0; rows[0][1] = v1; rows[0][2] = v2; + rows[1][0] = v3; rows[1][1] = v4; rows[1][2] = v5; + rows[2][0] = v6; rows[2][1] = v7; rows[2][2] = v8; + } + Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11) + { + if (COLS == 4) + { + rows[0][0] = v0; rows[0][1] = v1; rows[0][2] = v2; rows[0][3] = v3; + rows[1][0] = v4; rows[1][1] = v5; rows[1][2] = v6; rows[1][3] = v7; + rows[2][0] = v8; rows[2][1] = v9; rows[2][2] = v10; rows[2][3] = v11; + } + else + { + rows[0][0] = v0; rows[0][1] = v1; rows[0][2] = v2; + rows[1][0] = v3; rows[1][1] = v4; rows[1][2] = v5; + rows[2][0] = v6; rows[2][1] = v7; rows[2][2] = v8; + rows[3][0] = v9; rows[3][1] = v10; rows[3][2] = v11; + } + } + Matrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) + { + rows[0][0] = v0; rows[0][1] = v1; rows[0][2] = v2; rows[0][3] = v3; + rows[1][0] = v4; rows[1][1] = v5; rows[1][2] = v6; rows[1][3] = v7; + rows[2][0] = v8; rows[2][1] = v9; rows[2][2] = v10; rows[2][3] = v11; + rows[3][0] = v12; rows[3][1] = v13; rows[3][2] = v14; rows[3][3] = v15; + } +}; + +#define SLANG_MATRIX_BINARY_OP(T, op) \ + template \ + Matrix operator op(const Matrix& thisVal, const Matrix& other) \ + { \ + Matrix result;\ + for (int i = 0; i < R; i++) \ + for (int j = 0; j < C; j++) \ + result.rows[i][j] = thisVal.rows[i][j] op other.rows[i][j]; \ + return result;\ + } + +#define SLANG_MATRIX_UNARY_OP(T, op) \ + template \ + Matrix operator op(const Matrix& thisVal) \ + { \ + Matrix result;\ + for (int i = 0; i < R; i++) \ + for (int j = 0; j < C; j++) \ + result[i].rows[i][j] = op thisVal.rows[i][j]; \ + return result;\ + } +#define SLANG_INT_MATRIX_OPS(T) \ + SLANG_MATRIX_BINARY_OP(T, +)\ + SLANG_MATRIX_BINARY_OP(T, -)\ + SLANG_MATRIX_BINARY_OP(T, *)\ + SLANG_MATRIX_BINARY_OP(T, / )\ + SLANG_MATRIX_BINARY_OP(T, &)\ + SLANG_MATRIX_BINARY_OP(T, |)\ + SLANG_MATRIX_BINARY_OP(T, &&)\ + SLANG_MATRIX_BINARY_OP(T, ||)\ + SLANG_MATRIX_BINARY_OP(T, ^)\ + SLANG_MATRIX_BINARY_OP(T, %)\ + SLANG_MATRIX_UNARY_OP(T, !)\ + SLANG_MATRIX_UNARY_OP(T, ~) +#define SLANG_FLOAT_MATRIX_OPS(T) \ + SLANG_MATRIX_BINARY_OP(T, +)\ + SLANG_MATRIX_BINARY_OP(T, -)\ + SLANG_MATRIX_BINARY_OP(T, *)\ + SLANG_MATRIX_BINARY_OP(T, /)\ + SLANG_MATRIX_UNARY_OP(T, -) +SLANG_INT_MATRIX_OPS(int) +SLANG_INT_MATRIX_OPS(int8_t) +SLANG_INT_MATRIX_OPS(int16_t) +SLANG_INT_MATRIX_OPS(int64_t) +SLANG_INT_MATRIX_OPS(uint) +SLANG_INT_MATRIX_OPS(uint8_t) +SLANG_INT_MATRIX_OPS(uint16_t) +SLANG_INT_MATRIX_OPS(uint64_t) + +SLANG_FLOAT_MATRIX_OPS(float) +SLANG_FLOAT_MATRIX_OPS(double) + +#define SLANG_MATRIX_INT_NEG_OP(T) \ + template\ + SLANG_FORCE_INLINE Matrix operator-(Matrix thisVal) \ + { \ + Matrix result;\ + for (int i = 0; i < R; i++) \ + for (int j = 0; j < C; j++) \ + result.rows[i][j] = 0 - thisVal.rows[i][j]; \ + return result;\ + } + SLANG_MATRIX_INT_NEG_OP(int) + SLANG_MATRIX_INT_NEG_OP(int8_t) + SLANG_MATRIX_INT_NEG_OP(int16_t) + SLANG_MATRIX_INT_NEG_OP(int64_t) + SLANG_MATRIX_INT_NEG_OP(uint) + SLANG_MATRIX_INT_NEG_OP(uint8_t) + SLANG_MATRIX_INT_NEG_OP(uint16_t) + SLANG_MATRIX_INT_NEG_OP(uint64_t) + +#define SLANG_FLOAT_MATRIX_MOD(T)\ + template \ + SLANG_FORCE_INLINE Matrix operator%(Matrix left, Matrix right) \ + {\ + Matrix result;\ + for (int i = 0; i < R; i++) \ + for (int j = 0; j < C; j++) \ + result.rows[i][j] = _slang_fmod(left.rows[i][j], right.rows[i][j]); \ + return result;\ + } + + SLANG_FLOAT_MATRIX_MOD(float) + SLANG_FLOAT_MATRIX_MOD(double) +#undef SLANG_FLOAT_MATRIX_MOD +#undef SLANG_MATRIX_BINARY_OP +#undef SLANG_MATRIX_UNARY_OP +#undef SLANG_INT_MATRIX_OPS +#undef SLANG_FLOAT_MATRIX_OPS +#undef SLANG_MATRIX_INT_NEG_OP +#undef SLANG_FLOAT_MATRIX_MOD + +template +TResult slang_bit_cast(TInput val) +{ + return *(TResult*)(&val); +} + +#endif + + diff --git a/third_party/slang/inc/prelude/slang-cpp-types.h b/third_party/slang/inc/prelude/slang-cpp-types.h new file mode 100644 index 0000000..ac66ad9 --- /dev/null +++ b/third_party/slang/inc/prelude/slang-cpp-types.h @@ -0,0 +1,947 @@ +#ifndef SLANG_PRELUDE_CPP_TYPES_H +#define SLANG_PRELUDE_CPP_TYPES_H + +#ifdef SLANG_PRELUDE_NAMESPACE +namespace SLANG_PRELUDE_NAMESPACE { +#endif + + +#include "slang-cpp-types-core.h" + +typedef Vector float2; +typedef Vector float3; +typedef Vector float4; + +typedef Vector int2; +typedef Vector int3; +typedef Vector int4; + +typedef Vector uint2; +typedef Vector uint3; +typedef Vector uint4; + +// We can just map `NonUniformResourceIndex` type directly to the index type on CPU, as CPU does not require +// any special handling around such accesses. +typedef size_t NonUniformResourceIndex; + +// ----------------------------- ResourceType ----------------------------------------- + +// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-structuredbuffer-getdimensions +// Missing Load(_In_ int Location, _Out_ uint Status); + +template +struct RWStructuredBuffer +{ + SLANG_FORCE_INLINE T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; } + const T& Load(size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; } + void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); } + + T* data; + size_t count; +}; + +template +struct StructuredBuffer +{ + SLANG_FORCE_INLINE const T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; } + const T& Load(size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; } + void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); } + + T* data; + size_t count; +}; + + +template +struct RWBuffer +{ + SLANG_FORCE_INLINE T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; } + const T& Load(size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; } + void GetDimensions(uint32_t* outCount) { *outCount = uint32_t(count); } + + T* data; + size_t count; +}; + +template +struct Buffer +{ + SLANG_FORCE_INLINE const T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; } + const T& Load(size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; } + void GetDimensions(uint32_t* outCount) { *outCount = uint32_t(count); } + + T* data; + size_t count; +}; + +// Missing Load(_In_ int Location, _Out_ uint Status); +struct ByteAddressBuffer +{ + void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); } + uint32_t Load(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes); + return data[index >> 2]; + } + uint2 Load2(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes); + const size_t dataIdx = index >> 2; + return uint2{data[dataIdx], data[dataIdx + 1]}; + } + uint3 Load3(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes); + const size_t dataIdx = index >> 2; + return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]}; + } + uint4 Load4(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes); + const size_t dataIdx = index >> 2; + return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]}; + } + template + T Load(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes); + return *(const T*)(((const char*)data) + index); + } + + const uint32_t* data; + size_t sizeInBytes; //< Must be multiple of 4 +}; + +// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-rwbyteaddressbuffer +// Missing support for Atomic operations +// Missing support for Load with status +struct RWByteAddressBuffer +{ + void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); } + + uint32_t Load(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes); + return data[index >> 2]; + } + uint2 Load2(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes); + const size_t dataIdx = index >> 2; + return uint2{data[dataIdx], data[dataIdx + 1]}; + } + uint3 Load3(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes); + const size_t dataIdx = index >> 2; + return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]}; + } + uint4 Load4(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes); + const size_t dataIdx = index >> 2; + return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]}; + } + template + T Load(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes); + return *(const T*)(((const char*)data) + index); + } + + void Store(size_t index, uint32_t v) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes); + data[index >> 2] = v; + } + void Store2(size_t index, uint2 v) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes); + const size_t dataIdx = index >> 2; + data[dataIdx + 0] = v.x; + data[dataIdx + 1] = v.y; + } + void Store3(size_t index, uint3 v) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes); + const size_t dataIdx = index >> 2; + data[dataIdx + 0] = v.x; + data[dataIdx + 1] = v.y; + data[dataIdx + 2] = v.z; + } + void Store4(size_t index, uint4 v) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes); + const size_t dataIdx = index >> 2; + data[dataIdx + 0] = v.x; + data[dataIdx + 1] = v.y; + data[dataIdx + 2] = v.z; + data[dataIdx + 3] = v.w; + } + template + void Store(size_t index, T const& value) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes); + *(T*)(((char*)data) + index) = value; + } + + uint32_t* data; + size_t sizeInBytes; //< Must be multiple of 4 +}; + +struct ISamplerState; +struct ISamplerComparisonState; + +struct SamplerState +{ + ISamplerState* state; +}; + +struct SamplerComparisonState +{ + ISamplerComparisonState* state; +}; + +#ifndef SLANG_RESOURCE_SHAPE +# define SLANG_RESOURCE_SHAPE +typedef unsigned int SlangResourceShape; +enum +{ + SLANG_RESOURCE_BASE_SHAPE_MASK = 0x0F, + + SLANG_RESOURCE_NONE = 0x00, + + SLANG_TEXTURE_1D = 0x01, + SLANG_TEXTURE_2D = 0x02, + SLANG_TEXTURE_3D = 0x03, + SLANG_TEXTURE_CUBE = 0x04, + SLANG_TEXTURE_BUFFER = 0x05, + + SLANG_STRUCTURED_BUFFER = 0x06, + SLANG_BYTE_ADDRESS_BUFFER = 0x07, + SLANG_RESOURCE_UNKNOWN = 0x08, + SLANG_ACCELERATION_STRUCTURE = 0x09, + + SLANG_RESOURCE_EXT_SHAPE_MASK = 0xF0, + + SLANG_TEXTURE_FEEDBACK_FLAG = 0x10, + SLANG_TEXTURE_ARRAY_FLAG = 0x40, + SLANG_TEXTURE_MULTISAMPLE_FLAG = 0x80, + + SLANG_TEXTURE_1D_ARRAY = SLANG_TEXTURE_1D | SLANG_TEXTURE_ARRAY_FLAG, + SLANG_TEXTURE_2D_ARRAY = SLANG_TEXTURE_2D | SLANG_TEXTURE_ARRAY_FLAG, + SLANG_TEXTURE_CUBE_ARRAY = SLANG_TEXTURE_CUBE | SLANG_TEXTURE_ARRAY_FLAG, + + SLANG_TEXTURE_2D_MULTISAMPLE = SLANG_TEXTURE_2D | SLANG_TEXTURE_MULTISAMPLE_FLAG, + SLANG_TEXTURE_2D_MULTISAMPLE_ARRAY = + SLANG_TEXTURE_2D | SLANG_TEXTURE_MULTISAMPLE_FLAG | SLANG_TEXTURE_ARRAY_FLAG, +}; +#endif + +// +struct TextureDimensions +{ + void reset() + { + shape = 0; + width = height = depth = 0; + numberOfLevels = 0; + arrayElementCount = 0; + } + int getDimSizes(uint32_t outDims[4]) const + { + const auto baseShape = (shape & SLANG_RESOURCE_BASE_SHAPE_MASK); + int count = 0; + switch (baseShape) + { + case SLANG_TEXTURE_1D: + { + outDims[count++] = width; + break; + } + case SLANG_TEXTURE_2D: + { + outDims[count++] = width; + outDims[count++] = height; + break; + } + case SLANG_TEXTURE_3D: + { + outDims[count++] = width; + outDims[count++] = height; + outDims[count++] = depth; + break; + } + case SLANG_TEXTURE_CUBE: + { + outDims[count++] = width; + outDims[count++] = height; + outDims[count++] = 6; + break; + } + } + + if (shape & SLANG_TEXTURE_ARRAY_FLAG) + { + outDims[count++] = arrayElementCount; + } + return count; + } + int getMIPDims(int outDims[3]) const + { + const auto baseShape = (shape & SLANG_RESOURCE_BASE_SHAPE_MASK); + int count = 0; + switch (baseShape) + { + case SLANG_TEXTURE_1D: + { + outDims[count++] = width; + break; + } + case SLANG_TEXTURE_CUBE: + case SLANG_TEXTURE_2D: + { + outDims[count++] = width; + outDims[count++] = height; + break; + } + case SLANG_TEXTURE_3D: + { + outDims[count++] = width; + outDims[count++] = height; + outDims[count++] = depth; + break; + } + } + return count; + } + int calcMaxMIPLevels() const + { + int dims[3]; + const int dimCount = getMIPDims(dims); + for (int count = 1; true; count++) + { + bool allOne = true; + for (int i = 0; i < dimCount; ++i) + { + if (dims[i] > 1) + { + allOne = false; + dims[i] >>= 1; + } + } + if (allOne) + { + return count; + } + } + } + + uint32_t shape; + uint32_t width, height, depth; + uint32_t numberOfLevels; + uint32_t arrayElementCount; ///< For array types, 0 otherwise +}; + + + + + +// Texture + +struct ITexture +{ + virtual TextureDimensions GetDimensions(int mipLevel = -1) = 0; + virtual void Load(const int32_t* v, void* outData, size_t dataSize) = 0; + virtual void Sample(SamplerState samplerState, const float* loc, void* outData, size_t dataSize) = 0; + virtual void SampleLevel(SamplerState samplerState, const float* loc, float level, void* outData, size_t dataSize) = 0; +}; + +template +struct Texture1D +{ + void GetDimensions(uint32_t* outWidth) { *outWidth = texture->GetDimensions().width; } + void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outNumberOfLevels) + { + auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outNumberOfLevels = dims.numberOfLevels; + } + + void GetDimensions(float* outWidth) { *outWidth = texture->GetDimensions().width; } + void GetDimensions(uint32_t mipLevel, float* outWidth, float* outNumberOfLevels) + { + auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outNumberOfLevels = dims.numberOfLevels; + } + + T Load(const int2& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; } + T Sample(SamplerState samplerState, float loc) const { T out; texture->Sample(samplerState, &loc, &out, sizeof(out)); return out; } + T SampleLevel(SamplerState samplerState, float loc, float level) { T out; texture->SampleLevel(samplerState, &loc, level, &out, sizeof(out)); return out; } + + ITexture* texture; +}; + +template +struct Texture2D +{ + void GetDimensions(uint32_t* outWidth, uint32_t* outHeight) + { + const auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + } + void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outNumberOfLevels = dims.numberOfLevels; + } + void GetDimensions(float* outWidth, float* outHeight) + { + const auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + } + void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outNumberOfLevels = dims.numberOfLevels; + } + + T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; } + T Sample(SamplerState samplerState, const float2& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; } + T SampleLevel(SamplerState samplerState, const float2& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; } + + ITexture* texture; +}; + +template +struct Texture3D +{ + void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outDepth) + { + const auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + *outDepth = dims.depth; + } + void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outDepth, uint32_t* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outDepth = dims.depth; + *outNumberOfLevels = dims.numberOfLevels; + } + void GetDimensions(float* outWidth, float* outHeight, float* outDepth) + { + const auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + *outDepth = dims.depth; + } + void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outDepth, float* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outDepth = dims.depth; + *outNumberOfLevels = dims.numberOfLevels; + } + + T Load(const int4& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; } + T Sample(SamplerState samplerState, const float3& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; } + T SampleLevel(SamplerState samplerState, const float3& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; } + + ITexture* texture; +}; + +template +struct TextureCube +{ + void GetDimensions(uint32_t* outWidth, uint32_t* outHeight) + { + const auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + } + void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outNumberOfLevels = dims.numberOfLevels; + } + void GetDimensions(float* outWidth, float* outHeight) + { + const auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + } + void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outNumberOfLevels = dims.numberOfLevels; + } + + T Sample(SamplerState samplerState, const float3& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; } + T SampleLevel(SamplerState samplerState, const float3& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; } + + ITexture* texture; +}; + +template +struct Texture1DArray +{ + void GetDimensions(uint32_t* outWidth, uint32_t* outElements) { auto dims = texture->GetDimensions(); *outWidth = dims.width; *outElements = dims.arrayElementCount; } + void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outElements, uint32_t* outNumberOfLevels) + { + auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outNumberOfLevels = dims.numberOfLevels; + *outElements = dims.arrayElementCount; + } + void GetDimensions(float* outWidth, float* outElements) { auto dims = texture->GetDimensions(); *outWidth = dims.width; *outElements = dims.arrayElementCount; } + void GetDimensions(uint32_t mipLevel, float* outWidth, float* outElements, float* outNumberOfLevels) + { + auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outNumberOfLevels = dims.numberOfLevels; + *outElements = dims.arrayElementCount; + } + + T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; } + T Sample(SamplerState samplerState, const float2& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; } + T SampleLevel(SamplerState samplerState, const float2& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; } + + ITexture* texture; +}; + +template +struct Texture2DArray +{ + void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements) + { + auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + } + void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements, uint32_t* outNumberOfLevels) + { + auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + *outNumberOfLevels = dims.numberOfLevels; + } + + void GetDimensions(uint32_t* outWidth, float* outHeight, float* outElements) + { + auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + } + void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outElements, float* outNumberOfLevels) + { + auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + *outNumberOfLevels = dims.numberOfLevels; + } + + T Load(const int4& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; } + T Sample(SamplerState samplerState, const float3& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; } + T SampleLevel(SamplerState samplerState, const float3& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; } + + ITexture* texture; +}; + +template +struct TextureCubeArray +{ + void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements) + { + auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + } + void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements, uint32_t* outNumberOfLevels) + { + auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + *outNumberOfLevels = dims.numberOfLevels; + } + + void GetDimensions(uint32_t* outWidth, float* outHeight, float* outElements) + { + auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + } + void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outElements, float* outNumberOfLevels) + { + auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + *outNumberOfLevels = dims.numberOfLevels; + } + + T Sample(SamplerState samplerState, const float4& loc) const { T out; texture->Sample(samplerState, &loc.x, &out, sizeof(out)); return out; } + T SampleLevel(SamplerState samplerState, const float4& loc, float level) { T out; texture->SampleLevel(samplerState, &loc.x, level, &out, sizeof(out)); return out; } + + ITexture* texture; +}; + +/* !!!!!!!!!!!!!!!!!!!!!!!!!!! RWTexture !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ + +struct IRWTexture : ITexture +{ + /// Get the reference to the element at loc. + virtual void* refAt(const uint32_t* loc) = 0; +}; + +template +struct RWTexture1D +{ + void GetDimensions(uint32_t* outWidth) { *outWidth = texture->GetDimensions().width; } + void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outNumberOfLevels) { auto dims = texture->GetDimensions(mipLevel); *outWidth = dims.width; *outNumberOfLevels = dims.numberOfLevels; } + + void GetDimensions(float* outWidth) { *outWidth = texture->GetDimensions().width; } + void GetDimensions(uint32_t mipLevel, float* outWidth, float* outNumberOfLevels) { auto dims = texture->GetDimensions(mipLevel); *outWidth = dims.width; *outNumberOfLevels = dims.numberOfLevels; } + + T Load(int32_t loc) const { T out; texture->Load(&loc, &out, sizeof(out)); return out; } + T& operator[](uint32_t loc) { return *(T*)texture->refAt(&loc); } + IRWTexture* texture; +}; + +template +struct RWTexture2D +{ + void GetDimensions(uint32_t* outWidth, uint32_t* outHeight) + { + const auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + } + void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outNumberOfLevels = dims.numberOfLevels; + } + void GetDimensions(float* outWidth, float* outHeight) + { + const auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + } + void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outNumberOfLevels = dims.numberOfLevels; + } + + T Load(const int2& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; } + T& operator[](const uint2& loc) { return *(T*)texture->refAt(&loc.x); } + IRWTexture* texture; +}; + +template +struct RWTexture3D +{ + void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outDepth) + { + const auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + *outDepth = dims.depth; + } + void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outDepth, uint32_t* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outDepth = dims.depth; + *outNumberOfLevels = dims.numberOfLevels; + } + void GetDimensions(float* outWidth, float* outHeight, float* outDepth) + { + const auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + *outDepth = dims.depth; + } + void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outDepth, float* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outDepth = dims.depth; + *outNumberOfLevels = dims.numberOfLevels; + } + + T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; } + T& operator[](const uint3& loc) { return *(T*)texture->refAt(&loc.x); } + IRWTexture* texture; +}; + + +template +struct RWTexture1DArray +{ + void GetDimensions(uint32_t* outWidth, uint32_t* outElements) + { + auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outElements = dims.arrayElementCount; + } + void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outElements, uint32_t* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outElements = dims.arrayElementCount; + *outNumberOfLevels = dims.numberOfLevels; + } + void GetDimensions(float* outWidth, float* outElements) + { + auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outElements = dims.arrayElementCount; + } + void GetDimensions(uint32_t mipLevel, float* outWidth, float* outElements, float* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outElements = dims.arrayElementCount; + *outNumberOfLevels = dims.numberOfLevels; + } + + T Load(int2 loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; } + T& operator[](uint2 loc) { return *(T*)texture->refAt(&loc.x); } + + IRWTexture* texture; +}; + +template +struct RWTexture2DArray +{ + void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements) + { + auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + } + void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements, uint32_t* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + *outNumberOfLevels = dims.numberOfLevels; + } + void GetDimensions(float* outWidth, float* outHeight, float* outElements) + { + auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + } + void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outElements, float* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + *outNumberOfLevels = dims.numberOfLevels; + } + + T Load(const int3& loc) const { T out; texture->Load(&loc.x, &out, sizeof(out)); return out; } + T& operator[](const uint3& loc) { return *(T*)texture->refAt(&loc.x); } + + IRWTexture* texture; +}; + +// FeedbackTexture + +struct FeedbackType {}; +struct SAMPLER_FEEDBACK_MIN_MIP : FeedbackType {}; +struct SAMPLER_FEEDBACK_MIP_REGION_USED : FeedbackType {}; + +struct IFeedbackTexture +{ + virtual TextureDimensions GetDimensions(int mipLevel = -1) = 0; + + // Note here we pass the optional clamp parameter as a pointer. Passing nullptr means no clamp. + // This was preferred over having two function definitions, and having to differentiate their names + virtual void WriteSamplerFeedback(ITexture* tex, SamplerState samp, const float* location, const float* clamp = nullptr) = 0; + virtual void WriteSamplerFeedbackBias(ITexture* tex, SamplerState samp, const float* location, float bias, const float* clamp = nullptr) = 0; + virtual void WriteSamplerFeedbackGrad(ITexture* tex, SamplerState samp, const float* location, const float* ddx, const float* ddy, const float* clamp = nullptr) = 0; + + virtual void WriteSamplerFeedbackLevel(ITexture* tex, SamplerState samp, const float* location, float lod) = 0; +}; + +template +struct FeedbackTexture2D +{ + void GetDimensions(uint32_t* outWidth, uint32_t* outHeight) + { + const auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + } + void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outNumberOfLevels = dims.numberOfLevels; + } + void GetDimensions(float* outWidth, float* outHeight) + { + const auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + } + void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outNumberOfLevels = dims.numberOfLevels; + } + + template + void WriteSamplerFeedback(Texture2D tex, SamplerState samp, float2 location, float clamp) { texture->WriteSamplerFeedback(tex.texture, samp, &location.x, &clamp); } + + template + void WriteSamplerFeedbackBias(Texture2D tex, SamplerState samp, float2 location, float bias, float clamp) { texture->WriteSamplerFeedbackBias(tex.texture, samp, &location.x, bias, &clamp); } + + template + void WriteSamplerFeedbackGrad(Texture2D tex, SamplerState samp, float2 location, float2 ddx, float2 ddy, float clamp) { texture->WriteSamplerFeedbackGrad(tex.texture, samp, &location.x, &ddx.x, &ddy.x, &clamp); } + + // Level + + template + void WriteSamplerFeedbackLevel(Texture2D tex, SamplerState samp, float2 location, float lod) { texture->WriteSamplerFeedbackLevel(tex.texture, samp, &location.x, lod); } + + // Without Clamp + template + void WriteSamplerFeedback(Texture2D tex, SamplerState samp, float2 location) { texture->WriteSamplerFeedback(tex.texture, samp, &location.x); } + + template + void WriteSamplerFeedbackBias(Texture2D tex, SamplerState samp, float2 location, float bias) { texture->WriteSamplerFeedbackBias(tex.texture, samp, &location.x, bias); } + + template + void WriteSamplerFeedbackGrad(Texture2D tex, SamplerState samp, float2 location, float2 ddx, float2 ddy) { texture->WriteSamplerFeedbackGrad(tex.texture, samp, &location.x, &ddx.x, &ddy.x); } + + IFeedbackTexture* texture; +}; + +template +struct FeedbackTexture2DArray +{ + void GetDimensions(uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements) + { + auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + } + void GetDimensions(uint32_t mipLevel, uint32_t* outWidth, uint32_t* outHeight, uint32_t* outElements, uint32_t* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + *outNumberOfLevels = dims.numberOfLevels; + } + void GetDimensions(float* outWidth, float* outHeight, float* outElements) + { + auto dims = texture->GetDimensions(); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + } + void GetDimensions(uint32_t mipLevel, float* outWidth, float* outHeight, float* outElements, float* outNumberOfLevels) + { + const auto dims = texture->GetDimensions(mipLevel); + *outWidth = dims.width; + *outHeight = dims.height; + *outElements = dims.arrayElementCount; + *outNumberOfLevels = dims.numberOfLevels; + } + + template + void WriteSamplerFeedback(Texture2DArray texArray, SamplerState samp, float3 location, float clamp) { texture->WriteSamplerFeedback(texArray.texture, samp, &location.x, &clamp); } + + template + void WriteSamplerFeedbackBias(Texture2DArray texArray, SamplerState samp, float3 location, float bias, float clamp) { texture->WriteSamplerFeedbackBias(texArray.texture, samp, &location.x, bias, &clamp); } + + template + void WriteSamplerFeedbackGrad(Texture2DArray texArray, SamplerState samp, float3 location, float3 ddx, float3 ddy, float clamp) { texture->WriteSamplerFeedbackGrad(texArray.texture, samp, &location.x, &ddx.x, &ddy.x, &clamp); } + + // Level + template + void WriteSamplerFeedbackLevel(Texture2DArray texArray, SamplerState samp, float3 location, float lod) { texture->WriteSamplerFeedbackLevel(texArray.texture, samp, &location.x, lod); } + + // Without Clamp + + template + void WriteSamplerFeedback(Texture2DArray texArray, SamplerState samp, float3 location) { texture->WriteSamplerFeedback(texArray.texture, samp, &location.x); } + + template + void WriteSamplerFeedbackBias(Texture2DArray texArray, SamplerState samp, float3 location, float bias) { texture->WriteSamplerFeedbackBias(texArray.texture, samp, &location.x, bias); } + + template + void WriteSamplerFeedbackGrad(Texture2DArray texArray, SamplerState samp, float3 location, float3 ddx, float3 ddy) { texture->WriteSamplerFeedbackGrad(texArray.texture, samp, &location.x, &ddx.x, &ddy.x); } + + IFeedbackTexture* texture; +}; + +/* Varying input for Compute */ + +/* Used when running a single thread */ +struct ComputeThreadVaryingInput +{ + uint3 groupID; + uint3 groupThreadID; +}; + +struct ComputeVaryingInput +{ + uint3 startGroupID; ///< start groupID + uint3 endGroupID; ///< Non inclusive end groupID +}; + +// The uniformEntryPointParams and uniformState must be set to structures that match layout that the kernel expects. +// This can be determined via reflection for example. + +typedef void(*ComputeThreadFunc)(ComputeThreadVaryingInput* varyingInput, void* uniformEntryPointParams, void* uniformState); +typedef void(*ComputeFunc)(ComputeVaryingInput* varyingInput, void* uniformEntryPointParams, void* uniformState); + +#ifdef SLANG_PRELUDE_NAMESPACE +} +#endif + +#endif + + diff --git a/third_party/slang/inc/prelude/slang-cuda-prelude.h b/third_party/slang/inc/prelude/slang-cuda-prelude.h new file mode 100644 index 0000000..38f8a72 --- /dev/null +++ b/third_party/slang/inc/prelude/slang-cuda-prelude.h @@ -0,0 +1,2351 @@ +#define SLANG_PRELUDE_EXPORT + +#ifdef __CUDACC_RTC__ +#define SLANG_CUDA_RTC 1 +#else +#define SLANG_CUDA_RTC 0 +#endif + +#if SLANG_CUDA_RTC + +#else + +#include +#include + +#endif + +// Define SLANG_CUDA_ENABLE_HALF to use the cuda_fp16 include to add half support. +// For this to work NVRTC needs to have the path to the CUDA SDK. +// +// As it stands the includes paths defined for Slang are passed down to NVRTC. Similarly defines defined for the Slang compile +// are passed down. + +#ifdef SLANG_CUDA_ENABLE_HALF +// We don't want half2 operators, because it will implement comparison operators that return a bool(!). We want to generate +// those functions. Doing so means that we will have to define all the other half2 operators. +# define __CUDA_NO_HALF2_OPERATORS__ +# include +#endif + +#ifdef SLANG_CUDA_ENABLE_OPTIX +#include +#endif + +// Define slang offsetof implementation +#ifndef SLANG_OFFSET_OF +# define SLANG_OFFSET_OF(type, member) (size_t)((char*)&(((type *)0)->member) - (char*)0) +#endif + +#ifndef SLANG_ALIGN_OF +# define SLANG_ALIGN_OF(type) __alignof__(type) +#endif + +// Must be large enough to cause overflow and therefore infinity +#ifndef SLANG_INFINITY +# define SLANG_INFINITY ((float)(1e+300 * 1e+300)) +#endif + +// For now we'll disable any asserts in this prelude +#define SLANG_PRELUDE_ASSERT(x) + +#ifndef SLANG_CUDA_WARP_SIZE +# define SLANG_CUDA_WARP_SIZE 32 +#endif + +#define SLANG_CUDA_WARP_MASK (SLANG_CUDA_WARP_SIZE - 1) // Used for masking threadIdx.x to the warp lane index +#define SLANG_CUDA_WARP_BITMASK (~int(0)) + +// +#define SLANG_FORCE_INLINE inline + +#define SLANG_CUDA_CALL __device__ + +#define SLANG_FORCE_INLINE inline +#define SLANG_INLINE inline + + +// Since we are using unsigned arithmatic care is need in this comparison. +// It is *assumed* that sizeInBytes >= elemSize. Which means (sizeInBytes >= elemSize) >= 0 +// Which means only a single test is needed + +// Asserts for bounds checking. +// It is assumed index/count are unsigned types. +#define SLANG_BOUND_ASSERT(index, count) SLANG_PRELUDE_ASSERT(index < count); +#define SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_PRELUDE_ASSERT(index <= (sizeInBytes - elemSize) && (index & 3) == 0); + +// Macros to zero index if an access is out of range +#define SLANG_BOUND_ZERO_INDEX(index, count) index = (index < count) ? index : 0; +#define SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes) index = (index <= (sizeInBytes - elemSize)) ? index : 0; + +// The 'FIX' macro define how the index is fixed. The default is to do nothing. If SLANG_ENABLE_BOUND_ZERO_INDEX +// the fix macro will zero the index, if out of range +#ifdef SLANG_ENABLE_BOUND_ZERO_INDEX +# define SLANG_BOUND_FIX(index, count) SLANG_BOUND_ZERO_INDEX(index, count) +# define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_ZERO_INDEX_BYTE_ADDRESS(index, elemSize, sizeInBytes) +# define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) SLANG_BOUND_ZERO_INDEX(index, count) SLANG_BOUND_ZERO_INDEX(index, count) +#else +# define SLANG_BOUND_FIX(index, count) +# define SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) +# define SLANG_BOUND_FIX_FIXED_ARRAY(index, count) +#endif + +#ifndef SLANG_BOUND_CHECK +# define SLANG_BOUND_CHECK(index, count) SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX(index, count) +#endif + +#ifndef SLANG_BOUND_CHECK_BYTE_ADDRESS +# define SLANG_BOUND_CHECK_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_ASSERT_BYTE_ADDRESS(index, elemSize, sizeInBytes) SLANG_BOUND_FIX_BYTE_ADDRESS(index, elemSize, sizeInBytes) +#endif + +#ifndef SLANG_BOUND_CHECK_FIXED_ARRAY +# define SLANG_BOUND_CHECK_FIXED_ARRAY(index, count) SLANG_BOUND_ASSERT(index, count) SLANG_BOUND_FIX_FIXED_ARRAY(index, count) +#endif + + // This macro handles how out-of-range surface coordinates are handled; + // I can equal + // cudaBoundaryModeClamp, in which case out-of-range coordinates are clamped to the valid range + // cudaBoundaryModeZero, in which case out-of-range reads return zero and out-of-range writes are ignored + // cudaBoundaryModeTrap, in which case out-of-range accesses cause the kernel execution to fail. + +#ifndef SLANG_CUDA_BOUNDARY_MODE +# define SLANG_CUDA_BOUNDARY_MODE cudaBoundaryModeZero + +// Can be one of SLANG_CUDA_PTX_BOUNDARY_MODE. Only applies *PTX* emitted CUDA operations +// which currently is just RWTextureRW format writes +// +// .trap causes an execution trap on out-of-bounds addresses +// .clamp stores data at the nearest surface location (sized appropriately) +// .zero drops stores to out-of-bounds addresses + +# define SLANG_PTX_BOUNDARY_MODE "zero" +#endif + +struct TypeInfo +{ + size_t typeSize; +}; + +template +struct FixedArray +{ + SLANG_CUDA_CALL const T& operator[](size_t index) const { SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE); return m_data[index]; } + SLANG_CUDA_CALL T& operator[](size_t index) { SLANG_BOUND_CHECK_FIXED_ARRAY(index, SIZE); return m_data[index]; } + + T m_data[SIZE]; +}; + +// An array that has no specified size, becomes a 'Array'. This stores the size so it can potentially +// do bounds checking. +template +struct Array +{ + SLANG_CUDA_CALL const T& operator[](size_t index) const { SLANG_BOUND_CHECK(index, count); return data[index]; } + SLANG_CUDA_CALL T& operator[](size_t index) { SLANG_BOUND_CHECK(index, count); return data[index]; } + + T* data; + size_t count; +}; + +// Typically defined in cuda.h, but we can't ship/rely on that, so just define here +typedef unsigned long long CUtexObject; +typedef unsigned long long CUsurfObject; + +// On CUDA sampler state is actually bound up with the texture object. We have a SamplerState type, +// backed as a pointer, to simplify code generation, with the downside that such a binding will take up +// uniform space, even though it will have no effect. +// TODO(JS): Consider ways to strip use of variables of this type so have no binding, +struct SamplerStateUnused; +typedef SamplerStateUnused* SamplerState; + + +// TODO(JS): Not clear yet if this can be handled on CUDA, by just ignoring. +// For now, just map to the index type. +typedef size_t NonUniformResourceIndex; + +// Code generator will generate the specific type +template +struct Matrix; + +typedef int1 bool1; +typedef int2 bool2; +typedef int3 bool3; +typedef int4 bool4; + +#if SLANG_CUDA_RTC + +typedef signed char int8_t; +typedef short int16_t; +typedef int int32_t; +typedef long long int64_t; + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; + +#endif + +typedef long long longlong; +typedef unsigned long long ulonglong; + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; + +union Union32 +{ + uint32_t u; + int32_t i; + float f; +}; + +union Union64 +{ + uint64_t u; + int64_t i; + double d; +}; + +SLANG_FORCE_INLINE SLANG_CUDA_CALL float _slang_fmod(float x, float y) +{ + return ::fmodf(x, y); +} +SLANG_FORCE_INLINE SLANG_CUDA_CALL double _slang_fmod(double x, double y) +{ + return ::fmod(x, y); +} + +#if SLANG_CUDA_ENABLE_HALF + +// Add the other vector half types +struct __half1 { __half x; }; +struct __align__(4) __half3 { __half x, y, z; }; +struct __align__(4) __half4 { __half x, y, z, w; }; +#endif + +#define SLANG_VECTOR_GET_ELEMENT(T) \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##1 x, int index) { return ((T*)(&x))[index]; }\ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##2 x, int index) { return ((T*)(&x))[index]; }\ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##3 x, int index) { return ((T*)(&x))[index]; }\ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_vector_get_element(T##4 x, int index) { return ((T*)(&x))[index]; } +SLANG_VECTOR_GET_ELEMENT(int) +SLANG_VECTOR_GET_ELEMENT(uint) +SLANG_VECTOR_GET_ELEMENT(short) +SLANG_VECTOR_GET_ELEMENT(ushort) +SLANG_VECTOR_GET_ELEMENT(char) +SLANG_VECTOR_GET_ELEMENT(uchar) +SLANG_VECTOR_GET_ELEMENT(longlong) +SLANG_VECTOR_GET_ELEMENT(ulonglong) +SLANG_VECTOR_GET_ELEMENT(float) +SLANG_VECTOR_GET_ELEMENT(double) + +#define SLANG_VECTOR_GET_ELEMENT_PTR(T) \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##1* x, int index) { return ((T*)(x)) + index; }\ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##2* x, int index) { return ((T*)(x)) + index; }\ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##3* x, int index) { return ((T*)(x)) + index; }\ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T* _slang_vector_get_element_ptr(T##4* x, int index) { return ((T*)(x)) + index; } +SLANG_VECTOR_GET_ELEMENT_PTR(int) +SLANG_VECTOR_GET_ELEMENT_PTR(uint) +SLANG_VECTOR_GET_ELEMENT_PTR(short) +SLANG_VECTOR_GET_ELEMENT_PTR(ushort) +SLANG_VECTOR_GET_ELEMENT_PTR(char) +SLANG_VECTOR_GET_ELEMENT_PTR(uchar) +SLANG_VECTOR_GET_ELEMENT_PTR(longlong) +SLANG_VECTOR_GET_ELEMENT_PTR(ulonglong) +SLANG_VECTOR_GET_ELEMENT_PTR(float) +SLANG_VECTOR_GET_ELEMENT_PTR(double) + +#if SLANG_CUDA_ENABLE_HALF +SLANG_VECTOR_GET_ELEMENT(__half) +SLANG_VECTOR_GET_ELEMENT_PTR(__half) +#endif + +#define SLANG_CUDA_VECTOR_BINARY_OP(T, n, op) \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n operator op(T##n thisVal, T##n other) \ + { \ + T##n result;\ + for (int i = 0; i < n; i++) \ + *_slang_vector_get_element_ptr(&result, i) = _slang_vector_get_element(thisVal,i) op _slang_vector_get_element(other,i); \ + return result;\ + } +#define SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, op) \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL bool##n operator op(T##n thisVal, T##n other) \ + { \ + bool##n result;\ + for (int i = 0; i < n; i++) \ + *_slang_vector_get_element_ptr(&result, i) = (int)(_slang_vector_get_element(thisVal,i) op _slang_vector_get_element(other,i)); \ + return result;\ + } +#define SLANG_CUDA_VECTOR_UNARY_OP(T, n, op) \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n operator op(T##n thisVal) \ + { \ + T##n result;\ + for (int i = 0; i < n; i++) \ + *_slang_vector_get_element_ptr(&result, i) = op _slang_vector_get_element(thisVal,i); \ + return result;\ + } + +#define SLANG_CUDA_VECTOR_INT_OP(T, n) \ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, +)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, -)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, *)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, /)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, %)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, ^)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, &)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, |)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, &&)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, ||)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, >>)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, <<)\ + SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >)\ + SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <)\ + SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >=)\ + SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <=)\ + SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, ==)\ + SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, !=)\ + SLANG_CUDA_VECTOR_UNARY_OP(T, n, !)\ + SLANG_CUDA_VECTOR_UNARY_OP(T, n, -)\ + SLANG_CUDA_VECTOR_UNARY_OP(T, n, ~) + +#define SLANG_CUDA_VECTOR_INT_OPS(T) \ + SLANG_CUDA_VECTOR_INT_OP(T, 2) \ + SLANG_CUDA_VECTOR_INT_OP(T, 3) \ + SLANG_CUDA_VECTOR_INT_OP(T, 4) + +SLANG_CUDA_VECTOR_INT_OPS(int) +SLANG_CUDA_VECTOR_INT_OPS(uint) +SLANG_CUDA_VECTOR_INT_OPS(ushort) +SLANG_CUDA_VECTOR_INT_OPS(short) +SLANG_CUDA_VECTOR_INT_OPS(char) +SLANG_CUDA_VECTOR_INT_OPS(uchar) +SLANG_CUDA_VECTOR_INT_OPS(longlong) +SLANG_CUDA_VECTOR_INT_OPS(ulonglong) + +#define SLANG_CUDA_VECTOR_FLOAT_OP(T, n) \ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, +)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, -)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, *)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, /)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, &&)\ + SLANG_CUDA_VECTOR_BINARY_OP(T, n, ||)\ + SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >)\ + SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <)\ + SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, >=)\ + SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, <=)\ + SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, ==)\ + SLANG_CUDA_VECTOR_BINARY_COMPARE_OP(T, n, !=)\ + SLANG_CUDA_VECTOR_UNARY_OP(T, n, -) +#define SLANG_CUDA_VECTOR_FLOAT_OPS(T) \ + SLANG_CUDA_VECTOR_FLOAT_OP(T, 2) \ + SLANG_CUDA_VECTOR_FLOAT_OP(T, 3) \ + SLANG_CUDA_VECTOR_FLOAT_OP(T, 4) + +SLANG_CUDA_VECTOR_FLOAT_OPS(float) +SLANG_CUDA_VECTOR_FLOAT_OPS(double) +#if SLANG_CUDA_ENABLE_HALF +SLANG_CUDA_VECTOR_FLOAT_OPS(__half) +#endif +#define SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, n)\ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n operator%(const T##n& left, const T##n& right) \ + {\ + T##n result;\ + for (int i = 0; i < n; i++) \ + *_slang_vector_get_element_ptr(&result, i) = _slang_fmod(_slang_vector_get_element(left,i), _slang_vector_get_element(right,i)); \ + return result;\ + } +#define SLANG_CUDA_FLOAT_VECTOR_MOD(T) \ + SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, 2)\ + SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, 3)\ + SLANG_CUDA_FLOAT_VECTOR_MOD_IMPL(T, 4) + +SLANG_CUDA_FLOAT_VECTOR_MOD(float) +SLANG_CUDA_FLOAT_VECTOR_MOD(double) + +#if SLANG_CUDA_RTC +#define SLANG_MAKE_VECTOR(T) \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T##2 make_##T##2(T x, T y) { return T##2{x, y}; }\ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T##3 make_##T##3(T x, T y, T z) { return T##3{ x, y, z }; }\ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T##4 make_##T##4(T x, T y, T z, T w) { return T##4{ x, y, z, w }; } +SLANG_MAKE_VECTOR(int) +SLANG_MAKE_VECTOR(uint) +SLANG_MAKE_VECTOR(short) +SLANG_MAKE_VECTOR(ushort) +SLANG_MAKE_VECTOR(char) +SLANG_MAKE_VECTOR(uchar) +SLANG_MAKE_VECTOR(float) +SLANG_MAKE_VECTOR(double) +SLANG_MAKE_VECTOR(longlong) +SLANG_MAKE_VECTOR(ulonglong) +#endif + +#if SLANG_CUDA_ENABLE_HALF +SLANG_MAKE_VECTOR(__half) +#endif + +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool1 make_bool1(bool x) { return bool1{ x }; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool2 make_bool2(bool x, bool y) { return bool2{ x, y }; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool3 make_bool3(bool x, bool y, bool z) { return bool3{ x, y, z }; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool4 make_bool4(bool x, bool y, bool z, bool w) { return bool4{ x, y, z, w }; } + +#if SLANG_CUDA_RTC +#define SLANG_MAKE_VECTOR_FROM_SCALAR(T) \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T##1 make_##T##1(T x) { return T##1{x}; }\ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T##2 make_##T##2(T x) { return make_##T##2(x, x); }\ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T##3 make_##T##3(T x) { return make_##T##3(x, x, x); }\ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T##4 make_##T##4(T x) { return make_##T##4(x, x, x, x); } +#else +#define SLANG_MAKE_VECTOR_FROM_SCALAR(T) \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T##2 make_##T##2(T x) { return make_##T##2(x, x); }\ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T##3 make_##T##3(T x) { return make_##T##3(x, x, x); }\ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T##4 make_##T##4(T x) { return make_##T##4(x, x, x, x); } +#endif +SLANG_MAKE_VECTOR_FROM_SCALAR(int) +SLANG_MAKE_VECTOR_FROM_SCALAR(uint) +SLANG_MAKE_VECTOR_FROM_SCALAR(short) +SLANG_MAKE_VECTOR_FROM_SCALAR(ushort) +SLANG_MAKE_VECTOR_FROM_SCALAR(char) +SLANG_MAKE_VECTOR_FROM_SCALAR(uchar) +SLANG_MAKE_VECTOR_FROM_SCALAR(longlong) +SLANG_MAKE_VECTOR_FROM_SCALAR(ulonglong) +SLANG_MAKE_VECTOR_FROM_SCALAR(float) +SLANG_MAKE_VECTOR_FROM_SCALAR(double) +#if SLANG_CUDA_ENABLE_HALF +SLANG_MAKE_VECTOR_FROM_SCALAR(__half) +#endif + +#define SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(Fn,T,N) \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL T##N Fn(T##N* address, T##N val) \ + {\ + T##N result; \ + for (int i = 0; i < N; i++) \ + *_slang_vector_get_element_ptr(&result, i) = Fn(_slang_vector_get_element_ptr(address, i), _slang_vector_get_element(val, i)); \ + return result; \ + }\ + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 900 +SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, float, 2) +SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, float, 4) +#endif +SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, float, 3) +SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, int, 2) +SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, int, 3) +SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, int, 4) +SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, uint, 2) +SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, uint, 3) +SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, uint, 4) +SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, ulonglong, 2) +SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, ulonglong, 3) +SLANG_CUDA_VECTOR_ATOMIC_BINARY_IMPL(atomicAdd, ulonglong, 4) + +template +struct GetVectorTypeImpl {}; + +#define GET_VECTOR_TYPE_IMPL(T, n)\ +template<>\ +struct GetVectorTypeImpl\ +{\ + typedef T##n type;\ + static SLANG_FORCE_INLINE SLANG_CUDA_CALL T##n fromScalar(T v) { return make_##T##n(v); } \ +}; +#define GET_VECTOR_TYPE_IMPL_N(T)\ + GET_VECTOR_TYPE_IMPL(T, 1)\ + GET_VECTOR_TYPE_IMPL(T, 2)\ + GET_VECTOR_TYPE_IMPL(T, 3)\ + GET_VECTOR_TYPE_IMPL(T, 4) + +GET_VECTOR_TYPE_IMPL_N(int) +GET_VECTOR_TYPE_IMPL_N(uint) +GET_VECTOR_TYPE_IMPL_N(short) +GET_VECTOR_TYPE_IMPL_N(ushort) +GET_VECTOR_TYPE_IMPL_N(char) +GET_VECTOR_TYPE_IMPL_N(uchar) +GET_VECTOR_TYPE_IMPL_N(longlong) +GET_VECTOR_TYPE_IMPL_N(ulonglong) +GET_VECTOR_TYPE_IMPL_N(float) +GET_VECTOR_TYPE_IMPL_N(double) +#if SLANG_CUDA_ENABLE_HALF +GET_VECTOR_TYPE_IMPL_N(__half) +#endif +template +using Vector = typename GetVectorTypeImpl::type; + +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL Vector _slang_vector_reshape(const Vector other) +{ + Vector result; + for (int i = 0; i < n; i++) + { + OtherT otherElement = T(0); + if (i < m) + otherElement = _slang_vector_get_element(other, i); + *_slang_vector_get_element_ptr(&result, i) = (T)otherElement; + } + return result; +} + +template +struct Matrix +{ + Vector rows[ROWS]; + SLANG_FORCE_INLINE SLANG_CUDA_CALL Vector& operator[](size_t index) { return rows[index]; } +}; + + +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix makeMatrix(T scalar) +{ + Matrix result; + for (int i = 0; i < ROWS; i++) + result.rows[i] = GetVectorTypeImpl::fromScalar(scalar); + return result; + +} + +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix makeMatrix(const Vector& row0) +{ + Matrix result; + result.rows[0] = row0; + return result; +} + +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix makeMatrix(const Vector& row0, const Vector& row1) +{ + Matrix result; + result.rows[0] = row0; + result.rows[1] = row1; + return result; +} + +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix makeMatrix(const Vector& row0, const Vector& row1, const Vector& row2) +{ + Matrix result; + result.rows[0] = row0; + result.rows[1] = row1; + result.rows[2] = row2; + return result; +} + +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix makeMatrix(const Vector& row0, const Vector& row1, const Vector& row2, const Vector& row3) +{ + Matrix result; + result.rows[0] = row0; + result.rows[1] = row1; + result.rows[2] = row2; + result.rows[3] = row3; + return result; +} + +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix makeMatrix(const Matrix& other) +{ + Matrix result; + int minRow = ROWS; + int minCol = COLS; + if (minRow > otherRow) minRow = otherRow; + if (minCol > otherCol) minCol = otherCol; + for (int i = 0; i < minRow; i++) + for (int j = 0; j < minCol; j++) + *_slang_vector_get_element_ptr(result.rows + i, j) = (T)_slang_vector_get_element(other.rows[i], j); + return result; +} + +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix makeMatrix(T v0, T v1, T v2, T v3) +{ + Matrix rs; + rs.rows[0].x = v0; rs.rows[0].y = v1; + rs.rows[1].x = v2; rs.rows[1].y = v3; + return rs; +} + +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix makeMatrix(T v0, T v1, T v2, T v3, T v4, T v5) +{ + Matrix rs; + if (COLS == 3) + { + rs.rows[0].x = v0; rs.rows[0].y = v1; rs.rows[0].z = v2; + rs.rows[1].x = v3; rs.rows[1].y = v4; rs.rows[1].z = v5; + } + else + { + rs.rows[0].x = v0; rs.rows[0].y = v1; + rs.rows[1].x = v2; rs.rows[1].y = v3; + rs.rows[2].x = v4; rs.rows[2].y = v5; + } + return rs; + +} + +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix makeMatrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) +{ + Matrix rs; + if (COLS == 4) + { + rs.rows[0].x = v0; rs.rows[0].y = v1; rs.rows[0].z = v2; rs.rows[0].w = v3; + rs.rows[1].x = v4; rs.rows[1].y = v5; rs.rows[1].z = v6; rs.rows[1].w = v7; + } + else + { + rs.rows[0].x = v0; rs.rows[0].y = v1; + rs.rows[1].x = v2; rs.rows[1].y = v3; + rs.rows[2].x = v4; rs.rows[2].y = v5; + rs.rows[3].x = v6; rs.rows[3].y = v7; + } + return rs; +} + +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix makeMatrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8) +{ + Matrix rs; + rs.rows[0].x = v0; rs.rows[0].y = v1; rs.rows[0].z = v2; + rs.rows[1].x = v3; rs.rows[1].y = v4; rs.rows[1].z = v5; + rs.rows[2].x = v6; rs.rows[2].y = v7; rs.rows[2].z = v8; + return rs; +} + +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix makeMatrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11) +{ + Matrix rs; + if (COLS == 4) + { + rs.rows[0].x = v0; rs.rows[0].y = v1; rs.rows[0].z = v2; rs.rows[0].w = v3; + rs.rows[1].x = v4; rs.rows[1].y = v5; rs.rows[1].z = v6; rs.rows[1].w = v7; + rs.rows[2].x = v8; rs.rows[2].y = v9; rs.rows[2].z = v10; rs.rows[2].w = v11; + } + else + { + rs.rows[0].x = v0; rs.rows[0].y = v1; rs.rows[0].z = v2; + rs.rows[1].x = v3; rs.rows[1].y = v4; rs.rows[1].z = v5; + rs.rows[2].x = v6; rs.rows[2].y = v7; rs.rows[2].z = v8; + rs.rows[3].x = v9; rs.rows[3].y = v10; rs.rows[3].z = v11; + } + return rs; +} + +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix makeMatrix(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) +{ + Matrix rs; + rs.rows[0].x = v0; rs.rows[0].y = v1; rs.rows[0].z = v2; rs.rows[0].w = v3; + rs.rows[1].x = v4; rs.rows[1].y = v5; rs.rows[1].z = v6; rs.rows[1].w = v7; + rs.rows[2].x = v8; rs.rows[2].y = v9; rs.rows[2].z = v10; rs.rows[2].w = v11; + rs.rows[3].x = v12; rs.rows[3].y = v13; rs.rows[3].z = v14; rs.rows[3].w = v15; + return rs; +} + +#define SLANG_MATRIX_BINARY_OP(T, op) \ + template \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix operator op(const Matrix& thisVal, const Matrix& other) \ + { \ + Matrix result;\ + for (int i = 0; i < R; i++) \ + for (int j = 0; j < C; j++) \ + *_slang_vector_get_element_ptr(result.rows+i,j) = _slang_vector_get_element(thisVal.rows[i], j) op _slang_vector_get_element(other.rows[i], j); \ + return result;\ + } + +#define SLANG_MATRIX_UNARY_OP(T, op) \ + template \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix operator op(const Matrix& thisVal) \ + { \ + Matrix result;\ + for (int i = 0; i < R; i++) \ + for (int j = 0; j < C; j++) \ + *_slang_vector_get_element_ptr(result.rows+i,j) = op _slang_vector_get_element(thisVal.rows[i], j); \ + return result;\ + } +#define SLANG_INT_MATRIX_OPS(T) \ + SLANG_MATRIX_BINARY_OP(T, +)\ + SLANG_MATRIX_BINARY_OP(T, -)\ + SLANG_MATRIX_BINARY_OP(T, *)\ + SLANG_MATRIX_BINARY_OP(T, / )\ + SLANG_MATRIX_BINARY_OP(T, &)\ + SLANG_MATRIX_BINARY_OP(T, |)\ + SLANG_MATRIX_BINARY_OP(T, &&)\ + SLANG_MATRIX_BINARY_OP(T, ||)\ + SLANG_MATRIX_BINARY_OP(T, ^)\ + SLANG_MATRIX_BINARY_OP(T, %)\ + SLANG_MATRIX_UNARY_OP(T, !)\ + SLANG_MATRIX_UNARY_OP(T, ~) +#define SLANG_FLOAT_MATRIX_OPS(T) \ + SLANG_MATRIX_BINARY_OP(T, +)\ + SLANG_MATRIX_BINARY_OP(T, -)\ + SLANG_MATRIX_BINARY_OP(T, *)\ + SLANG_MATRIX_BINARY_OP(T, /)\ + SLANG_MATRIX_UNARY_OP(T, -) +SLANG_INT_MATRIX_OPS(int) +SLANG_INT_MATRIX_OPS(uint) +SLANG_INT_MATRIX_OPS(short) +SLANG_INT_MATRIX_OPS(ushort) +SLANG_INT_MATRIX_OPS(char) +SLANG_INT_MATRIX_OPS(uchar) +SLANG_INT_MATRIX_OPS(longlong) +SLANG_INT_MATRIX_OPS(ulonglong) +SLANG_FLOAT_MATRIX_OPS(float) +SLANG_FLOAT_MATRIX_OPS(double) +#if SLANG_CUDA_ENABLE_HALF +SLANG_FLOAT_MATRIX_OPS(__half) +#endif +#define SLANG_MATRIX_INT_NEG_OP(T) \ + template\ + SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix operator-(Matrix thisVal) \ + { \ + Matrix result;\ + for (int i = 0; i < R; i++) \ + for (int j = 0; j < C; j++) \ + *_slang_vector_get_element_ptr(result.rows+i,j) = 0 - _slang_vector_get_element(thisVal.rows[i], j); \ + return result;\ + } + SLANG_MATRIX_INT_NEG_OP(int) + SLANG_MATRIX_INT_NEG_OP(uint) + SLANG_MATRIX_INT_NEG_OP(short) + SLANG_MATRIX_INT_NEG_OP(ushort) + SLANG_MATRIX_INT_NEG_OP(char) + SLANG_MATRIX_INT_NEG_OP(uchar) + SLANG_MATRIX_INT_NEG_OP(longlong) + SLANG_MATRIX_INT_NEG_OP(ulonglong) + +#define SLANG_FLOAT_MATRIX_MOD(T)\ + template \ + SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix operator%(Matrix left, Matrix right) \ + {\ + Matrix result;\ + for (int i = 0; i < R; i++) \ + for (int j = 0; j < C; j++) \ + *_slang_vector_get_element_ptr(result.rows+i,j) = _slang_fmod(_slang_vector_get_element(left.rows[i], j), _slang_vector_get_element(right.rows[i], j)); \ + return result;\ + } + + SLANG_FLOAT_MATRIX_MOD(float) + SLANG_FLOAT_MATRIX_MOD(double) +#if SLANG_CUDA_ENABLE_HALF + template + SLANG_FORCE_INLINE SLANG_CUDA_CALL Matrix<__half, R, C> operator%(Matrix<__half, R, C> left, Matrix<__half, R, C> right) + { + Matrix<__half, R, C> result; + for (int i = 0; i < R; i++) + for (int j = 0; j < C; j++) + * _slang_vector_get_element_ptr(result.rows + i, j) = __float2half(_slang_fmod(__half2float(_slang_vector_get_element(left.rows[i], j)), __half2float(_slang_vector_get_element(right.rows[i], j)))); + return result; + } +#endif +#undef SLANG_FLOAT_MATRIX_MOD +#undef SLANG_MATRIX_BINARY_OP +#undef SLANG_MATRIX_UNARY_OP +#undef SLANG_INT_MATRIX_OPS +#undef SLANG_FLOAT_MATRIX_OPS +#undef SLANG_MATRIX_INT_NEG_OP +#undef SLANG_FLOAT_MATRIX_MOD + +#define SLANG_SELECT_IMPL(T, N)\ +SLANG_FORCE_INLINE SLANG_CUDA_CALL Vector _slang_select(bool##N condition, Vector v0, Vector v1) \ +{ \ + Vector result; \ + for (int i = 0; i < N; i++) \ + { \ + *_slang_vector_get_element_ptr(&result, i) = _slang_vector_get_element(condition, i) ? _slang_vector_get_element(v0, i) : _slang_vector_get_element(v1, i); \ + } \ + return result; \ +} +#define SLANG_SELECT_T(T)\ + SLANG_SELECT_IMPL(T, 2)\ + SLANG_SELECT_IMPL(T, 3)\ + SLANG_SELECT_IMPL(T, 4) + +SLANG_SELECT_T(int) +SLANG_SELECT_T(uint) +SLANG_SELECT_T(short) +SLANG_SELECT_T(ushort) +SLANG_SELECT_T(char) +SLANG_SELECT_T(uchar) +SLANG_SELECT_T(float) +SLANG_SELECT_T(double) + +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL T _slang_select(bool condition, T v0, T v1) +{ + return condition ? v0 : v1; +} + +// +// Half support +// + +#if SLANG_CUDA_ENABLE_HALF +SLANG_SELECT_T(__half) + +// Convenience functions ushort -> half + +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 __ushort_as_half(const ushort2& i) { return __halves2half2(__ushort_as_half(i.x), __ushort_as_half(i.y)); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half3 __ushort_as_half(const ushort3& i) { return __half3{__ushort_as_half(i.x), __ushort_as_half(i.y), __ushort_as_half(i.z)}; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 __ushort_as_half(const ushort4& i) { return __half4{ __ushort_as_half(i.x), __ushort_as_half(i.y), __ushort_as_half(i.z), __ushort_as_half(i.w) }; } + +// Convenience functions half -> ushort + +SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort2 __half_as_ushort(const __half2& i) { return make_ushort2(__half_as_ushort(i.x), __half_as_ushort(i.y)); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort3 __half_as_ushort(const __half3& i) { return make_ushort3(__half_as_ushort(i.x), __half_as_ushort(i.y), __half_as_ushort(i.z)); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL ushort4 __half_as_ushort(const __half4& i) { return make_ushort4(__half_as_ushort(i.x), __half_as_ushort(i.y), __half_as_ushort(i.z), __half_as_ushort(i.w)); } + +// This is a little bit of a hack. Fortunately CUDA has the definitions of the templated types in +// include/surface_indirect_functions.h +// Here we find the template definition requires a specialization of __nv_isurf_trait to allow +// a specialization of the surface write functions. +// This *isn't* a problem on the read functions as they don't have a return type that uses this mechanism + +template<> struct __nv_isurf_trait<__half> { typedef void type; }; +template<> struct __nv_isurf_trait<__half2> { typedef void type; }; +template<> struct __nv_isurf_trait<__half4> { typedef void type; }; + +#define SLANG_DROP_PARENS(...) __VA_ARGS__ + +#define SLANG_SURFACE_READ(FUNC_NAME, TYPE_ARGS, ARGS) \ +template <> \ +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half FUNC_NAME<__half>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \ +{ \ + return __ushort_as_half(FUNC_NAME(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \ +} \ +\ +template <> \ +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half2 FUNC_NAME<__half2>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \ +{ \ + return __ushort_as_half(FUNC_NAME(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \ +} \ +\ +template <> \ +SLANG_FORCE_INLINE SLANG_CUDA_CALL __half4 FUNC_NAME<__half4>(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \ +{ \ + return __ushort_as_half(FUNC_NAME(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \ +} + +SLANG_SURFACE_READ(surf1Dread, (int x), (x)) +SLANG_SURFACE_READ(surf2Dread, (int x, int y), (x, y)) +SLANG_SURFACE_READ(surf3Dread, (int x, int y, int z), (x, y, z)) +SLANG_SURFACE_READ(surf1DLayeredread, (int x, int layer), (x, layer)) +SLANG_SURFACE_READ(surf2DLayeredread, (int x, int y, int layer), (x, y, layer)) +SLANG_SURFACE_READ(surfCubemapread, (int x, int y, int face), (x, y, face)) +SLANG_SURFACE_READ(surfCubemapLayeredread, (int x, int y, int layerFace), (x, y, layerFace)) + +#define SLANG_SURFACE_WRITE(FUNC_NAME, TYPE_ARGS, ARGS) \ +template <> \ +SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half>(__half data, cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \ +{ \ + FUNC_NAME(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode); \ +} \ +\ +template <> \ +SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half2>(__half2 data, cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \ +{ \ + FUNC_NAME(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode); \ +} \ +\ +template <> \ +SLANG_FORCE_INLINE SLANG_CUDA_CALL void FUNC_NAME<__half4>(__half4 data, cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \ +{ \ + FUNC_NAME(__half_as_ushort(data), surfObj, SLANG_DROP_PARENS ARGS, boundaryMode); \ +} + +SLANG_SURFACE_WRITE(surf1Dwrite, (int x), (x)) +SLANG_SURFACE_WRITE(surf2Dwrite, (int x, int y), (x, y)) +SLANG_SURFACE_WRITE(surf3Dwrite, (int x, int y, int z), (x, y, z)) +SLANG_SURFACE_WRITE(surf1DLayeredwrite, (int x, int layer), (x, layer)) +SLANG_SURFACE_WRITE(surf2DLayeredwrite, (int x, int y, int layer), (x, y, layer)) +SLANG_SURFACE_WRITE(surfCubemapwrite, (int x, int y, int face), (x, y, face)) +SLANG_SURFACE_WRITE(surfCubemapLayeredwrite, (int x, int y, int layerFace), (x, y, layerFace)) + +// ! Hack to test out reading !!! +// Only works converting *from* half + +//template +//SLANG_FORCE_INLINE SLANG_CUDA_CALL T surf2Dread_convert(cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode); + +#define SLANG_SURFACE_READ_HALF_CONVERT(FUNC_NAME, TYPE_ARGS, ARGS) \ +\ +template \ +SLANG_FORCE_INLINE SLANG_CUDA_CALL T FUNC_NAME##_convert(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode); \ +\ +template <> \ +SLANG_FORCE_INLINE SLANG_CUDA_CALL float FUNC_NAME##_convert(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \ +{ \ + return __ushort_as_half(FUNC_NAME(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \ +} \ +\ +template <> \ +SLANG_FORCE_INLINE SLANG_CUDA_CALL float2 FUNC_NAME##_convert(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \ +{ \ + const __half2 v = __ushort_as_half(FUNC_NAME(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \ + return float2{v.x, v.y}; \ +} \ +\ +template <> \ +SLANG_FORCE_INLINE SLANG_CUDA_CALL float4 FUNC_NAME##_convert(cudaSurfaceObject_t surfObj, SLANG_DROP_PARENS TYPE_ARGS, cudaSurfaceBoundaryMode boundaryMode) \ +{ \ + const __half4 v = __ushort_as_half(FUNC_NAME(surfObj, SLANG_DROP_PARENS ARGS, boundaryMode)); \ + return float4{v.x, v.y, v.z, v.w}; \ +} + +SLANG_SURFACE_READ_HALF_CONVERT(surf1Dread, (int x), (x)) +SLANG_SURFACE_READ_HALF_CONVERT(surf2Dread, (int x, int y), (x, y)) +SLANG_SURFACE_READ_HALF_CONVERT(surf3Dread, (int x, int y, int z), (x, y, z)) + +#endif + +// Support for doing format conversion when writing to a surface/RWTexture + +// NOTE! For normal surface access x values are *byte* addressed. +// For the _convert versions they are *not*. They don't need to be because sust.p does not require it. + +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode); +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode); +template +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert(T, cudaSurfaceObject_t surfObj, int x, int y, int z, cudaSurfaceBoundaryMode boundaryMode); + +// https://docs.nvidia.com/cuda/inline-ptx-assembly/index.html +// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#surface-instructions-sust + +// Float + +template <> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert(float v, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode) +{ + asm volatile ( "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2};}\n\t" :: "l"(surfObj),"r"(x),"f"(v)); +} + +template <> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert(float v, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode) +{ + asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"f"(v)); +} + +template <> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert(float v, cudaSurfaceObject_t surfObj, int x, int y, int z, cudaSurfaceBoundaryMode boundaryMode) +{ + asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"r"(z),"f"(v)); +} + +// Float2 + +template <> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert(float2 v, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode) +{ + const float vx = v.x, vy = v.y; + asm volatile ( "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2,%3};}\n\t" :: "l"(surfObj),"r"(x),"f"(vx),"f"(vy)); +} + +template <> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert(float2 v, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode) +{ + const float vx = v.x, vy = v.y; + asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"f"(vx),"f"(vy)); +} + +template <> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert(float2 v, cudaSurfaceObject_t surfObj, int x, int y, int z, cudaSurfaceBoundaryMode boundaryMode) +{ + const float vx = v.x, vy = v.y; + asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4,%5};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"r"(z),"f"(vx),"f"(vy)); +} + +// Float4 +template <> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf1Dwrite_convert(float4 v, cudaSurfaceObject_t surfObj, int x, cudaSurfaceBoundaryMode boundaryMode) +{ + const float vx = v.x, vy = v.y, vz = v.z, vw = v.w; + asm volatile ( "{sust.p.1d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1}], {%2,%3,%4,%5};}\n\t" :: "l"(surfObj),"r"(x),"f"(vx),"f"(vy),"f"(vz),"f"(vw)); +} + +template <> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf2Dwrite_convert(float4 v, cudaSurfaceObject_t surfObj, int x, int y, cudaSurfaceBoundaryMode boundaryMode) +{ + const float vx = v.x, vy = v.y, vz = v.z, vw = v.w; + asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2}], {%3,%4,%5,%6};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"f"(vx),"f"(vy),"f"(vz),"f"(vw)); +} + +template <> +SLANG_FORCE_INLINE SLANG_CUDA_CALL void surf3Dwrite_convert(float4 v, cudaSurfaceObject_t surfObj, int x, int y, int z, cudaSurfaceBoundaryMode boundaryMode) +{ + const float vx = v.x, vy = v.y, vz = v.z, vw = v.w; + asm volatile ( "{sust.p.2d.b32." SLANG_PTX_BOUNDARY_MODE " [%0, {%1,%2,%3}], {%4,%5,%6,%7};}\n\t" :: "l"(surfObj),"r"(x),"r"(y),"r"(z),"f"(vx),"f"(vy),"f"(vz),"f"(vw)); +} + +// ----------------------------- F32 ----------------------------------------- + +// Unary +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_ceil(float f) { return ::ceilf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_floor(float f) { return ::floorf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_round(float f) { return ::roundf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sin(float f) { return ::sinf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_cos(float f) { return ::cosf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL void F32_sincos(float f, float* s, float* c) { ::sincosf(f, s, c); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_tan(float f) { return ::tanf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_asin(float f) { return ::asinf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_acos(float f) { return ::acosf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_atan(float f) { return ::atanf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sinh(float f) { return ::sinhf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_cosh(float f) { return ::coshf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_tanh(float f) { return ::tanhf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_log2(float f) { return ::log2f(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_log(float f) { return ::logf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_log10(float f) { return ::log10f(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_exp2(float f) { return ::exp2f(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_exp(float f) { return ::expf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_abs(float f) { return ::fabsf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_trunc(float f) { return ::truncf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sqrt(float f) { return ::sqrtf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_rsqrt(float f) { return ::rsqrtf(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_sign(float f) { return ( f == 0.0f) ? f : (( f < 0.0f) ? -1.0f : 1.0f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_frac(float f) { return f - F32_floor(f); } + +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F32_isnan(float f) { return isnan(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F32_isfinite(float f) { return isfinite(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F32_isinf(float f) { return isinf(f); } + +// Binary +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_min(float a, float b) { return ::fminf(a, b); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_max(float a, float b) { return ::fmaxf(a, b); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_pow(float a, float b) { return ::powf(a, b); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_fmod(float a, float b) { return ::fmodf(a, b); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_remainder(float a, float b) { return ::remainderf(a, b); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_atan2(float a, float b) { return float(::atan2(a, b)); } + +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_frexp(float x, int* e) { return frexpf(x, e); } + +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_modf(float x, float* ip) +{ + return ::modff(x, ip); +} + +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t F32_asuint(float f) { Union32 u; u.f = f; return u.u; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t F32_asint(float f) { Union32 u; u.f = f; return u.i; } + +// Ternary +SLANG_FORCE_INLINE SLANG_CUDA_CALL float F32_fma(float a, float b, float c) { return ::fmaf(a, b, c); } + + +// ----------------------------- F64 ----------------------------------------- + +// Unary +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_ceil(double f) { return ::ceil(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_floor(double f) { return ::floor(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_round(double f) { return ::round(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sin(double f) { return ::sin(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_cos(double f) { return ::cos(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL void F64_sincos(double f, double* s, double* c) { ::sincos(f, s, c); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_tan(double f) { return ::tan(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_asin(double f) { return ::asin(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_acos(double f) { return ::acos(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_atan(double f) { return ::atan(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sinh(double f) { return ::sinh(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_cosh(double f) { return ::cosh(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_tanh(double f) { return ::tanh(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_log2(double f) { return ::log2(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_log(double f) { return ::log(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_log10(float f) { return ::log10(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_exp2(double f) { return ::exp2(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_exp(double f) { return ::exp(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_abs(double f) { return ::fabs(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_trunc(double f) { return ::trunc(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sqrt(double f) { return ::sqrt(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_rsqrt(double f) { return ::rsqrt(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_sign(double f) { return (f == 0.0) ? f : ((f < 0.0) ? -1.0 : 1.0); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_frac(double f) { return f - F64_floor(f); } + +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F64_isnan(double f) { return isnan(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F64_isfinite(double f) { return isfinite(f); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL bool F64_isinf(double f) { return isinf(f); } + +// Binary +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_min(double a, double b) { return ::fmin(a, b); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_max(double a, double b) { return ::fmax(a, b); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_pow(double a, double b) { return ::pow(a, b); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_fmod(double a, double b) { return ::fmod(a, b); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_remainder(double a, double b) { return ::remainder(a, b); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_atan2(double a, double b) { return ::atan2(a, b); } + +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_frexp(double x, int* e) { return ::frexp(x, e); } + +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_modf(double x, double* ip) +{ + return ::modf(x, ip); +} + +SLANG_FORCE_INLINE SLANG_CUDA_CALL void F64_asuint(double d, uint32_t* low, uint32_t* hi) +{ + Union64 u; + u.d = d; + *low = uint32_t(u.u); + *hi = uint32_t(u.u >> 32); +} + +SLANG_FORCE_INLINE SLANG_CUDA_CALL void F64_asint(double d, int32_t* low, int32_t* hi) +{ + Union64 u; + u.d = d; + *low = int32_t(u.u); + *hi = int32_t(u.u >> 32); +} + +// Ternary +SLANG_FORCE_INLINE SLANG_CUDA_CALL double F64_fma(double a, double b, double c) { return ::fma(a, b, c); } + +// ----------------------------- I32 ----------------------------------------- + +// Unary +SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_abs(int32_t f) { return (f < 0) ? -f : f; } + +// Binary +SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_min(int32_t a, int32_t b) { return a < b ? a : b; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL int32_t I32_max(int32_t a, int32_t b) { return a > b ? a : b; } + +SLANG_FORCE_INLINE SLANG_CUDA_CALL float I32_asfloat(int32_t x) { Union32 u; u.i = x; return u.f; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t I32_asuint(int32_t x) { return uint32_t(x); } +SLANG_FORCE_INLINE SLANG_CUDA_CALL double I32_asdouble(int32_t low, int32_t hi ) +{ + Union64 u; + u.u = (uint64_t(hi) << 32) | uint32_t(low); + return u.d; +} + +// ----------------------------- U32 ----------------------------------------- + +// Unary +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_abs(uint32_t f) { return f; } + +// Binary +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_min(uint32_t a, uint32_t b) { return a < b ? a : b; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_max(uint32_t a, uint32_t b) { return a > b ? a : b; } + +SLANG_FORCE_INLINE SLANG_CUDA_CALL float U32_asfloat(uint32_t x) { Union32 u; u.u = x; return u.f; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_asint(int32_t x) { return uint32_t(x); } + +SLANG_FORCE_INLINE SLANG_CUDA_CALL double U32_asdouble(uint32_t low, uint32_t hi) +{ + Union64 u; + u.u = (uint64_t(hi) << 32) | low; + return u.d; +} + +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U32_countbits(uint32_t v) +{ + // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html#group__CUDA__MATH__INTRINSIC__INT_1g43c9c7d2b9ebf202ff1ef5769989be46 + return __popc(v); +} + + +// ----------------------------- I64 ----------------------------------------- + +SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_abs(int64_t f) { return (f < 0) ? -f : f; } + +SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_min(int64_t a, int64_t b) { return a < b ? a : b; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t I64_max(int64_t a, int64_t b) { return a > b ? a : b; } + +// ----------------------------- U64 ----------------------------------------- + +SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_abs(uint64_t f) { return f; } + +SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_min(uint64_t a, uint64_t b) { return a < b ? a : b; } +SLANG_FORCE_INLINE SLANG_CUDA_CALL int64_t U64_max(uint64_t a, uint64_t b) { return a > b ? a : b; } + +SLANG_FORCE_INLINE SLANG_CUDA_CALL uint32_t U64_countbits(uint64_t v) +{ + // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html#group__CUDA__MATH__INTRINSIC__INT_1g43c9c7d2b9ebf202ff1ef5769989be46 + return __popcll(v); +} + + +// ----------------------------- ResourceType ----------------------------------------- + + +// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-structuredbuffer-getdimensions +// Missing Load(_In_ int Location, _Out_ uint Status); + +template +struct StructuredBuffer +{ + SLANG_CUDA_CALL const T& operator[](size_t index) const + { +#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT + SLANG_BOUND_CHECK(index, count); +#endif + return data[index]; + } + + SLANG_CUDA_CALL const T& Load(size_t index) const + { +#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT + SLANG_BOUND_CHECK(index, count); +#endif + return data[index]; + } + +#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT + SLANG_CUDA_CALL void GetDimensions(uint32_t* outNumStructs, uint32_t* outStride) { *outNumStructs = uint32_t(count); *outStride = uint32_t(sizeof(T)); } +#endif + + T* data; +#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT + size_t count; +#endif +}; + +template +struct RWStructuredBuffer : StructuredBuffer +{ + SLANG_CUDA_CALL T& operator[](size_t index) const + { +#ifndef SLANG_CUDA_STRUCTURED_BUFFER_NO_COUNT + SLANG_BOUND_CHECK(index, this->count); +#endif + return this->data[index]; + } +}; + +// Missing Load(_In_ int Location, _Out_ uint Status); +struct ByteAddressBuffer +{ + SLANG_CUDA_CALL void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); } + SLANG_CUDA_CALL uint32_t Load(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes); + return data[index >> 2]; + } + SLANG_CUDA_CALL uint2 Load2(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes); + const size_t dataIdx = index >> 2; + return uint2{data[dataIdx], data[dataIdx + 1]}; + } + SLANG_CUDA_CALL uint3 Load3(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes); + const size_t dataIdx = index >> 2; + return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]}; + } + SLANG_CUDA_CALL uint4 Load4(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes); + const size_t dataIdx = index >> 2; + return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]}; + } + template + SLANG_CUDA_CALL T Load(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes); + T data; + memcpy(&data, ((const char*)this->data) + index, sizeof(T)); + return data; + } + + const uint32_t* data; + size_t sizeInBytes; //< Must be multiple of 4 +}; + +// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-rwbyteaddressbuffer +// Missing support for Atomic operations +// Missing support for Load with status +struct RWByteAddressBuffer +{ + SLANG_CUDA_CALL void GetDimensions(uint32_t* outDim) const { *outDim = uint32_t(sizeInBytes); } + + SLANG_CUDA_CALL uint32_t Load(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes); + return data[index >> 2]; + } + SLANG_CUDA_CALL uint2 Load2(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes); + const size_t dataIdx = index >> 2; + return uint2{data[dataIdx], data[dataIdx + 1]}; + } + SLANG_CUDA_CALL uint3 Load3(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes); + const size_t dataIdx = index >> 2; + return uint3{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2]}; + } + SLANG_CUDA_CALL uint4 Load4(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes); + const size_t dataIdx = index >> 2; + return uint4{data[dataIdx], data[dataIdx + 1], data[dataIdx + 2], data[dataIdx + 3]}; + } + template + SLANG_CUDA_CALL T Load(size_t index) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes); + T data; + memcpy(&data, ((const char*)this->data) + index, sizeof(T)); + return data; + } + + SLANG_CUDA_CALL void Store(size_t index, uint32_t v) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 4, sizeInBytes); + data[index >> 2] = v; + } + SLANG_CUDA_CALL void Store2(size_t index, uint2 v) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 8, sizeInBytes); + const size_t dataIdx = index >> 2; + data[dataIdx + 0] = v.x; + data[dataIdx + 1] = v.y; + } + SLANG_CUDA_CALL void Store3(size_t index, uint3 v) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 12, sizeInBytes); + const size_t dataIdx = index >> 2; + data[dataIdx + 0] = v.x; + data[dataIdx + 1] = v.y; + data[dataIdx + 2] = v.z; + } + SLANG_CUDA_CALL void Store4(size_t index, uint4 v) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, 16, sizeInBytes); + const size_t dataIdx = index >> 2; + data[dataIdx + 0] = v.x; + data[dataIdx + 1] = v.y; + data[dataIdx + 2] = v.z; + data[dataIdx + 3] = v.w; + } + template + SLANG_CUDA_CALL void Store(size_t index, T const& value) const + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes); + memcpy((char*)data + index, &value, sizeof(T)); + } + + /// Can be used in stdlib to gain access + template + SLANG_CUDA_CALL T* _getPtrAt(size_t index) + { + SLANG_BOUND_CHECK_BYTE_ADDRESS(index, sizeof(T), sizeInBytes); + return (T*)(((char*)data) + index); + } + + uint32_t* data; + size_t sizeInBytes; //< Must be multiple of 4 +}; + + +// ---------------------- Wave -------------------------------------- + +// TODO(JS): It appears that cuda does not have a simple way to get a lane index. +// +// Another approach could be... +// laneId = ((threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x) & SLANG_CUDA_WARP_MASK +// If that is really true another way to do this, would be for code generator to add this function +// with the [numthreads] baked in. +// +// For now I'll just assume you have a launch that makes the following correct if the kernel uses WaveGetLaneIndex() +#ifndef SLANG_USE_ASM_LANE_ID + __forceinline__ __device__ uint32_t _getLaneId() +{ + // If the launch is (or I guess some multiple of the warp size) + // we try this mechanism, which is apparently faster. + return threadIdx.x & SLANG_CUDA_WARP_MASK; +} +#else +__forceinline__ __device__ uint32_t _getLaneId() +{ + // https://stackoverflow.com/questions/44337309/whats-the-most-efficient-way-to-calculate-the-warp-id-lane-id-in-a-1-d-grid# + // This mechanism is not the fastest way to do it, and that is why the other mechanism + // is the default. But the other mechanism relies on a launch that makes the assumption + // true. + unsigned ret; + asm volatile ("mov.u32 %0, %laneid;" : "=r"(ret)); + return ret; +} +#endif + +typedef int WarpMask; + +// It appears that the __activemask() cannot always be used because +// threads need to be converged. +// +// For CUDA the article claims mask has to be used carefully +// https://devblogs.nvidia.com/using-cuda-warp-level-primitives/ +// With the Warp intrinsics there is no mask, and it's just the 'active lanes'. +// __activemask() though does not require there is convergence, so that doesn't work. +// +// '__ballot_sync' produces a convergance. +// +// From the CUDA docs: +// ```For __all_sync, __any_sync, and __ballot_sync, a mask must be passed that specifies the threads +// participating in the call. A bit, representing the thread's lane ID, must be set for each participating thread +// to ensure they are properly converged before the intrinsic is executed by the hardware. All active threads named +// in mask must execute the same intrinsic with the same mask, or the result is undefined.``` +// +// Currently there isn't a mechanism to correctly get the mask without it being passed through. +// Doing so will most likely require some changes to slang code generation to track masks, for now then we use +// _getActiveMask. + +// Return mask of all the lanes less than the current lane +__forceinline__ __device__ WarpMask _getLaneLtMask() +{ + return (int(1) << _getLaneId()) - 1; +} + +// TODO(JS): +// THIS IS NOT CORRECT! That determining the appropriate active mask requires appropriate +// mask tracking. +__forceinline__ __device__ WarpMask _getActiveMask() +{ + return __ballot_sync(__activemask(), true); +} + +// Return a mask suitable for the 'MultiPrefix' style functions +__forceinline__ __device__ WarpMask _getMultiPrefixMask(int mask) +{ + return mask; +} + +// Note! Note will return true if mask is 0, but thats okay, because there must be one +// lane active to execute anything +__inline__ __device__ bool _waveIsSingleLane(WarpMask mask) +{ + return (mask & (mask - 1)) == 0; +} + +// Returns the power of 2 size of run of set bits. Returns 0 if not a suitable run. +// Examples: +// 0b00000000'00000000'00000000'11111111 -> 8 +// 0b11111111'11111111'11111111'11111111 -> 32 +// 0b00000000'00000000'00000000'00011111 -> 0 (since 5 is not a power of 2) +// 0b00000000'00000000'00000000'11110000 -> 0 (since the run of bits does not start at the LSB) +// 0b00000000'00000000'00000000'00100111 -> 0 (since it is not a single contiguous run) +__inline__ __device__ int _waveCalcPow2Offset(WarpMask mask) +{ + // This should be the most common case, so fast path it + if (mask == SLANG_CUDA_WARP_BITMASK) + { + return SLANG_CUDA_WARP_SIZE; + } + // Is it a contiguous run of bits? + if ((mask & (mask + 1)) == 0) + { + // const int offsetSize = __ffs(mask + 1) - 1; + const int offset = 32 - __clz(mask); + // Is it a power of 2 size + if ((offset & (offset - 1)) == 0) + { + return offset; + } + } + return 0; +} + +__inline__ __device__ bool _waveIsFirstLane() +{ + const WarpMask mask = __activemask(); + // We special case bit 0, as that most warps are expected to be fully active. + + // mask & -mask, isolates the lowest set bit. + //return (mask & 1 ) || ((mask & -mask) == (1 << _getLaneId())); + + // This mechanism is most similar to what was in an nVidia post, so assume it is prefered. + return (mask & 1 ) || ((__ffs(mask) - 1) == _getLaneId()); +} + +template +struct WaveOpOr +{ + __inline__ __device__ static T getInitial(T a) { return 0; } + __inline__ __device__ static T doOp(T a, T b) { return a | b; } +}; + +template +struct WaveOpAnd +{ + __inline__ __device__ static T getInitial(T a) { return ~T(0); } + __inline__ __device__ static T doOp(T a, T b) { return a & b; } +}; + +template +struct WaveOpXor +{ + __inline__ __device__ static T getInitial(T a) { return 0; } + __inline__ __device__ static T doOp(T a, T b) { return a ^ b; } + __inline__ __device__ static T doInverse(T a, T b) { return a ^ b; } +}; + +template +struct WaveOpAdd +{ + __inline__ __device__ static T getInitial(T a) { return 0; } + __inline__ __device__ static T doOp(T a, T b) { return a + b; } + __inline__ __device__ static T doInverse(T a, T b) { return a - b; } +}; + +template +struct WaveOpMul +{ + __inline__ __device__ static T getInitial(T a) { return T(1); } + __inline__ __device__ static T doOp(T a, T b) { return a * b; } + // Using this inverse for int is probably undesirable - because in general it requires T to have more precision + // There is also a performance aspect to it, where divides are generally significantly slower + __inline__ __device__ static T doInverse(T a, T b) { return a / b; } +}; + +template +struct WaveOpMax +{ + __inline__ __device__ static T getInitial(T a) { return a; } + __inline__ __device__ static T doOp(T a, T b) { return a > b ? a : b; } +}; + +template +struct WaveOpMin +{ + __inline__ __device__ static T getInitial(T a) { return a; } + __inline__ __device__ static T doOp(T a, T b) { return a < b ? a : b; } +}; + +template +struct ElementTypeTrait; + +// Scalar +template <> struct ElementTypeTrait { typedef int Type; }; +template <> struct ElementTypeTrait { typedef uint Type; }; +template <> struct ElementTypeTrait { typedef float Type; }; +template <> struct ElementTypeTrait { typedef double Type; }; +template <> struct ElementTypeTrait { typedef uint64_t Type; }; +template <> struct ElementTypeTrait { typedef int64_t Type; }; + +// Vector +template <> struct ElementTypeTrait { typedef int Type; }; +template <> struct ElementTypeTrait { typedef int Type; }; +template <> struct ElementTypeTrait { typedef int Type; }; +template <> struct ElementTypeTrait { typedef int Type; }; + +template <> struct ElementTypeTrait { typedef uint Type; }; +template <> struct ElementTypeTrait { typedef uint Type; }; +template <> struct ElementTypeTrait { typedef uint Type; }; +template <> struct ElementTypeTrait { typedef uint Type; }; + +template <> struct ElementTypeTrait { typedef float Type; }; +template <> struct ElementTypeTrait { typedef float Type; }; +template <> struct ElementTypeTrait { typedef float Type; }; +template <> struct ElementTypeTrait { typedef float Type; }; + +template <> struct ElementTypeTrait { typedef double Type; }; +template <> struct ElementTypeTrait { typedef double Type; }; +template <> struct ElementTypeTrait { typedef double Type; }; +template <> struct ElementTypeTrait { typedef double Type; }; + +// Matrix +template +struct ElementTypeTrait > +{ + typedef T Type; +}; + +// Scalar +template +__device__ T _waveReduceScalar(WarpMask mask, T val) +{ + const int offsetSize = _waveCalcPow2Offset(mask); + if (offsetSize > 0) + { + // Fast path O(log2(activeLanes)) + for (int offset = offsetSize >> 1; offset > 0; offset >>= 1) + { + val = INTF::doOp(val, __shfl_xor_sync(mask, val, offset)); + } + } + else if (!_waveIsSingleLane(mask)) + { + T result = INTF::getInitial(val); + int remaining = mask; + while (remaining) + { + const int laneBit = remaining & -remaining; + // Get the sourceLane + const int srcLane = __ffs(laneBit) - 1; + // Broadcast (can also broadcast to self) + result = INTF::doOp(result, __shfl_sync(mask, val, srcLane)); + remaining &= ~laneBit; + } + return result; + } + return val; +} + + +// Multiple values +template +__device__ void _waveReduceMultiple(WarpMask mask, T* val) +{ + const int offsetSize = _waveCalcPow2Offset(mask); + if (offsetSize > 0) + { + // Fast path O(log2(activeLanes)) + for (int offset = offsetSize >> 1; offset > 0; offset >>= 1) + { + for (size_t i = 0; i < COUNT; ++i) + { + val[i] = INTF::doOp(val[i], __shfl_xor_sync(mask, val[i], offset)); + } + } + } + else if (!_waveIsSingleLane(mask)) + { + // Copy the original + T originalVal[COUNT]; + for (size_t i = 0; i < COUNT; ++i) + { + const T v = val[i]; + originalVal[i] = v; + val[i] = INTF::getInitial(v); + } + + int remaining = mask; + while (remaining) + { + const int laneBit = remaining & -remaining; + // Get the sourceLane + const int srcLane = __ffs(laneBit) - 1; + // Broadcast (can also broadcast to self) + for (size_t i = 0; i < COUNT; ++i) + { + val[i] = INTF::doOp(val[i], __shfl_sync(mask, originalVal[i], srcLane)); + } + remaining &= ~laneBit; + } + } +} + +template +__device__ void _waveReduceMultiple(WarpMask mask, T* val) +{ + typedef typename ElementTypeTrait::Type ElemType; + _waveReduceMultiple(mask, (ElemType*)val); +} + +template +__inline__ __device__ T _waveOr(WarpMask mask, T val) { return _waveReduceScalar, T>(mask, val); } + +template +__inline__ __device__ T _waveAnd(WarpMask mask, T val) { return _waveReduceScalar, T>(mask, val); } + +template +__inline__ __device__ T _waveXor(WarpMask mask, T val) { return _waveReduceScalar, T>(mask, val); } + +template +__inline__ __device__ T _waveProduct(WarpMask mask, T val) { return _waveReduceScalar, T>(mask, val); } + +template +__inline__ __device__ T _waveSum(WarpMask mask, T val) { return _waveReduceScalar, T>(mask, val); } + +template +__inline__ __device__ T _waveMin(WarpMask mask, T val) { return _waveReduceScalar, T>(mask, val); } + +template +__inline__ __device__ T _waveMax(WarpMask mask, T val) { return _waveReduceScalar, T>(mask, val); } + +// Fast-path specializations when CUDA warp reduce operators are available +#if __CUDA_ARCH__ >= 800 // 8.x or higher +template<> +__inline__ __device__ unsigned _waveOr(WarpMask mask, unsigned val) { return __reduce_or_sync(mask, val); } + +template<> +__inline__ __device__ unsigned _waveAnd(WarpMask mask, unsigned val) { return __reduce_and_sync(mask, val); } + +template<> +__inline__ __device__ unsigned _waveXor(WarpMask mask, unsigned val) { return __reduce_xor_sync(mask, val); } + +template<> +__inline__ __device__ unsigned _waveSum(WarpMask mask, unsigned val) { return __reduce_add_sync(mask, val); } + +template<> +__inline__ __device__ int _waveSum(WarpMask mask, int val) { return __reduce_add_sync(mask, val); } + +template<> +__inline__ __device__ unsigned _waveMin(WarpMask mask, unsigned val) { return __reduce_min_sync(mask, val); } + +template<> +__inline__ __device__ int _waveMin(WarpMask mask, int val) { return __reduce_min_sync(mask, val); } + +template<> +__inline__ __device__ unsigned _waveMax(WarpMask mask, unsigned val) { return __reduce_max_sync(mask, val); } + +template<> +__inline__ __device__ int _waveMax(WarpMask mask, int val) { return __reduce_max_sync(mask, val); } +#endif + + +// Multiple + +template +__inline__ __device__ T _waveOrMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait::Type ElemType; _waveReduceMultiple >(mask, &val); return val; } + +template +__inline__ __device__ T _waveAndMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait::Type ElemType; _waveReduceMultiple >(mask, &val); return val; } + +template +__inline__ __device__ T _waveXorMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait::Type ElemType; _waveReduceMultiple >(mask, &val); return val; } + +template +__inline__ __device__ T _waveProductMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait::Type ElemType; _waveReduceMultiple >(mask, &val); return val; } + +template +__inline__ __device__ T _waveSumMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait::Type ElemType; _waveReduceMultiple >(mask, &val); return val; } + +template +__inline__ __device__ T _waveMinMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait::Type ElemType; _waveReduceMultiple >(mask, &val); return val; } + +template +__inline__ __device__ T _waveMaxMultiple(WarpMask mask, T val) { typedef typename ElementTypeTrait::Type ElemType; _waveReduceMultiple >(mask, &val); return val; } + + +template +__inline__ __device__ bool _waveAllEqual(WarpMask mask, T val) +{ + int pred; + __match_all_sync(mask, val, &pred); + return pred != 0; +} + +template +__inline__ __device__ bool _waveAllEqualMultiple(WarpMask mask, T inVal) +{ + typedef typename ElementTypeTrait::Type ElemType; + const size_t count = sizeof(T) / sizeof(ElemType); + int pred; + const ElemType* src = (const ElemType*)&inVal; + for (size_t i = 0; i < count; ++i) + { + __match_all_sync(mask, src[i], &pred); + if (pred == 0) + { + return false; + } + } + return true; +} + +template +__inline__ __device__ T _waveReadFirst(WarpMask mask, T val) +{ + const int lowestLaneId = __ffs(mask) - 1; + return __shfl_sync(mask, val, lowestLaneId); +} + +template +__inline__ __device__ T _waveReadFirstMultiple(WarpMask mask, T inVal) +{ + typedef typename ElementTypeTrait::Type ElemType; + const size_t count = sizeof(T) / sizeof(ElemType); + T outVal; + const ElemType* src = (const ElemType*)&inVal; + ElemType* dst = (ElemType*)&outVal; + const int lowestLaneId = __ffs(mask) - 1; + for (size_t i = 0; i < count; ++i) + { + dst[i] = __shfl_sync(mask, src[i], lowestLaneId); + } + return outVal; +} + +template +__inline__ __device__ T _waveShuffleMultiple(WarpMask mask, T inVal, int lane) +{ + typedef typename ElementTypeTrait::Type ElemType; + const size_t count = sizeof(T) / sizeof(ElemType); + T outVal; + const ElemType* src = (const ElemType*)&inVal; + ElemType* dst = (ElemType*)&outVal; + for (size_t i = 0; i < count; ++i) + { + dst[i] = __shfl_sync(mask, src[i], lane); + } + return outVal; +} + +// Scalar + +// Invertable means that when we get to the end of the reduce, we can remove val (to make exclusive), using +// the inverse of the op. +template +__device__ T _wavePrefixInvertableScalar(WarpMask mask, T val) +{ + const int offsetSize = _waveCalcPow2Offset(mask); + + const int laneId = _getLaneId(); + T result; + if (offsetSize > 0) + { + // Sum is calculated inclusive of this lanes value + result = val; + for (int i = 1; i < offsetSize; i += i) + { + const T readVal = __shfl_up_sync(mask, result, i, offsetSize); + if (laneId >= i) + { + result = INTF::doOp(result, readVal); + } + } + // Remove val from the result, by applyin inverse + result = INTF::doInverse(result, val); + } + else + { + result = INTF::getInitial(val); + if (!_waveIsSingleLane(mask)) + { + int remaining = mask; + while (remaining) + { + const int laneBit = remaining & -remaining; + // Get the sourceLane + const int srcLane = __ffs(laneBit) - 1; + // Broadcast (can also broadcast to self) + const T readValue = __shfl_sync(mask, val, srcLane); + // Only accumulate if srcLane is less than this lane + if (srcLane < laneId) + { + result = INTF::doOp(result, readValue); + } + remaining &= ~laneBit; + } + } + } + return result; +} + + +// This implementation separately tracks the value to be propogated, and the value +// that is the final result +template +__device__ T _wavePrefixScalar(WarpMask mask, T val) +{ + const int offsetSize = _waveCalcPow2Offset(mask); + + const int laneId = _getLaneId(); + T result = INTF::getInitial(val); + if (offsetSize > 0) + { + // For transmitted value we will do it inclusively with this lanes value + // For the result we do not include the lanes value. This means an extra multiply for each iteration + // but means we don't need to have a divide at the end and also removes overflow issues in that scenario. + for (int i = 1; i < offsetSize; i += i) + { + const T readVal = __shfl_up_sync(mask, val, i, offsetSize); + if (laneId >= i) + { + result = INTF::doOp(result, readVal); + val = INTF::doOp(val, readVal); + } + } + } + else + { + if (!_waveIsSingleLane(mask)) + { + int remaining = mask; + while (remaining) + { + const int laneBit = remaining & -remaining; + // Get the sourceLane + const int srcLane = __ffs(laneBit) - 1; + // Broadcast (can also broadcast to self) + const T readValue = __shfl_sync(mask, val, srcLane); + // Only accumulate if srcLane is less than this lane + if (srcLane < laneId) + { + result = INTF::doOp(result, readValue); + } + remaining &= ~laneBit; + } + } + } + return result; +} + + +template +__device__ T _waveOpCopy(T* dst, const T* src) +{ + for (size_t j = 0; j < COUNT; ++j) + { + dst[j] = src[j]; + } +} + + +template +__device__ T _waveOpDoInverse(T* inOut, const T* val) +{ + for (size_t j = 0; j < COUNT; ++j) + { + inOut[j] = INTF::doInverse(inOut[j], val[j]); + } +} + +template +__device__ T _waveOpSetInitial(T* out, const T* val) +{ + for (size_t j = 0; j < COUNT; ++j) + { + out[j] = INTF::getInitial(val[j]); + } +} + +template +__device__ T _wavePrefixInvertableMultiple(WarpMask mask, T* val) +{ + const int offsetSize = _waveCalcPow2Offset(mask); + + const int laneId = _getLaneId(); + T originalVal[COUNT]; + _waveOpCopy(originalVal, val); + + if (offsetSize > 0) + { + // Sum is calculated inclusive of this lanes value + for (int i = 1; i < offsetSize; i += i) + { + // TODO(JS): Note that here I don't split the laneId outside so it's only tested once. + // This may be better but it would also mean that there would be shfl between lanes + // that are on different (albeit identical) instructions. So this seems more likely to + // work as expected with everything in lock step. + for (size_t j = 0; j < COUNT; ++j) + { + const T readVal = __shfl_up_sync(mask, val[j], i, offsetSize); + if (laneId >= i) + { + val[j] = INTF::doOp(val[j], readVal); + } + } + } + // Remove originalVal from the result, by applyin inverse + _waveOpDoInverse(val, originalVal); + } + else + { + _waveOpSetInitial(val, val); + if (!_waveIsSingleLane(mask)) + { + int remaining = mask; + while (remaining) + { + const int laneBit = remaining & -remaining; + // Get the sourceLane + const int srcLane = __ffs(laneBit) - 1; + + for (size_t j = 0; j < COUNT; ++j) + { + // Broadcast (can also broadcast to self) + const T readValue = __shfl_sync(mask, originalVal[j], srcLane); + // Only accumulate if srcLane is less than this lane + if (srcLane < laneId) + { + val[j] = INTF::doOp(val[j], readValue); + } + remaining &= ~laneBit; + } + } + } + } +} + +template +__device__ T _wavePrefixMultiple(WarpMask mask, T* val) +{ + const int offsetSize = _waveCalcPow2Offset(mask); + + const int laneId = _getLaneId(); + + T work[COUNT]; + _waveOpCopy(work, val); + _waveOpSetInitial(val, val); + + if (offsetSize > 0) + { + // For transmitted value we will do it inclusively with this lanes value + // For the result we do not include the lanes value. This means an extra op for each iteration + // but means we don't need to have a divide at the end and also removes overflow issues in that scenario. + for (int i = 1; i < offsetSize; i += i) + { + for (size_t j = 0; j < COUNT; ++j) + { + const T readVal = __shfl_up_sync(mask, work[j], i, offsetSize); + if (laneId >= i) + { + work[j] = INTF::doOp(work[j], readVal); + val[j] = INTF::doOp(val[j], readVal); + } + } + } + } + else + { + if (!_waveIsSingleLane(mask)) + { + int remaining = mask; + while (remaining) + { + const int laneBit = remaining & -remaining; + // Get the sourceLane + const int srcLane = __ffs(laneBit) - 1; + + for (size_t j = 0; j < COUNT; ++j) + { + // Broadcast (can also broadcast to self) + const T readValue = __shfl_sync(mask, work[j], srcLane); + // Only accumulate if srcLane is less than this lane + if (srcLane < laneId) + { + val[j] = INTF::doOp(val[j], readValue); + } + } + remaining &= ~laneBit; + } + } + } +} + +template +__inline__ __device__ T _wavePrefixProduct(WarpMask mask, T val) { return _wavePrefixScalar, T>(mask, val); } + +template +__inline__ __device__ T _wavePrefixSum(WarpMask mask, T val) { return _wavePrefixInvertableScalar, T>(mask, val); } + +template +__inline__ __device__ T _wavePrefixXor(WarpMask mask, T val) { return _wavePrefixInvertableScalar, T>(mask, val); } + +template +__inline__ __device__ T _wavePrefixOr(WarpMask mask, T val) { return _wavePrefixScalar, T>(mask, val); } + +template +__inline__ __device__ T _wavePrefixAnd(WarpMask mask, T val) { return _wavePrefixScalar, T>(mask, val); } + + +template +__inline__ __device__ T _wavePrefixProductMultiple(WarpMask mask, T val) +{ + typedef typename ElementTypeTrait::Type ElemType; + _wavePrefixInvertableMultiple, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)&val); + return val; +} + +template +__inline__ __device__ T _wavePrefixSumMultiple(WarpMask mask, T val) +{ + typedef typename ElementTypeTrait::Type ElemType; + _wavePrefixInvertableMultiple, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)&val); + return val; +} + +template +__inline__ __device__ T _wavePrefixXorMultiple(WarpMask mask, T val) +{ + typedef typename ElementTypeTrait::Type ElemType; + _wavePrefixInvertableMultiple, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)&val); + return val; +} + +template +__inline__ __device__ T _wavePrefixOrMultiple(WarpMask mask, T val) +{ + typedef typename ElementTypeTrait::Type ElemType; + _wavePrefixMultiple, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)&val); + return val; +} + +template +__inline__ __device__ T _wavePrefixAndMultiple(WarpMask mask, T val) +{ + typedef typename ElementTypeTrait::Type ElemType; + _wavePrefixMultiple, ElemType, sizeof(T) / sizeof(ElemType)>(mask, (ElemType*)&val); + return val; +} + +template +__inline__ __device__ uint4 _waveMatchScalar(WarpMask mask, T val) +{ + int pred; + return make_uint4(__match_all_sync(mask, val, &pred), 0, 0, 0); +} + +template +__inline__ __device__ uint4 _waveMatchMultiple(WarpMask mask, const T& inVal) +{ + typedef typename ElementTypeTrait::Type ElemType; + const size_t count = sizeof(T) / sizeof(ElemType); + int pred; + const ElemType* src = (const ElemType*)&inVal; + uint matchBits = 0xffffffff; + for (size_t i = 0; i < count && matchBits; ++i) + { + matchBits = matchBits & __match_all_sync(mask, src[i], &pred); + } + return make_uint4(matchBits, 0, 0, 0); +} + +__device__ uint getAt(dim3 a, int b) +{ + SLANG_PRELUDE_ASSERT(b >= 0 && b < 3); + return (&a.x)[b]; +} +__device__ uint3 operator*(uint3 a, dim3 b) +{ + uint3 r; + r.x = a.x * b.x; + r.y = a.y * b.y; + r.z = a.z * b.z; + return r; +} + +template +__inline__ __device__ TResult slang_bit_cast(TInput val) +{ + return *(TResult*)(&val); +} + +/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ + + +/* Type that defines the uniform entry point params. The actual content of this type is dependent on the entry point parameters, and can be +found via reflection or defined such that it matches the shader appropriately. +*/ +struct UniformEntryPointParams; +struct UniformState; + +// ---------------------- OptiX Ray Payload -------------------------------------- +#ifdef SLANG_CUDA_ENABLE_OPTIX +struct RayDesc +{ + float3 Origin; + float TMin; + float3 Direction; + float TMax; +}; + +static __forceinline__ __device__ +void *unpackOptiXRayPayloadPointer(uint32_t i0, uint32_t i1) +{ + const uint64_t uptr = static_cast(i0) << 32 | i1; + void* ptr = reinterpret_cast(uptr); + return ptr; +} + +static __forceinline__ __device__ +void packOptiXRayPayloadPointer(void* ptr, uint32_t& i0, uint32_t& i1) +{ + const uint64_t uptr = reinterpret_cast(ptr); + i0 = uptr >> 32; + i1 = uptr & 0x00000000ffffffff; +} + +static __forceinline__ __device__ void *getOptiXRayPayloadPtr() +{ + const uint32_t u0 = optixGetPayload_0(); + const uint32_t u1 = optixGetPayload_1(); + return unpackOptiXRayPayloadPointer(u0, u1); +} + +template +__forceinline__ __device__ void *traceOptiXRay( + OptixTraversableHandle AccelerationStructure, + uint32_t RayFlags, + uint32_t InstanceInclusionMask, + uint32_t RayContributionToHitGroupIndex, + uint32_t MultiplierForGeometryContributionToHitGroupIndex, + uint32_t MissShaderIndex, + RayDesc Ray, + T *Payload +) { + uint32_t r0, r1; + packOptiXRayPayloadPointer((void*)Payload, r0, r1); + optixTrace( + AccelerationStructure, + Ray.Origin, + Ray.Direction, + Ray.TMin, + Ray.TMax, + 0.f, /* Time for motion blur, currently unsupported in slang */ + InstanceInclusionMask, + RayFlags, + RayContributionToHitGroupIndex, + MultiplierForGeometryContributionToHitGroupIndex, + MissShaderIndex, + r0, r1 + ); +} + +#endif + +static const int kSlangTorchTensorMaxDim = 5; + +// TensorView +struct TensorView +{ + uint8_t* data; + uint32_t strides[kSlangTorchTensorMaxDim]; + uint32_t sizes[kSlangTorchTensorMaxDim]; + uint32_t dimensionCount; + + template + __device__ T* data_ptr() + { + return reinterpret_cast(data); + } + + template + __device__ T* data_ptr_at(uint32_t index) + { + uint64_t offset = strides[0] * index; + return reinterpret_cast(data + offset); + } + + template + __device__ T* data_ptr_at(uint2 index) + { + uint64_t offset = strides[0] * index.x + strides[1] * index.y; + return reinterpret_cast(data + offset); + } + + template + __device__ T* data_ptr_at(uint3 index) + { + uint64_t offset = strides[0] * index.x + strides[1] * index.y + strides[2] * index.z; + return reinterpret_cast(data + offset); + } + + template + __device__ T* data_ptr_at(uint4 index) + { + uint64_t offset = strides[0] * index.x + strides[1] * index.y + strides[2] * index.z + strides[3] * index.w; + return reinterpret_cast(data + offset); + } + + template + __device__ T* data_ptr_at(uint index[N]) + { + uint64_t offset = 0; + for (unsigned int i = 0; i < N; ++i) + { + offset += strides[i] * index[i]; + } + return reinterpret_cast(data + offset); + } + + template + __device__ T& load(uint32_t x) + { + return *reinterpret_cast(data + strides[0] * x); + } + template + __device__ T& load(uint32_t x, uint32_t y) + { + return *reinterpret_cast(data + strides[0] * x + strides[1] * y); + } + template + __device__ T& load(uint2 index) + { + return *reinterpret_cast(data + strides[0] * index.x + strides[1] * index.y); + } + template + __device__ T& load(uint32_t x, uint32_t y, uint32_t z) + { + return *reinterpret_cast(data + strides[0] * x + strides[1] * y + strides[2] * z); + } + template + __device__ T& load(uint3 index) + { + return *reinterpret_cast(data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z); + } + template + __device__ T& load(uint32_t x, uint32_t y, uint32_t z, uint32_t w) + { + return *reinterpret_cast(data + strides[0] * x + strides[1] * y + strides[2] * z + strides[3] * w); + } + template + __device__ T& load(uint4 index) + { + return *reinterpret_cast(data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z + strides[3] * index.w); + } + template + __device__ T& load(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4) + { + return *reinterpret_cast(data + strides[0] * i0 + strides[1] * i1 + strides[2] * i2 + strides[3] * i3 + strides[4] * i4); + } + + // Generic version of load + template + __device__ T& load(uint index[N]) + { + uint64_t offset = 0; + for (unsigned int i = 0; i < N; ++i) + { + offset += strides[i] * index[i]; + } + return *reinterpret_cast(data + offset); + } + + template + __device__ void store(uint32_t x, T val) + { + *reinterpret_cast(data + strides[0] * x) = val; + } + template + __device__ void store(uint32_t x, uint32_t y, T val) + { + *reinterpret_cast(data + strides[0] * x + strides[1] * y) = val; + } + template + __device__ void store(uint2 index, T val) + { + *reinterpret_cast(data + strides[0] * index.x + strides[1] * index.y) = val; + } + template + __device__ void store(uint32_t x, uint32_t y, uint32_t z, T val) + { + *reinterpret_cast(data + strides[0] * x + strides[1] * y + strides[2] * z) = val; + } + template + __device__ void store(uint3 index, T val) + { + *reinterpret_cast(data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z) = val; + } + template + __device__ void store(uint32_t x, uint32_t y, uint32_t z, uint32_t w, T val) + { + *reinterpret_cast( + data + strides[0] * x + strides[1] * y + strides[2] * z + strides[3] * w) = val; + } + template + __device__ void store(uint4 index, T val) + { + *reinterpret_cast(data + strides[0] * index.x + strides[1] * index.y + strides[2] * index.z + strides[3] * index.w) = val; + } + template + __device__ void store(uint32_t i0, uint32_t i1, uint32_t i2, uint32_t i3, uint32_t i4, T val) + { + *reinterpret_cast(data + strides[0] * i0 + strides[1] * i1 + strides[2] * i2 + strides[3] * i3 + strides[4] * i4) = val; + } + + // Generic version + template + __device__ void store(uint index[N], T val) + { + uint64_t offset = 0; + for (unsigned int i = 0; i < N; ++i) + { + offset += strides[i] * index[i]; + } + *reinterpret_cast(data + offset) = val; + } +}; diff --git a/third_party/slang/inc/prelude/slang-hlsl-prelude.h b/third_party/slang/inc/prelude/slang-hlsl-prelude.h new file mode 100644 index 0000000..4774217 --- /dev/null +++ b/third_party/slang/inc/prelude/slang-hlsl-prelude.h @@ -0,0 +1,4 @@ +#ifdef SLANG_HLSL_ENABLE_NVAPI +#include "nvHLSLExtns.h" +#endif +#pragma warning(disable: 3557) diff --git a/third_party/slang/inc/prelude/slang-llvm.h b/third_party/slang/inc/prelude/slang-llvm.h new file mode 100644 index 0000000..b413805 --- /dev/null +++ b/third_party/slang/inc/prelude/slang-llvm.h @@ -0,0 +1,398 @@ +#ifndef SLANG_LLVM_H +#define SLANG_LLVM_H + +// TODO(JS): +// Disable exception declspecs, as not supported on LLVM without some extra options. +// We could enable with `-fms-extensions` +#define SLANG_DISABLE_EXCEPTIONS 1 + +#ifndef SLANG_PRELUDE_ASSERT +# ifdef SLANG_PRELUDE_ENABLE_ASSERT +extern "C" void assertFailure(const char* msg); +# define SLANG_PRELUDE_EXPECT(VALUE, MSG) if(VALUE) {} else assertFailure("assertion failed: '" MSG "'") +# define SLANG_PRELUDE_ASSERT(VALUE) SLANG_PRELUDE_EXPECT(VALUE, #VALUE) +# else // SLANG_PRELUDE_ENABLE_ASSERT +# define SLANG_PRELUDE_EXPECT(VALUE, MSG) +# define SLANG_PRELUDE_ASSERT(x) +# endif // SLANG_PRELUDE_ENABLE_ASSERT +#endif + +/* +Taken from stddef.h +*/ + +typedef __PTRDIFF_TYPE__ ptrdiff_t; +typedef __SIZE_TYPE__ size_t; +typedef __SIZE_TYPE__ rsize_t; + +//typedef __WCHAR_TYPE__ wchar_t; + +#if defined(__need_NULL) +#undef NULL +#ifdef __cplusplus +# if !defined(__MINGW32__) && !defined(_MSC_VER) +# define NULL __null +# else +# define NULL 0 +# endif +#else +# define NULL ((void*)0) +#endif +#ifdef __cplusplus +#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED) +namespace std { typedef decltype(nullptr) nullptr_t; } +using ::std::nullptr_t; +#endif +#endif +#undef __need_NULL +#endif /* defined(__need_NULL) */ + + +/* +The following are taken verbatim from stdint.h from Clang in LLVM. Only 8/16/32/64 types are needed. +*/ + +// LLVM/Clang types such that we can use LLVM/Clang without headers for C++ output from Slang + +#ifdef __INT64_TYPE__ +# ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/ +typedef __INT64_TYPE__ int64_t; +# endif /* __int8_t_defined */ +typedef __UINT64_TYPE__ uint64_t; +# define __int_least64_t int64_t +# define __uint_least64_t uint64_t +#endif /* __INT64_TYPE__ */ + +#ifdef __int_least64_t +typedef __int_least64_t int_least64_t; +typedef __uint_least64_t uint_least64_t; +typedef __int_least64_t int_fast64_t; +typedef __uint_least64_t uint_fast64_t; +#endif /* __int_least64_t */ + +#ifdef __INT32_TYPE__ + +# ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/ +typedef __INT32_TYPE__ int32_t; +# endif /* __int8_t_defined */ + +# ifndef __uint32_t_defined /* more glibc compatibility */ +# define __uint32_t_defined +typedef __UINT32_TYPE__ uint32_t; +# endif /* __uint32_t_defined */ + +# define __int_least32_t int32_t +# define __uint_least32_t uint32_t +#endif /* __INT32_TYPE__ */ + +#ifdef __int_least32_t +typedef __int_least32_t int_least32_t; +typedef __uint_least32_t uint_least32_t; +typedef __int_least32_t int_fast32_t; +typedef __uint_least32_t uint_fast32_t; +#endif /* __int_least32_t */ + +#ifdef __INT16_TYPE__ +#ifndef __int8_t_defined /* glibc sys/types.h also defines int16_t*/ +typedef __INT16_TYPE__ int16_t; +#endif /* __int8_t_defined */ +typedef __UINT16_TYPE__ uint16_t; +# define __int_least16_t int16_t +# define __uint_least16_t uint16_t +#endif /* __INT16_TYPE__ */ + +#ifdef __int_least16_t +typedef __int_least16_t int_least16_t; +typedef __uint_least16_t uint_least16_t; +typedef __int_least16_t int_fast16_t; +typedef __uint_least16_t uint_fast16_t; +#endif /* __int_least16_t */ + +#ifdef __INT8_TYPE__ +#ifndef __int8_t_defined /* glibc sys/types.h also defines int8_t*/ +typedef __INT8_TYPE__ int8_t; +#endif /* __int8_t_defined */ +typedef __UINT8_TYPE__ uint8_t; +# define __int_least8_t int8_t +# define __uint_least8_t uint8_t +#endif /* __INT8_TYPE__ */ + +#ifdef __int_least8_t +typedef __int_least8_t int_least8_t; +typedef __uint_least8_t uint_least8_t; +typedef __int_least8_t int_fast8_t; +typedef __uint_least8_t uint_fast8_t; +#endif /* __int_least8_t */ + +/* prevent glibc sys/types.h from defining conflicting types */ +#ifndef __int8_t_defined +# define __int8_t_defined +#endif /* __int8_t_defined */ + +/* C99 7.18.1.4 Integer types capable of holding object pointers. + */ +#define __stdint_join3(a,b,c) a ## b ## c + +#ifndef _INTPTR_T +#ifndef __intptr_t_defined +typedef __INTPTR_TYPE__ intptr_t; +#define __intptr_t_defined +#define _INTPTR_T +#endif +#endif + +#ifndef _UINTPTR_T +typedef __UINTPTR_TYPE__ uintptr_t; +#define _UINTPTR_T +#endif + +/* C99 7.18.1.5 Greatest-width integer types. + */ +typedef __INTMAX_TYPE__ intmax_t; +typedef __UINTMAX_TYPE__ uintmax_t; + +/* C99 7.18.4 Macros for minimum-width integer constants. + * + * The standard requires that integer constant macros be defined for all the + * minimum-width types defined above. As 8-, 16-, 32-, and 64-bit minimum-width + * types are required, the corresponding integer constant macros are defined + * here. This implementation also defines minimum-width types for every other + * integer width that the target implements, so corresponding macros are + * defined below, too. + * + * These macros are defined using the same successive-shrinking approach as + * the type definitions above. It is likewise important that macros are defined + * in order of decending width. + * + * Note that C++ should not check __STDC_CONSTANT_MACROS here, contrary to the + * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]). + */ + +#define __int_c_join(a, b) a ## b +#define __int_c(v, suffix) __int_c_join(v, suffix) +#define __uint_c(v, suffix) __int_c_join(v##U, suffix) + +#ifdef __INT64_TYPE__ +# ifdef __INT64_C_SUFFIX__ +# define __int64_c_suffix __INT64_C_SUFFIX__ +# else +# undef __int64_c_suffix +# endif /* __INT64_C_SUFFIX__ */ +#endif /* __INT64_TYPE__ */ + +#ifdef __int_least64_t +# ifdef __int64_c_suffix +# define INT64_C(v) __int_c(v, __int64_c_suffix) +# define UINT64_C(v) __uint_c(v, __int64_c_suffix) +# else +# define INT64_C(v) v +# define UINT64_C(v) v ## U +# endif /* __int64_c_suffix */ +#endif /* __int_least64_t */ + + +#ifdef __INT32_TYPE__ +# ifdef __INT32_C_SUFFIX__ +# define __int32_c_suffix __INT32_C_SUFFIX__ +#else +# undef __int32_c_suffix +# endif /* __INT32_C_SUFFIX__ */ +#endif /* __INT32_TYPE__ */ + +#ifdef __int_least32_t +# ifdef __int32_c_suffix +# define INT32_C(v) __int_c(v, __int32_c_suffix) +# define UINT32_C(v) __uint_c(v, __int32_c_suffix) +# else +# define INT32_C(v) v +# define UINT32_C(v) v ## U +# endif /* __int32_c_suffix */ +#endif /* __int_least32_t */ + +#ifdef __INT16_TYPE__ +# ifdef __INT16_C_SUFFIX__ +# define __int16_c_suffix __INT16_C_SUFFIX__ +#else +# undef __int16_c_suffix +# endif /* __INT16_C_SUFFIX__ */ +#endif /* __INT16_TYPE__ */ + +#ifdef __int_least16_t +# ifdef __int16_c_suffix +# define INT16_C(v) __int_c(v, __int16_c_suffix) +# define UINT16_C(v) __uint_c(v, __int16_c_suffix) +# else +# define INT16_C(v) v +# define UINT16_C(v) v ## U +# endif /* __int16_c_suffix */ +#endif /* __int_least16_t */ + + +#ifdef __INT8_TYPE__ +# ifdef __INT8_C_SUFFIX__ +# define __int8_c_suffix __INT8_C_SUFFIX__ +#else +# undef __int8_c_suffix +# endif /* __INT8_C_SUFFIX__ */ +#endif /* __INT8_TYPE__ */ + +#ifdef __int_least8_t +# ifdef __int8_c_suffix +# define INT8_C(v) __int_c(v, __int8_c_suffix) +# define UINT8_C(v) __uint_c(v, __int8_c_suffix) +# else +# define INT8_C(v) v +# define UINT8_C(v) v ## U +# endif /* __int8_c_suffix */ +#endif /* __int_least8_t */ + +/* C99 7.18.2.1 Limits of exact-width integer types. + * C99 7.18.2.2 Limits of minimum-width integer types. + * C99 7.18.2.3 Limits of fastest minimum-width integer types. + * + * The presence of limit macros are completely optional in C99. This + * implementation defines limits for all of the types (exact- and + * minimum-width) that it defines above, using the limits of the minimum-width + * type for any types that do not have exact-width representations. + * + * As in the type definitions, this section takes an approach of + * successive-shrinking to determine which limits to use for the standard (8, + * 16, 32, 64) bit widths when they don't have exact representations. It is + * therefore important that the definitions be kept in order of decending + * widths. + * + * Note that C++ should not check __STDC_LIMIT_MACROS here, contrary to the + * claims of the C standard (see C++ 18.3.1p2, [cstdint.syn]). + */ + +#ifdef __INT64_TYPE__ +# define INT64_MAX INT64_C( 9223372036854775807) +# define INT64_MIN (-INT64_C( 9223372036854775807)-1) +# define UINT64_MAX UINT64_C(18446744073709551615) +# define __INT_LEAST64_MIN INT64_MIN +# define __INT_LEAST64_MAX INT64_MAX +# define __UINT_LEAST64_MAX UINT64_MAX +#endif /* __INT64_TYPE__ */ + +#ifdef __INT_LEAST64_MIN +# define INT_LEAST64_MIN __INT_LEAST64_MIN +# define INT_LEAST64_MAX __INT_LEAST64_MAX +# define UINT_LEAST64_MAX __UINT_LEAST64_MAX +# define INT_FAST64_MIN __INT_LEAST64_MIN +# define INT_FAST64_MAX __INT_LEAST64_MAX +# define UINT_FAST64_MAX __UINT_LEAST64_MAX +#endif /* __INT_LEAST64_MIN */ + +#ifdef __INT32_TYPE__ +# define INT32_MAX INT32_C(2147483647) +# define INT32_MIN (-INT32_C(2147483647)-1) +# define UINT32_MAX UINT32_C(4294967295) +# define __INT_LEAST32_MIN INT32_MIN +# define __INT_LEAST32_MAX INT32_MAX +# define __UINT_LEAST32_MAX UINT32_MAX +#endif /* __INT32_TYPE__ */ + +#ifdef __INT_LEAST32_MIN +# define INT_LEAST32_MIN __INT_LEAST32_MIN +# define INT_LEAST32_MAX __INT_LEAST32_MAX +# define UINT_LEAST32_MAX __UINT_LEAST32_MAX +# define INT_FAST32_MIN __INT_LEAST32_MIN +# define INT_FAST32_MAX __INT_LEAST32_MAX +# define UINT_FAST32_MAX __UINT_LEAST32_MAX +#endif /* __INT_LEAST32_MIN */ + +#ifdef __INT16_TYPE__ +#define INT16_MAX INT16_C(32767) +#define INT16_MIN (-INT16_C(32767)-1) +#define UINT16_MAX UINT16_C(65535) +# define __INT_LEAST16_MIN INT16_MIN +# define __INT_LEAST16_MAX INT16_MAX +# define __UINT_LEAST16_MAX UINT16_MAX +#endif /* __INT16_TYPE__ */ + +#ifdef __INT_LEAST16_MIN +# define INT_LEAST16_MIN __INT_LEAST16_MIN +# define INT_LEAST16_MAX __INT_LEAST16_MAX +# define UINT_LEAST16_MAX __UINT_LEAST16_MAX +# define INT_FAST16_MIN __INT_LEAST16_MIN +# define INT_FAST16_MAX __INT_LEAST16_MAX +# define UINT_FAST16_MAX __UINT_LEAST16_MAX +#endif /* __INT_LEAST16_MIN */ + + +#ifdef __INT8_TYPE__ +# define INT8_MAX INT8_C(127) +# define INT8_MIN (-INT8_C(127)-1) +# define UINT8_MAX UINT8_C(255) +# define __INT_LEAST8_MIN INT8_MIN +# define __INT_LEAST8_MAX INT8_MAX +# define __UINT_LEAST8_MAX UINT8_MAX +#endif /* __INT8_TYPE__ */ + +#ifdef __INT_LEAST8_MIN +# define INT_LEAST8_MIN __INT_LEAST8_MIN +# define INT_LEAST8_MAX __INT_LEAST8_MAX +# define UINT_LEAST8_MAX __UINT_LEAST8_MAX +# define INT_FAST8_MIN __INT_LEAST8_MIN +# define INT_FAST8_MAX __INT_LEAST8_MAX +# define UINT_FAST8_MAX __UINT_LEAST8_MAX +#endif /* __INT_LEAST8_MIN */ + +/* Some utility macros */ +#define __INTN_MIN(n) __stdint_join3( INT, n, _MIN) +#define __INTN_MAX(n) __stdint_join3( INT, n, _MAX) +#define __UINTN_MAX(n) __stdint_join3(UINT, n, _MAX) +#define __INTN_C(n, v) __stdint_join3( INT, n, _C(v)) +#define __UINTN_C(n, v) __stdint_join3(UINT, n, _C(v)) + +/* C99 7.18.2.4 Limits of integer types capable of holding object pointers. */ +/* C99 7.18.3 Limits of other integer types. */ + +#define INTPTR_MIN (-__INTPTR_MAX__-1) +#define INTPTR_MAX __INTPTR_MAX__ +#define UINTPTR_MAX __UINTPTR_MAX__ +#define PTRDIFF_MIN (-__PTRDIFF_MAX__-1) +#define PTRDIFF_MAX __PTRDIFF_MAX__ +#define SIZE_MAX __SIZE_MAX__ + +/* ISO9899:2011 7.20 (C11 Annex K): Define RSIZE_MAX if __STDC_WANT_LIB_EXT1__ + * is enabled. */ +#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 +#define RSIZE_MAX (SIZE_MAX >> 1) +#endif + +/* C99 7.18.2.5 Limits of greatest-width integer types. */ +#define INTMAX_MIN (-__INTMAX_MAX__-1) +#define INTMAX_MAX __INTMAX_MAX__ +#define UINTMAX_MAX __UINTMAX_MAX__ + +/* C99 7.18.3 Limits of other integer types. */ +#define SIG_ATOMIC_MIN __INTN_MIN(__SIG_ATOMIC_WIDTH__) +#define SIG_ATOMIC_MAX __INTN_MAX(__SIG_ATOMIC_WIDTH__) +#ifdef __WINT_UNSIGNED__ +# define WINT_MIN __UINTN_C(__WINT_WIDTH__, 0) +# define WINT_MAX __UINTN_MAX(__WINT_WIDTH__) +#else +# define WINT_MIN __INTN_MIN(__WINT_WIDTH__) +# define WINT_MAX __INTN_MAX(__WINT_WIDTH__) +#endif + +#ifndef WCHAR_MAX +# define WCHAR_MAX __WCHAR_MAX__ +#endif +#ifndef WCHAR_MIN +# if __WCHAR_MAX__ == __INTN_MAX(__WCHAR_WIDTH__) +# define WCHAR_MIN __INTN_MIN(__WCHAR_WIDTH__) +# else +# define WCHAR_MIN __UINTN_C(__WCHAR_WIDTH__, 0) +# endif +#endif + +/* 7.18.4.2 Macros for greatest-width integer constants. */ +#define INTMAX_C(v) __int_c(v, __INTMAX_C_SUFFIX__) +#define UINTMAX_C(v) __int_c(v, __UINTMAX_C_SUFFIX__) + + +#endif // SLANG_LLVM_H + + diff --git a/third_party/slang/inc/prelude/slang-torch-prelude.h b/third_party/slang/inc/prelude/slang-torch-prelude.h new file mode 100644 index 0000000..a2e4a19 --- /dev/null +++ b/third_party/slang/inc/prelude/slang-torch-prelude.h @@ -0,0 +1,149 @@ +// Prelude for PyTorch cpp binding. + +#include +#include +#include +#include +#include +#include + +#ifdef SLANG_LLVM +#include "slang-llvm.h" +#else // SLANG_LLVM +# if SLANG_GCC_FAMILY && __GNUC__ < 6 +# include +# define SLANG_PRELUDE_STD std:: +# else +# include +# define SLANG_PRELUDE_STD +# endif + +# include +# include +# include +# include +#endif // SLANG_LLVM + +#include "../source/core/slang-string.h" + +#if defined(_MSC_VER) +# define SLANG_PRELUDE_SHARED_LIB_EXPORT __declspec(dllexport) +#else +# define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__((__visibility__("default"))) +//# define SLANG_PRELUDE_SHARED_LIB_EXPORT __attribute__ ((dllexport)) __attribute__((__visibility__("default"))) +#endif + +#ifdef __cplusplus +# define SLANG_PRELUDE_EXTERN_C extern "C" +# define SLANG_PRELUDE_EXTERN_C_START extern "C" { +# define SLANG_PRELUDE_EXTERN_C_END } +#else +# define SLANG_PRELUDE_EXTERN_C +# define SLANG_PRELUDE_EXTERN_C_START +# define SLANG_PRELUDE_EXTERN_C_END +#endif + +#define SLANG_PRELUDE_NAMESPACE + +#ifndef SLANG_NO_THROW +# define SLANG_NO_THROW +#endif +#ifndef SLANG_STDCALL +# define SLANG_STDCALL +#endif +#ifndef SLANG_MCALL +# define SLANG_MCALL SLANG_STDCALL +#endif +#ifndef SLANG_FORCE_INLINE +# define SLANG_FORCE_INLINE inline +#endif +#include "slang-cpp-types-core.h" +#include "slang-cpp-scalar-intrinsics.h" + + +static const int kSlangTorchTensorMaxDim = 5; + +struct TensorView +{ + uint8_t* data; + uint32_t strides[kSlangTorchTensorMaxDim]; + uint32_t sizes[kSlangTorchTensorMaxDim]; + uint32_t dimensionCount; +}; + + +TensorView make_tensor_view(torch::Tensor val, const char* name, torch::ScalarType targetScalarType, bool requireContiguous) +{ + // We're currently not trying to implicitly cast or transfer to device for two reasons: + // 1. There appears to be a bug with .to() where successive calls after the first one fail. + // 2. Silent casts like this can cause large memory allocations & unexpected overheads. + // It's better to be explicit. + + // Expect tensors to be on CUDA device + if (!val.device().is_cuda()) + throw std::runtime_error(std::string(name).append(": tensor is not on CUDA device.").c_str()); + + // Expect tensors to be the right type. + if (val.dtype() != targetScalarType) + throw std::runtime_error(std::string(name).append(": tensor is not of the expected type.").c_str()); + + // Check that the tensor is contiguous + if (requireContiguous && !val.is_contiguous()) + throw std::runtime_error(std::string(name).append(": tensor is not contiguous.").c_str()); + + TensorView res = {}; + res.dimensionCount = val.dim(); + res.data = nullptr; + size_t elementSize = 4; + + switch (val.scalar_type()) + { + case torch::kInt8: + case torch::kUInt8: + elementSize = 1; + res.data = (uint8_t*)val.data_ptr(); + break; + case torch::kBFloat16: + elementSize = 2; + res.data = (uint8_t*)val.data_ptr(); + break; + case torch::kInt16: + elementSize = 2; + res.data = (uint8_t*)val.data_ptr(); + break; + case torch::kFloat32: + elementSize = 4; + res.data = (uint8_t*)val.data_ptr(); + break; + case torch::kInt32: + elementSize = 4; + res.data = (uint8_t*)val.data_ptr(); + break; + case torch::kFloat64: + elementSize = 8; + res.data = (uint8_t*)val.data_ptr(); + break; + case torch::kInt64: + elementSize = 8; + res.data = (uint8_t*)val.data_ptr(); + break; + case torch::kBool: + elementSize = 1; + res.data = (uint8_t*)val.data_ptr(); + break; + } + + if (val.dim() > kSlangTorchTensorMaxDim) + throw std::runtime_error(std::string(name).append(": number of dimensions exceeds limit (").append(std::to_string(kSlangTorchTensorMaxDim)).append(")").c_str()); + + for (int i = 0; i < val.dim(); ++i) + { + res.strides[i] = val.stride(i) * elementSize; + res.sizes[i] = val.size(i); + } + if (!res.data) + throw std::runtime_error(std::string(name).append(": data pointer is invalid.").c_str()); + return res; +} + +#define SLANG_PRELUDE_EXPORT diff --git a/third_party/slang/inc/slang-com-helper.h b/third_party/slang/inc/slang-com-helper.h new file mode 100644 index 0000000..fc8b7de --- /dev/null +++ b/third_party/slang/inc/slang-com-helper.h @@ -0,0 +1,134 @@ +#ifndef SLANG_COM_HELPER_H +#define SLANG_COM_HELPER_H + +/** \file slang-com-helper.h +*/ + +#include "slang.h" +#include + +/* !!!!!!!!!!!!!!!!!!!!! Macros to help checking SlangResult !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/ + +/*! Set SLANG_HANDLE_RESULT_FAIL(x) to code to be executed whenever an error occurs, and is detected by one of the macros */ +#ifndef SLANG_HANDLE_RESULT_FAIL +# define SLANG_HANDLE_RESULT_FAIL(x) +#endif + +//! Helper macro, that makes it easy to add result checking to calls in functions/methods that themselves return Result. +#define SLANG_RETURN_ON_FAIL(x) { SlangResult _res = (x); if (SLANG_FAILED(_res)) { SLANG_HANDLE_RESULT_FAIL(_res); return _res; } } +//! Helper macro that can be used to test the return value from a call, and will return in a void method/function +#define SLANG_RETURN_VOID_ON_FAIL(x) { SlangResult _res = (x); if (SLANG_FAILED(_res)) { SLANG_HANDLE_RESULT_FAIL(_res); return; } } +//! Helper macro that will return false on failure. +#define SLANG_RETURN_FALSE_ON_FAIL(x) { SlangResult _res = (x); if (SLANG_FAILED(_res)) { SLANG_HANDLE_RESULT_FAIL(_res); return false; } } +//! Helper macro that will return nullptr on failure. +#define SLANG_RETURN_NULL_ON_FAIL(x) { SlangResult _res = (x); if (SLANG_FAILED(_res)) { SLANG_HANDLE_RESULT_FAIL(_res); return nullptr; } } + +//! Helper macro that will assert if the return code from a call is failure, also returns the failure. +#define SLANG_ASSERT_ON_FAIL(x) { SlangResult _res = (x); if (SLANG_FAILED(_res)) { assert(false); return _res; } } +//! Helper macro that will assert if the result from a call is a failure, also returns. +#define SLANG_ASSERT_VOID_ON_FAIL(x) { SlangResult _res = (x); if (SLANG_FAILED(_res)) { assert(false); return; } } + +/* !!!!!!!!!!!!!!!!!!!!!!! C++ helpers !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!*/ + +#if defined(__cplusplus) +namespace Slang { + +// Alias SlangResult to Slang::Result +typedef SlangResult Result; +// Alias SlangUUID to Slang::Guid +typedef SlangUUID Guid; + +} // namespace Slang + +// Operator == and != for Guid/SlangUUID + +SLANG_FORCE_INLINE bool operator==(const Slang::Guid& aIn, const Slang::Guid& bIn) +{ + using namespace Slang; + // Use the largest type the honors the alignment of Guid + typedef uint32_t CmpType; + union GuidCompare + { + Guid guid; + CmpType data[sizeof(Guid) / sizeof(CmpType)]; + }; + // Type pun - so compiler can 'see' the pun and not break aliasing rules + const CmpType* a = reinterpret_cast(aIn).data; + const CmpType* b = reinterpret_cast(bIn).data; + // Make the guid comparison a single branch, by not using short circuit + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3])) == 0; +} + +SLANG_FORCE_INLINE bool operator!=(const Slang::Guid& a, const Slang::Guid& b) +{ + return !(a == b); +} + +/* !!!!!!!! Macros to simplify implementing COM interfaces !!!!!!!!!!!!!!!!!!!!!!!!!!!! */ + +/* Assumes underlying implementation has a member m_refCount that is initialized to 0 and can have ++ and -- operate on it. +For SLANG_IUNKNOWN_QUERY_INTERFACE to work - must have a method 'getInterface' that returns valid pointers for the Guid, or nullptr +if not found. */ + +#define SLANG_IUNKNOWN_QUERY_INTERFACE \ +SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface(SlangUUID const& uuid, void** outObject) SLANG_OVERRIDE \ +{ \ + ISlangUnknown* intf = getInterface(uuid); \ + if (intf) \ + { \ + addRef(); \ + *outObject = intf; \ + return SLANG_OK;\ + } \ + return SLANG_E_NO_INTERFACE;\ +} + +#define SLANG_IUNKNOWN_ADD_REF \ +SLANG_NO_THROW uint32_t SLANG_MCALL addRef() \ +{ \ + return ++m_refCount; \ +} + +#define SLANG_IUNKNOWN_RELEASE \ +SLANG_NO_THROW uint32_t SLANG_MCALL release() \ +{ \ + --m_refCount; \ + if (m_refCount == 0) \ + { \ + delete this; \ + return 0; \ + } \ + return m_refCount; \ +} + +#define SLANG_IUNKNOWN_ALL \ + SLANG_IUNKNOWN_QUERY_INTERFACE \ + SLANG_IUNKNOWN_ADD_REF \ + SLANG_IUNKNOWN_RELEASE + +// ------------------------ RefObject IUnknown ----------------------------- + +#define SLANG_REF_OBJECT_IUNKNOWN_QUERY_INTERFACE \ +SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface(SlangUUID const& uuid, void** outObject) SLANG_OVERRIDE \ +{ \ + void* intf = getInterface(uuid); \ + if (intf) \ + { \ + addReference(); \ + *outObject = intf; \ + return SLANG_OK;\ + } \ + return SLANG_E_NO_INTERFACE;\ +} + +#define SLANG_REF_OBJECT_IUNKNOWN_ADD_REF SLANG_NO_THROW uint32_t SLANG_MCALL addRef() SLANG_OVERRIDE { return (uint32_t)addReference(); } +#define SLANG_REF_OBJECT_IUNKNOWN_RELEASE SLANG_NO_THROW uint32_t SLANG_MCALL release() SLANG_OVERRIDE { return (uint32_t)releaseReference(); } + +# define SLANG_REF_OBJECT_IUNKNOWN_ALL \ + SLANG_REF_OBJECT_IUNKNOWN_QUERY_INTERFACE \ + SLANG_REF_OBJECT_IUNKNOWN_ADD_REF \ + SLANG_REF_OBJECT_IUNKNOWN_RELEASE + +#endif // defined(__cplusplus) + +#endif diff --git a/third_party/slang/inc/slang-com-ptr.h b/third_party/slang/inc/slang-com-ptr.h new file mode 100644 index 0000000..00cc9db --- /dev/null +++ b/third_party/slang/inc/slang-com-ptr.h @@ -0,0 +1,160 @@ +#ifndef SLANG_COM_PTR_H +#define SLANG_COM_PTR_H + +#include "slang-com-helper.h" + +#include +#include + +namespace Slang { + +/*! \brief ComPtr is a simple smart pointer that manages types which implement COM based interfaces. +\details A class that implements a COM, must derive from the IUnknown interface or a type that matches +it's layout exactly (such as ISlangUnknown). Trying to use this template with a class that doesn't follow +these rules, will lead to undefined behavior. +This is a 'strong' pointer type, and will AddRef when a non null pointer is set and Release when the pointer +leaves scope. +Using 'detach' allows a pointer to be removed from the management of the ComPtr. +To set the smart pointer to null, there is the method setNull, or alternatively just assign SLANG_NULL/nullptr. + +One edge case using the template is that sometimes you want access as a pointer to a pointer. Sometimes this +is to write into the smart pointer, other times to pass as an array. To handle these different behaviors +there are the methods readRef and writeRef, which are used instead of the & (ref) operator. For example + +\code +Void doSomething(ID3D12Resource** resources, IndexT numResources); +// ... +ComPtr resources[3]; +doSomething(resources[0].readRef(), SLANG_COUNT_OF(resource)); +\endcode + +A more common scenario writing to the pointer + +\code +IUnknown* unk = ...; + +ComPtr resource; +Result res = unk->QueryInterface(resource.writeRef()); +\endcode +*/ + +// Enum to force initializing as an attach (without adding a reference) +enum InitAttach +{ + INIT_ATTACH +}; + +template +class ComPtr +{ +public: + typedef T Type; + typedef ComPtr ThisType; + typedef ISlangUnknown* Ptr; + + /// Constructors + /// Default Ctor. Sets to nullptr + SLANG_FORCE_INLINE ComPtr() :m_ptr(nullptr) {} + SLANG_FORCE_INLINE ComPtr(std::nullptr_t) : m_ptr(nullptr) {} + /// Sets, and ref counts. + SLANG_FORCE_INLINE explicit ComPtr(T* ptr) :m_ptr(ptr) { if (ptr) ((Ptr)ptr)->addRef(); } + /// The copy ctor + SLANG_FORCE_INLINE ComPtr(const ThisType& rhs) : m_ptr(rhs.m_ptr) { if (m_ptr) ((Ptr)m_ptr)->addRef(); } + + /// Ctor without adding to ref count. + SLANG_FORCE_INLINE explicit ComPtr(InitAttach, T* ptr) :m_ptr(ptr) { } + /// Ctor without adding to ref count + SLANG_FORCE_INLINE ComPtr(InitAttach, const ThisType& rhs) : m_ptr(rhs.m_ptr) { } + +#ifdef SLANG_HAS_MOVE_SEMANTICS + /// Move Ctor + SLANG_FORCE_INLINE ComPtr(ThisType&& rhs) : m_ptr(rhs.m_ptr) { rhs.m_ptr = nullptr; } + /// Move assign + SLANG_FORCE_INLINE ComPtr& operator=(ThisType&& rhs) { T* swap = m_ptr; m_ptr = rhs.m_ptr; rhs.m_ptr = swap; return *this; } +#endif + + /// Destructor releases the pointer, assuming it is set + SLANG_FORCE_INLINE ~ComPtr() { if (m_ptr) ((Ptr)m_ptr)->release(); } + + // !!! Operators !!! + + /// Returns the dumb pointer + SLANG_FORCE_INLINE operator T *() const { return m_ptr; } + + SLANG_FORCE_INLINE T& operator*() { return *m_ptr; } + /// For making method invocations through the smart pointer work through the dumb pointer + SLANG_FORCE_INLINE T* operator->() const { return m_ptr; } + + /// Assign + SLANG_FORCE_INLINE const ThisType &operator=(const ThisType& rhs); + /// Assign from dumb ptr + SLANG_FORCE_INLINE T* operator=(T* in); + + /// Get the pointer and don't ref + SLANG_FORCE_INLINE T* get() const { return m_ptr; } + /// Release a contained nullptr pointer if set + SLANG_FORCE_INLINE void setNull(); + + /// Detach + SLANG_FORCE_INLINE T* detach() { T* ptr = m_ptr; m_ptr = nullptr; return ptr; } + /// Set to a pointer without changing the ref count + SLANG_FORCE_INLINE void attach(T* in) { m_ptr = in; } + + /// Get ready for writing (nulls contents) + SLANG_FORCE_INLINE T** writeRef() { setNull(); return &m_ptr; } + /// Get for read access + SLANG_FORCE_INLINE T*const* readRef() const { return &m_ptr; } + + /// Swap + void swap(ThisType& rhs); + +protected: + /// Gets the address of the dumb pointer. + // Disabled: use writeRef and readRef to get a reference based on usage. +#ifndef SLANG_COM_PTR_ENABLE_REF_OPERATOR + SLANG_FORCE_INLINE T** operator&() = delete; +#endif + + T* m_ptr; +}; + +//---------------------------------------------------------------------------- +template +void ComPtr::setNull() +{ + if (m_ptr) + { + ((Ptr)m_ptr)->release(); + m_ptr = nullptr; + } +} +//---------------------------------------------------------------------------- +template +const ComPtr& ComPtr::operator=(const ThisType& rhs) +{ + if (rhs.m_ptr) ((Ptr)rhs.m_ptr)->addRef(); + if (m_ptr) ((Ptr)m_ptr)->release(); + m_ptr = rhs.m_ptr; + return *this; +} +//---------------------------------------------------------------------------- +template +T* ComPtr::operator=(T* ptr) +{ + if (ptr) ((Ptr)ptr)->addRef(); + if (m_ptr) ((Ptr)m_ptr)->release(); + m_ptr = ptr; + return m_ptr; +} +//---------------------------------------------------------------------------- +template +void ComPtr::swap(ThisType& rhs) +{ + T* tmp = m_ptr; + m_ptr = rhs.m_ptr; + rhs.m_ptr = tmp; +} + +} // namespace Slang + +#endif // SLANG_COM_PTR_H diff --git a/third_party/slang/inc/slang-gfx.h b/third_party/slang/inc/slang-gfx.h new file mode 100644 index 0000000..9b45d23 --- /dev/null +++ b/third_party/slang/inc/slang-gfx.h @@ -0,0 +1,2705 @@ +// render.h +#pragma once + +#include +#include + +#include "slang.h" +#include "slang-com-ptr.h" + + +#if defined(SLANG_GFX_DYNAMIC) +# if defined(_MSC_VER) +# ifdef SLANG_GFX_DYNAMIC_EXPORT +# define SLANG_GFX_API SLANG_DLL_EXPORT +# else +# define SLANG_GFX_API __declspec(dllimport) +# endif +# else +// TODO: need to consider compiler capabilities +//# ifdef SLANG_DYNAMIC_EXPORT +# define SLANG_GFX_API SLANG_DLL_EXPORT +//# endif +# endif +#endif + +#ifndef SLANG_GFX_API +# define SLANG_GFX_API +#endif + +// Needed for building on cygwin with gcc +#undef Always +#undef None + +// GLOBAL TODO: doc comments +// GLOBAL TODO: Rationalize integer types (not a smush of uint/int/Uint/Int/etc) +// - need typedefs in gfx namespace for Count, Index, Size, Offset (ex. DeviceAddress) +// - Index and Count are for arrays, and indexing into array - like things(XY coordinates of pixels, etc.) +// - Count is also for anything where we need to measure how many of something there are. This includes things like extents. +// - Offset and Size are almost always for bytes and things measured in bytes. +namespace gfx { + +using Slang::ComPtr; + +typedef SlangResult Result; + +// Had to move here, because Options needs types defined here +typedef SlangInt Int; +typedef SlangUInt UInt; +typedef uint64_t DeviceAddress; +typedef int GfxIndex; +typedef int GfxCount; +typedef size_t Size; +typedef size_t Offset; + +const uint64_t kTimeoutInfinite = 0xFFFFFFFFFFFFFFFF; + +enum class StructType +{ + D3D12DeviceExtendedDesc, D3D12ExperimentalFeaturesDesc +}; + +// TODO: Rename to Stage +enum class StageType +{ + Unknown, + Vertex, + Hull, + Domain, + Geometry, + Fragment, + Compute, + RayGeneration, + Intersection, + AnyHit, + ClosestHit, + Miss, + Callable, + Amplification, + Mesh, + CountOf, +}; + +// TODO: Implementation or backend or something else? +enum class DeviceType +{ + Unknown, + Default, + DirectX11, + DirectX12, + OpenGl, + Vulkan, + CPU, + CUDA, + CountOf, +}; + +// TODO: Why does this exist it should go poof +enum class ProjectionStyle +{ + Unknown, + OpenGl, + DirectX, + Vulkan, + CountOf, +}; + +// TODO: This should also go poof +/// The style of the binding +enum class BindingStyle +{ + Unknown, + DirectX, + OpenGl, + Vulkan, + CPU, + CUDA, + CountOf, +}; + +// TODO: Is this actually a flag when there are no bit fields? +enum class AccessFlag +{ + None, + Read, + Write, +}; + +// TODO: Needed? Shouldn't be hard-coded if so +const GfxCount kMaxRenderTargetCount = 8; + +class ITransientResourceHeap; + +enum class ShaderModuleSourceType +{ + SlangSource, // a slang source string in memory. + SlangModuleBinary, // a slang module binary code in memory. + SlangSourceFile, // a slang source from file. + SlangModuleBinaryFile, // a slang module binary code from file. +}; + +class IShaderProgram: public ISlangUnknown +{ +public: + // Defines how linking should be performed for a shader program. + enum class LinkingStyle + { + // Compose all entry-points in a single program, then compile all entry-points together with the same + // set of root shader arguments. + SingleProgram, + + // Link and compile each entry-point individually, potentially with different specializations. + SeparateEntryPointCompilation + }; + + struct Desc + { + // TODO: Tess doesn't like this but doesn't know what to do about it + // The linking style of this program. + LinkingStyle linkingStyle = LinkingStyle::SingleProgram; + + // The global scope or a Slang composite component that represents the entire program. + slang::IComponentType* slangGlobalScope; + + // Number of separate entry point components in the `slangEntryPoints` array to link in. + // If set to 0, then `slangGlobalScope` must contain Slang EntryPoint components. + // If not 0, then `slangGlobalScope` must not contain any EntryPoint components. + GfxCount entryPointCount = 0; + + // An array of Slang entry points. The size of the array must be `entryPointCount`. + // Each element must define only 1 Slang EntryPoint. + slang::IComponentType** slangEntryPoints = nullptr; + }; + + struct CreateDesc2 + { + ShaderModuleSourceType sourceType; + void* sourceData; + Size sourceDataSize; + + // Number of entry points to include in the shader program. 0 means include all entry points + // defined in the module. + GfxCount entryPointCount = 0; + // Names of entry points to include in the shader program. The size of the array must be + // `entryPointCount`. + const char** entryPointNames = nullptr; + }; + + virtual SLANG_NO_THROW slang::TypeReflection* SLANG_MCALL findTypeByName(const char* name) = 0; +}; +#define SLANG_UUID_IShaderProgram \ + { \ + 0x9d32d0ad, 0x915c, 0x4ffd, { 0x91, 0xe2, 0x50, 0x85, 0x54, 0xa0, 0x4a, 0x76 } \ + } + +// TODO: Confirm with Yong that we really want this naming convention +// TODO: Rename to what? +// Dont' change without keeping in sync with Format +#define GFX_FORMAT(x) \ + x( Unknown, 0, 0) \ + \ + x(R32G32B32A32_TYPELESS, 16, 1) \ + x(R32G32B32_TYPELESS, 12, 1) \ + x(R32G32_TYPELESS, 8, 1) \ + x(R32_TYPELESS, 4, 1) \ + \ + x(R16G16B16A16_TYPELESS, 8, 1) \ + x(R16G16_TYPELESS, 4, 1) \ + x(R16_TYPELESS, 2, 1) \ + \ + x(R8G8B8A8_TYPELESS, 4, 1) \ + x(R8G8_TYPELESS, 2, 1) \ + x(R8_TYPELESS, 1, 1) \ + x(B8G8R8A8_TYPELESS, 4, 1) \ + \ + x(R32G32B32A32_FLOAT, 16, 1) \ + x(R32G32B32_FLOAT, 12, 1) \ + x(R32G32_FLOAT, 8, 1) \ + x(R32_FLOAT, 4, 1) \ + \ + x(R16G16B16A16_FLOAT, 8, 1) \ + x(R16G16_FLOAT, 4, 1) \ + x(R16_FLOAT, 2, 1) \ + \ + x(R32G32B32A32_UINT, 16, 1) \ + x(R32G32B32_UINT, 12, 1) \ + x(R32G32_UINT, 8, 1) \ + x(R32_UINT, 4, 1) \ + \ + x(R16G16B16A16_UINT, 8, 1) \ + x(R16G16_UINT, 4, 1) \ + x(R16_UINT, 2, 1) \ + \ + x(R8G8B8A8_UINT, 4, 1) \ + x(R8G8_UINT, 2, 1) \ + x(R8_UINT, 1, 1) \ + \ + x(R32G32B32A32_SINT, 16, 1) \ + x(R32G32B32_SINT, 12, 1) \ + x(R32G32_SINT, 8, 1) \ + x(R32_SINT, 4, 1) \ + \ + x(R16G16B16A16_SINT, 8, 1) \ + x(R16G16_SINT, 4, 1) \ + x(R16_SINT, 2, 1) \ + \ + x(R8G8B8A8_SINT, 4, 1) \ + x(R8G8_SINT, 2, 1) \ + x(R8_SINT, 1, 1) \ + \ + x(R16G16B16A16_UNORM, 8, 1) \ + x(R16G16_UNORM, 4, 1) \ + x(R16_UNORM, 2, 1) \ + \ + x(R8G8B8A8_UNORM, 4, 1) \ + x(R8G8B8A8_UNORM_SRGB, 4, 1) \ + x(R8G8_UNORM, 2, 1) \ + x(R8_UNORM, 1, 1) \ + x(B8G8R8A8_UNORM, 4, 1) \ + x(B8G8R8A8_UNORM_SRGB, 4, 1) \ + x(B8G8R8X8_UNORM, 4, 1) \ + x(B8G8R8X8_UNORM_SRGB, 4, 1) \ + \ + x(R16G16B16A16_SNORM, 8, 1) \ + x(R16G16_SNORM, 4, 1) \ + x(R16_SNORM, 2, 1) \ + \ + x(R8G8B8A8_SNORM, 4, 1) \ + x(R8G8_SNORM, 2, 1) \ + x(R8_SNORM, 1, 1) \ + \ + x(D32_FLOAT, 4, 1) \ + x(D16_UNORM, 2, 1) \ + x(D32_FLOAT_S8_UINT, 8, 1) \ + x(R32_FLOAT_X32_TYPELESS, 8, 1) \ + \ + x(B4G4R4A4_UNORM, 2, 1) \ + x(B5G6R5_UNORM, 2, 1) \ + x(B5G5R5A1_UNORM, 2, 1) \ + \ + x(R9G9B9E5_SHAREDEXP, 4, 1) \ + x(R10G10B10A2_TYPELESS, 4, 1) \ + x(R10G10B10A2_UNORM, 4, 1) \ + x(R10G10B10A2_UINT, 4, 1) \ + x(R11G11B10_FLOAT, 4, 1) \ + \ + x(BC1_UNORM, 8, 16) \ + x(BC1_UNORM_SRGB, 8, 16) \ + x(BC2_UNORM, 16, 16) \ + x(BC2_UNORM_SRGB, 16, 16) \ + x(BC3_UNORM, 16, 16) \ + x(BC3_UNORM_SRGB, 16, 16) \ + x(BC4_UNORM, 8, 16) \ + x(BC4_SNORM, 8, 16) \ + x(BC5_UNORM, 16, 16) \ + x(BC5_SNORM, 16, 16) \ + x(BC6H_UF16, 16, 16) \ + x(BC6H_SF16, 16, 16) \ + x(BC7_UNORM, 16, 16) \ + x(BC7_UNORM_SRGB, 16, 16) + +// TODO: This should be generated from above +// TODO: enum class should be explicitly uint32_t or whatever's appropriate +/// Different formats of things like pixels or elements of vertices +/// NOTE! Any change to this type (adding, removing, changing order) - must also be reflected in changes GFX_FORMAT +enum class Format +{ + // D3D formats omitted: 19-22, 44-47, 65-66, 68-70, 73, 76, 79, 82, 88-89, 92-94, 97, 100-114 + // These formats are omitted due to lack of a corresponding Vulkan format. D24_UNORM_S8_UINT (DXGI_FORMAT 45) + // has a matching Vulkan format but is also omitted as it is only supported by Nvidia. + Unknown, + + R32G32B32A32_TYPELESS, + R32G32B32_TYPELESS, + R32G32_TYPELESS, + R32_TYPELESS, + + R16G16B16A16_TYPELESS, + R16G16_TYPELESS, + R16_TYPELESS, + + R8G8B8A8_TYPELESS, + R8G8_TYPELESS, + R8_TYPELESS, + B8G8R8A8_TYPELESS, + + R32G32B32A32_FLOAT, + R32G32B32_FLOAT, + R32G32_FLOAT, + R32_FLOAT, + + R16G16B16A16_FLOAT, + R16G16_FLOAT, + R16_FLOAT, + + R32G32B32A32_UINT, + R32G32B32_UINT, + R32G32_UINT, + R32_UINT, + + R16G16B16A16_UINT, + R16G16_UINT, + R16_UINT, + + R8G8B8A8_UINT, + R8G8_UINT, + R8_UINT, + + R32G32B32A32_SINT, + R32G32B32_SINT, + R32G32_SINT, + R32_SINT, + + R16G16B16A16_SINT, + R16G16_SINT, + R16_SINT, + + R8G8B8A8_SINT, + R8G8_SINT, + R8_SINT, + + R16G16B16A16_UNORM, + R16G16_UNORM, + R16_UNORM, + + R8G8B8A8_UNORM, + R8G8B8A8_UNORM_SRGB, + R8G8_UNORM, + R8_UNORM, + B8G8R8A8_UNORM, + B8G8R8A8_UNORM_SRGB, + B8G8R8X8_UNORM, + B8G8R8X8_UNORM_SRGB, + + R16G16B16A16_SNORM, + R16G16_SNORM, + R16_SNORM, + + R8G8B8A8_SNORM, + R8G8_SNORM, + R8_SNORM, + + D32_FLOAT, + D16_UNORM, + D32_FLOAT_S8_UINT, + R32_FLOAT_X32_TYPELESS, + + B4G4R4A4_UNORM, + B5G6R5_UNORM, + B5G5R5A1_UNORM, + + R9G9B9E5_SHAREDEXP, + R10G10B10A2_TYPELESS, + R10G10B10A2_UNORM, + R10G10B10A2_UINT, + R11G11B10_FLOAT, + + BC1_UNORM, + BC1_UNORM_SRGB, + BC2_UNORM, + BC2_UNORM_SRGB, + BC3_UNORM, + BC3_UNORM_SRGB, + BC4_UNORM, + BC4_SNORM, + BC5_UNORM, + BC5_SNORM, + BC6H_UF16, + BC6H_SF16, + BC7_UNORM, + BC7_UNORM_SRGB, + + _Count, +}; + +// TODO: Aspect = Color, Depth, Stencil, etc. +// TODO: Channel = R, G, B, A, D, S, etc. +// TODO: Pick : pixel or texel +// TODO: Block is a good term for what it is +// TODO: Width/Height/Depth/whatever should not be used. We should use extentX, extentY, etc. +struct FormatInfo +{ + GfxCount channelCount; ///< The amount of channels in the format. Only set if the channelType is set + uint8_t channelType; ///< One of SlangScalarType None if type isn't made up of elements of type. TODO: Change to uint32_t? + + Size blockSizeInBytes; ///< The size of a block in bytes. + GfxCount pixelsPerBlock; ///< The number of pixels contained in a block. + GfxCount blockWidth; ///< The width of a block in pixels. + GfxCount blockHeight; ///< The height of a block in pixels. +}; + +enum class InputSlotClass +{ + PerVertex, PerInstance +}; + +struct InputElementDesc +{ + char const* semanticName; ///< The name of the corresponding parameter in shader code. + GfxIndex semanticIndex; ///< The index of the corresponding parameter in shader code. Only needed if multiple parameters share a semantic name. + Format format; ///< The format of the data being fetched for this element. + Offset offset; ///< The offset in bytes of this element from the start of the corresponding chunk of vertex stream data. + GfxIndex bufferSlotIndex; ///< The index of the vertex stream to fetch this element's data from. +}; + +struct VertexStreamDesc +{ + Size stride; ///< The stride in bytes for this vertex stream. + InputSlotClass slotClass; ///< Whether the stream contains per-vertex or per-instance data. + GfxCount instanceDataStepRate; ///< How many instances to draw per chunk of data. +}; + +enum class PrimitiveType +{ + Point, Line, Triangle, Patch +}; + +enum class PrimitiveTopology +{ + TriangleList, TriangleStrip, PointList, LineList, LineStrip +}; + +enum class ResourceState +{ + Undefined, + General, + PreInitialized, + VertexBuffer, + IndexBuffer, + ConstantBuffer, + StreamOutput, + ShaderResource, + UnorderedAccess, + RenderTarget, + DepthRead, + DepthWrite, + Present, + IndirectArgument, + CopySource, + CopyDestination, + ResolveSource, + ResolveDestination, + AccelerationStructure, + AccelerationStructureBuildInput, + PixelShaderResource, + NonPixelShaderResource, + _Count +}; + +struct ResourceStateSet +{ +public: + void add(ResourceState state) { m_bitFields |= (1LL << (uint32_t)state); } + template void add(ResourceState s, TResourceState... states) + { + add(s); + add(states...); + } + bool contains(ResourceState state) const { return (m_bitFields & (1LL << (uint32_t)state)) != 0; } + ResourceStateSet() + : m_bitFields(0) + {} + ResourceStateSet(const ResourceStateSet& other) = default; + ResourceStateSet(ResourceState state) { add(state); } + template ResourceStateSet(TResourceState... states) + { + add(states...); + } + + ResourceStateSet operator&(const ResourceStateSet& that) const + { + ResourceStateSet result; + result.m_bitFields = this->m_bitFields & that.m_bitFields; + return result; + } + +private: + uint64_t m_bitFields = 0; + void add() {} +}; + + +/// Describes how memory for the resource should be allocated for CPU access. +enum class MemoryType +{ + DeviceLocal, + Upload, + ReadBack, +}; + +enum class InteropHandleAPI +{ + Unknown, + D3D12, // A D3D12 object pointer. + Vulkan, // A general Vulkan object handle. + CUDA, // A general CUDA object handle. + Win32, // A general Win32 HANDLE. + FileDescriptor, // A file descriptor. + DeviceAddress, // A device address. + D3D12CpuDescriptorHandle, // A D3D12_CPU_DESCRIPTOR_HANDLE value. +}; + +struct InteropHandle +{ + InteropHandleAPI api = InteropHandleAPI::Unknown; + uint64_t handleValue = 0; +}; + +// Declare opaque type +class IInputLayout : public ISlangUnknown +{ +public: + struct Desc + { + InputElementDesc const* inputElements = nullptr; + GfxCount inputElementCount = 0; + VertexStreamDesc const* vertexStreams = nullptr; + GfxCount vertexStreamCount = 0; + }; +}; +#define SLANG_UUID_IInputLayout \ + { \ + 0x45223711, 0xa84b, 0x455c, { 0xbe, 0xfa, 0x49, 0x37, 0x42, 0x1e, 0x8e, 0x2e } \ + } + +class IResource: public ISlangUnknown +{ +public: + /// The type of resource. + /// NOTE! The order needs to be such that all texture types are at or after Texture1D (otherwise isTexture won't work correctly) + enum class Type + { + Unknown, ///< Unknown + Buffer, ///< A buffer (like a constant/index/vertex buffer) + Texture1D, ///< A 1d texture + Texture2D, ///< A 2d texture + Texture3D, ///< A 3d texture + TextureCube, ///< A cubemap consists of 6 Texture2D like faces + _Count, + }; + + /// Base class for Descs + struct DescBase + { + Type type = Type::Unknown; + ResourceState defaultState = ResourceState::Undefined; + ResourceStateSet allowedStates = ResourceStateSet(); + MemoryType memoryType = MemoryType::DeviceLocal; + InteropHandle existingHandle = {}; + bool isShared = false; + }; + + virtual SLANG_NO_THROW Type SLANG_MCALL getType() = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeResourceHandle(InteropHandle* outHandle) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL getSharedHandle(InteropHandle* outHandle) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL setDebugName(const char* name) = 0; + virtual SLANG_NO_THROW const char* SLANG_MCALL getDebugName() = 0; + +}; +#define SLANG_UUID_IResource \ + { \ + 0xa0e39f34, 0x8398, 0x4522, { 0x95, 0xc2, 0xeb, 0xc0, 0xf9, 0x84, 0xef, 0x3f } \ + } + +struct MemoryRange +{ + // TODO: Change to Offset/Size? + uint64_t offset; + uint64_t size; +}; + +class IBufferResource: public IResource +{ +public: + struct Desc: public DescBase + { + Size sizeInBytes = 0; ///< Total size in bytes + Size elementSize = 0; ///< Get the element stride. If > 0, this is a structured buffer + Format format = Format::Unknown; + }; + + virtual SLANG_NO_THROW Desc* SLANG_MCALL getDesc() = 0; + virtual SLANG_NO_THROW DeviceAddress SLANG_MCALL getDeviceAddress() = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL map(MemoryRange* rangeToRead, void** outPointer) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL unmap(MemoryRange* writtenRange) = 0; +}; +#define SLANG_UUID_IBufferResource \ + { \ + 0x1b274efe, 0x5e37, 0x492b, { 0x82, 0x6e, 0x7e, 0xe7, 0xe8, 0xf5, 0xa4, 0x9b } \ + } + +struct DepthStencilClearValue +{ + float depth = 1.0f; + uint32_t stencil = 0; +}; +union ColorClearValue +{ + float floatValues[4]; + uint32_t uintValues[4]; +}; +struct ClearValue +{ + ColorClearValue color = {{0.0f, 0.0f, 0.0f, 0.0f}}; + DepthStencilClearValue depthStencil; +}; + +struct BufferRange +{ + // TODO: Change to Index and Count? + uint64_t firstElement; + uint64_t elementCount; +}; + +enum class TextureAspect : uint32_t +{ + Default = 0, + Color = 0x00000001, + Depth = 0x00000002, + Stencil = 0x00000004, + MetaData = 0x00000008, + Plane0 = 0x00000010, + Plane1 = 0x00000020, + Plane2 = 0x00000040, + + DepthStencil = Depth | Stencil, +}; + +struct SubresourceRange +{ + TextureAspect aspectMask; + GfxIndex mipLevel; + GfxCount mipLevelCount; + GfxIndex baseArrayLayer; // For Texture3D, this is WSlice. + GfxCount layerCount; // For cube maps, this is a multiple of 6. +}; + +class ITextureResource: public IResource +{ +public: + static const GfxCount kRemainingTextureSize = 0xffffffff; + struct Offset3D + { + GfxIndex x = 0; + GfxIndex y = 0; + GfxIndex z = 0; + Offset3D() = default; + Offset3D(GfxIndex _x, GfxIndex _y, GfxIndex _z) :x(_x), y(_y), z(_z) {} + }; + + struct SampleDesc + { + GfxCount numSamples = 1; ///< Number of samples per pixel + int quality = 0; ///< The quality measure for the samples + }; + + struct Extents + { + GfxCount width = 0; ///< Width in pixels + GfxCount height = 0; ///< Height in pixels (if 2d or 3d) + GfxCount depth = 0; ///< Depth (if 3d) + }; + + struct Desc: public DescBase + { + Extents size; + + GfxCount arraySize = 0; ///< Array size + + GfxCount numMipLevels = 0; ///< Number of mip levels - if 0 will create all mip levels + Format format; ///< The resources format + SampleDesc sampleDesc; ///< How the resource is sampled + ClearValue* optimalClearValue = nullptr; + }; + + /// Data for a single subresource of a texture. + /// + /// Each subresource is a tensor with `1 <= rank <= 3`, + /// where the rank is deterined by the base shape of the + /// texture (Buffer, 1D, 2D, 3D, or Cube). For the common + /// case of a 2D texture, `rank == 2` and each subresource + /// is a 2D image. + /// + /// Subresource tensors must be stored in a row-major layout, + /// so that the X axis strides over texels, the Y axis strides + /// over 1D rows of texels, and the Z axis strides over 2D + /// "layers" of texels. + /// + /// For a texture with multiple mip levels or array elements, + /// each mip level and array element is stores as a distinct + /// subresource. When indexing into an array of subresources, + /// the index of a subresoruce for mip level `m` and array + /// index `a` is `m + a*mipLevelCount`. + /// + struct SubresourceData + { + /// Pointer to texel data for the subresource tensor. + void const* data; + + /// Stride in bytes between rows of the subresource tensor. + /// + /// This is the number of bytes to add to a pointer to a texel + /// at (X,Y,Z) to get to a texel at (X,Y+1,Z). + /// + /// Devices may not support all possible values for `strideY`. + /// In particular, they may only support strictly positive strides. + /// + gfx::Size strideY; + + /// Stride in bytes between layers of the subresource tensor. + /// + /// This is the number of bytes to add to a pointer to a texel + /// at (X,Y,Z) to get to a texel at (X,Y,Z+1). + /// + /// Devices may not support all possible values for `strideZ`. + /// In particular, they may only support strictly positive strides. + /// + gfx::Size strideZ; + }; + + virtual SLANG_NO_THROW Desc* SLANG_MCALL getDesc() = 0; +}; +#define SLANG_UUID_ITextureResource \ + { \ + 0xcf88a31c, 0x6187, 0x46c5, { 0xa4, 0xb7, 0xeb, 0x58, 0xc7, 0x33, 0x40, 0x17 } \ + } + + +enum class ComparisonFunc : uint8_t +{ + Never = 0x0, + Less = 0x1, + Equal = 0x2, + LessEqual = 0x3, + Greater = 0x4, + NotEqual = 0x5, + GreaterEqual = 0x6, + Always = 0x7, +}; + +enum class TextureFilteringMode +{ + Point, + Linear, +}; + +enum class TextureAddressingMode +{ + Wrap, + ClampToEdge, + ClampToBorder, + MirrorRepeat, + MirrorOnce, +}; + +enum class TextureReductionOp +{ + Average, + Comparison, + Minimum, + Maximum, +}; + +class ISamplerState : public ISlangUnknown +{ +public: + struct Desc + { + TextureFilteringMode minFilter = TextureFilteringMode::Linear; + TextureFilteringMode magFilter = TextureFilteringMode::Linear; + TextureFilteringMode mipFilter = TextureFilteringMode::Linear; + TextureReductionOp reductionOp = TextureReductionOp::Average; + TextureAddressingMode addressU = TextureAddressingMode::Wrap; + TextureAddressingMode addressV = TextureAddressingMode::Wrap; + TextureAddressingMode addressW = TextureAddressingMode::Wrap; + float mipLODBias = 0.0f; + uint32_t maxAnisotropy = 1; + ComparisonFunc comparisonFunc = ComparisonFunc::Never; + float borderColor[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; + float minLOD = -FLT_MAX; + float maxLOD = FLT_MAX; + }; + + /// Returns a native API handle representing this sampler state object. + /// When using D3D12, this will be a D3D12_CPU_DESCRIPTOR_HANDLE. + /// When using Vulkan, this will be a VkSampler. + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outNativeHandle) = 0; +}; +#define SLANG_UUID_ISamplerState \ + { \ + 0x8b8055df, 0x9377, 0x401d, { 0x91, 0xff, 0x3f, 0xa3, 0xbf, 0x66, 0x64, 0xf4 } \ + } + +class IResourceView : public ISlangUnknown +{ +public: + enum class Type + { + Unknown, + + RenderTarget, + DepthStencil, + ShaderResource, + UnorderedAccess, + AccelerationStructure, + + CountOf_, + }; + + struct RenderTargetDesc + { + // The resource shape of this render target view. + IResource::Type shape; + }; + + struct Desc + { + Type type; + Format format; + + // Required fields for `RenderTarget` and `DepthStencil` views. + RenderTargetDesc renderTarget; + // Specifies the range of a texture resource for a ShaderRsource/UnorderedAccess/RenderTarget/DepthStencil view. + SubresourceRange subresourceRange; + // Specifies the range of a buffer resource for a ShaderResource/UnorderedAccess view. + BufferRange bufferRange; + // Specifies the element size in bytes of a structured buffer. Pass 0 for a raw buffer view. + Size bufferElementSize; + }; + virtual SLANG_NO_THROW Desc* SLANG_MCALL getViewDesc() = 0; + + /// Returns a native API handle representing this resource view object. + /// When using D3D12, this will be a D3D12_CPU_DESCRIPTOR_HANDLE or a buffer device address depending + /// on the type of the resource view. + /// When using Vulkan, this will be a VkImageView, VkBufferView, VkAccelerationStructure or a VkBuffer + /// depending on the type of the resource view. + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outNativeHandle) = 0; +}; +#define SLANG_UUID_IResourceView \ + { \ + 0x7b6c4926, 0x884, 0x408c, { 0xad, 0x8a, 0x50, 0x3a, 0x8e, 0x23, 0x98, 0xa4 } \ + } + +class IAccelerationStructure : public IResourceView +{ +public: + enum class Kind + { + TopLevel, + BottomLevel + }; + + struct BuildFlags + { + // The enum values are intentionally consistent with + // D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS. + enum Enum + { + None, + AllowUpdate = 1, + AllowCompaction = 2, + PreferFastTrace = 4, + PreferFastBuild = 8, + MinimizeMemory = 16, + PerformUpdate = 32 + }; + }; + + enum class GeometryType + { + Triangles, ProcedurePrimitives + }; + + struct GeometryFlags + { + // The enum values are intentionally consistent with + // D3D12_RAYTRACING_GEOMETRY_FLAGS. + enum Enum + { + None, + Opaque = 1, + NoDuplicateAnyHitInvocation = 2 + }; + }; + + struct TriangleDesc + { + DeviceAddress transform3x4; + Format indexFormat; + Format vertexFormat; + GfxCount indexCount; + GfxCount vertexCount; + DeviceAddress indexData; + DeviceAddress vertexData; + Size vertexStride; + }; + + struct ProceduralAABB + { + float minX; + float minY; + float minZ; + float maxX; + float maxY; + float maxZ; + }; + + struct ProceduralAABBDesc + { + /// Number of AABBs. + GfxCount count; + + /// Pointer to an array of `ProceduralAABB` values in device memory. + DeviceAddress data; + + /// Stride in bytes of the AABB values array. + Size stride; + }; + + struct GeometryDesc + { + GeometryType type; + GeometryFlags::Enum flags; + union + { + TriangleDesc triangles; + ProceduralAABBDesc proceduralAABBs; + } content; + }; + + struct GeometryInstanceFlags + { + // The enum values are kept consistent with D3D12_RAYTRACING_INSTANCE_FLAGS + // and VkGeometryInstanceFlagBitsKHR. + enum Enum : uint32_t + { + None = 0, + TriangleFacingCullDisable = 0x00000001, + TriangleFrontCounterClockwise = 0x00000002, + ForceOpaque = 0x00000004, + NoOpaque = 0x00000008 + }; + }; + + // TODO: Should any of these be changed? + // The layout of this struct is intentionally consistent with D3D12_RAYTRACING_INSTANCE_DESC + // and VkAccelerationStructureInstanceKHR. + struct InstanceDesc + { + float transform[3][4]; + uint32_t instanceID : 24; + uint32_t instanceMask : 8; + uint32_t instanceContributionToHitGroupIndex : 24; + uint32_t flags : 8; // Combination of GeometryInstanceFlags::Enum values. + DeviceAddress accelerationStructure; + }; + + struct PrebuildInfo + { + Size resultDataMaxSize; + Size scratchDataSize; + Size updateScratchDataSize; + }; + + struct BuildInputs + { + Kind kind; + + BuildFlags::Enum flags; + + GfxCount descCount; + + /// Array of `InstanceDesc` values in device memory. + /// Used when `kind` is `TopLevel`. + DeviceAddress instanceDescs; + + /// Array of `GeometryDesc` values. + /// Used when `kind` is `BottomLevel`. + const GeometryDesc* geometryDescs; + }; + + struct CreateDesc + { + Kind kind; + IBufferResource* buffer; + Offset offset; + Size size; + }; + + struct BuildDesc + { + BuildInputs inputs; + IAccelerationStructure* source; + IAccelerationStructure* dest; + DeviceAddress scratchData; + }; + + virtual SLANG_NO_THROW DeviceAddress SLANG_MCALL getDeviceAddress() = 0; +}; +#define SLANG_UUID_IAccelerationStructure \ + { \ + 0xa5cdda3c, 0x1d4e, 0x4df7, { 0x8e, 0xf2, 0xb7, 0x3f, 0xce, 0x4, 0xde, 0x3b } \ + } + +class IFence : public ISlangUnknown +{ +public: + struct Desc + { + uint64_t initialValue = 0; + bool isShared = false; + }; + + /// Returns the currently signaled value on the device. + virtual SLANG_NO_THROW Result SLANG_MCALL getCurrentValue(uint64_t* outValue) = 0; + + /// Signals the fence from the host with the specified value. + virtual SLANG_NO_THROW Result SLANG_MCALL setCurrentValue(uint64_t value) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL getSharedHandle(InteropHandle* outHandle) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outNativeHandle) = 0; +}; +#define SLANG_UUID_IFence \ + { \ + 0x7fe1c283, 0xd3f4, 0x48ed, { 0xaa, 0xf3, 0x1, 0x51, 0x96, 0x4e, 0x7c, 0xb5 } \ + } + +struct ShaderOffset +{ + SlangInt uniformOffset = 0; // TODO: Change to Offset? + GfxIndex bindingRangeIndex = 0; + GfxIndex bindingArrayIndex = 0; + uint32_t getHashCode() const + { + return (uint32_t)(((bindingRangeIndex << 20) + bindingArrayIndex) ^ uniformOffset); + } + bool operator==(const ShaderOffset& other) const + { + return uniformOffset == other.uniformOffset + && bindingRangeIndex == other.bindingRangeIndex + && bindingArrayIndex == other.bindingArrayIndex; + } + bool operator!=(const ShaderOffset& other) const + { + return !this->operator==(other); + } + bool operator<(const ShaderOffset& other) const + { + if (bindingRangeIndex < other.bindingRangeIndex) + return true; + if (bindingRangeIndex > other.bindingRangeIndex) + return false; + if (bindingArrayIndex < other.bindingArrayIndex) + return true; + if (bindingArrayIndex > other.bindingArrayIndex) + return false; + return uniformOffset < other.uniformOffset; + } + bool operator<=(const ShaderOffset& other) const { return (*this == other) || (*this) < other; } + bool operator>(const ShaderOffset& other) const { return other < *this; } + bool operator>=(const ShaderOffset& other) const { return other <= *this; } +}; + +enum class ShaderObjectContainerType +{ + None, Array, StructuredBuffer +}; + +class IShaderObject : public ISlangUnknown +{ +public: + virtual SLANG_NO_THROW slang::TypeLayoutReflection* SLANG_MCALL getElementTypeLayout() = 0; + virtual SLANG_NO_THROW ShaderObjectContainerType SLANG_MCALL getContainerType() = 0; + virtual SLANG_NO_THROW GfxCount SLANG_MCALL getEntryPointCount() = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL + getEntryPoint(GfxIndex index, IShaderObject** entryPoint) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL + setData(ShaderOffset const& offset, void const* data, Size size) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL + getObject(ShaderOffset const& offset, IShaderObject** object) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL + setObject(ShaderOffset const& offset, IShaderObject* object) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL + setResource(ShaderOffset const& offset, IResourceView* resourceView) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL + setSampler(ShaderOffset const& offset, ISamplerState* sampler) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL setCombinedTextureSampler( + ShaderOffset const& offset, IResourceView* textureView, ISamplerState* sampler) = 0; + + /// Manually overrides the specialization argument for the sub-object binding at `offset`. + /// Specialization arguments are passed to the shader compiler to specialize the type + /// of interface-typed shader parameters. + virtual SLANG_NO_THROW Result SLANG_MCALL setSpecializationArgs( + ShaderOffset const& offset, + const slang::SpecializationArg* args, + GfxCount count) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL getCurrentVersion( + ITransientResourceHeap* transientHeap, + IShaderObject** outObject) = 0; + + virtual SLANG_NO_THROW const void* SLANG_MCALL getRawData() = 0; + + virtual SLANG_NO_THROW Size SLANG_MCALL getSize() = 0; + + /// Use the provided constant buffer instead of the internally created one. + virtual SLANG_NO_THROW Result SLANG_MCALL setConstantBufferOverride(IBufferResource* constantBuffer) = 0; + + + inline ComPtr getObject(ShaderOffset const& offset) + { + ComPtr object = nullptr; + SLANG_RETURN_NULL_ON_FAIL(getObject(offset, object.writeRef())); + return object; + } + inline ComPtr getEntryPoint(GfxIndex index) + { + ComPtr entryPoint = nullptr; + SLANG_RETURN_NULL_ON_FAIL(getEntryPoint(index, entryPoint.writeRef())); + return entryPoint; + } +}; +#define SLANG_UUID_IShaderObject \ + { \ + 0xc1fa997e, 0x5ca2, 0x45ae, { 0x9b, 0xcb, 0xc4, 0x35, 0x9e, 0x85, 0x5, 0x85 } \ + } + +enum class StencilOp : uint8_t +{ + Keep, + Zero, + Replace, + IncrementSaturate, + DecrementSaturate, + Invert, + IncrementWrap, + DecrementWrap, +}; + +enum class FillMode : uint8_t +{ + Solid, + Wireframe, +}; + +enum class CullMode : uint8_t +{ + None, + Front, + Back, +}; + +enum class FrontFaceMode : uint8_t +{ + CounterClockwise, + Clockwise, +}; + +struct DepthStencilOpDesc +{ + StencilOp stencilFailOp = StencilOp::Keep; + StencilOp stencilDepthFailOp = StencilOp::Keep; + StencilOp stencilPassOp = StencilOp::Keep; + ComparisonFunc stencilFunc = ComparisonFunc::Always; +}; + +struct DepthStencilDesc +{ + bool depthTestEnable = false; + bool depthWriteEnable = true; + ComparisonFunc depthFunc = ComparisonFunc::Less; + + bool stencilEnable = false; + uint32_t stencilReadMask = 0xFFFFFFFF; + uint32_t stencilWriteMask = 0xFFFFFFFF; + DepthStencilOpDesc frontFace; + DepthStencilOpDesc backFace; + + uint32_t stencilRef = 0; +}; + +struct RasterizerDesc +{ + FillMode fillMode = FillMode::Solid; + CullMode cullMode = CullMode::None; + FrontFaceMode frontFace = FrontFaceMode::CounterClockwise; + int32_t depthBias = 0; + float depthBiasClamp = 0.0f; + float slopeScaledDepthBias = 0.0f; + bool depthClipEnable = true; + bool scissorEnable = false; + bool multisampleEnable = false; + bool antialiasedLineEnable = false; + bool enableConservativeRasterization = false; + uint32_t forcedSampleCount = 0; +}; + +enum class LogicOp +{ + NoOp, +}; + +enum class BlendOp +{ + Add, + Subtract, + ReverseSubtract, + Min, + Max, +}; + +enum class BlendFactor +{ + Zero, + One, + SrcColor, + InvSrcColor, + SrcAlpha, + InvSrcAlpha, + DestAlpha, + InvDestAlpha, + DestColor, + InvDestColor, + SrcAlphaSaturate, + BlendColor, + InvBlendColor, + SecondarySrcColor, + InvSecondarySrcColor, + SecondarySrcAlpha, + InvSecondarySrcAlpha, +}; + +namespace RenderTargetWriteMask +{ + typedef uint8_t Type; + enum + { + EnableNone = 0, + EnableRed = 0x01, + EnableGreen = 0x02, + EnableBlue = 0x04, + EnableAlpha = 0x08, + EnableAll = 0x0F, + }; +}; +typedef RenderTargetWriteMask::Type RenderTargetWriteMaskT; + +struct AspectBlendDesc +{ + BlendFactor srcFactor = BlendFactor::One; + BlendFactor dstFactor = BlendFactor::Zero; + BlendOp op = BlendOp::Add; +}; + +struct TargetBlendDesc +{ + AspectBlendDesc color; + AspectBlendDesc alpha; + bool enableBlend = false; + LogicOp logicOp = LogicOp::NoOp; + RenderTargetWriteMaskT writeMask = RenderTargetWriteMask::EnableAll; +}; + +struct BlendDesc +{ + TargetBlendDesc targets[kMaxRenderTargetCount]; + GfxCount targetCount = 0; + + bool alphaToCoverageEnable = false; +}; + +class IFramebufferLayout : public ISlangUnknown +{ +public: + struct TargetLayout + { + Format format; + GfxCount sampleCount; + }; + struct Desc + { + GfxCount renderTargetCount; + TargetLayout* renderTargets = nullptr; + TargetLayout* depthStencil = nullptr; + }; +}; +#define SLANG_UUID_IFramebufferLayout \ + { \ + 0xa838785, 0xc13a, 0x4832, { 0xad, 0x88, 0x64, 0x6, 0xb5, 0x4b, 0x5e, 0xba } \ + } + +struct GraphicsPipelineStateDesc +{ + IShaderProgram* program = nullptr; + + IInputLayout* inputLayout = nullptr; + IFramebufferLayout* framebufferLayout = nullptr; + PrimitiveType primitiveType = PrimitiveType::Triangle; + DepthStencilDesc depthStencil; + RasterizerDesc rasterizer; + BlendDesc blend; +}; + +struct ComputePipelineStateDesc +{ + IShaderProgram* program = nullptr; + void* d3d12RootSignatureOverride = nullptr; +}; + +struct RayTracingPipelineFlags +{ + enum Enum : uint32_t + { + None = 0, + SkipTriangles = 1, + SkipProcedurals = 2, + }; +}; + +struct HitGroupDesc +{ + const char* hitGroupName = nullptr; + const char* closestHitEntryPoint = nullptr; + const char* anyHitEntryPoint = nullptr; + const char* intersectionEntryPoint = nullptr; +}; + +struct RayTracingPipelineStateDesc +{ + IShaderProgram* program = nullptr; + GfxCount hitGroupCount = 0; + const HitGroupDesc* hitGroups = nullptr; + int maxRecursion = 0; + Size maxRayPayloadSize = 0; + Size maxAttributeSizeInBytes = 8; + RayTracingPipelineFlags::Enum flags = RayTracingPipelineFlags::None; +}; + +class IShaderTable : public ISlangUnknown +{ +public: + // Specifies the bytes to overwrite into a record in the shader table. + struct ShaderRecordOverwrite + { + Offset offset; // Offset within the shader record. + Size size; // Number of bytes to overwrite. + uint8_t data[8]; // Content to overwrite. + }; + + struct Desc + { + GfxCount rayGenShaderCount; + const char** rayGenShaderEntryPointNames; + const ShaderRecordOverwrite* rayGenShaderRecordOverwrites; + + GfxCount missShaderCount; + const char** missShaderEntryPointNames; + const ShaderRecordOverwrite* missShaderRecordOverwrites; + + GfxCount hitGroupCount; + const char** hitGroupNames; + const ShaderRecordOverwrite* hitGroupRecordOverwrites; + + IShaderProgram* program; + }; +}; +#define SLANG_UUID_IShaderTable \ + { \ + 0xa721522c, 0xdf31, 0x4c2f, { 0xa5, 0xe7, 0x3b, 0xe0, 0x12, 0x4b, 0x31, 0x78 } \ + } + +class IPipelineState : public ISlangUnknown +{ +public: + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) = 0; +}; +#define SLANG_UUID_IPipelineState \ + { \ + 0xca7e57d, 0x8a90, 0x44f3, { 0xbd, 0xb1, 0xfe, 0x9b, 0x35, 0x3f, 0x5a, 0x72 } \ + } + + +struct ScissorRect +{ + int32_t minX; + int32_t minY; + int32_t maxX; + int32_t maxY; +}; + +struct Viewport +{ + float originX = 0.0f; + float originY = 0.0f; + float extentX = 0.0f; + float extentY = 0.0f; + float minZ = 0.0f; + float maxZ = 1.0f; +}; + +class IFramebuffer : public ISlangUnknown +{ +public: + struct Desc + { + GfxCount renderTargetCount; + IResourceView* const* renderTargetViews; + IResourceView* depthStencilView; + IFramebufferLayout* layout; + }; +}; +#define SLANG_UUID_IFrameBuffer \ + { \ + 0xf0c0d9a, 0x4ef3, 0x4e18, { 0x9b, 0xa9, 0x34, 0x60, 0xea, 0x69, 0x87, 0x95 } \ + } + +struct WindowHandle +{ + enum class Type + { + Unknown, + Win32Handle, + NSViewHandle, + XLibHandle, + }; + Type type; + intptr_t handleValues[2]; + static WindowHandle FromHwnd(void* hwnd) + { + WindowHandle handle = {}; + handle.type = WindowHandle::Type::Win32Handle; + handle.handleValues[0] = (intptr_t)(hwnd); + return handle; + } + static WindowHandle FromNSView(void* nsview) + { + WindowHandle handle = {}; + handle.type = WindowHandle::Type::NSViewHandle; + handle.handleValues[0] = (intptr_t)(nsview); + return handle; + } + static WindowHandle FromXWindow(void* xdisplay, uint32_t xwindow) + { + WindowHandle handle = {}; + handle.type = WindowHandle::Type::XLibHandle; + handle.handleValues[0] = (intptr_t)(xdisplay); + handle.handleValues[1] = xwindow; + return handle; + } +}; + +struct FaceMask +{ + enum Enum + { + Front = 1, Back = 2 + }; +}; + +class IRenderPassLayout : public ISlangUnknown +{ +public: + enum class TargetLoadOp + { + Load, Clear, DontCare + }; + enum class TargetStoreOp + { + Store, DontCare + }; + struct TargetAccessDesc + { + TargetLoadOp loadOp; + TargetLoadOp stencilLoadOp; + TargetStoreOp storeOp; + TargetStoreOp stencilStoreOp; + ResourceState initialState; + ResourceState finalState; + }; + struct Desc + { + IFramebufferLayout* framebufferLayout = nullptr; + GfxCount renderTargetCount; + TargetAccessDesc* renderTargetAccess = nullptr; + TargetAccessDesc* depthStencilAccess = nullptr; + }; +}; +#define SLANG_UUID_IRenderPassLayout \ + { \ + 0xdaab0b1a, 0xf45d, 0x4ae9, { 0xbf, 0x2c, 0xe0, 0xbb, 0x76, 0x7d, 0xfa, 0xd1 } \ + } + +enum class QueryType +{ + Timestamp, + AccelerationStructureCompactedSize, + AccelerationStructureSerializedSize, + AccelerationStructureCurrentSize, +}; + +class IQueryPool : public ISlangUnknown +{ +public: + struct Desc + { + QueryType type; + GfxCount count; + }; +public: + virtual SLANG_NO_THROW Result SLANG_MCALL getResult(GfxIndex queryIndex, GfxCount count, uint64_t* data) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL reset() = 0; +}; +#define SLANG_UUID_IQueryPool \ + { 0xc2cc3784, 0x12da, 0x480a, { 0xa8, 0x74, 0x8b, 0x31, 0x96, 0x1c, 0xa4, 0x36 } } + + +class ICommandEncoder : public ISlangUnknown +{ + SLANG_COM_INTERFACE( 0x77ea6383, 0xbe3d, 0x40aa, { 0x8b, 0x45, 0xfd, 0xf0, 0xd7, 0x5b, 0xfa, 0x34 }); +public: + virtual SLANG_NO_THROW void SLANG_MCALL endEncoding() = 0; + virtual SLANG_NO_THROW void SLANG_MCALL writeTimestamp(IQueryPool* queryPool, GfxIndex queryIndex) = 0; +}; + +struct IndirectDispatchArguments +{ + GfxCount ThreadGroupCountX; + GfxCount ThreadGroupCountY; + GfxCount ThreadGroupCountZ; +}; + +struct IndirectDrawArguments +{ + GfxCount VertexCountPerInstance; + GfxCount InstanceCount; + GfxIndex StartVertexLocation; + GfxIndex StartInstanceLocation; +}; + +struct IndirectDrawIndexedArguments +{ + GfxCount IndexCountPerInstance; + GfxCount InstanceCount; + GfxIndex StartIndexLocation; + GfxIndex BaseVertexLocation; + GfxIndex StartInstanceLocation; +}; + +struct SamplePosition +{ + int8_t x; + int8_t y; +}; + +struct ClearResourceViewFlags +{ + enum Enum : uint32_t + { + None = 0, + ClearDepth = 1, + ClearStencil = 2, + FloatClearValues = 4 + }; +}; + +class IResourceCommandEncoder : public ICommandEncoder +{ + // {F99A00E9-ED50-4088-8A0E-3B26755031EA} + SLANG_COM_INTERFACE(0xf99a00e9, 0xed50, 0x4088, { 0x8a, 0xe, 0x3b, 0x26, 0x75, 0x50, 0x31, 0xea }); + +public: + virtual SLANG_NO_THROW void SLANG_MCALL copyBuffer( + IBufferResource* dst, + Offset dstOffset, + IBufferResource* src, + Offset srcOffset, + Size size) = 0; + + /// Copies texture from src to dst. If dstSubresource and srcSubresource has mipLevelCount = 0 + /// and layerCount = 0, the entire resource is being copied and dstOffset, srcOffset and extent + /// arguments are ignored. + virtual SLANG_NO_THROW void SLANG_MCALL copyTexture( + ITextureResource* dst, + ResourceState dstState, + SubresourceRange dstSubresource, + ITextureResource::Offset3D dstOffset, + ITextureResource* src, + ResourceState srcState, + SubresourceRange srcSubresource, + ITextureResource::Offset3D srcOffset, + ITextureResource::Extents extent) = 0; + + /// Copies texture to a buffer. Each row is aligned to kTexturePitchAlignment. + virtual SLANG_NO_THROW void SLANG_MCALL copyTextureToBuffer( + IBufferResource* dst, + Offset dstOffset, + Size dstSize, + Size dstRowStride, + ITextureResource* src, + ResourceState srcState, + SubresourceRange srcSubresource, + ITextureResource::Offset3D srcOffset, + ITextureResource::Extents extent) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL uploadTextureData( + ITextureResource* dst, + SubresourceRange subResourceRange, + ITextureResource::Offset3D offset, + ITextureResource::Extents extent, + ITextureResource::SubresourceData* subResourceData, + GfxCount subResourceDataCount) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL + uploadBufferData(IBufferResource* dst, Offset offset, Size size, void* data) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL textureBarrier( + GfxCount count, ITextureResource* const* textures, ResourceState src, ResourceState dst) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL textureSubresourceBarrier( + ITextureResource* texture, + SubresourceRange subresourceRange, + ResourceState src, + ResourceState dst) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL bufferBarrier( + GfxCount count, IBufferResource* const* buffers, ResourceState src, ResourceState dst) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL clearResourceView( + IResourceView* view, ClearValue* clearValue, ClearResourceViewFlags::Enum flags) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL resolveResource( + ITextureResource* source, + ResourceState sourceState, + SubresourceRange sourceRange, + ITextureResource* dest, + ResourceState destState, + SubresourceRange destRange) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL resolveQuery( + IQueryPool* queryPool, + GfxIndex index, + GfxCount count, + IBufferResource* buffer, + Offset offset) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL beginDebugEvent(const char* name, float rgbColor[3]) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL endDebugEvent() = 0; + inline void textureBarrier(ITextureResource* texture, ResourceState src, ResourceState dst) + { + textureBarrier(1, &texture, src, dst); + } + inline void bufferBarrier(IBufferResource* buffer, ResourceState src, ResourceState dst) + { + bufferBarrier(1, &buffer, src, dst); + } +}; + +class IRenderCommandEncoder : public IResourceCommandEncoder +{ + // {7A8D56D0-53E6-4AD6-85F7-D14DC110FDCE} + SLANG_COM_INTERFACE(0x7a8d56d0, 0x53e6, 0x4ad6, { 0x85, 0xf7, 0xd1, 0x4d, 0xc1, 0x10, 0xfd, 0xce }) +public: + // Sets the current pipeline state. This method returns a transient shader object for + // writing shader parameters. This shader object will not retain any resources or + // sub-shader-objects bound to it. The user must be responsible for ensuring that any + // resources or shader objects that is set into `outRootShaderObject` stays alive during + // the execution of the command buffer. + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootShaderObject) = 0; + inline IShaderObject* bindPipeline(IPipelineState* state) + { + IShaderObject* rootObject = nullptr; + SLANG_RETURN_NULL_ON_FAIL(bindPipeline(state, &rootObject)); + return rootObject; + } + + // Sets the current pipeline state along with a pre-created mutable root shader object. + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) = 0; + + virtual SLANG_NO_THROW void + SLANG_MCALL setViewports(GfxCount count, const Viewport* viewports) = 0; + virtual SLANG_NO_THROW void + SLANG_MCALL setScissorRects(GfxCount count, const ScissorRect* scissors) = 0; + + /// Sets the viewport, and sets the scissor rect to match the viewport. + inline void setViewportAndScissor(Viewport const& viewport) + { + setViewports(1, &viewport); + ScissorRect rect = {}; + rect.maxX = static_cast(viewport.extentX); + rect.maxY = static_cast(viewport.extentY); + setScissorRects(1, &rect); + } + + virtual SLANG_NO_THROW void SLANG_MCALL setPrimitiveTopology(PrimitiveTopology topology) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL setVertexBuffers( + GfxIndex startSlot, + GfxCount slotCount, + IBufferResource* const* buffers, + const Offset* offsets) = 0; + inline void setVertexBuffer( + GfxIndex slot, IBufferResource* buffer, Offset offset = 0) + { + setVertexBuffers(slot, 1, &buffer, &offset); + } + + virtual SLANG_NO_THROW void SLANG_MCALL + setIndexBuffer(IBufferResource* buffer, Format indexFormat, Offset offset = 0) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL + draw(GfxCount vertexCount, GfxIndex startVertex = 0) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL + drawIndexed(GfxCount indexCount, GfxIndex startIndex = 0, GfxIndex baseVertex = 0) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL drawIndirect( + GfxCount maxDrawCount, + IBufferResource* argBuffer, + Offset argOffset, + IBufferResource* countBuffer = nullptr, + Offset countOffset = 0) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL drawIndexedIndirect( + GfxCount maxDrawCount, + IBufferResource* argBuffer, + Offset argOffset, + IBufferResource* countBuffer = nullptr, + Offset countOffset = 0) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL setStencilReference(uint32_t referenceValue) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL setSamplePositions( + GfxCount samplesPerPixel, GfxCount pixelCount, const SamplePosition* samplePositions) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL drawInstanced( + GfxCount vertexCount, + GfxCount instanceCount, + GfxIndex startVertex, + GfxIndex startInstanceLocation) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL drawIndexedInstanced( + GfxCount indexCount, + GfxCount instanceCount, + GfxIndex startIndexLocation, + GfxIndex baseVertexLocation, + GfxIndex startInstanceLocation) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL + drawMeshTasks(int x, int y, int z) = 0; +}; + +class IComputeCommandEncoder : public IResourceCommandEncoder +{ + // {88AA9322-82F7-4FE6-A68A-29C7FE798737} + SLANG_COM_INTERFACE(0x88aa9322, 0x82f7, 0x4fe6, { 0xa6, 0x8a, 0x29, 0xc7, 0xfe, 0x79, 0x87, 0x37 }) + +public: + // Sets the current pipeline state. This method returns a transient shader object for + // writing shader parameters. This shader object will not retain any resources or + // sub-shader-objects bound to it. The user must be responsible for ensuring that any + // resources or shader objects that is set into `outRooShaderObject` stays alive during + // the execution of the command buffer. + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootShaderObject) = 0; + inline IShaderObject* bindPipeline(IPipelineState* state) + { + IShaderObject* rootObject = nullptr; + SLANG_RETURN_NULL_ON_FAIL(bindPipeline(state, &rootObject)); + return rootObject; + } + // Sets the current pipeline state along with a pre-created mutable root shader object. + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL dispatchCompute(int x, int y, int z) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL dispatchComputeIndirect(IBufferResource* cmdBuffer, Offset offset) = 0; +}; + +enum class AccelerationStructureCopyMode +{ + Clone, Compact +}; + +struct AccelerationStructureQueryDesc +{ + QueryType queryType; + + IQueryPool* queryPool; + + GfxIndex firstQueryIndex; +}; + +class IRayTracingCommandEncoder : public IResourceCommandEncoder +{ + SLANG_COM_INTERFACE(0x9a672b87, 0x5035, 0x45e3, { 0x96, 0x7c, 0x1f, 0x85, 0xcd, 0xb3, 0x63, 0x4f }) +public: + virtual SLANG_NO_THROW void SLANG_MCALL buildAccelerationStructure( + const IAccelerationStructure::BuildDesc& desc, + GfxCount propertyQueryCount, + AccelerationStructureQueryDesc* queryDescs) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL copyAccelerationStructure( + IAccelerationStructure* dest, + IAccelerationStructure* src, + AccelerationStructureCopyMode mode) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL queryAccelerationStructureProperties( + GfxCount accelerationStructureCount, + IAccelerationStructure* const* accelerationStructures, + GfxCount queryCount, + AccelerationStructureQueryDesc* queryDescs) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL + serializeAccelerationStructure(DeviceAddress dest, IAccelerationStructure* source) = 0; + virtual SLANG_NO_THROW void SLANG_MCALL + deserializeAccelerationStructure(IAccelerationStructure* dest, DeviceAddress source) = 0; + + virtual SLANG_NO_THROW void SLANG_MCALL + bindPipeline(IPipelineState* state, IShaderObject** outRootObject) = 0; + // Sets the current pipeline state along with a pre-created mutable root shader object. + virtual SLANG_NO_THROW Result SLANG_MCALL + bindPipelineWithRootObject(IPipelineState* state, IShaderObject* rootObject) = 0; + + /// Issues a dispatch command to start ray tracing workload with a ray tracing pipeline. + /// `rayGenShaderIndex` specifies the index into the shader table that identifies the ray generation shader. + virtual SLANG_NO_THROW Result SLANG_MCALL dispatchRays( + GfxIndex rayGenShaderIndex, + IShaderTable* shaderTable, + GfxCount width, + GfxCount height, + GfxCount depth) = 0; +}; + +class ICommandBuffer : public ISlangUnknown +{ +public: + // Only one encoder may be open at a time. User must call `ICommandEncoder::endEncoding` + // before calling other `encode*Commands` methods. + // Once `endEncoding` is called, the `ICommandEncoder` object becomes obsolete and is + // invalid for further use. To continue recording, the user must request a new encoder + // object by calling one of the `encode*Commands` methods again. + virtual SLANG_NO_THROW void SLANG_MCALL encodeRenderCommands( + IRenderPassLayout* renderPass, + IFramebuffer* framebuffer, + IRenderCommandEncoder** outEncoder) = 0; + inline IRenderCommandEncoder* + encodeRenderCommands(IRenderPassLayout* renderPass, IFramebuffer* framebuffer) + { + IRenderCommandEncoder* result; + encodeRenderCommands(renderPass, framebuffer, &result); + return result; + } + + virtual SLANG_NO_THROW void SLANG_MCALL + encodeComputeCommands(IComputeCommandEncoder** outEncoder) = 0; + inline IComputeCommandEncoder* encodeComputeCommands() + { + IComputeCommandEncoder* result; + encodeComputeCommands(&result); + return result; + } + + virtual SLANG_NO_THROW void SLANG_MCALL + encodeResourceCommands(IResourceCommandEncoder** outEncoder) = 0; + inline IResourceCommandEncoder* encodeResourceCommands() + { + IResourceCommandEncoder* result; + encodeResourceCommands(&result); + return result; + } + + virtual SLANG_NO_THROW void SLANG_MCALL + encodeRayTracingCommands(IRayTracingCommandEncoder** outEncoder) = 0; + inline IRayTracingCommandEncoder* encodeRayTracingCommands() + { + IRayTracingCommandEncoder* result; + encodeRayTracingCommands(&result); + return result; + } + + virtual SLANG_NO_THROW void SLANG_MCALL close() = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) = 0; +}; +#define SLANG_UUID_ICommandBuffer \ + { \ + 0x5d56063f, 0x91d4, 0x4723, { 0xa7, 0xa7, 0x7a, 0x15, 0xaf, 0x93, 0xeb, 0x48 } \ + } + +class ICommandBufferD3D12 : public ICommandBuffer +{ +public: + virtual SLANG_NO_THROW void SLANG_MCALL invalidateDescriptorHeapBinding() = 0; + virtual SLANG_NO_THROW void SLANG_MCALL ensureInternalDescriptorHeapsBound() = 0; +}; +#define SLANG_UUID_ICommandBufferD3D12 \ + { \ + 0xd56b7616, 0x6c14, 0x4841, { 0x9d, 0x9c, 0x7b, 0x7f, 0xdb, 0x9f, 0xd9, 0xb8 } \ + } + +class ICommandQueue : public ISlangUnknown +{ +public: + enum class QueueType + { + Graphics + }; + struct Desc + { + QueueType type; + }; + + // For D3D12, this is the pointer to the queue. For Vulkan, this is the queue itself. + typedef uint64_t NativeHandle; + + virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() = 0; + + virtual SLANG_NO_THROW void SLANG_MCALL executeCommandBuffers( + GfxCount count, + ICommandBuffer* const* commandBuffers, + IFence* fenceToSignal, + uint64_t newFenceValue) = 0; + inline void executeCommandBuffer( + ICommandBuffer* commandBuffer, IFence* fenceToSignal = nullptr, uint64_t newFenceValue = 0) + { + executeCommandBuffers(1, &commandBuffer, fenceToSignal, newFenceValue); + } + + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeHandle(InteropHandle* outHandle) = 0; + + virtual SLANG_NO_THROW void SLANG_MCALL waitOnHost() = 0; + + /// Queues a device side wait for the given fences. + virtual SLANG_NO_THROW Result SLANG_MCALL + waitForFenceValuesOnDevice(GfxCount fenceCount, IFence** fences, uint64_t* waitValues) = 0; +}; +#define SLANG_UUID_ICommandQueue \ + { \ + 0x14e2bed0, 0xad0, 0x4dc8, { 0xb3, 0x41, 0x6, 0x3f, 0xe7, 0x2d, 0xbf, 0xe } \ + } + +class ITransientResourceHeap : public ISlangUnknown +{ +public: + struct Flags + { + enum Enum + { + None = 0, + AllowResizing = 0x1, + }; + }; + struct Desc + { + Flags::Enum flags; + Size constantBufferSize; + GfxCount samplerDescriptorCount; + GfxCount uavDescriptorCount; + GfxCount srvDescriptorCount; + GfxCount constantBufferDescriptorCount; + GfxCount accelerationStructureDescriptorCount; + }; + + // Waits until GPU commands issued before last call to `finish()` has been completed, and resets + // all transient resources holds by the heap. + // This method must be called before using the transient heap to issue new GPU commands. + // In most situations this method should be called at the beginning of each frame. + virtual SLANG_NO_THROW Result SLANG_MCALL synchronizeAndReset() = 0; + + // Must be called when the application has done using this heap to issue commands. In most situations + // this method should be called at the end of each frame. + virtual SLANG_NO_THROW Result SLANG_MCALL finish() = 0; + + // Command buffers are one-time use. Once it is submitted to the queue via + // `executeCommandBuffers` a command buffer is no longer valid to be used any more. Command + // buffers must be closed before submission. The current D3D12 implementation has a limitation + // that only one command buffer maybe recorded at a time. User must finish recording a command + // buffer before creating another command buffer. + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandBuffer(ICommandBuffer** outCommandBuffer) = 0; + inline ComPtr createCommandBuffer() + { + ComPtr result; + SLANG_RETURN_NULL_ON_FAIL(createCommandBuffer(result.writeRef())); + return result; + } +}; +#define SLANG_UUID_ITransientResourceHeap \ + { \ + 0xcd48bd29, 0xee72, 0x41b8, { 0xbc, 0xff, 0xa, 0x2b, 0x3a, 0xaa, 0x6d, 0xeb } \ + } + +class ITransientResourceHeapD3D12 : public ISlangUnknown +{ +public: + enum class DescriptorType + { + ResourceView, Sampler + }; + virtual SLANG_NO_THROW Result SLANG_MCALL allocateTransientDescriptorTable( + DescriptorType type, + GfxCount count, + Offset& outDescriptorOffset, + void** outD3DDescriptorHeapHandle) = 0; +}; +#define SLANG_UUID_ITransientResourceHeapD3D12 \ + { \ + 0x9bc6a8bc, 0x5f7a, 0x454a, { 0x93, 0xef, 0x3b, 0x10, 0x5b, 0xb7, 0x63, 0x7e } \ + } + +class ISwapchain : public ISlangUnknown +{ +public: + struct Desc + { + Format format; + GfxCount width, height; + GfxCount imageCount; + ICommandQueue* queue; + bool enableVSync; + }; + virtual SLANG_NO_THROW const Desc& SLANG_MCALL getDesc() = 0; + + /// Returns the back buffer image at `index`. + virtual SLANG_NO_THROW Result SLANG_MCALL + getImage(GfxIndex index, ITextureResource** outResource) = 0; + + /// Present the next image in the swapchain. + virtual SLANG_NO_THROW Result SLANG_MCALL present() = 0; + + /// Returns the index of next back buffer image that will be presented in the next + /// `present` call. If the swapchain is invalid/out-of-date, this method returns -1. + virtual SLANG_NO_THROW int SLANG_MCALL acquireNextImage() = 0; + + /// Resizes the back buffers of this swapchain. All render target views and framebuffers + /// referencing the back buffer images must be freed before calling this method. + virtual SLANG_NO_THROW Result SLANG_MCALL resize(GfxCount width, GfxCount height) = 0; + + // Check if the window is occluded. + virtual SLANG_NO_THROW bool SLANG_MCALL isOccluded() = 0; + + // Toggle full screen mode. + virtual SLANG_NO_THROW Result SLANG_MCALL setFullScreenMode(bool mode) = 0; +}; +#define SLANG_UUID_ISwapchain \ + { \ + 0xbe91ba6c, 0x784, 0x4308, { 0xa1, 0x0, 0x19, 0xc3, 0x66, 0x83, 0x44, 0xb2 } \ + } + +struct AdapterLUID +{ + uint8_t luid[16]; + + bool operator==(const AdapterLUID& other) const + { + for (size_t i = 0; i < sizeof(AdapterLUID::luid); ++i) + if (luid[i] != other.luid[i]) + return false; + return true; + } + bool operator!=(const AdapterLUID& other) const + { + return !this->operator==(other); + } +}; + +struct AdapterInfo +{ + // Descriptive name of the adapter. + char name[128]; + + // Unique identifier for the vendor (only available for D3D and Vulkan). + uint32_t vendorID; + + // Unique identifier for the physical device among devices from the vendor (only available for D3D and Vulkan) + uint32_t deviceID; + + // Logically unique identifier of the adapter. + AdapterLUID luid; +}; + +class AdapterList +{ +public: + AdapterList(ISlangBlob* blob) : m_blob(blob) {} + + const AdapterInfo* getAdapters() const + { + return reinterpret_cast(m_blob ? m_blob->getBufferPointer() : nullptr); + } + + GfxCount getCount() const + { + return (GfxCount)(m_blob ? m_blob->getBufferSize() / sizeof(AdapterInfo) : 0); + } + +private: + ComPtr m_blob; +}; + +struct DeviceLimits +{ + /// Maximum dimension for 1D textures. + uint32_t maxTextureDimension1D; + /// Maximum dimensions for 2D textures. + uint32_t maxTextureDimension2D; + /// Maximum dimensions for 3D textures. + uint32_t maxTextureDimension3D; + /// Maximum dimensions for cube textures. + uint32_t maxTextureDimensionCube; + /// Maximum number of texture layers. + uint32_t maxTextureArrayLayers; + + /// Maximum number of vertex input elements in a graphics pipeline. + uint32_t maxVertexInputElements; + /// Maximum offset of a vertex input element in the vertex stream. + uint32_t maxVertexInputElementOffset; + /// Maximum number of vertex streams in a graphics pipeline. + uint32_t maxVertexStreams; + /// Maximum stride of a vertex stream. + uint32_t maxVertexStreamStride; + + /// Maximum number of threads per thread group. + uint32_t maxComputeThreadsPerGroup; + /// Maximum dimensions of a thread group. + uint32_t maxComputeThreadGroupSize[3]; + /// Maximum number of thread groups per dimension in a single dispatch. + uint32_t maxComputeDispatchThreadGroups[3]; + + /// Maximum number of viewports per pipeline. + uint32_t maxViewports; + /// Maximum viewport dimensions. + uint32_t maxViewportDimensions[2]; + /// Maximum framebuffer dimensions. + uint32_t maxFramebufferDimensions[3]; + + /// Maximum samplers visible in a shader stage. + uint32_t maxShaderVisibleSamplers; +}; + +struct DeviceInfo +{ + DeviceType deviceType; + + DeviceLimits limits; + + BindingStyle bindingStyle; + + ProjectionStyle projectionStyle; + + /// An projection matrix that ensures x, y mapping to pixels + /// is the same on all targets + float identityProjectionMatrix[16]; + + /// The name of the graphics API being used by this device. + const char* apiName = nullptr; + + /// The name of the graphics adapter. + const char* adapterName = nullptr; + + /// The clock frequency used in timestamp queries. + uint64_t timestampFrequency = 0; +}; + +enum class DebugMessageType +{ + Info, Warning, Error +}; +enum class DebugMessageSource +{ + Layer, Driver, Slang +}; +class IDebugCallback +{ +public: + virtual SLANG_NO_THROW void SLANG_MCALL + handleMessage(DebugMessageType type, DebugMessageSource source, const char* message) = 0; +}; + +class IDevice : public ISlangUnknown +{ +public: + struct SlangDesc + { + slang::IGlobalSession* slangGlobalSession = nullptr; // (optional) A slang global session object. If null will create automatically. + + SlangMatrixLayoutMode defaultMatrixLayoutMode = SLANG_MATRIX_LAYOUT_ROW_MAJOR; + + char const* const* searchPaths = nullptr; + GfxCount searchPathCount = 0; + + slang::PreprocessorMacroDesc const* preprocessorMacros = nullptr; + GfxCount preprocessorMacroCount = 0; + + const char* targetProfile = nullptr; // (optional) Target shader profile. If null this will be set to platform dependent default. + SlangFloatingPointMode floatingPointMode = SLANG_FLOATING_POINT_MODE_DEFAULT; + SlangOptimizationLevel optimizationLevel = SLANG_OPTIMIZATION_LEVEL_DEFAULT; + SlangTargetFlags targetFlags = kDefaultTargetFlags; + SlangLineDirectiveMode lineDirectiveMode = SLANG_LINE_DIRECTIVE_MODE_DEFAULT; + }; + + struct ShaderCacheDesc + { + // The root directory for the shader cache. If not set, shader cache is disabled. + const char* shaderCachePath = nullptr; + // The maximum number of entries stored in the cache. By default, there is no limit. + GfxCount maxEntryCount = 0; + }; + + struct InteropHandles + { + InteropHandle handles[3] = {}; + }; + + struct Desc + { + // The underlying API/Platform of the device. + DeviceType deviceType = DeviceType::Default; + // The device's handles (if they exist) and their associated API. For D3D12, this contains a single InteropHandle + // for the ID3D12Device. For Vulkan, the first InteropHandle is the VkInstance, the second is the VkPhysicalDevice, + // and the third is the VkDevice. For CUDA, this only contains a single value for the CUDADevice. + InteropHandles existingDeviceHandles; + // LUID of the adapter to use. Use getGfxAdapters() to get a list of available adapters. + const AdapterLUID* adapterLUID = nullptr; + // Number of required features. + GfxCount requiredFeatureCount = 0; + // Array of required feature names, whose size is `requiredFeatureCount`. + const char** requiredFeatures = nullptr; + // A command dispatcher object that intercepts and handles actual low-level API call. + ISlangUnknown* apiCommandDispatcher = nullptr; + // The slot (typically UAV) used to identify NVAPI intrinsics. If >=0 NVAPI is required. + GfxIndex nvapiExtnSlot = -1; + // Configurations for the shader cache. + ShaderCacheDesc shaderCache = {}; + // Configurations for Slang compiler. + SlangDesc slang = {}; + + GfxCount extendedDescCount = 0; + void** extendedDescs = nullptr; + }; + + virtual SLANG_NO_THROW Result SLANG_MCALL getNativeDeviceHandles(InteropHandles* outHandles) = 0; + + virtual SLANG_NO_THROW bool SLANG_MCALL hasFeature(const char* feature) = 0; + + /// Returns a list of features supported by the renderer. + virtual SLANG_NO_THROW Result SLANG_MCALL getFeatures(const char** outFeatures, Size bufferSize, GfxCount* outFeatureCount) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL getFormatSupportedResourceStates(Format format, ResourceStateSet* outStates) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL getSlangSession(slang::ISession** outSlangSession) = 0; + + inline ComPtr getSlangSession() + { + ComPtr result; + getSlangSession(result.writeRef()); + return result; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL createTransientResourceHeap( + const ITransientResourceHeap::Desc& desc, + ITransientResourceHeap** outHeap) = 0; + inline ComPtr createTransientResourceHeap( + const ITransientResourceHeap::Desc& desc) + { + ComPtr result; + createTransientResourceHeap(desc, result.writeRef()); + return result; + } + + /// Create a texture resource. + /// + /// If `initData` is non-null, then it must point to an array of + /// `ITextureResource::SubresourceData` with one element for each + /// subresource of the texture being created. + /// + /// The number of subresources in a texture is: + /// + /// effectiveElementCount * mipLevelCount + /// + /// where the effective element count is computed as: + /// + /// effectiveElementCount = (isArray ? arrayElementCount : 1) * (isCube ? 6 : 1); + /// + virtual SLANG_NO_THROW Result SLANG_MCALL createTextureResource( + const ITextureResource::Desc& desc, + const ITextureResource::SubresourceData* initData, + ITextureResource** outResource) = 0; + + /// Create a texture resource. initData holds the initialize data to set the contents of the texture when constructed. + inline SLANG_NO_THROW ComPtr createTextureResource( + const ITextureResource::Desc& desc, + const ITextureResource::SubresourceData* initData = nullptr) + { + ComPtr resource; + SLANG_RETURN_NULL_ON_FAIL(createTextureResource(desc, initData, resource.writeRef())); + return resource; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL createTextureFromNativeHandle( + InteropHandle handle, + const ITextureResource::Desc& srcDesc, + ITextureResource** outResource) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL createTextureFromSharedHandle( + InteropHandle handle, + const ITextureResource::Desc& srcDesc, + const Size size, + ITextureResource** outResource) = 0; + + /// Create a buffer resource + virtual SLANG_NO_THROW Result SLANG_MCALL createBufferResource( + const IBufferResource::Desc& desc, + const void* initData, + IBufferResource** outResource) = 0; + + inline SLANG_NO_THROW ComPtr createBufferResource( + const IBufferResource::Desc& desc, + const void* initData = nullptr) + { + ComPtr resource; + SLANG_RETURN_NULL_ON_FAIL(createBufferResource(desc, initData, resource.writeRef())); + return resource; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL createBufferFromNativeHandle( + InteropHandle handle, + const IBufferResource::Desc& srcDesc, + IBufferResource** outResource) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL createBufferFromSharedHandle( + InteropHandle handle, + const IBufferResource::Desc& srcDesc, + IBufferResource** outResource) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createSamplerState(ISamplerState::Desc const& desc, ISamplerState** outSampler) = 0; + + inline ComPtr createSamplerState(ISamplerState::Desc const& desc) + { + ComPtr sampler; + SLANG_RETURN_NULL_ON_FAIL(createSamplerState(desc, sampler.writeRef())); + return sampler; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL createTextureView( + ITextureResource* texture, IResourceView::Desc const& desc, IResourceView** outView) = 0; + + inline ComPtr createTextureView(ITextureResource* texture, IResourceView::Desc const& desc) + { + ComPtr view; + SLANG_RETURN_NULL_ON_FAIL(createTextureView(texture, desc, view.writeRef())); + return view; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL createBufferView( + IBufferResource* buffer, + IBufferResource* counterBuffer, + IResourceView::Desc const& desc, + IResourceView** outView) = 0; + + inline ComPtr createBufferView( + IBufferResource* buffer, IBufferResource* counterBuffer, IResourceView::Desc const& desc) + { + ComPtr view; + SLANG_RETURN_NULL_ON_FAIL(createBufferView(buffer, counterBuffer, desc, view.writeRef())); + return view; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL + createFramebufferLayout(IFramebufferLayout::Desc const& desc, IFramebufferLayout** outFrameBuffer) = 0; + inline ComPtr createFramebufferLayout(IFramebufferLayout::Desc const& desc) + { + ComPtr fb; + SLANG_RETURN_NULL_ON_FAIL(createFramebufferLayout(desc, fb.writeRef())); + return fb; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL + createFramebuffer(IFramebuffer::Desc const& desc, IFramebuffer** outFrameBuffer) = 0; + inline ComPtr createFramebuffer(IFramebuffer::Desc const& desc) + { + ComPtr fb; + SLANG_RETURN_NULL_ON_FAIL(createFramebuffer(desc, fb.writeRef())); + return fb; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL createRenderPassLayout( + const IRenderPassLayout::Desc& desc, + IRenderPassLayout** outRenderPassLayout) = 0; + inline ComPtr createRenderPassLayout(const IRenderPassLayout::Desc& desc) + { + ComPtr rs; + SLANG_RETURN_NULL_ON_FAIL(createRenderPassLayout(desc, rs.writeRef())); + return rs; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL createSwapchain( + ISwapchain::Desc const& desc, WindowHandle window, ISwapchain** outSwapchain) = 0; + inline ComPtr createSwapchain(ISwapchain::Desc const& desc, WindowHandle window) + { + ComPtr swapchain; + SLANG_RETURN_NULL_ON_FAIL(createSwapchain(desc, window, swapchain.writeRef())); + return swapchain; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL createInputLayout( + IInputLayout::Desc const& desc, IInputLayout** outLayout) = 0; + + inline ComPtr createInputLayout(IInputLayout::Desc const& desc) + { + ComPtr layout; + SLANG_RETURN_NULL_ON_FAIL(createInputLayout(desc, layout.writeRef())); + return layout; + } + + inline Result createInputLayout(Size vertexSize, InputElementDesc const* inputElements, GfxCount inputElementCount, IInputLayout** outLayout) + { + VertexStreamDesc streamDesc = { vertexSize, InputSlotClass::PerVertex, 0 }; + + IInputLayout::Desc inputLayoutDesc = {}; + inputLayoutDesc.inputElementCount = inputElementCount; + inputLayoutDesc.inputElements = inputElements; + inputLayoutDesc.vertexStreamCount = 1; + inputLayoutDesc.vertexStreams = &streamDesc; + return createInputLayout(inputLayoutDesc, outLayout); + } + + inline ComPtr createInputLayout(Size vertexSize, InputElementDesc const* inputElements, GfxCount inputElementCount) + { + ComPtr layout; + SLANG_RETURN_NULL_ON_FAIL(createInputLayout(vertexSize, inputElements, inputElementCount, layout.writeRef())); + return layout; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL + createCommandQueue(const ICommandQueue::Desc& desc, ICommandQueue** outQueue) = 0; + inline ComPtr createCommandQueue(const ICommandQueue::Desc& desc) + { + ComPtr queue; + SLANG_RETURN_NULL_ON_FAIL(createCommandQueue(desc, queue.writeRef())); + return queue; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL createShaderObject( + slang::TypeReflection* type, + ShaderObjectContainerType container, + IShaderObject** outObject) = 0; + + inline ComPtr createShaderObject(slang::TypeReflection* type) + { + ComPtr object; + SLANG_RETURN_NULL_ON_FAIL(createShaderObject(type, ShaderObjectContainerType::None, object.writeRef())); + return object; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL createMutableShaderObject( + slang::TypeReflection* type, + ShaderObjectContainerType container, + IShaderObject** outObject) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL createShaderObjectFromTypeLayout( + slang::TypeLayoutReflection* typeLayout, IShaderObject** outObject) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL createMutableShaderObjectFromTypeLayout( + slang::TypeLayoutReflection* typeLayout, IShaderObject** outObject) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL createMutableRootShaderObject( + IShaderProgram* program, + IShaderObject** outObject) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createShaderTable(const IShaderTable::Desc& desc, IShaderTable** outTable) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL createProgram( + const IShaderProgram::Desc& desc, + IShaderProgram** outProgram, + ISlangBlob** outDiagnosticBlob = nullptr) = 0; + + inline ComPtr createProgram(const IShaderProgram::Desc& desc) + { + ComPtr program; + SLANG_RETURN_NULL_ON_FAIL(createProgram(desc, program.writeRef())); + return program; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL createProgram2( + const IShaderProgram::CreateDesc2& createDesc, + IShaderProgram** outProgram, + ISlangBlob** outDiagnosticBlob = nullptr) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL createGraphicsPipelineState( + const GraphicsPipelineStateDesc& desc, + IPipelineState** outState) = 0; + + inline ComPtr createGraphicsPipelineState( + const GraphicsPipelineStateDesc& desc) + { + ComPtr state; + SLANG_RETURN_NULL_ON_FAIL(createGraphicsPipelineState(desc, state.writeRef())); + return state; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL createComputePipelineState( + const ComputePipelineStateDesc& desc, + IPipelineState** outState) = 0; + + inline ComPtr createComputePipelineState( + const ComputePipelineStateDesc& desc) + { + ComPtr state; + SLANG_RETURN_NULL_ON_FAIL(createComputePipelineState(desc, state.writeRef())); + return state; + } + + virtual SLANG_NO_THROW Result SLANG_MCALL createRayTracingPipelineState( + const RayTracingPipelineStateDesc& desc, IPipelineState** outState) = 0; + + /// Read back texture resource and stores the result in `outBlob`. + virtual SLANG_NO_THROW SlangResult SLANG_MCALL readTextureResource( + ITextureResource* resource, + ResourceState state, + ISlangBlob** outBlob, + Size* outRowPitch, + Size* outPixelSize) = 0; + + virtual SLANG_NO_THROW SlangResult SLANG_MCALL readBufferResource( + IBufferResource* buffer, + Offset offset, + Size size, + ISlangBlob** outBlob) = 0; + + /// Get the type of this renderer + virtual SLANG_NO_THROW const DeviceInfo& SLANG_MCALL getDeviceInfo() const = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL createQueryPool( + const IQueryPool::Desc& desc, IQueryPool** outPool) = 0; + + + virtual SLANG_NO_THROW Result SLANG_MCALL getAccelerationStructurePrebuildInfo( + const IAccelerationStructure::BuildInputs& buildInputs, + IAccelerationStructure::PrebuildInfo* outPrebuildInfo) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL createAccelerationStructure( + const IAccelerationStructure::CreateDesc& desc, + IAccelerationStructure** outView) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL + createFence(const IFence::Desc& desc, IFence** outFence) = 0; + + /// Wait on the host for the fences to signals. + /// `timeout` is in nanoseconds, can be set to `kTimeoutInfinite`. + virtual SLANG_NO_THROW Result SLANG_MCALL waitForFences( + GfxCount fenceCount, + IFence** fences, + uint64_t* values, + bool waitForAll, + uint64_t timeout) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL getTextureAllocationInfo( + const ITextureResource::Desc& desc, Size* outSize, Size* outAlignment) = 0; + + virtual SLANG_NO_THROW Result SLANG_MCALL getTextureRowAlignment(Size* outAlignment) = 0; +}; + +#define SLANG_UUID_IDevice \ + { \ + 0x715bdf26, 0x5135, 0x11eb, { 0xAE, 0x93, 0x02, 0x42, 0xAC, 0x13, 0x00, 0x02 } \ + } + +struct ShaderCacheStats +{ + GfxCount hitCount; + GfxCount missCount; + GfxCount entryCount; +}; + +// These are exclusively used to track hit/miss counts for shader cache entries. Entry hit and +// miss counts specifically indicate if the file containing relevant shader code was found in +// the cache, while the general hit and miss counts indicate whether the file was both found and +// up-to-date. +class IShaderCache : public ISlangUnknown +{ +public: + virtual SLANG_NO_THROW Result SLANG_MCALL clearShaderCache() = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL getShaderCacheStats(ShaderCacheStats* outStats) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL resetShaderCacheStats() = 0; +}; + +#define SLANG_UUID_IShaderCache \ + { \ + 0x8eccc8ec, 0x5c04, 0x4a51, { 0x99, 0x75, 0x13, 0xf8, 0xfe, 0xa1, 0x59, 0xf3 } \ + } + +class IPipelineCreationAPIDispatcher : public ISlangUnknown +{ +public: + virtual SLANG_NO_THROW Result SLANG_MCALL createComputePipelineState( + IDevice* device, + slang::IComponentType* program, + void* pipelineDesc, + void** outPipelineState) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL createGraphicsPipelineState( + IDevice* device, + slang::IComponentType* program, + void* pipelineDesc, + void** outPipelineState) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL createMeshPipelineState( + IDevice* device, + slang::IComponentType* program, + void* pipelineDesc, + void** outPipelineState) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL + beforeCreateRayTracingState(IDevice* device, slang::IComponentType* program) = 0; + virtual SLANG_NO_THROW Result SLANG_MCALL + afterCreateRayTracingState(IDevice* device, slang::IComponentType* program) = 0; +}; +#define SLANG_UUID_IPipelineCreationAPIDispatcher \ + { \ + 0xc3d5f782, 0xeae1, 0x4da6, { 0xab, 0x40, 0x75, 0x32, 0x31, 0x2, 0xb7, 0xdc } \ + } + + +// Global public functions + +extern "C" +{ + /// Checks if format is compressed + SLANG_GFX_API bool SLANG_MCALL gfxIsCompressedFormat(Format format); + + /// Checks if format is typeless + SLANG_GFX_API bool SLANG_MCALL gfxIsTypelessFormat(Format format); + + /// Gets information about the format + SLANG_GFX_API SlangResult SLANG_MCALL gfxGetFormatInfo(Format format, FormatInfo* outInfo); + + /// Gets a list of available adapters for a given device type + SLANG_GFX_API SlangResult SLANG_MCALL gfxGetAdapters(DeviceType type, ISlangBlob** outAdaptersBlob); + + /// Given a type returns a function that can construct it, or nullptr if there isn't one + SLANG_GFX_API SlangResult SLANG_MCALL + gfxCreateDevice(const IDevice::Desc* desc, IDevice** outDevice); + + /// Reports current set of live objects in gfx. + /// Currently this only calls D3D's ReportLiveObjects. + SLANG_GFX_API SlangResult SLANG_MCALL gfxReportLiveObjects(); + + /// Sets a callback for receiving debug messages. + /// The layer does not hold a strong reference to the callback object. + /// The user is responsible for holding the callback object alive. + SLANG_GFX_API SlangResult SLANG_MCALL + gfxSetDebugCallback(IDebugCallback* callback); + + /// Enables debug layer. The debug layer will check all `gfx` calls and verify that uses are valid. + SLANG_GFX_API void SLANG_MCALL gfxEnableDebugLayer(); + + SLANG_GFX_API const char* SLANG_MCALL gfxGetDeviceTypeName(DeviceType type); +} + +/// Gets a list of available adapters for a given device type +inline AdapterList gfxGetAdapters(DeviceType type) +{ + ComPtr blob; + gfxGetAdapters(type, blob.writeRef()); + return AdapterList(blob); +} + +// Extended descs. +struct D3D12ExperimentalFeaturesDesc +{ + StructType structType = StructType::D3D12ExperimentalFeaturesDesc; + uint32_t numFeatures; + const void* featureIIDs; + void* configurationStructs; + uint32_t* configurationStructSizes; +}; + +struct D3D12DeviceExtendedDesc +{ + StructType structType = StructType::D3D12DeviceExtendedDesc; + const char* rootParameterShaderAttributeName = nullptr; + bool debugBreakOnD3D12Error = false; + uint32_t highestShaderModel = 0; +}; + +} diff --git a/third_party/slang/inc/slang-tag-version.h b/third_party/slang/inc/slang-tag-version.h new file mode 100644 index 0000000..e0ef88a --- /dev/null +++ b/third_party/slang/inc/slang-tag-version.h @@ -0,0 +1 @@ +#define SLANG_TAG_VERSION "v2024.0.0" diff --git a/third_party/slang/inc/slang.h b/third_party/slang/inc/slang.h new file mode 100644 index 0000000..c4fb555 --- /dev/null +++ b/third_party/slang/inc/slang.h @@ -0,0 +1,4819 @@ +#ifndef SLANG_H +#define SLANG_H + +/** \file slang.h + +The Slang API provides services to compile, reflect, and specialize code +written in the Slang shading language. +*/ + +/* +The following section attempts to detect the compiler and version in use. + +If an application defines `SLANG_COMPILER` before including this header, +they take responsibility for setting any compiler-dependent macros +used later in the file. + +Most applications should not need to touch this section. +*/ +#ifndef SLANG_COMPILER +# define SLANG_COMPILER + +/* +Compiler defines, see http://sourceforge.net/p/predef/wiki/Compilers/ +NOTE that SLANG_VC holds the compiler version - not just 1 or 0 +*/ +# if defined(_MSC_VER) +# if _MSC_VER >= 1900 +# define SLANG_VC 14 +# elif _MSC_VER >= 1800 +# define SLANG_VC 12 +# elif _MSC_VER >= 1700 +# define SLANG_VC 11 +# elif _MSC_VER >= 1600 +# define SLANG_VC 10 +# elif _MSC_VER >= 1500 +# define SLANG_VC 9 +# else +# error "unknown version of Visual C++ compiler" +# endif +# elif defined(__clang__) +# define SLANG_CLANG 1 +# elif defined(__SNC__) +# define SLANG_SNC 1 +# elif defined(__ghs__) +# define SLANG_GHS 1 +# elif defined(__GNUC__) /* note: __clang__, __SNC__, or __ghs__ imply __GNUC__ */ +# define SLANG_GCC 1 +# else +# error "unknown compiler" +# endif +/* +Any compilers not detected by the above logic are now now explicitly zeroed out. +*/ +# ifndef SLANG_VC +# define SLANG_VC 0 +# endif +# ifndef SLANG_CLANG +# define SLANG_CLANG 0 +# endif +# ifndef SLANG_SNC +# define SLANG_SNC 0 +# endif +# ifndef SLANG_GHS +# define SLANG_GHS 0 +# endif +# ifndef SLANG_GCC +# define SLANG_GCC 0 +# endif +#endif /* SLANG_COMPILER */ + +/* +The following section attempts to detect the target platform being compiled for. + +If an application defines `SLANG_PLATFORM` before including this header, +they take responsibility for setting any compiler-dependent macros +used later in the file. + +Most applications should not need to touch this section. +*/ +#ifndef SLANG_PLATFORM +# define SLANG_PLATFORM +/** +Operating system defines, see http://sourceforge.net/p/predef/wiki/OperatingSystems/ +*/ +# if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_PARTITION_APP +# define SLANG_WINRT 1 /* Windows Runtime, either on Windows RT or Windows 8 */ +# elif defined(XBOXONE) +# define SLANG_XBOXONE 1 +# elif defined(_WIN64) /* note: XBOXONE implies _WIN64 */ +# define SLANG_WIN64 1 +# elif defined(_M_PPC) +# define SLANG_X360 1 +# elif defined(_WIN32) /* note: _M_PPC implies _WIN32 */ +# define SLANG_WIN32 1 +# elif defined(__ANDROID__) +# define SLANG_ANDROID 1 +# elif defined(__linux__) || defined(__CYGWIN__) /* note: __ANDROID__ implies __linux__ */ +# define SLANG_LINUX 1 +# elif defined(__APPLE__) +# include "TargetConditionals.h" +# if TARGET_OS_MAC +# define SLANG_OSX 1 +# else +# define SLANG_IOS 1 +# endif +# elif defined(__CELLOS_LV2__) +# define SLANG_PS3 1 +# elif defined(__ORBIS__) +# define SLANG_PS4 1 +# elif defined(__SNC__) && defined(__arm__) +# define SLANG_PSP2 1 +# elif defined(__ghs__) +# define SLANG_WIIU 1 +# else +# error "unknown target platform" +# endif +/* +Any platforms not detected by the above logic are now now explicitly zeroed out. +*/ +# ifndef SLANG_WINRT +# define SLANG_WINRT 0 +# endif +# ifndef SLANG_XBOXONE +# define SLANG_XBOXONE 0 +# endif +# ifndef SLANG_WIN64 +# define SLANG_WIN64 0 +# endif +# ifndef SLANG_X360 +# define SLANG_X360 0 +# endif +# ifndef SLANG_WIN32 +# define SLANG_WIN32 0 +# endif +# ifndef SLANG_ANDROID +# define SLANG_ANDROID 0 +# endif +# ifndef SLANG_LINUX +# define SLANG_LINUX 0 +# endif +# ifndef SLANG_IOS +# define SLANG_IOS 0 +# endif +# ifndef SLANG_OSX +# define SLANG_OSX 0 +# endif +# ifndef SLANG_PS3 +# define SLANG_PS3 0 +# endif +# ifndef SLANG_PS4 +# define SLANG_PS4 0 +# endif +# ifndef SLANG_PSP2 +# define SLANG_PSP2 0 +# endif +# ifndef SLANG_WIIU +# define SLANG_WIIU 0 +# endif +#endif /* SLANG_PLATFORM */ + +/* Shorthands for "families" of compilers/platforms */ +#define SLANG_GCC_FAMILY (SLANG_CLANG || SLANG_SNC || SLANG_GHS || SLANG_GCC) +#define SLANG_WINDOWS_FAMILY (SLANG_WINRT || SLANG_WIN32 || SLANG_WIN64) +#define SLANG_MICROSOFT_FAMILY (SLANG_XBOXONE || SLANG_X360 || SLANG_WINDOWS_FAMILY) +#define SLANG_LINUX_FAMILY (SLANG_LINUX || SLANG_ANDROID) +#define SLANG_APPLE_FAMILY (SLANG_IOS || SLANG_OSX) /* equivalent to #if __APPLE__ */ +#define SLANG_UNIX_FAMILY (SLANG_LINUX_FAMILY || SLANG_APPLE_FAMILY) /* shortcut for unix/posix platforms */ + +/* Macros concerning DirectX */ +#if !defined(SLANG_CONFIG_DX_ON_VK) || !SLANG_CONFIG_DX_ON_VK +# define SLANG_ENABLE_DXVK 0 +# define SLANG_ENABLE_VKD3D 0 +#else +# define SLANG_ENABLE_DXVK 1 +# define SLANG_ENABLE_VKD3D 1 +#endif + +#if SLANG_WINDOWS_FAMILY +# define SLANG_ENABLE_DIRECTX 1 +# define SLANG_ENABLE_DXGI_DEBUG 1 +# define SLANG_ENABLE_DXBC_SUPPORT 1 +# define SLANG_ENABLE_PIX 1 +#elif SLANG_LINUX_FAMILY +# define SLANG_ENABLE_DIRECTX (SLANG_ENABLE_DXVK || SLANG_ENABLE_VKD3D) +# define SLANG_ENABLE_DXGI_DEBUG 0 +# define SLANG_ENABLE_DXBC_SUPPORT 0 +# define SLANG_ENABLE_PIX 0 +#else +# define SLANG_ENABLE_DIRECTX 0 +# define SLANG_ENABLE_DXGI_DEBUG 0 +# define SLANG_ENABLE_DXBC_SUPPORT 0 +# define SLANG_ENABLE_PIX 0 +#endif + +/* Macro for declaring if a method is no throw. Should be set before the return parameter. */ +#ifndef SLANG_NO_THROW +# if SLANG_WINDOWS_FAMILY && !defined(SLANG_DISABLE_EXCEPTIONS) +# define SLANG_NO_THROW __declspec(nothrow) +# endif +#endif +#ifndef SLANG_NO_THROW +# define SLANG_NO_THROW +#endif + +/* The `SLANG_STDCALL` and `SLANG_MCALL` defines are used to set the calling +convention for interface methods. +*/ +#ifndef SLANG_STDCALL +# if SLANG_MICROSOFT_FAMILY +# define SLANG_STDCALL __stdcall +# else +# define SLANG_STDCALL +# endif +#endif +#ifndef SLANG_MCALL +# define SLANG_MCALL SLANG_STDCALL +#endif + + +#if !defined(SLANG_STATIC) && !defined(SLANG_DYNAMIC) + #define SLANG_DYNAMIC +#endif + +#if defined(_MSC_VER) +# define SLANG_DLL_EXPORT __declspec(dllexport) +#else +# if 0 && __GNUC__ >= 4 +// Didn't work on latest gcc on linux.. so disable for now +// https://gcc.gnu.org/wiki/Visibility +# define SLANG_DLL_EXPORT __attribute__ ((dllexport)) +# else +# define SLANG_DLL_EXPORT __attribute__((__visibility__("default"))) +# endif +#endif + +#if defined(SLANG_DYNAMIC) +# if defined(_MSC_VER) +# ifdef SLANG_DYNAMIC_EXPORT +# define SLANG_API SLANG_DLL_EXPORT +# else +# define SLANG_API __declspec(dllimport) +# endif +# else + // TODO: need to consider compiler capabilities +//# ifdef SLANG_DYNAMIC_EXPORT +# define SLANG_API SLANG_DLL_EXPORT +//# endif +# endif +#endif + +#ifndef SLANG_API +# define SLANG_API +#endif + +// GCC Specific +#if SLANG_GCC_FAMILY + +# define SLANG_NO_INLINE __attribute__((noinline)) +# define SLANG_FORCE_INLINE inline __attribute__((always_inline)) +# define SLANG_BREAKPOINT(id) __builtin_trap(); +# define SLANG_ALIGN_OF(T) __alignof__(T) + +// Use the builtin directly so we don't need to have an include of stddef.h +# define SLANG_OFFSET_OF(T, ELEMENT) __builtin_offsetof(T, ELEMENT) +#endif // SLANG_GCC_FAMILY + +#ifndef SLANG_OFFSET_OF +# define SLANG_OFFSET_OF(T, ELEMENT) (size_t(&((T*)1)->ELEMENT) - 1) +#endif + +// Microsoft VC specific +#if SLANG_MICROSOFT_FAMILY +# define SLANG_NO_INLINE __declspec(noinline) +# define SLANG_FORCE_INLINE __forceinline +# define SLANG_BREAKPOINT(id) __debugbreak(); +# define SLANG_ALIGN_OF(T) __alignof(T) + +# define SLANG_INT64(x) (x##i64) +# define SLANG_UINT64(x) (x##ui64) +#endif // SLANG_MICROSOFT_FAMILY + +#ifndef SLANG_FORCE_INLINE +# define SLANG_FORCE_INLINE inline +#endif +#ifndef SLANG_NO_INLINE +# define SLANG_NO_INLINE +#endif + +#ifndef SLANG_COMPILE_TIME_ASSERT +# define SLANG_COMPILE_TIME_ASSERT(x) static_assert(x) +#endif + +#ifndef SLANG_OFFSET_OF +# define SLANG_OFFSET_OF(X, Y) offsetof(X, Y) +#endif + +#ifndef SLANG_BREAKPOINT +// Make it crash with a write to 0! +# define SLANG_BREAKPOINT(id) (*((int*)0) = int(id)); +#endif + +// Use for getting the amount of members of a standard C array. +// Use 0[x] here to catch the case where x has an overloaded subscript operator +#define SLANG_COUNT_OF(x) (SlangSSizeT(sizeof(x)/sizeof(0[x]))) +/// SLANG_INLINE exists to have a way to inline consistent with SLANG_ALWAYS_INLINE +#define SLANG_INLINE inline + +// If explicilty disabled and not set, set to not available +#if !defined(SLANG_HAS_EXCEPTIONS) && defined(SLANG_DISABLE_EXCEPTIONS) +# define SLANG_HAS_EXCEPTIONS 0 +#endif + +// If not set, the default is exceptions are available +#ifndef SLANG_HAS_EXCEPTIONS +# define SLANG_HAS_EXCEPTIONS 1 +#endif + +// Other defines +#define SLANG_STRINGIZE_HELPER(X) #X +#define SLANG_STRINGIZE(X) SLANG_STRINGIZE_HELPER(X) + +#define SLANG_CONCAT_HELPER(X, Y) X##Y +#define SLANG_CONCAT(X, Y) SLANG_CONCAT_HELPER(X, Y) + +#ifndef SLANG_UNUSED +# define SLANG_UNUSED(v) (void)v; +#endif + +// Used for doing constant literals +#ifndef SLANG_INT64 +# define SLANG_INT64(x) (x##ll) +#endif +#ifndef SLANG_UINT64 +# define SLANG_UINT64(x) (x##ull) +#endif + + +#ifdef __cplusplus +# define SLANG_EXTERN_C extern "C" +#else +# define SLANG_EXTERN_C +#endif + +#ifdef __cplusplus +// C++ specific macros +// Clang +#if SLANG_CLANG +# if (__clang_major__*10 + __clang_minor__) >= 33 +# define SLANG_HAS_MOVE_SEMANTICS 1 +# define SLANG_HAS_ENUM_CLASS 1 +# define SLANG_OVERRIDE override +# endif + +// Gcc +#elif SLANG_GCC_FAMILY +// Check for C++11 +# if (__cplusplus >= 201103L) +# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 +# define SLANG_HAS_MOVE_SEMANTICS 1 +# endif +# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 +# define SLANG_HAS_ENUM_CLASS 1 +# endif +# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 +# define SLANG_OVERRIDE override +# endif +# endif + +// TODO(JS): Not used in previous code. Left here as may be useful on some other version. +// #define SLANG_RETURN_NEVER __attribute__((__noreturn__)) + +# define SLANG_RETURN_NEVER [[noreturn]] + +# endif // SLANG_GCC_FAMILY + +// Visual Studio + +# if SLANG_VC +// C4481: nonstandard extension used: override specifier 'override' +# if _MSC_VER < 1700 +# pragma warning(disable : 4481) +# endif +# define SLANG_OVERRIDE override +# if _MSC_VER >= 1600 +# define SLANG_HAS_MOVE_SEMANTICS 1 +# endif +# if _MSC_VER >= 1700 +# define SLANG_HAS_ENUM_CLASS 1 +# endif + +# define SLANG_RETURN_NEVER __declspec(noreturn) + +# endif // SLANG_VC + +// Set non set +# ifndef SLANG_OVERRIDE +# define SLANG_OVERRIDE +# endif +# ifndef SLANG_HAS_ENUM_CLASS +# define SLANG_HAS_ENUM_CLASS 0 +# endif +# ifndef SLANG_HAS_MOVE_SEMANTICS +# define SLANG_HAS_MOVE_SEMANTICS 0 +# endif + +#endif // __cplusplus + +#ifndef SLANG_RETURN_NEVER +# define SLANG_RETURN_NEVER [[noreturn]] +#endif // SLANG_RETURN_NEVER + +/* Macros for detecting processor */ +#if defined(_M_ARM) || defined(__ARM_EABI__) +// This is special case for nVidia tegra +# define SLANG_PROCESSOR_ARM 1 +#elif defined(__i386__) || defined(_M_IX86) +# define SLANG_PROCESSOR_X86 1 +#elif defined(_M_AMD64) || defined(_M_X64) || defined(__amd64) || defined(__x86_64) +# define SLANG_PROCESSOR_X86_64 1 +#elif defined(_PPC_) || defined(__ppc__) || defined(__POWERPC__) || defined(_M_PPC) +# if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) +# define SLANG_PROCESSOR_POWER_PC_64 1 +# else +# define SLANG_PROCESSOR_POWER_PC 1 +# endif +#elif defined(__arm__) +# define SLANG_PROCESSOR_ARM 1 +#elif defined(_M_ARM64) || defined(__aarch64__) +# define SLANG_PROCESSOR_ARM_64 1 +#endif + +#ifndef SLANG_PROCESSOR_ARM +# define SLANG_PROCESSOR_ARM 0 +#endif + +#ifndef SLANG_PROCESSOR_ARM_64 +# define SLANG_PROCESSOR_ARM_64 0 +#endif + +#ifndef SLANG_PROCESSOR_X86 +# define SLANG_PROCESSOR_X86 0 +#endif + +#ifndef SLANG_PROCESSOR_X86_64 +# define SLANG_PROCESSOR_X86_64 0 +#endif + +#ifndef SLANG_PROCESSOR_POWER_PC +# define SLANG_PROCESSOR_POWER_PC 0 +#endif + +#ifndef SLANG_PROCESSOR_POWER_PC_64 +# define SLANG_PROCESSOR_POWER_PC_64 0 +#endif + +// Processor families + +#define SLANG_PROCESSOR_FAMILY_X86 (SLANG_PROCESSOR_X86_64 | SLANG_PROCESSOR_X86) +#define SLANG_PROCESSOR_FAMILY_ARM (SLANG_PROCESSOR_ARM | SLANG_PROCESSOR_ARM_64) +#define SLANG_PROCESSOR_FAMILY_POWER_PC (SLANG_PROCESSOR_POWER_PC_64 | SLANG_PROCESSOR_POWER_PC) + +// Pointer size +#define SLANG_PTR_IS_64 (SLANG_PROCESSOR_ARM_64 | SLANG_PROCESSOR_X86_64 | SLANG_PROCESSOR_POWER_PC_64) +#define SLANG_PTR_IS_32 (SLANG_PTR_IS_64 ^ 1) + +// Processor features +#if SLANG_PROCESSOR_FAMILY_X86 +# define SLANG_LITTLE_ENDIAN 1 +# define SLANG_UNALIGNED_ACCESS 1 +#elif SLANG_PROCESSOR_FAMILY_ARM +# if defined(__ARMEB__) +# define SLANG_BIG_ENDIAN 1 +# else +# define SLANG_LITTLE_ENDIAN 1 +# endif +#elif SLANG_PROCESSOR_FAMILY_POWER_PC +# define SLANG_BIG_ENDIAN 1 +#endif + +#ifndef SLANG_LITTLE_ENDIAN +# define SLANG_LITTLE_ENDIAN 0 +#endif + +#ifndef SLANG_BIG_ENDIAN +# define SLANG_BIG_ENDIAN 0 +#endif + +#ifndef SLANG_UNALIGNED_ACCESS +# define SLANG_UNALIGNED_ACCESS 0 +#endif + +// One endianess must be set +#if ((SLANG_BIG_ENDIAN | SLANG_LITTLE_ENDIAN) == 0) +# error "Couldn't determine endianess" +#endif + +#ifndef SLANG_NO_INTTYPES +#include +#endif // ! SLANG_NO_INTTYPES + +#ifndef SLANG_NO_STDDEF +#include +#endif // ! SLANG_NO_STDDEF + +#ifdef __cplusplus +extern "C" +{ +#endif + /*! + @mainpage Introduction + + API Reference: slang.h + + @file slang.h + */ + + typedef uint32_t SlangUInt32; + typedef int32_t SlangInt32; + + // Use SLANG_PTR_ macros to determine SlangInt/SlangUInt types. + // This is used over say using size_t/ptrdiff_t/intptr_t/uintptr_t, because on some targets, these types are distinct from + // their uint_t/int_t equivalents and so produce ambiguity with function overloading. + // + // SlangSizeT is helpful as on some compilers size_t is distinct from a regular integer type and so overloading doesn't work. + // Casting to SlangSizeT works around this. +#if SLANG_PTR_IS_64 + typedef int64_t SlangInt; + typedef uint64_t SlangUInt; + + typedef int64_t SlangSSizeT; + typedef uint64_t SlangSizeT; +#else + typedef int32_t SlangInt; + typedef uint32_t SlangUInt; + + typedef int32_t SlangSSizeT; + typedef uint32_t SlangSizeT; +#endif + + typedef bool SlangBool; + + + /*! + @brief Severity of a diagnostic generated by the compiler. + Values come from the enum below, with higher values representing more severe + conditions, and all values >= SLANG_SEVERITY_ERROR indicating compilation + failure. + */ + typedef int SlangSeverityIntegral; + enum SlangSeverity : SlangSeverityIntegral + { + SLANG_SEVERITY_DISABLED = 0, /**< A message that is disabled, filtered out. */ + SLANG_SEVERITY_NOTE, /**< An informative message. */ + SLANG_SEVERITY_WARNING, /**< A warning, which indicates a possible proble. */ + SLANG_SEVERITY_ERROR, /**< An error, indicating that compilation failed. */ + SLANG_SEVERITY_FATAL, /**< An unrecoverable error, which forced compilation to abort. */ + SLANG_SEVERITY_INTERNAL, /**< An internal error, indicating a logic error in the compiler. */ + }; + + typedef int SlangDiagnosticFlags; + enum + { + SLANG_DIAGNOSTIC_FLAG_VERBOSE_PATHS = 0x01, + SLANG_DIAGNOSTIC_FLAG_TREAT_WARNINGS_AS_ERRORS = 0x02 + }; + + typedef int SlangBindableResourceIntegral; + enum SlangBindableResourceType : SlangBindableResourceIntegral + { + SLANG_NON_BINDABLE = 0, + SLANG_TEXTURE, + SLANG_SAMPLER, + SLANG_UNIFORM_BUFFER, + SLANG_STORAGE_BUFFER, + }; + + /* NOTE! To keep binary compatibility care is needed with this enum! + + * To add value, only add at the bottom (before COUNT_OF) + * To remove a value, add _DEPRECATED as a suffix, but leave in the list + + This will make the enum values stable, and compatible with libraries that might not use the latest + enum values. + */ + typedef int SlangCompileTargetIntegral; + enum SlangCompileTarget : SlangCompileTargetIntegral + { + SLANG_TARGET_UNKNOWN, + SLANG_TARGET_NONE, + SLANG_GLSL, + SLANG_GLSL_VULKAN, //< deprecated: just use `SLANG_GLSL` + SLANG_GLSL_VULKAN_ONE_DESC, //< deprecated + SLANG_HLSL, + SLANG_SPIRV, + SLANG_SPIRV_ASM, + SLANG_DXBC, + SLANG_DXBC_ASM, + SLANG_DXIL, + SLANG_DXIL_ASM, + SLANG_C_SOURCE, ///< The C language + SLANG_CPP_SOURCE, ///< C++ code for shader kernels. + SLANG_HOST_EXECUTABLE, ///< Standalone binary executable (for hosting CPU/OS) + SLANG_SHADER_SHARED_LIBRARY, ///< A shared library/Dll for shader kernels (for hosting CPU/OS) + SLANG_SHADER_HOST_CALLABLE, ///< A CPU target that makes the compiled shader code available to be run immediately + SLANG_CUDA_SOURCE, ///< Cuda source + SLANG_PTX, ///< PTX + SLANG_CUDA_OBJECT_CODE, ///< Object code that contains CUDA functions. + SLANG_OBJECT_CODE, ///< Object code that can be used for later linking + SLANG_HOST_CPP_SOURCE, ///< C++ code for host library or executable. + SLANG_HOST_HOST_CALLABLE, ///< Host callable host code (ie non kernel/shader) + SLANG_CPP_PYTORCH_BINDING, ///< C++ PyTorch binding code. + SLANG_TARGET_COUNT_OF, + }; + + /* A "container format" describes the way that the outputs + for multiple files, entry points, targets, etc. should be + combined into a single artifact for output. */ + typedef int SlangContainerFormatIntegral; + enum SlangContainerFormat : SlangContainerFormatIntegral + { + /* Don't generate a container. */ + SLANG_CONTAINER_FORMAT_NONE, + + /* Generate a container in the `.slang-module` format, + which includes reflection information, compiled kernels, etc. */ + SLANG_CONTAINER_FORMAT_SLANG_MODULE, + }; + + typedef int SlangPassThroughIntegral; + enum SlangPassThrough : SlangPassThroughIntegral + { + SLANG_PASS_THROUGH_NONE, + SLANG_PASS_THROUGH_FXC, + SLANG_PASS_THROUGH_DXC, + SLANG_PASS_THROUGH_GLSLANG, + SLANG_PASS_THROUGH_SPIRV_DIS, + SLANG_PASS_THROUGH_CLANG, ///< Clang C/C++ compiler + SLANG_PASS_THROUGH_VISUAL_STUDIO, ///< Visual studio C/C++ compiler + SLANG_PASS_THROUGH_GCC, ///< GCC C/C++ compiler + SLANG_PASS_THROUGH_GENERIC_C_CPP, ///< Generic C or C++ compiler, which is decided by the source type + SLANG_PASS_THROUGH_NVRTC, ///< NVRTC Cuda compiler + SLANG_PASS_THROUGH_LLVM, ///< LLVM 'compiler' - includes LLVM and Clang + SLANG_PASS_THROUGH_SPIRV_OPT, ///< SPIRV-opt + SLANG_PASS_THROUGH_COUNT_OF, + }; + + /* Defines an archive type used to holds a 'file system' type structure. */ + typedef int SlangArchiveTypeIntegral; + enum SlangArchiveType : SlangArchiveTypeIntegral + { + SLANG_ARCHIVE_TYPE_UNDEFINED, + SLANG_ARCHIVE_TYPE_ZIP, + SLANG_ARCHIVE_TYPE_RIFF, ///< Riff container with no compression + SLANG_ARCHIVE_TYPE_RIFF_DEFLATE, + SLANG_ARCHIVE_TYPE_RIFF_LZ4, + SLANG_ARCHIVE_TYPE_COUNT_OF, + }; + + /*! + Flags to control compilation behavior. + */ + typedef unsigned int SlangCompileFlags; + enum + { + /* Do as little mangling of names as possible, to try to preserve original names */ + SLANG_COMPILE_FLAG_NO_MANGLING = 1 << 3, + + /* Skip code generation step, just check the code and generate layout */ + SLANG_COMPILE_FLAG_NO_CODEGEN = 1 << 4, + + /* Obfuscate shader names on release products */ + SLANG_COMPILE_FLAG_OBFUSCATE = 1 << 5, + + /* Deprecated flags: kept around to allow existing applications to + compile. Note that the relevant features will still be left in + their default state. */ + SLANG_COMPILE_FLAG_NO_CHECKING = 0, + SLANG_COMPILE_FLAG_SPLIT_MIXED_TYPES = 0, + }; + + /*! + @brief Flags to control code generation behavior of a compilation target */ + typedef unsigned int SlangTargetFlags; + enum + { + /* When compiling for a D3D Shader Model 5.1 or higher target, allocate + distinct register spaces for parameter blocks. + + @deprecated This behavior is now enabled unconditionally. + */ + SLANG_TARGET_FLAG_PARAMETER_BLOCKS_USE_REGISTER_SPACES = 1 << 4, + + /* When set, will generate target code that contains all entrypoints defined + in the input source or specified via the `spAddEntryPoint` function in a + single output module (library/source file). + */ + SLANG_TARGET_FLAG_GENERATE_WHOLE_PROGRAM = 1 << 8, + + /* When set, will dump out the IR between intermediate compilation steps.*/ + SLANG_TARGET_FLAG_DUMP_IR = 1 << 9, + + /* When set, will generate SPIRV directly rather than via glslang. */ + SLANG_TARGET_FLAG_GENERATE_SPIRV_DIRECTLY = 1 << 10, + }; +#if defined(SLANG_CONFIG_DEFAULT_SPIRV_DIRECT) + constexpr static SlangTargetFlags kDefaultTargetFlags = SLANG_TARGET_FLAG_GENERATE_SPIRV_DIRECTLY; +#else + constexpr static SlangTargetFlags kDefaultTargetFlags = 0; +#endif + + /*! + @brief Options to control floating-point precision guarantees for a target. + */ + typedef unsigned int SlangFloatingPointModeIntegral; + enum SlangFloatingPointMode : SlangFloatingPointModeIntegral + { + SLANG_FLOATING_POINT_MODE_DEFAULT = 0, + SLANG_FLOATING_POINT_MODE_FAST, + SLANG_FLOATING_POINT_MODE_PRECISE, + }; + + /*! + @brief Options to control emission of `#line` directives + */ + typedef unsigned int SlangLineDirectiveModeIntegral; + enum SlangLineDirectiveMode : SlangLineDirectiveModeIntegral + { + SLANG_LINE_DIRECTIVE_MODE_DEFAULT = 0, /**< Default behavior: pick behavior base on target. */ + SLANG_LINE_DIRECTIVE_MODE_NONE, /**< Don't emit line directives at all. */ + SLANG_LINE_DIRECTIVE_MODE_STANDARD, /**< Emit standard C-style `#line` directives. */ + SLANG_LINE_DIRECTIVE_MODE_GLSL, /**< Emit GLSL-style directives with file *number* instead of name */ + SLANG_LINE_DIRECTIVE_MODE_SOURCE_MAP, /**< Use a source map to track line mappings (ie no #line will appear in emitting source) */ + }; + + typedef int SlangSourceLanguageIntegral; + enum SlangSourceLanguage : SlangSourceLanguageIntegral + { + SLANG_SOURCE_LANGUAGE_UNKNOWN, + SLANG_SOURCE_LANGUAGE_SLANG, + SLANG_SOURCE_LANGUAGE_HLSL, + SLANG_SOURCE_LANGUAGE_GLSL, + SLANG_SOURCE_LANGUAGE_C, + SLANG_SOURCE_LANGUAGE_CPP, + SLANG_SOURCE_LANGUAGE_CUDA, + SLANG_SOURCE_LANGUAGE_SPIRV, + SLANG_SOURCE_LANGUAGE_COUNT_OF, + }; + + typedef unsigned int SlangProfileIDIntegral; + enum SlangProfileID : SlangProfileIDIntegral + { + SLANG_PROFILE_UNKNOWN, + }; + + + typedef SlangInt32 SlangCapabilityIDIntegral; + enum SlangCapabilityID : SlangCapabilityIDIntegral + { + SLANG_CAPABILITY_UNKNOWN = 0, + }; + + typedef unsigned int SlangMatrixLayoutModeIntegral; + enum SlangMatrixLayoutMode : SlangMatrixLayoutModeIntegral + { + SLANG_MATRIX_LAYOUT_MODE_UNKNOWN = 0, + SLANG_MATRIX_LAYOUT_ROW_MAJOR, + SLANG_MATRIX_LAYOUT_COLUMN_MAJOR, + }; + + typedef SlangUInt32 SlangStageIntegral; + enum SlangStage : SlangStageIntegral + { + SLANG_STAGE_NONE, + SLANG_STAGE_VERTEX, + SLANG_STAGE_HULL, + SLANG_STAGE_DOMAIN, + SLANG_STAGE_GEOMETRY, + SLANG_STAGE_FRAGMENT, + SLANG_STAGE_COMPUTE, + SLANG_STAGE_RAY_GENERATION, + SLANG_STAGE_INTERSECTION, + SLANG_STAGE_ANY_HIT, + SLANG_STAGE_CLOSEST_HIT, + SLANG_STAGE_MISS, + SLANG_STAGE_CALLABLE, + SLANG_STAGE_MESH, + SLANG_STAGE_AMPLIFICATION, + + // alias: + SLANG_STAGE_PIXEL = SLANG_STAGE_FRAGMENT, + }; + + typedef SlangUInt32 SlangDebugInfoLevelIntegral; + enum SlangDebugInfoLevel : SlangDebugInfoLevelIntegral + { + SLANG_DEBUG_INFO_LEVEL_NONE = 0, /**< Don't emit debug information at all. */ + SLANG_DEBUG_INFO_LEVEL_MINIMAL, /**< Emit as little debug information as possible, while still supporting stack trackes. */ + SLANG_DEBUG_INFO_LEVEL_STANDARD, /**< Emit whatever is the standard level of debug information for each target. */ + SLANG_DEBUG_INFO_LEVEL_MAXIMAL, /**< Emit as much debug infromation as possible for each target. */ + + }; + + /* Describes the debugging information format produced during a compilation. */ + typedef SlangUInt32 SlangDebugInfoFormatIntegral; + enum SlangDebugInfoFormat : SlangDebugInfoFormatIntegral + { + SLANG_DEBUG_INFO_FORMAT_DEFAULT, ///< Use the default debugging format for the target + SLANG_DEBUG_INFO_FORMAT_C7, ///< CodeView C7 format (typically means debugging infomation is embedded in the binary) + SLANG_DEBUG_INFO_FORMAT_PDB, ///< Program database + + SLANG_DEBUG_INFO_FORMAT_STABS, ///< Stabs + SLANG_DEBUG_INFO_FORMAT_COFF, ///< COFF debug info + SLANG_DEBUG_INFO_FORMAT_DWARF, ///< DWARF debug info (we may want to support specifying the version) + + SLANG_DEBUG_INFO_FORMAT_COUNT_OF, + }; + + typedef SlangUInt32 SlangOptimizationLevelIntegral; + enum SlangOptimizationLevel : SlangOptimizationLevelIntegral + { + SLANG_OPTIMIZATION_LEVEL_NONE = 0, /**< Don't optimize at all. */ + SLANG_OPTIMIZATION_LEVEL_DEFAULT, /**< Default optimization level: balance code quality and compilation time. */ + SLANG_OPTIMIZATION_LEVEL_HIGH, /**< Optimize aggressively. */ + SLANG_OPTIMIZATION_LEVEL_MAXIMAL, /**< Include optimizations that may take a very long time, or may involve severe space-vs-speed tradeoffs */ + }; + + /** A result code for a Slang API operation. + + This type is generally compatible with the Windows API `HRESULT` type. In particular, negative values indicate + failure results, while zero or positive results indicate success. + + In general, Slang APIs always return a zero result on success, unless documented otherwise. Strictly speaking + a negative value indicates an error, a positive (or 0) value indicates success. This can be tested for with the macros + SLANG_SUCCEEDED(x) or SLANG_FAILED(x). + + It can represent if the call was successful or not. It can also specify in an extensible manner what facility + produced the result (as the integral 'facility') as well as what caused it (as an integral 'code'). + Under the covers SlangResult is represented as a int32_t. + + SlangResult is designed to be compatible with COM HRESULT. + + It's layout in bits is as follows + + Severity | Facility | Code + ---------|----------|----- + 31 | 30-16 | 15-0 + + Severity - 1 fail, 0 is success - as SlangResult is signed 32 bits, means negative number indicates failure. + Facility is where the error originated from. Code is the code specific to the facility. + + Result codes have the following styles, + 1) SLANG_name + 2) SLANG_s_f_name + 3) SLANG_s_name + + where s is S for success, E for error + f is the short version of the facility name + + Style 1 is reserved for SLANG_OK and SLANG_FAIL as they are so commonly used. + + It is acceptable to expand 'f' to a longer name to differentiate a name or drop if unique without it. + ie for a facility 'DRIVER' it might make sense to have an error of the form SLANG_E_DRIVER_OUT_OF_MEMORY + */ + + typedef int32_t SlangResult; + + //! Use to test if a result was failure. Never use result != SLANG_OK to test for failure, as there may be successful codes != SLANG_OK. +#define SLANG_FAILED(status) ((status) < 0) + //! Use to test if a result succeeded. Never use result == SLANG_OK to test for success, as will detect other successful codes as a failure. +#define SLANG_SUCCEEDED(status) ((status) >= 0) + + //! Get the facility the result is associated with +#define SLANG_GET_RESULT_FACILITY(r) ((int32_t)(((r) >> 16) & 0x7fff)) + //! Get the result code for the facility +#define SLANG_GET_RESULT_CODE(r) ((int32_t)((r) & 0xffff)) + +#define SLANG_MAKE_ERROR(fac, code) ((((int32_t)(fac)) << 16) | ((int32_t)(code)) | int32_t(0x80000000)) +#define SLANG_MAKE_SUCCESS(fac, code) ((((int32_t)(fac)) << 16) | ((int32_t)(code))) + + /*************************** Facilities ************************************/ + + //! Facilities compatible with windows COM - only use if known code is compatible +#define SLANG_FACILITY_WIN_GENERAL 0 +#define SLANG_FACILITY_WIN_INTERFACE 4 +#define SLANG_FACILITY_WIN_API 7 + + //! Base facility -> so as to not clash with HRESULT values (values in 0x200 range do not appear used) +#define SLANG_FACILITY_BASE 0x200 + + /*! Facilities numbers must be unique across a project to make the resulting result a unique number. + It can be useful to have a consistent short name for a facility, as used in the name prefix */ +#define SLANG_FACILITY_CORE SLANG_FACILITY_BASE + /* Facility for codes, that are not uniquely defined/protected. Can be used to pass back a specific error without requiring system wide facility uniqueness. Codes + should never be part of a public API. */ +#define SLANG_FACILITY_INTERNAL SLANG_FACILITY_BASE + 1 + + /// Base for external facilities. Facilities should be unique across modules. +#define SLANG_FACILITY_EXTERNAL_BASE 0x210 + + /* ************************ Win COM compatible Results ******************************/ + // https://msdn.microsoft.com/en-us/library/windows/desktop/aa378137(v=vs.85).aspx + + //! SLANG_OK indicates success, and is equivalent to SLANG_MAKE_SUCCESS(SLANG_FACILITY_WIN_GENERAL, 0) +#define SLANG_OK 0 + //! SLANG_FAIL is the generic failure code - meaning a serious error occurred and the call couldn't complete +#define SLANG_FAIL SLANG_MAKE_ERROR(SLANG_FACILITY_WIN_GENERAL, 0x4005) + +#define SLANG_MAKE_WIN_GENERAL_ERROR(code) SLANG_MAKE_ERROR(SLANG_FACILITY_WIN_GENERAL, code) + + //! Functionality is not implemented +#define SLANG_E_NOT_IMPLEMENTED SLANG_MAKE_WIN_GENERAL_ERROR(0x4001) + //! Interface not be found +#define SLANG_E_NO_INTERFACE SLANG_MAKE_WIN_GENERAL_ERROR(0x4002) + //! Operation was aborted (did not correctly complete) +#define SLANG_E_ABORT SLANG_MAKE_WIN_GENERAL_ERROR(0x4004) + + //! Indicates that a handle passed in as parameter to a method is invalid. +#define SLANG_E_INVALID_HANDLE SLANG_MAKE_ERROR(SLANG_FACILITY_WIN_API, 6) + //! Indicates that an argument passed in as parameter to a method is invalid. +#define SLANG_E_INVALID_ARG SLANG_MAKE_ERROR(SLANG_FACILITY_WIN_API, 0x57) + //! Operation could not complete - ran out of memory +#define SLANG_E_OUT_OF_MEMORY SLANG_MAKE_ERROR(SLANG_FACILITY_WIN_API, 0xe) + + /* *************************** other Results **************************************/ + +#define SLANG_MAKE_CORE_ERROR(code) SLANG_MAKE_ERROR(SLANG_FACILITY_CORE, code) + + // Supplied buffer is too small to be able to complete +#define SLANG_E_BUFFER_TOO_SMALL SLANG_MAKE_CORE_ERROR(1) + //! Used to identify a Result that has yet to be initialized. + //! It defaults to failure such that if used incorrectly will fail, as similar in concept to using an uninitialized variable. +#define SLANG_E_UNINITIALIZED SLANG_MAKE_CORE_ERROR(2) + //! Returned from an async method meaning the output is invalid (thus an error), but a result for the request is pending, and will be returned on a subsequent call with the async handle. +#define SLANG_E_PENDING SLANG_MAKE_CORE_ERROR(3) + //! Indicates a file/resource could not be opened +#define SLANG_E_CANNOT_OPEN SLANG_MAKE_CORE_ERROR(4) + //! Indicates a file/resource could not be found +#define SLANG_E_NOT_FOUND SLANG_MAKE_CORE_ERROR(5) + //! An unhandled internal failure (typically from unhandled exception) +#define SLANG_E_INTERNAL_FAIL SLANG_MAKE_CORE_ERROR(6) + //! Could not complete because some underlying feature (hardware or software) was not available +#define SLANG_E_NOT_AVAILABLE SLANG_MAKE_CORE_ERROR(7) + //! Could not complete because the operation times out. +#define SLANG_E_TIME_OUT SLANG_MAKE_CORE_ERROR(8) + + /** A "Universally Unique Identifier" (UUID) + + The Slang API uses UUIDs to identify interfaces when + using `queryInterface`. + + This type is compatible with the `GUID` type defined + by the Component Object Model (COM), but Slang is + not dependent on COM. + */ + struct SlangUUID + { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; + }; + +// Place at the start of an interface with the guid. +// Guid should be specified as SLANG_COM_INTERFACE(0x00000000, 0x0000, 0x0000, { 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46 }) +// NOTE: it's the typical guid struct definition, without the surrounding {} +// It is not necessary to use the multiple parameters (we can wrap in parens), but this is simple. +#define SLANG_COM_INTERFACE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ + public: \ + SLANG_FORCE_INLINE constexpr static SlangUUID getTypeGuid() \ + { \ + return { a, b, c, d0, d1, d2, d3, d4, d5, d6, d7 }; \ + } + +// Sometimes it's useful to associate a guid with a class to identify it. This macro can used for this, +// and the guid extracted via the getTypeGuid() function defined in the type +#define SLANG_CLASS_GUID(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ + SLANG_FORCE_INLINE constexpr static SlangUUID getTypeGuid() \ + { \ + return { a, b, c, d0, d1, d2, d3, d4, d5, d6, d7 }; \ + } + +// Helper to fill in pairs of GUIDs and return pointers. This ensures that the +// type of the GUID passed matches the pointer type, and that it is derived +// from ISlangUnknown, +// TODO(c++20): would is_derived_from be more appropriate here for private inheritance of ISlangUnknown? +// +// with : void createFoo(SlangUUID, void**); +// Slang::ComPtr myBar; +// call with: createFoo(SLANG_IID_PPV_ARGS(myBar.writeRef())) +// to call : createFoo(Bar::getTypeGuid(), (void**)(myBar.writeRef())) +#define SLANG_IID_PPV_ARGS(ppType) \ + std::decay_t::getTypeGuid(), \ + ((void)[]{static_assert(std::is_base_of_v>);}, reinterpret_cast(ppType)) + + + /** Base interface for components exchanged through the API. + + This interface definition is compatible with the COM `IUnknown`, + and uses the same UUID, but Slang does not require applications + to use or initialize COM. + */ + struct ISlangUnknown + { + SLANG_COM_INTERFACE(0x00000000, 0x0000, 0x0000, { 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46 }) + + virtual SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface(SlangUUID const& uuid, void** outObject) = 0; + virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() = 0; + virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() = 0; + + /* + Inline methods are provided to allow the above operations to be called + using their traditional COM names/signatures: + */ + SlangResult QueryInterface(struct _GUID const& uuid, void** outObject) { return queryInterface(*(SlangUUID const*)&uuid, outObject); } + uint32_t AddRef() { return addRef(); } + uint32_t Release() { return release(); } + }; + #define SLANG_UUID_ISlangUnknown ISlangUnknown::getTypeGuid() + + + /* An interface to provide a mechanism to cast, that doesn't require ref counting + and doesn't have to return a pointer to a ISlangUnknown derived class */ + class ISlangCastable : public ISlangUnknown + { + SLANG_COM_INTERFACE(0x87ede0e1, 0x4852, 0x44b0, { 0x8b, 0xf2, 0xcb, 0x31, 0x87, 0x4d, 0xe2, 0x39 }); + + /// Can be used to cast to interfaces without reference counting. + /// Also provides access to internal implementations, when they provide a guid + /// Can simulate a 'generated' interface as long as kept in scope by cast from. + virtual SLANG_NO_THROW void* SLANG_MCALL castAs(const SlangUUID& guid) = 0; + }; + + class ISlangClonable : public ISlangCastable + { + SLANG_COM_INTERFACE(0x1ec36168, 0xe9f4, 0x430d, { 0xbb, 0x17, 0x4, 0x8a, 0x80, 0x46, 0xb3, 0x1f }); + + /// Note the use of guid is for the desired interface/object. + /// The object is returned *not* ref counted. Any type that can implements the interface, + /// derives from ICastable, and so (not withstanding some other issue) will always return + /// an ICastable interface which other interfaces/types are accessible from via castAs + SLANG_NO_THROW virtual void* SLANG_MCALL clone(const SlangUUID& guid) = 0; + }; + + /** A "blob" of binary data. + + This interface definition is compatible with the `ID3DBlob` and `ID3D10Blob` interfaces. + */ + struct ISlangBlob : public ISlangUnknown + { + SLANG_COM_INTERFACE(0x8BA5FB08, 0x5195, 0x40e2, { 0xAC, 0x58, 0x0D, 0x98, 0x9C, 0x3A, 0x01, 0x02 }) + + virtual SLANG_NO_THROW void const* SLANG_MCALL getBufferPointer() = 0; + virtual SLANG_NO_THROW size_t SLANG_MCALL getBufferSize() = 0; + }; + #define SLANG_UUID_ISlangBlob ISlangBlob::getTypeGuid() + + /* Can be requested from ISlangCastable cast to indicate the contained chars are null terminated. + */ + struct SlangTerminatedChars + { + SLANG_CLASS_GUID(0xbe0db1a8, 0x3594, 0x4603, { 0xa7, 0x8b, 0xc4, 0x86, 0x84, 0x30, 0xdf, 0xbb }); + operator const char*() const { return chars; } + char chars[1]; + }; + + /** A (real or virtual) file system. + + Slang can make use of this interface whenever it would otherwise try to load files + from disk, allowing applications to hook and/or override filesystem access from + the compiler. + + It is the responsibility of + the caller of any method that returns a ISlangBlob to release the blob when it is no + longer used (using 'release'). + */ + + struct ISlangFileSystem : public ISlangCastable + { + SLANG_COM_INTERFACE(0x003A09FC, 0x3A4D, 0x4BA0, { 0xAD, 0x60, 0x1F, 0xD8, 0x63, 0xA9, 0x15, 0xAB }) + + /** Load a file from `path` and return a blob of its contents + @param path The path to load from, as a null-terminated UTF-8 string. + @param outBlob A destination pointer to receive the blob of the file contents. + @returns A `SlangResult` to indicate success or failure in loading the file. + + NOTE! This is a *binary* load - the blob should contain the exact same bytes + as are found in the backing file. + + If load is successful, the implementation should create a blob to hold + the file's content, store it to `outBlob`, and return 0. + If the load fails, the implementation should return a failure status + (any negative value will do). + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL loadFile( + char const* path, + ISlangBlob** outBlob) = 0; + }; + #define SLANG_UUID_ISlangFileSystem ISlangFileSystem::getTypeGuid() + + + typedef void(*SlangFuncPtr)(void); + + /** + (DEPRECATED) ISlangSharedLibrary + */ + struct ISlangSharedLibrary_Dep1: public ISlangUnknown + { + SLANG_COM_INTERFACE( 0x9c9d5bc5, 0xeb61, 0x496f,{ 0x80, 0xd7, 0xd1, 0x47, 0xc4, 0xa2, 0x37, 0x30 }) + + virtual SLANG_NO_THROW void* SLANG_MCALL findSymbolAddressByName(char const* name) = 0; + }; + #define SLANG_UUID_ISlangSharedLibrary_Dep1 ISlangSharedLibrary_Dep1::getTypeGuid() + + /** An interface that can be used to encapsulate access to a shared library. An implementation + does not have to implement the library as a shared library + */ + struct ISlangSharedLibrary : public ISlangCastable + { + SLANG_COM_INTERFACE(0x70dbc7c4, 0xdc3b, 0x4a07, { 0xae, 0x7e, 0x75, 0x2a, 0xf6, 0xa8, 0x15, 0x55 }) + + /** Get a function by name. If the library is unloaded will only return nullptr. + @param name The name of the function + @return The function pointer related to the name or nullptr if not found + */ + SLANG_FORCE_INLINE SlangFuncPtr findFuncByName(char const* name) { return (SlangFuncPtr)findSymbolAddressByName(name); } + + /** Get a symbol by name. If the library is unloaded will only return nullptr. + @param name The name of the symbol + @return The pointer related to the name or nullptr if not found + */ + virtual SLANG_NO_THROW void* SLANG_MCALL findSymbolAddressByName(char const* name) = 0; + }; + #define SLANG_UUID_ISlangSharedLibrary ISlangSharedLibrary::getTypeGuid() + + struct ISlangSharedLibraryLoader: public ISlangUnknown + { + SLANG_COM_INTERFACE(0x6264ab2b, 0xa3e8, 0x4a06, { 0x97, 0xf1, 0x49, 0xbc, 0x2d, 0x2a, 0xb1, 0x4d }) + + /** Load a shared library. In typical usage the library name should *not* contain any platform + specific elements. For example on windows a dll name should *not* be passed with a '.dll' extension, + and similarly on linux a shared library should *not* be passed with the 'lib' prefix and '.so' extension + @path path The unadorned filename and/or path for the shared library + @ param sharedLibraryOut Holds the shared library if successfully loaded */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL loadSharedLibrary( + const char* path, + ISlangSharedLibrary** sharedLibraryOut) = 0; + }; + #define SLANG_UUID_ISlangSharedLibraryLoader ISlangSharedLibraryLoader::getTypeGuid() + + /* Type that identifies how a path should be interpreted */ + typedef unsigned int SlangPathTypeIntegral; + enum SlangPathType : SlangPathTypeIntegral + { + SLANG_PATH_TYPE_DIRECTORY, /**< Path specified specifies a directory. */ + SLANG_PATH_TYPE_FILE, /**< Path specified is to a file. */ + }; + + /* Callback to enumerate the contents of of a directory in a ISlangFileSystemExt. + The name is the name of a file system object (directory/file) in the specified path (ie it is without a path) */ + typedef void (*FileSystemContentsCallBack)(SlangPathType pathType, const char* name, void* userData); + + /* Determines how paths map to files on the OS file system */ + enum class OSPathKind : uint8_t + { + None, ///< Paths do not map to the file system + Direct, ///< Paths map directly to the file system + OperatingSystem, ///< Only paths gained via PathKind::OperatingSystem map to the operating system file system + }; + + /* Used to determine what kind of path is required from an input path */ + enum class PathKind + { + /// Given a path, returns a simplified version of that path. + /// This typically means removing '..' and/or '.' from the path. + /// A simplified path must point to the same object as the original. + Simplified, + + /// Given a path, returns a 'canonical path' to the item. + /// This may be the operating system 'canonical path' that is the unique path to the item. + /// + /// If the item exists the returned canonical path should always be usable to access the item. + /// + /// If the item the path specifies doesn't exist, the canonical path may not be returnable + /// or be a path simplification. + /// Not all file systems support canonical paths. + Canonical, + + /// Given a path returns a path such that it is suitable to be displayed to the user. + /// + /// For example if the file system is a zip file - it might include the path to the zip + /// container as well as the path to the specific file. + /// + /// NOTE! The display path won't necessarily work on the file system to access the item + Display, + + /// Get the path to the item on the *operating system* file system, if available. + OperatingSystem, + + CountOf, + }; + + /** An extended file system abstraction. + + Implementing and using this interface over ISlangFileSystem gives much more control over how paths + are managed, as well as how it is determined if two files 'are the same'. + + All paths as input char*, or output as ISlangBlobs are always encoded as UTF-8 strings. + Blobs that contain strings are always zero terminated. + */ + struct ISlangFileSystemExt : public ISlangFileSystem + { + SLANG_COM_INTERFACE(0x5fb632d2, 0x979d, 0x4481, { 0x9f, 0xee, 0x66, 0x3c, 0x3f, 0x14, 0x49, 0xe1 }) + + /** Get a uniqueIdentity which uniquely identifies an object of the file system. + + Given a path, returns a 'uniqueIdentity' which ideally is the same value for the same object on the file system. + + The uniqueIdentity is used to compare if two paths are the same - which amongst other things allows Slang to + cache source contents internally. It is also used for #pragma once functionality. + + A *requirement* is for any implementation is that two paths can only return the same uniqueIdentity if the + contents of the two files are *identical*. If an implementation breaks this constraint it can produce incorrect compilation. + If an implementation cannot *strictly* identify *the same* files, this will only have an effect on #pragma once behavior. + + The string for the uniqueIdentity is held zero terminated in the ISlangBlob of outUniqueIdentity. + + Note that there are many ways a uniqueIdentity may be generated for a file. For example it could be the + 'canonical path' - assuming it is available and unambiguous for a file system. Another possible mechanism + could be to store the filename combined with the file date time to uniquely identify it. + + The client must ensure the blob be released when no longer used, otherwise memory will leak. + + NOTE! Ideally this method would be called 'getPathUniqueIdentity' but for historical reasons and + backward compatibility it's name remains with 'File' even though an implementation should be made to work + with directories too. + + @param path + @param outUniqueIdentity + @returns A `SlangResult` to indicate success or failure getting the uniqueIdentity. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getFileUniqueIdentity( + const char* path, + ISlangBlob** outUniqueIdentity) = 0; + + /** Calculate a path combining the 'fromPath' with 'path' + + The client must ensure the blob be released when no longer used, otherwise memory will leak. + + @param fromPathType How to interpret the from path - as a file or a directory. + @param fromPath The from path. + @param path Path to be determined relative to the fromPath + @param pathOut Holds the string which is the relative path. The string is held in the blob zero terminated. + @returns A `SlangResult` to indicate success or failure in loading the file. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL calcCombinedPath( + SlangPathType fromPathType, + const char* fromPath, + const char* path, + ISlangBlob** pathOut) = 0; + + /** Gets the type of path that path is on the file system. + @param path + @param pathTypeOut + @returns SLANG_OK if located and type is known, else an error. SLANG_E_NOT_FOUND if not found. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getPathType( + const char* path, + SlangPathType* pathTypeOut) = 0; + + /** Get a path based on the kind. + + @param kind The kind of path wanted + @param path The input path + @param outPath The output path held in a blob + @returns SLANG_OK if successfully simplified the path (SLANG_E_NOT_IMPLEMENTED if not implemented, or some other error code) + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getPath( + PathKind kind, + const char* path, + ISlangBlob** outPath) = 0; + + /** Clears any cached information */ + virtual SLANG_NO_THROW void SLANG_MCALL clearCache() = 0; + + /** Enumerate the contents of the path + + Note that for normal Slang operation it isn't necessary to enumerate contents this can return SLANG_E_NOT_IMPLEMENTED. + + @param The path to enumerate + @param callback This callback is called for each entry in the path. + @param userData This is passed to the callback + @returns SLANG_OK if successful + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL enumeratePathContents( + const char* path, + FileSystemContentsCallBack callback, + void* userData) = 0; + + /** Returns how paths map to the OS file system + + @returns OSPathKind that describes how paths map to the Operating System file system + */ + virtual SLANG_NO_THROW OSPathKind SLANG_MCALL getOSPathKind() = 0; + }; + + #define SLANG_UUID_ISlangFileSystemExt ISlangFileSystemExt::getTypeGuid() + + struct ISlangMutableFileSystem : public ISlangFileSystemExt + { + SLANG_COM_INTERFACE(0xa058675c, 0x1d65, 0x452a, { 0x84, 0x58, 0xcc, 0xde, 0xd1, 0x42, 0x71, 0x5 }) + + /** Write data to the specified path. + + @param path The path for data to be saved to + @param data The data to be saved + @param size The size of the data in bytes + @returns SLANG_OK if successful (SLANG_E_NOT_IMPLEMENTED if not implemented, or some other error code) + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL saveFile( + const char* path, + const void* data, + size_t size) = 0; + + /** Write data in the form of a blob to the specified path. + + Depending on the implementation writing a blob might be faster/use less memory. It is assumed the + blob is *immutable* and that an implementation can reference count it. + + It is not guaranteed loading the same file will return the *same* blob - just a blob with same + contents. + + @param path The path for data to be saved to + @param dataBlob The data to be saved + @returns SLANG_OK if successful (SLANG_E_NOT_IMPLEMENTED if not implemented, or some other error code) + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL saveFileBlob( + const char* path, + ISlangBlob* dataBlob) = 0; + + /** Remove the entry in the path (directory of file). Will only delete an empty directory, if not empty + will return an error. + + @param path The path to remove + @returns SLANG_OK if successful + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL remove( + const char* path) = 0; + + /** Create a directory. + + The path to the directory must exist + + @param path To the directory to create. The parent path *must* exist otherwise will return an error. + @returns SLANG_OK if successful + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL createDirectory( + const char* path) = 0; + }; + + #define SLANG_UUID_ISlangMutableFileSystem ISlangMutableFileSystem::getTypeGuid() + + /* Identifies different types of writer target*/ + typedef unsigned int SlangWriterChannelIntegral; + enum SlangWriterChannel : SlangWriterChannelIntegral + { + SLANG_WRITER_CHANNEL_DIAGNOSTIC, + SLANG_WRITER_CHANNEL_STD_OUTPUT, + SLANG_WRITER_CHANNEL_STD_ERROR, + SLANG_WRITER_CHANNEL_COUNT_OF, + }; + + typedef unsigned int SlangWriterModeIntegral; + enum SlangWriterMode : SlangWriterModeIntegral + { + SLANG_WRITER_MODE_TEXT, + SLANG_WRITER_MODE_BINARY, + }; + + /** A stream typically of text, used for outputting diagnostic as well as other information. + */ + struct ISlangWriter : public ISlangUnknown + { + SLANG_COM_INTERFACE(0xec457f0e, 0x9add, 0x4e6b,{ 0x85, 0x1c, 0xd7, 0xfa, 0x71, 0x6d, 0x15, 0xfd }) + + /** Begin an append buffer. + NOTE! Only one append buffer can be active at any time. + @param maxNumChars The maximum of chars that will be appended + @returns The start of the buffer for appending to. */ + virtual SLANG_NO_THROW char* SLANG_MCALL beginAppendBuffer(size_t maxNumChars) = 0; + /** Ends the append buffer, and is equivalent to a write of the append buffer. + NOTE! That an endAppendBuffer is not necessary if there are no characters to write. + @param buffer is the start of the data to append and must be identical to last value returned from beginAppendBuffer + @param numChars must be a value less than or equal to what was returned from last call to beginAppendBuffer + @returns Result, will be SLANG_OK on success */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL endAppendBuffer(char* buffer, size_t numChars) = 0; + /** Write text to the writer + @param chars The characters to write out + @param numChars The amount of characters + @returns SLANG_OK on success */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL write(const char* chars, size_t numChars) = 0; + /** Flushes any content to the output */ + virtual SLANG_NO_THROW void SLANG_MCALL flush() = 0; + /** Determines if the writer stream is to the console, and can be used to alter the output + @returns Returns true if is a console writer */ + virtual SLANG_NO_THROW SlangBool SLANG_MCALL isConsole() = 0; + /** Set the mode for the writer to use + @param mode The mode to use + @returns SLANG_OK on success */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL setMode(SlangWriterMode mode) = 0; + }; + + #define SLANG_UUID_ISlangWriter ISlangWriter::getTypeGuid() + + namespace slang { + struct IGlobalSession; + struct ICompileRequest; + + } // namespace slang + + /*! + @brief An instance of the Slang library. + */ + typedef slang::IGlobalSession SlangSession; + + + typedef struct SlangProgramLayout SlangProgramLayout; + + /*! + @brief A request for one or more compilation actions to be performed. + */ + typedef struct slang::ICompileRequest SlangCompileRequest; + + + /*! + @brief Initialize an instance of the Slang library. + */ + SLANG_API SlangSession* spCreateSession(const char* deprecated = 0); + + /*! + @brief Clean up after an instance of the Slang library. + */ + SLANG_API void spDestroySession( + SlangSession* session); + + /** @see slang::IGlobalSession::setSharedLibraryLoader + */ + SLANG_API void spSessionSetSharedLibraryLoader( + SlangSession* session, + ISlangSharedLibraryLoader* loader); + + /** @see slang::IGlobalSession::getSharedLibraryLoader + */ + SLANG_API ISlangSharedLibraryLoader* spSessionGetSharedLibraryLoader( + SlangSession* session); + + /** @see slang::IGlobalSession::checkCompileTargetSupport + */ + SLANG_API SlangResult spSessionCheckCompileTargetSupport( + SlangSession* session, + SlangCompileTarget target); + + /** @see slang::IGlobalSession::checkPassThroughSupport + */ + SLANG_API SlangResult spSessionCheckPassThroughSupport( + SlangSession* session, + SlangPassThrough passThrough + ); + + /** @see slang::IGlobalSession::addBuiltins + */ + SLANG_API void spAddBuiltins( + SlangSession* session, + char const* sourcePath, + char const* sourceString); + + /*! + @brief Callback type used for diagnostic output. + */ + typedef void(*SlangDiagnosticCallback)( + char const* message, + void* userData); + + /*! + @brief Get the build version 'tag' string. The string is the same as produced via `git describe --tags` + for the project. If Slang is built separately from the automated build scripts + the contents will by default be 'unknown'. Any string can be set by changing the + contents of 'slang-tag-version.h' file and recompiling the project. + + This function will return exactly the same result as the method getBuildTag string on IGlobalSession. + + An advantage of using this function over the method is that doing so does not require the creation of + a session, which can be a fairly costly operation. + + @return The build tag string + */ + SLANG_API const char* spGetBuildTagString(); + + /* @see slang::IGlobalSession::createCompileRequest + */ + SLANG_API SlangCompileRequest* spCreateCompileRequest( + SlangSession* session); + + /*! + @brief Destroy a compile request. + Note a request is a COM object and can be destroyed via 'Release'. + */ + SLANG_API void spDestroyCompileRequest( + SlangCompileRequest* request); + + /*! @see slang::ICompileRequest::setFileSystem */ + SLANG_API void spSetFileSystem( + SlangCompileRequest* request, + ISlangFileSystem* fileSystem); + + /*! @see slang::ICompileRequest::setCompileFlags */ + SLANG_API void spSetCompileFlags( + SlangCompileRequest* request, + SlangCompileFlags flags); + + /*! @see slang::ICompileRequest::getCompileFlags */ + SLANG_API SlangCompileFlags spGetCompileFlags( + SlangCompileRequest* request); + + /*! @see slang::ICompileRequest::setDumpIntermediates */ + SLANG_API void spSetDumpIntermediates( + SlangCompileRequest* request, + int enable); + + /*! @see slang::ICompileRequest::setDumpIntermediatePrefix */ + SLANG_API void spSetDumpIntermediatePrefix( + SlangCompileRequest* request, + const char* prefix); + + /*! DEPRECATED: use `spSetTargetLineDirectiveMode` instead. + @see slang::ICompileRequest::setLineDirectiveMode */ + SLANG_API void spSetLineDirectiveMode( + SlangCompileRequest* request, + SlangLineDirectiveMode mode); + + /*! @see slang::ICompileRequest::setTargetLineDirectiveMode */ + SLANG_API void spSetTargetLineDirectiveMode( + SlangCompileRequest* request, + int targetIndex, + SlangLineDirectiveMode mode); + + /*! @see slang::ICompileRequest::setTargetLineDirectiveMode */ + SLANG_API void spSetTargetForceGLSLScalarBufferLayout( + SlangCompileRequest* request, + int targetIndex, + bool forceScalarLayout); + + /*! @see slang::ICompileRequest::setCodeGenTarget */ + SLANG_API void spSetCodeGenTarget( + SlangCompileRequest* request, + SlangCompileTarget target); + + /*! @see slang::ICompileRequest::addCodeGenTarget */ + SLANG_API int spAddCodeGenTarget( + SlangCompileRequest* request, + SlangCompileTarget target); + + /*! @see slang::ICompileRequest::setTargetProfile */ + SLANG_API void spSetTargetProfile( + SlangCompileRequest* request, + int targetIndex, + SlangProfileID profile); + + /*! @see slang::ICompileRequest::setTargetFlags */ + SLANG_API void spSetTargetFlags( + SlangCompileRequest* request, + int targetIndex, + SlangTargetFlags flags); + + + + /*! @see slang::ICompileRequest::setTargetFloatingPointMode */ + SLANG_API void spSetTargetFloatingPointMode( + SlangCompileRequest* request, + int targetIndex, + SlangFloatingPointMode mode); + + /*! @see slang::ICompileRequest::addTargetCapability */ + SLANG_API void spAddTargetCapability( + slang::ICompileRequest* request, + int targetIndex, + SlangCapabilityID capability); + + /* DEPRECATED: use `spSetMatrixLayoutMode` instead. */ + SLANG_API void spSetTargetMatrixLayoutMode( + SlangCompileRequest* request, + int targetIndex, + SlangMatrixLayoutMode mode); + + /*! @see slang::ICompileRequest::setMatrixLayoutMode */ + SLANG_API void spSetMatrixLayoutMode( + SlangCompileRequest* request, + SlangMatrixLayoutMode mode); + + /*! @see slang::ICompileRequest::setDebugInfoLevel */ + SLANG_API void spSetDebugInfoLevel( + SlangCompileRequest* request, + SlangDebugInfoLevel level); + + /*! @see slang::ICompileRequest::setDebugInfoFormat */ + SLANG_API void spSetDebugInfoFormat( + SlangCompileRequest* request, + SlangDebugInfoFormat format); + + /*! @see slang::ICompileRequest::setOptimizationLevel */ + SLANG_API void spSetOptimizationLevel( + SlangCompileRequest* request, + SlangOptimizationLevel level); + + + + /*! @see slang::ICompileRequest::setOutputContainerFormat */ + SLANG_API void spSetOutputContainerFormat( + SlangCompileRequest* request, + SlangContainerFormat format); + + /*! @see slang::ICompileRequest::setPassThrough */ + SLANG_API void spSetPassThrough( + SlangCompileRequest* request, + SlangPassThrough passThrough); + + /*! @see slang::ICompileRequest::setDiagnosticCallback */ + SLANG_API void spSetDiagnosticCallback( + SlangCompileRequest* request, + SlangDiagnosticCallback callback, + void const* userData); + + /*! @see slang::ICompileRequest::setWriter */ + SLANG_API void spSetWriter( + SlangCompileRequest* request, + SlangWriterChannel channel, + ISlangWriter* writer); + + /*! @see slang::ICompileRequest::getWriter */ + SLANG_API ISlangWriter* spGetWriter( + SlangCompileRequest* request, + SlangWriterChannel channel); + + /*! @see slang::ICompileRequest::addSearchPath */ + SLANG_API void spAddSearchPath( + SlangCompileRequest* request, + const char* searchDir); + + /*! @see slang::ICompileRequest::addPreprocessorDefine */ + SLANG_API void spAddPreprocessorDefine( + SlangCompileRequest* request, + const char* key, + const char* value); + + /*! @see slang::ICompileRequest::processCommandLineArguments */ + SLANG_API SlangResult spProcessCommandLineArguments( + SlangCompileRequest* request, + char const* const* args, + int argCount); + + /*! @see slang::ICompileRequest::addTranslationUnit */ + SLANG_API int spAddTranslationUnit( + SlangCompileRequest* request, + SlangSourceLanguage language, + char const* name); + + + /*! @see slang::ICompileRequest::setDefaultModuleName */ + SLANG_API void spSetDefaultModuleName( + SlangCompileRequest* request, + const char* defaultModuleName); + + /*! @see slang::ICompileRequest::addPreprocessorDefine */ + SLANG_API void spTranslationUnit_addPreprocessorDefine( + SlangCompileRequest* request, + int translationUnitIndex, + const char* key, + const char* value); + + + /*! @see slang::ICompileRequest::addTranslationUnitSourceFile */ + SLANG_API void spAddTranslationUnitSourceFile( + SlangCompileRequest* request, + int translationUnitIndex, + char const* path); + + /*! @see slang::ICompileRequest::addTranslationUnitSourceString */ + SLANG_API void spAddTranslationUnitSourceString( + SlangCompileRequest* request, + int translationUnitIndex, + char const* path, + char const* source); + + + /*! @see slang::ICompileRequest::addLibraryReference */ + SLANG_API SlangResult spAddLibraryReference( + SlangCompileRequest* request, + const void* libData, + size_t libDataSize); + + /*! @see slang::ICompileRequest::addTranslationUnitSourceStringSpan */ + SLANG_API void spAddTranslationUnitSourceStringSpan( + SlangCompileRequest* request, + int translationUnitIndex, + char const* path, + char const* sourceBegin, + char const* sourceEnd); + + /*! @see slang::ICompileRequest::addTranslationUnitSourceBlob */ + SLANG_API void spAddTranslationUnitSourceBlob( + SlangCompileRequest* request, + int translationUnitIndex, + char const* path, + ISlangBlob* sourceBlob); + + /*! @see slang::IGlobalSession::findProfile */ + SLANG_API SlangProfileID spFindProfile( + SlangSession* session, + char const* name); + + /*! @see slang::IGlobalSession::findCapability */ + SLANG_API SlangCapabilityID spFindCapability( + SlangSession* session, + char const* name); + + /*! @see slang::ICompileRequest::addEntryPoint */ + SLANG_API int spAddEntryPoint( + SlangCompileRequest* request, + int translationUnitIndex, + char const* name, + SlangStage stage); + + /*! @see slang::ICompileRequest::addEntryPointEx */ + SLANG_API int spAddEntryPointEx( + SlangCompileRequest* request, + int translationUnitIndex, + char const* name, + SlangStage stage, + int genericArgCount, + char const** genericArgs); + + /*! @see slang::ICompileRequest::setGlobalGenericArgs */ + SLANG_API SlangResult spSetGlobalGenericArgs( + SlangCompileRequest* request, + int genericArgCount, + char const** genericArgs); + + /*! @see slang::ICompileRequest::setTypeNameForGlobalExistentialTypeParam */ + SLANG_API SlangResult spSetTypeNameForGlobalExistentialTypeParam( + SlangCompileRequest* request, + int slotIndex, + char const* typeName); + + /*! @see slang::ICompileRequest::setTypeNameForEntryPointExistentialTypeParam */ + SLANG_API SlangResult spSetTypeNameForEntryPointExistentialTypeParam( + SlangCompileRequest* request, + int entryPointIndex, + int slotIndex, + char const* typeName); + + /*! @see slang::ICompileRequest::compile */ + SLANG_API SlangResult spCompile( + SlangCompileRequest* request); + + + /*! @see slang::ICompileRequest::getDiagnosticOutput */ + SLANG_API char const* spGetDiagnosticOutput( + SlangCompileRequest* request); + + /*! @see slang::ICompileRequest::getDiagnosticOutputBlob */ + SLANG_API SlangResult spGetDiagnosticOutputBlob( + SlangCompileRequest* request, + ISlangBlob** outBlob); + + + /*! @see slang::ICompileRequest::getDependencyFileCount */ + SLANG_API int + spGetDependencyFileCount( + SlangCompileRequest* request); + + /*! @see slang::ICompileRequest::getDependencyFilePath */ + SLANG_API char const* + spGetDependencyFilePath( + SlangCompileRequest* request, + int index); + + /*! @see slang::ICompileRequest::getTranslationUnitCount */ + SLANG_API int + spGetTranslationUnitCount( + SlangCompileRequest* request); + + /*! @see slang::ICompileRequest::getEntryPointSource */ + SLANG_API char const* spGetEntryPointSource( + SlangCompileRequest* request, + int entryPointIndex); + + /*! @see slang::ICompileRequest::getEntryPointCode */ + SLANG_API void const* spGetEntryPointCode( + SlangCompileRequest* request, + int entryPointIndex, + size_t* outSize); + + /*! @see slang::ICompileRequest::getEntryPointCodeBlob */ + SLANG_API SlangResult spGetEntryPointCodeBlob( + SlangCompileRequest* request, + int entryPointIndex, + int targetIndex, + ISlangBlob** outBlob); + + /*! @see slang::ICompileRequest::getEntryPointHostCallable */ + SLANG_API SlangResult spGetEntryPointHostCallable( + SlangCompileRequest* request, + int entryPointIndex, + int targetIndex, + ISlangSharedLibrary** outSharedLibrary); + + /*! @see slang::ICompileRequest::getTargetCodeBlob */ + SLANG_API SlangResult spGetTargetCodeBlob( + SlangCompileRequest* request, + int targetIndex, + ISlangBlob** outBlob); + + /*! @see slang::ICompileRequest::getTargetHostCallable */ + SLANG_API SlangResult spGetTargetHostCallable( + SlangCompileRequest* request, + int targetIndex, + ISlangSharedLibrary** outSharedLibrary); + + /*! @see slang::ICompileRequest::getCompileRequestCode */ + SLANG_API void const* spGetCompileRequestCode( + SlangCompileRequest* request, + size_t* outSize); + + /*! @see slang::ICompileRequest::getContainerCode */ + SLANG_API SlangResult spGetContainerCode( + SlangCompileRequest* request, + ISlangBlob** outBlob); + + /*! @see slang::ICompileRequest::loadRepro */ + SLANG_API SlangResult spLoadRepro( + SlangCompileRequest* request, + ISlangFileSystem* fileSystem, + const void* data, + size_t size); + + /*! @see slang::ICompileRequest::saveRepro */ + SLANG_API SlangResult spSaveRepro( + SlangCompileRequest* request, + ISlangBlob** outBlob + ); + + /*! @see slang::ICompileRequest::enableReproCapture */ + SLANG_API SlangResult spEnableReproCapture( + SlangCompileRequest* request); + + + /** Extract contents of a repro. + + Writes the contained files and manifest with their 'unique' names into fileSystem. For more details read the + docs/repro.md documentation. + + @param session The slang session + @param reproData Holds the repro data + @param reproDataSize The size of the repro data + @param fileSystem File system that the contents of the repro will be written to + @returns A `SlangResult` to indicate success or failure. + */ + SLANG_API SlangResult spExtractRepro( + SlangSession* session, + const void* reproData, + size_t reproDataSize, + ISlangMutableFileSystem* fileSystem); + + /* Turns a repro into a file system. + + Makes the contents of the repro available as a file system - that is able to access the files with the same + paths as were used on the original repro file system. + + @param session The slang session + @param reproData The repro data + @param reproDataSize The size of the repro data + @param replaceFileSystem Will attempt to load by unique names from this file system before using contents of the repro. Optional. + @param outFileSystem The file system that can be used to access contents + @returns A `SlangResult` to indicate success or failure. + */ + SLANG_API SlangResult spLoadReproAsFileSystem( + SlangSession* session, + const void* reproData, + size_t reproDataSize, + ISlangFileSystem* replaceFileSystem, + ISlangFileSystemExt** outFileSystem); + + /*! @see slang::ICompileRequest::overrideDiagnosticSeverity */ + SLANG_API void spOverrideDiagnosticSeverity( + SlangCompileRequest* request, + SlangInt messageID, + SlangSeverity overrideSeverity); + + /*! @see slang::ICompileRequest::getDiagnosticFlags */ + SLANG_API SlangDiagnosticFlags spGetDiagnosticFlags(SlangCompileRequest* request); + + /*! @see slang::ICompileRequest::setDiagnosticFlags */ + SLANG_API void spSetDiagnosticFlags(SlangCompileRequest* request, SlangDiagnosticFlags flags); + + /* + Forward declarations of types used in the reflection interface; + */ + + typedef struct SlangProgramLayout SlangProgramLayout; + typedef struct SlangEntryPoint SlangEntryPoint; + typedef struct SlangEntryPointLayout SlangEntryPointLayout; + + typedef struct SlangReflectionModifier SlangReflectionModifier; + typedef struct SlangReflectionType SlangReflectionType; + typedef struct SlangReflectionTypeLayout SlangReflectionTypeLayout; + typedef struct SlangReflectionVariable SlangReflectionVariable; + typedef struct SlangReflectionVariableLayout SlangReflectionVariableLayout; + typedef struct SlangReflectionTypeParameter SlangReflectionTypeParameter; + typedef struct SlangReflectionUserAttribute SlangReflectionUserAttribute; + + /* + Type aliases to maintain backward compatibility. + */ + typedef SlangProgramLayout SlangReflection; + typedef SlangEntryPointLayout SlangReflectionEntryPoint; + + // get reflection data from a compilation request + SLANG_API SlangReflection* spGetReflection( + SlangCompileRequest* request); + + // type reflection + + typedef unsigned int SlangTypeKindIntegral; + enum SlangTypeKind : SlangTypeKindIntegral + { + SLANG_TYPE_KIND_NONE, + SLANG_TYPE_KIND_STRUCT, + SLANG_TYPE_KIND_ARRAY, + SLANG_TYPE_KIND_MATRIX, + SLANG_TYPE_KIND_VECTOR, + SLANG_TYPE_KIND_SCALAR, + SLANG_TYPE_KIND_CONSTANT_BUFFER, + SLANG_TYPE_KIND_RESOURCE, + SLANG_TYPE_KIND_SAMPLER_STATE, + SLANG_TYPE_KIND_TEXTURE_BUFFER, + SLANG_TYPE_KIND_SHADER_STORAGE_BUFFER, + SLANG_TYPE_KIND_PARAMETER_BLOCK, + SLANG_TYPE_KIND_GENERIC_TYPE_PARAMETER, + SLANG_TYPE_KIND_INTERFACE, + SLANG_TYPE_KIND_OUTPUT_STREAM, + SLANG_TYPE_KIND_MESH_OUTPUT, + SLANG_TYPE_KIND_SPECIALIZED, + SLANG_TYPE_KIND_FEEDBACK, + SLANG_TYPE_KIND_POINTER, + SLANG_TYPE_KIND_COUNT, + }; + + typedef unsigned int SlangScalarTypeIntegral; + enum SlangScalarType : SlangScalarTypeIntegral + { + SLANG_SCALAR_TYPE_NONE, + SLANG_SCALAR_TYPE_VOID, + SLANG_SCALAR_TYPE_BOOL, + SLANG_SCALAR_TYPE_INT32, + SLANG_SCALAR_TYPE_UINT32, + SLANG_SCALAR_TYPE_INT64, + SLANG_SCALAR_TYPE_UINT64, + SLANG_SCALAR_TYPE_FLOAT16, + SLANG_SCALAR_TYPE_FLOAT32, + SLANG_SCALAR_TYPE_FLOAT64, + SLANG_SCALAR_TYPE_INT8, + SLANG_SCALAR_TYPE_UINT8, + SLANG_SCALAR_TYPE_INT16, + SLANG_SCALAR_TYPE_UINT16, + SLANG_SCALAR_TYPE_INTPTR, + SLANG_SCALAR_TYPE_UINTPTR + }; + +#ifndef SLANG_RESOURCE_SHAPE +# define SLANG_RESOURCE_SHAPE + typedef unsigned int SlangResourceShapeIntegral; + enum SlangResourceShape : SlangResourceShapeIntegral + { + SLANG_RESOURCE_BASE_SHAPE_MASK = 0x0F, + + SLANG_RESOURCE_NONE = 0x00, + + SLANG_TEXTURE_1D = 0x01, + SLANG_TEXTURE_2D = 0x02, + SLANG_TEXTURE_3D = 0x03, + SLANG_TEXTURE_CUBE = 0x04, + SLANG_TEXTURE_BUFFER = 0x05, + + SLANG_STRUCTURED_BUFFER = 0x06, + SLANG_BYTE_ADDRESS_BUFFER = 0x07, + SLANG_RESOURCE_UNKNOWN = 0x08, + SLANG_ACCELERATION_STRUCTURE = 0x09, + + SLANG_RESOURCE_EXT_SHAPE_MASK = 0xF0, + + SLANG_TEXTURE_FEEDBACK_FLAG = 0x10, + SLANG_TEXTURE_SHADOW_FLAG = 0x20, + SLANG_TEXTURE_ARRAY_FLAG = 0x40, + SLANG_TEXTURE_MULTISAMPLE_FLAG = 0x80, + + SLANG_TEXTURE_1D_ARRAY = SLANG_TEXTURE_1D | SLANG_TEXTURE_ARRAY_FLAG, + SLANG_TEXTURE_2D_ARRAY = SLANG_TEXTURE_2D | SLANG_TEXTURE_ARRAY_FLAG, + SLANG_TEXTURE_CUBE_ARRAY = SLANG_TEXTURE_CUBE | SLANG_TEXTURE_ARRAY_FLAG, + + SLANG_TEXTURE_2D_MULTISAMPLE = SLANG_TEXTURE_2D | SLANG_TEXTURE_MULTISAMPLE_FLAG, + SLANG_TEXTURE_2D_MULTISAMPLE_ARRAY = SLANG_TEXTURE_2D | SLANG_TEXTURE_MULTISAMPLE_FLAG | SLANG_TEXTURE_ARRAY_FLAG, + }; +#endif + typedef unsigned int SlangResourceAccessIntegral; + enum SlangResourceAccess : SlangResourceAccessIntegral + { + SLANG_RESOURCE_ACCESS_NONE, + SLANG_RESOURCE_ACCESS_READ, + SLANG_RESOURCE_ACCESS_READ_WRITE, + SLANG_RESOURCE_ACCESS_RASTER_ORDERED, + SLANG_RESOURCE_ACCESS_APPEND, + SLANG_RESOURCE_ACCESS_CONSUME, + SLANG_RESOURCE_ACCESS_WRITE, + SLANG_RESOURCE_ACCESS_FEEDBACK, + SLANG_RESOURCE_ACCESS_UNKNOWN = 0x7FFFFFFF, + }; + + typedef unsigned int SlangParameterCategoryIntegral; + enum SlangParameterCategory : SlangParameterCategoryIntegral + { + SLANG_PARAMETER_CATEGORY_NONE, + SLANG_PARAMETER_CATEGORY_MIXED, + SLANG_PARAMETER_CATEGORY_CONSTANT_BUFFER, + SLANG_PARAMETER_CATEGORY_SHADER_RESOURCE, + SLANG_PARAMETER_CATEGORY_UNORDERED_ACCESS, + SLANG_PARAMETER_CATEGORY_VARYING_INPUT, + SLANG_PARAMETER_CATEGORY_VARYING_OUTPUT, + SLANG_PARAMETER_CATEGORY_SAMPLER_STATE, + SLANG_PARAMETER_CATEGORY_UNIFORM, + SLANG_PARAMETER_CATEGORY_DESCRIPTOR_TABLE_SLOT, + SLANG_PARAMETER_CATEGORY_SPECIALIZATION_CONSTANT, + SLANG_PARAMETER_CATEGORY_PUSH_CONSTANT_BUFFER, + + // HLSL register `space`, Vulkan GLSL `set` + SLANG_PARAMETER_CATEGORY_REGISTER_SPACE, + + // TODO: Ellie, Both APIs treat mesh outputs as more or less varying output, + // Does it deserve to be represented here?? + + // A parameter whose type is to be specialized by a global generic type argument + SLANG_PARAMETER_CATEGORY_GENERIC, + + SLANG_PARAMETER_CATEGORY_RAY_PAYLOAD, + SLANG_PARAMETER_CATEGORY_HIT_ATTRIBUTES, + SLANG_PARAMETER_CATEGORY_CALLABLE_PAYLOAD, + SLANG_PARAMETER_CATEGORY_SHADER_RECORD, + + // An existential type parameter represents a "hole" that + // needs to be filled with a concrete type to enable + // generation of specialized code. + // + // Consider this example: + // + // struct MyParams + // { + // IMaterial material; + // ILight lights[3]; + // }; + // + // This `MyParams` type introduces two existential type parameters: + // one for `material` and one for `lights`. Even though `lights` + // is an array, it only introduces one type parameter, because + // we need to hae a *single* concrete type for all the array + // elements to be able to generate specialized code. + // + SLANG_PARAMETER_CATEGORY_EXISTENTIAL_TYPE_PARAM, + + // An existential object parameter represents a value + // that needs to be passed in to provide data for some + // interface-type shader paameter. + // + // Consider this example: + // + // struct MyParams + // { + // IMaterial material; + // ILight lights[3]; + // }; + // + // This `MyParams` type introduces four existential object parameters: + // one for `material` and three for `lights` (one for each array + // element). This is consistent with the number of interface-type + // "objects" that are being passed through to the shader. + // + SLANG_PARAMETER_CATEGORY_EXISTENTIAL_OBJECT_PARAM, + + // The register space offset for the sub-elements that occupies register spaces. + SLANG_PARAMETER_CATEGORY_SUB_ELEMENT_REGISTER_SPACE, + + // + SLANG_PARAMETER_CATEGORY_COUNT, + + + // DEPRECATED: + SLANG_PARAMETER_CATEGORY_VERTEX_INPUT = SLANG_PARAMETER_CATEGORY_VARYING_INPUT, + SLANG_PARAMETER_CATEGORY_FRAGMENT_OUTPUT = SLANG_PARAMETER_CATEGORY_VARYING_OUTPUT, + }; + + /** Types of API-managed bindings that a parameter might use. + + `SlangBindingType` represents the distinct types of binding ranges that might be + understood by an underlying graphics API or cross-API abstraction layer. + Several of the enumeration cases here correspond to cases of `VkDescriptorType` + defined by the Vulkan API. Note however that the values of this enumeration + are not the same as those of any particular API. + + The `SlangBindingType` enumeration is distinct from `SlangParameterCategory` + because `SlangParameterCategory` differentiates the types of parameters for + the purposes of layout, where the layout rules of some targets will treat + parameters of different types as occupying the same binding space for layout + (e.g., in SPIR-V both a `Texture2D` and `SamplerState` use the same space of + `binding` indices, and are not allowed to overlap), while those same types + map to different types of bindingsin the API (e.g., both textures and samplers + use different `VkDescriptorType` values). + + When you want to answer "what register/binding did this parameter use?" you + should use `SlangParameterCategory`. + + When you wnat to answer "what type of descriptor range should this parameter use?" + you should use `SlangBindingType`. + */ + typedef SlangUInt32 SlangBindingTypeIntegral; + enum SlangBindingType : SlangBindingTypeIntegral + { + SLANG_BINDING_TYPE_UNKNOWN = 0, + + SLANG_BINDING_TYPE_SAMPLER, + SLANG_BINDING_TYPE_TEXTURE, + SLANG_BINDING_TYPE_CONSTANT_BUFFER, + SLANG_BINDING_TYPE_PARAMETER_BLOCK, + SLANG_BINDING_TYPE_TYPED_BUFFER, + SLANG_BINDING_TYPE_RAW_BUFFER, + SLANG_BINDING_TYPE_COMBINED_TEXTURE_SAMPLER, + SLANG_BINDING_TYPE_INPUT_RENDER_TARGET, + SLANG_BINDING_TYPE_INLINE_UNIFORM_DATA, + SLANG_BINDING_TYPE_RAY_TRACING_ACCELERATION_STRUCTURE, + + SLANG_BINDING_TYPE_VARYING_INPUT, + SLANG_BINDING_TYPE_VARYING_OUTPUT, + + SLANG_BINDING_TYPE_EXISTENTIAL_VALUE, + SLANG_BINDING_TYPE_PUSH_CONSTANT, + + SLANG_BINDING_TYPE_MUTABLE_FLAG = 0x100, + + SLANG_BINDING_TYPE_MUTABLE_TETURE = SLANG_BINDING_TYPE_TEXTURE | SLANG_BINDING_TYPE_MUTABLE_FLAG, + SLANG_BINDING_TYPE_MUTABLE_TYPED_BUFFER = SLANG_BINDING_TYPE_TYPED_BUFFER | SLANG_BINDING_TYPE_MUTABLE_FLAG, + SLANG_BINDING_TYPE_MUTABLE_RAW_BUFFER = SLANG_BINDING_TYPE_RAW_BUFFER | SLANG_BINDING_TYPE_MUTABLE_FLAG, + + SLANG_BINDING_TYPE_BASE_MASK = 0x00FF, + SLANG_BINDING_TYPE_EXT_MASK = 0xFF00, + }; + + typedef SlangUInt32 SlangLayoutRulesIntegral; + enum SlangLayoutRules : SlangLayoutRulesIntegral + { + SLANG_LAYOUT_RULES_DEFAULT, + }; + + typedef SlangUInt32 SlangModifierIDIntegral; + enum SlangModifierID : SlangModifierIDIntegral + { + SLANG_MODIFIER_SHARED, + }; + + // User Attribute + SLANG_API char const* spReflectionUserAttribute_GetName(SlangReflectionUserAttribute* attrib); + SLANG_API unsigned int spReflectionUserAttribute_GetArgumentCount(SlangReflectionUserAttribute* attrib); + SLANG_API SlangReflectionType* spReflectionUserAttribute_GetArgumentType(SlangReflectionUserAttribute* attrib, unsigned int index); + SLANG_API SlangResult spReflectionUserAttribute_GetArgumentValueInt(SlangReflectionUserAttribute* attrib, unsigned int index, int * rs); + SLANG_API SlangResult spReflectionUserAttribute_GetArgumentValueFloat(SlangReflectionUserAttribute* attrib, unsigned int index, float * rs); + + /** Returns the string-typed value of a user attribute argument + The string returned is not null-terminated. The length of the string is returned via `outSize`. + If index of out of range, or if the specified argument is not a string, the function will return nullptr. + */ + SLANG_API const char* spReflectionUserAttribute_GetArgumentValueString(SlangReflectionUserAttribute* attrib, unsigned int index, size_t * outSize); + + // Type Reflection + + SLANG_API SlangTypeKind spReflectionType_GetKind(SlangReflectionType* type); + SLANG_API unsigned int spReflectionType_GetUserAttributeCount(SlangReflectionType* type); + SLANG_API SlangReflectionUserAttribute* spReflectionType_GetUserAttribute(SlangReflectionType* type, unsigned int index); + SLANG_API SlangReflectionUserAttribute* spReflectionType_FindUserAttributeByName(SlangReflectionType* type, char const* name); + + SLANG_API unsigned int spReflectionType_GetFieldCount(SlangReflectionType* type); + SLANG_API SlangReflectionVariable* spReflectionType_GetFieldByIndex(SlangReflectionType* type, unsigned index); + + /** Returns the number of elements in the given type. + + This operation is valid for vector and array types. For other types it returns zero. + + When invoked on an unbounded-size array it will return `SLANG_UNBOUNDED_SIZE`, + which is defined to be `~size_t(0)`. + + If the size of a type cannot be statically computed, perhaps because it depends on + a generic parameter that has not been bound to a specific value, this function returns zero. + */ + SLANG_API size_t spReflectionType_GetElementCount(SlangReflectionType* type); + + #define SLANG_UNBOUNDED_SIZE (~size_t(0)) + + SLANG_API SlangReflectionType* spReflectionType_GetElementType(SlangReflectionType* type); + + SLANG_API unsigned int spReflectionType_GetRowCount(SlangReflectionType* type); + SLANG_API unsigned int spReflectionType_GetColumnCount(SlangReflectionType* type); + SLANG_API SlangScalarType spReflectionType_GetScalarType(SlangReflectionType* type); + + SLANG_API SlangResourceShape spReflectionType_GetResourceShape(SlangReflectionType* type); + SLANG_API SlangResourceAccess spReflectionType_GetResourceAccess(SlangReflectionType* type); + SLANG_API SlangReflectionType* spReflectionType_GetResourceResultType(SlangReflectionType* type); + + SLANG_API char const* spReflectionType_GetName(SlangReflectionType* type); + + // Type Layout Reflection + + SLANG_API SlangReflectionType* spReflectionTypeLayout_GetType(SlangReflectionTypeLayout* type); + SLANG_API SlangTypeKind spReflectionTypeLayout_getKind(SlangReflectionTypeLayout* type); + SLANG_API size_t spReflectionTypeLayout_GetSize(SlangReflectionTypeLayout* type, SlangParameterCategory category); + SLANG_API size_t spReflectionTypeLayout_GetStride(SlangReflectionTypeLayout* type, SlangParameterCategory category); + SLANG_API int32_t spReflectionTypeLayout_getAlignment(SlangReflectionTypeLayout* type, SlangParameterCategory category); + + SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_GetFieldByIndex(SlangReflectionTypeLayout* type, unsigned index); + + SLANG_API SlangInt spReflectionTypeLayout_findFieldIndexByName(SlangReflectionTypeLayout* typeLayout, const char* nameBegin, const char* nameEnd); + + SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_GetExplicitCounter(SlangReflectionTypeLayout* typeLayout); + + SLANG_API size_t spReflectionTypeLayout_GetElementStride(SlangReflectionTypeLayout* type, SlangParameterCategory category); + SLANG_API SlangReflectionTypeLayout* spReflectionTypeLayout_GetElementTypeLayout(SlangReflectionTypeLayout* type); + SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_GetElementVarLayout(SlangReflectionTypeLayout* type); + SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_getContainerVarLayout(SlangReflectionTypeLayout* type); + + SLANG_API SlangParameterCategory spReflectionTypeLayout_GetParameterCategory(SlangReflectionTypeLayout* type); + + SLANG_API unsigned spReflectionTypeLayout_GetCategoryCount(SlangReflectionTypeLayout* type); + SLANG_API SlangParameterCategory spReflectionTypeLayout_GetCategoryByIndex(SlangReflectionTypeLayout* type, unsigned index); + + SLANG_API SlangMatrixLayoutMode spReflectionTypeLayout_GetMatrixLayoutMode(SlangReflectionTypeLayout* type); + + SLANG_API int spReflectionTypeLayout_getGenericParamIndex(SlangReflectionTypeLayout* type); + + SLANG_API SlangReflectionTypeLayout* spReflectionTypeLayout_getPendingDataTypeLayout(SlangReflectionTypeLayout* type); + + SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_getSpecializedTypePendingDataVarLayout(SlangReflectionTypeLayout* type); + SLANG_API SlangInt spReflectionType_getSpecializedTypeArgCount(SlangReflectionType* type); + SLANG_API SlangReflectionType* spReflectionType_getSpecializedTypeArgType(SlangReflectionType* type, SlangInt index); + + SLANG_API SlangInt spReflectionTypeLayout_getBindingRangeCount(SlangReflectionTypeLayout* typeLayout); + SLANG_API SlangBindingType spReflectionTypeLayout_getBindingRangeType(SlangReflectionTypeLayout* typeLayout, SlangInt index); + SLANG_API SlangInt spReflectionTypeLayout_isBindingRangeSpecializable(SlangReflectionTypeLayout* typeLayout, SlangInt index); + SLANG_API SlangInt spReflectionTypeLayout_getBindingRangeBindingCount(SlangReflectionTypeLayout* typeLayout, SlangInt index); + SLANG_API SlangReflectionTypeLayout* spReflectionTypeLayout_getBindingRangeLeafTypeLayout(SlangReflectionTypeLayout* typeLayout, SlangInt index); + SLANG_API SlangReflectionVariable* spReflectionTypeLayout_getBindingRangeLeafVariable(SlangReflectionTypeLayout* typeLayout, SlangInt index); + SLANG_API SlangInt spReflectionTypeLayout_getFieldBindingRangeOffset(SlangReflectionTypeLayout* typeLayout, SlangInt fieldIndex); + SLANG_API SlangInt spReflectionTypeLayout_getExplicitCounterBindingRangeOffset(SlangReflectionTypeLayout* inTypeLayout); + + SLANG_API SlangInt spReflectionTypeLayout_getBindingRangeDescriptorSetIndex(SlangReflectionTypeLayout* typeLayout, SlangInt index); + SLANG_API SlangInt spReflectionTypeLayout_getBindingRangeFirstDescriptorRangeIndex(SlangReflectionTypeLayout* typeLayout, SlangInt index); + SLANG_API SlangInt spReflectionTypeLayout_getBindingRangeDescriptorRangeCount(SlangReflectionTypeLayout* typeLayout, SlangInt index); + + SLANG_API SlangInt spReflectionTypeLayout_getDescriptorSetCount(SlangReflectionTypeLayout* typeLayout); + SLANG_API SlangInt spReflectionTypeLayout_getDescriptorSetSpaceOffset(SlangReflectionTypeLayout* typeLayout, SlangInt setIndex); + SLANG_API SlangInt spReflectionTypeLayout_getDescriptorSetDescriptorRangeCount(SlangReflectionTypeLayout* typeLayout, SlangInt setIndex); + SLANG_API SlangInt spReflectionTypeLayout_getDescriptorSetDescriptorRangeIndexOffset(SlangReflectionTypeLayout* typeLayout, SlangInt setIndex, SlangInt rangeIndex); + SLANG_API SlangInt spReflectionTypeLayout_getDescriptorSetDescriptorRangeDescriptorCount(SlangReflectionTypeLayout* typeLayout, SlangInt setIndex, SlangInt rangeIndex); + SLANG_API SlangBindingType spReflectionTypeLayout_getDescriptorSetDescriptorRangeType(SlangReflectionTypeLayout* typeLayout, SlangInt setIndex, SlangInt rangeIndex); + SLANG_API SlangParameterCategory spReflectionTypeLayout_getDescriptorSetDescriptorRangeCategory(SlangReflectionTypeLayout* typeLayout, SlangInt setIndex, SlangInt rangeIndex); + + SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeCount(SlangReflectionTypeLayout* typeLayout); + SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeBindingRangeIndex(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex); + SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeSpaceOffset(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex); + SLANG_API SlangReflectionVariableLayout* spReflectionTypeLayout_getSubObjectRangeOffset(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex); + +#if 0 + SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeCount(SlangReflectionTypeLayout* typeLayout); + SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeObjectCount(SlangReflectionTypeLayout* typeLayout, SlangInt index); + SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeBindingRangeIndex(SlangReflectionTypeLayout* typeLayout, SlangInt index); + SLANG_API SlangReflectionTypeLayout* spReflectionTypeLayout_getSubObjectRangeTypeLayout(SlangReflectionTypeLayout* typeLayout, SlangInt index); + + SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeDescriptorRangeCount(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex); + SLANG_API SlangBindingType spReflectionTypeLayout_getSubObjectRangeDescriptorRangeBindingType(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex, SlangInt bindingRangeIndexInSubObject); + SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeDescriptorRangeBindingCount(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex, SlangInt bindingRangeIndexInSubObject); + SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeDescriptorRangeIndexOffset(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex, SlangInt bindingRangeIndexInSubObject); + SLANG_API SlangInt spReflectionTypeLayout_getSubObjectRangeDescriptorRangeSpaceOffset(SlangReflectionTypeLayout* typeLayout, SlangInt subObjectRangeIndex, SlangInt bindingRangeIndexInSubObject); +#endif + + // Variable Reflection + + SLANG_API char const* spReflectionVariable_GetName(SlangReflectionVariable* var); + SLANG_API SlangReflectionType* spReflectionVariable_GetType(SlangReflectionVariable* var); + SLANG_API SlangReflectionModifier* spReflectionVariable_FindModifier(SlangReflectionVariable* var, SlangModifierID modifierID); + SLANG_API unsigned int spReflectionVariable_GetUserAttributeCount(SlangReflectionVariable* var); + SLANG_API SlangReflectionUserAttribute* spReflectionVariable_GetUserAttribute(SlangReflectionVariable* var, unsigned int index); + SLANG_API SlangReflectionUserAttribute* spReflectionVariable_FindUserAttributeByName(SlangReflectionVariable* var, SlangSession * session, char const* name); + + // Variable Layout Reflection + + SLANG_API SlangReflectionVariable* spReflectionVariableLayout_GetVariable(SlangReflectionVariableLayout* var); + + SLANG_API SlangReflectionTypeLayout* spReflectionVariableLayout_GetTypeLayout(SlangReflectionVariableLayout* var); + + SLANG_API size_t spReflectionVariableLayout_GetOffset(SlangReflectionVariableLayout* var, SlangParameterCategory category); + SLANG_API size_t spReflectionVariableLayout_GetSpace(SlangReflectionVariableLayout* var, SlangParameterCategory category); + + SLANG_API char const* spReflectionVariableLayout_GetSemanticName(SlangReflectionVariableLayout* var); + SLANG_API size_t spReflectionVariableLayout_GetSemanticIndex(SlangReflectionVariableLayout* var); + + /** Get the stage that a variable belongs to (if any). + + A variable "belongs" to a specific stage when it is a varying input/output + parameter either defined as part of the parameter list for an entry + point *or* at the global scope of a stage-specific GLSL code file (e.g., + an `in` parameter in a GLSL `.vs` file belongs to the vertex stage). + */ + SLANG_API SlangStage spReflectionVariableLayout_getStage( + SlangReflectionVariableLayout* var); + + + SLANG_API SlangReflectionVariableLayout* spReflectionVariableLayout_getPendingDataLayout(SlangReflectionVariableLayout* var); + + // Shader Parameter Reflection + + typedef SlangReflectionVariableLayout SlangReflectionParameter; + + SLANG_API unsigned spReflectionParameter_GetBindingIndex(SlangReflectionParameter* parameter); + SLANG_API unsigned spReflectionParameter_GetBindingSpace(SlangReflectionParameter* parameter); + + SLANG_API SlangResult spIsParameterLocationUsed( + SlangCompileRequest* request, + SlangInt entryPointIndex, + SlangInt targetIndex, + SlangParameterCategory category, // is this a `t` register? `s` register? + SlangUInt spaceIndex, // `space` for D3D12, `set` for Vulkan + SlangUInt registerIndex, // `register` for D3D12, `binding` for Vulkan + bool& outUsed); + + // Entry Point Reflection + + SLANG_API char const* spReflectionEntryPoint_getName( + SlangReflectionEntryPoint* entryPoint); + + SLANG_API char const* spReflectionEntryPoint_getNameOverride( + SlangReflectionEntryPoint* entryPoint); + + SLANG_API unsigned spReflectionEntryPoint_getParameterCount( + SlangReflectionEntryPoint* entryPoint); + + SLANG_API SlangReflectionVariableLayout* spReflectionEntryPoint_getParameterByIndex( + SlangReflectionEntryPoint* entryPoint, + unsigned index); + + SLANG_API SlangStage spReflectionEntryPoint_getStage(SlangReflectionEntryPoint* entryPoint); + + SLANG_API void spReflectionEntryPoint_getComputeThreadGroupSize( + SlangReflectionEntryPoint* entryPoint, + SlangUInt axisCount, + SlangUInt* outSizeAlongAxis); + + SLANG_API int spReflectionEntryPoint_usesAnySampleRateInput( + SlangReflectionEntryPoint* entryPoint); + + SLANG_API SlangReflectionVariableLayout* spReflectionEntryPoint_getVarLayout( + SlangReflectionEntryPoint* entryPoint); + + SLANG_API SlangReflectionVariableLayout* spReflectionEntryPoint_getResultVarLayout( + SlangReflectionEntryPoint* entryPoint); + + SLANG_API int spReflectionEntryPoint_hasDefaultConstantBuffer( + SlangReflectionEntryPoint* entryPoint); + + // SlangReflectionTypeParameter + SLANG_API char const* spReflectionTypeParameter_GetName(SlangReflectionTypeParameter* typeParam); + SLANG_API unsigned spReflectionTypeParameter_GetIndex(SlangReflectionTypeParameter* typeParam); + SLANG_API unsigned spReflectionTypeParameter_GetConstraintCount(SlangReflectionTypeParameter* typeParam); + SLANG_API SlangReflectionType* spReflectionTypeParameter_GetConstraintByIndex(SlangReflectionTypeParameter* typeParam, unsigned int index); + + // Shader Reflection + + SLANG_API unsigned spReflection_GetParameterCount(SlangReflection* reflection); + SLANG_API SlangReflectionParameter* spReflection_GetParameterByIndex(SlangReflection* reflection, unsigned index); + + SLANG_API unsigned int spReflection_GetTypeParameterCount(SlangReflection* reflection); + SLANG_API SlangReflectionTypeParameter* spReflection_GetTypeParameterByIndex(SlangReflection* reflection, unsigned int index); + SLANG_API SlangReflectionTypeParameter* spReflection_FindTypeParameter(SlangReflection* reflection, char const* name); + + SLANG_API SlangReflectionType* spReflection_FindTypeByName(SlangReflection* reflection, char const* name); + SLANG_API SlangReflectionTypeLayout* spReflection_GetTypeLayout(SlangReflection* reflection, SlangReflectionType* reflectionType, SlangLayoutRules rules); + + SLANG_API SlangUInt spReflection_getEntryPointCount(SlangReflection* reflection); + SLANG_API SlangReflectionEntryPoint* spReflection_getEntryPointByIndex(SlangReflection* reflection, SlangUInt index); + SLANG_API SlangReflectionEntryPoint* spReflection_findEntryPointByName(SlangReflection* reflection, char const* name); + + SLANG_API SlangUInt spReflection_getGlobalConstantBufferBinding(SlangReflection* reflection); + SLANG_API size_t spReflection_getGlobalConstantBufferSize(SlangReflection* reflection); + + SLANG_API SlangReflectionType* spReflection_specializeType( + SlangReflection* reflection, + SlangReflectionType* type, + SlangInt specializationArgCount, + SlangReflectionType* const* specializationArgs, + ISlangBlob** outDiagnostics); + + /// Get the number of hashed strings + SLANG_API SlangUInt spReflection_getHashedStringCount( + SlangReflection* reflection); + + /// Get a hashed string. The number of chars is written in outCount. + /// The count does *NOT* including terminating 0. The returned string will be 0 terminated. + SLANG_API const char* spReflection_getHashedString( + SlangReflection* reflection, + SlangUInt index, + size_t* outCount); + + /// Compute a string hash. + /// Count should *NOT* include terminating zero. + SLANG_API SlangUInt32 spComputeStringHash(const char* chars, size_t count); + + /// Get a type layout representing reflection information for the global-scope prameters. + SLANG_API SlangReflectionTypeLayout* spReflection_getGlobalParamsTypeLayout( + SlangReflection* reflection); + + /// Get a variable layout representing reflection information for the global-scope prameters. + SLANG_API SlangReflectionVariableLayout* spReflection_getGlobalParamsVarLayout( + SlangReflection* reflection); + +#ifdef __cplusplus +} + +/* Helper interfaces for C++ users */ +namespace slang +{ + struct BufferReflection; + struct TypeLayoutReflection; + struct TypeReflection; + struct VariableLayoutReflection; + struct VariableReflection; + + struct UserAttribute + { + char const* getName() + { + return spReflectionUserAttribute_GetName((SlangReflectionUserAttribute*)this); + } + uint32_t getArgumentCount() + { + return (uint32_t)spReflectionUserAttribute_GetArgumentCount((SlangReflectionUserAttribute*)this); + } + TypeReflection* getArgumentType(uint32_t index) + { + return (TypeReflection*)spReflectionUserAttribute_GetArgumentType((SlangReflectionUserAttribute*)this, index); + } + SlangResult getArgumentValueInt(uint32_t index, int * value) + { + return spReflectionUserAttribute_GetArgumentValueInt((SlangReflectionUserAttribute*)this, index, value); + } + SlangResult getArgumentValueFloat(uint32_t index, float * value) + { + return spReflectionUserAttribute_GetArgumentValueFloat((SlangReflectionUserAttribute*)this, index, value); + } + const char* getArgumentValueString(uint32_t index, size_t * outSize) + { + return spReflectionUserAttribute_GetArgumentValueString((SlangReflectionUserAttribute*)this, index, outSize); + } + }; + + struct TypeReflection + { + enum class Kind + { + None = SLANG_TYPE_KIND_NONE, + Struct = SLANG_TYPE_KIND_STRUCT, + Array = SLANG_TYPE_KIND_ARRAY, + Matrix = SLANG_TYPE_KIND_MATRIX, + Vector = SLANG_TYPE_KIND_VECTOR, + Scalar = SLANG_TYPE_KIND_SCALAR, + ConstantBuffer = SLANG_TYPE_KIND_CONSTANT_BUFFER, + Resource = SLANG_TYPE_KIND_RESOURCE, + SamplerState = SLANG_TYPE_KIND_SAMPLER_STATE, + TextureBuffer = SLANG_TYPE_KIND_TEXTURE_BUFFER, + ShaderStorageBuffer = SLANG_TYPE_KIND_SHADER_STORAGE_BUFFER, + ParameterBlock = SLANG_TYPE_KIND_PARAMETER_BLOCK, + GenericTypeParameter = SLANG_TYPE_KIND_GENERIC_TYPE_PARAMETER, + Interface = SLANG_TYPE_KIND_INTERFACE, + OutputStream = SLANG_TYPE_KIND_OUTPUT_STREAM, + Specialized = SLANG_TYPE_KIND_SPECIALIZED, + Feedback = SLANG_TYPE_KIND_FEEDBACK, + Pointer = SLANG_TYPE_KIND_POINTER, + }; + + enum ScalarType : SlangScalarTypeIntegral + { + None = SLANG_SCALAR_TYPE_NONE, + Void = SLANG_SCALAR_TYPE_VOID, + Bool = SLANG_SCALAR_TYPE_BOOL, + Int32 = SLANG_SCALAR_TYPE_INT32, + UInt32 = SLANG_SCALAR_TYPE_UINT32, + Int64 = SLANG_SCALAR_TYPE_INT64, + UInt64 = SLANG_SCALAR_TYPE_UINT64, + Float16 = SLANG_SCALAR_TYPE_FLOAT16, + Float32 = SLANG_SCALAR_TYPE_FLOAT32, + Float64 = SLANG_SCALAR_TYPE_FLOAT64, + Int8 = SLANG_SCALAR_TYPE_INT8, + UInt8 = SLANG_SCALAR_TYPE_UINT8, + Int16 = SLANG_SCALAR_TYPE_INT16, + UInt16 = SLANG_SCALAR_TYPE_UINT16, + }; + + Kind getKind() + { + return (Kind) spReflectionType_GetKind((SlangReflectionType*) this); + } + + // only useful if `getKind() == Kind::Struct` + unsigned int getFieldCount() + { + return spReflectionType_GetFieldCount((SlangReflectionType*) this); + } + + VariableReflection* getFieldByIndex(unsigned int index) + { + return (VariableReflection*) spReflectionType_GetFieldByIndex((SlangReflectionType*) this, index); + } + + bool isArray() { return getKind() == TypeReflection::Kind::Array; } + + TypeReflection* unwrapArray() + { + TypeReflection* type = this; + while( type->isArray() ) + { + type = type->getElementType(); + } + return type; + } + + // only useful if `getKind() == Kind::Array` + size_t getElementCount() + { + return spReflectionType_GetElementCount((SlangReflectionType*) this); + } + + size_t getTotalArrayElementCount() + { + if(!isArray()) return 0; + size_t result = 1; + TypeReflection* type = this; + for(;;) + { + if(!type->isArray()) + return result; + + result *= type->getElementCount(); + type = type->getElementType(); + } + } + + TypeReflection* getElementType() + { + return (TypeReflection*) spReflectionType_GetElementType((SlangReflectionType*) this); + } + + unsigned getRowCount() + { + return spReflectionType_GetRowCount((SlangReflectionType*) this); + } + + unsigned getColumnCount() + { + return spReflectionType_GetColumnCount((SlangReflectionType*) this); + } + + ScalarType getScalarType() + { + return (ScalarType) spReflectionType_GetScalarType((SlangReflectionType*) this); + } + + TypeReflection* getResourceResultType() + { + return (TypeReflection*) spReflectionType_GetResourceResultType((SlangReflectionType*) this); + } + + SlangResourceShape getResourceShape() + { + return spReflectionType_GetResourceShape((SlangReflectionType*) this); + } + + SlangResourceAccess getResourceAccess() + { + return spReflectionType_GetResourceAccess((SlangReflectionType*) this); + } + + char const* getName() + { + return spReflectionType_GetName((SlangReflectionType*) this); + } + + unsigned int getUserAttributeCount() + { + return spReflectionType_GetUserAttributeCount((SlangReflectionType*)this); + } + UserAttribute* getUserAttributeByIndex(unsigned int index) + { + return (UserAttribute*)spReflectionType_GetUserAttribute((SlangReflectionType*)this, index); + } + UserAttribute* findUserAttributeByName(char const* name) + { + return (UserAttribute*)spReflectionType_FindUserAttributeByName((SlangReflectionType*)this, name); + } + }; + + enum ParameterCategory : SlangParameterCategoryIntegral + { + // TODO: these aren't scoped... + None = SLANG_PARAMETER_CATEGORY_NONE, + Mixed = SLANG_PARAMETER_CATEGORY_MIXED, + ConstantBuffer = SLANG_PARAMETER_CATEGORY_CONSTANT_BUFFER, + ShaderResource = SLANG_PARAMETER_CATEGORY_SHADER_RESOURCE, + UnorderedAccess = SLANG_PARAMETER_CATEGORY_UNORDERED_ACCESS, + VaryingInput = SLANG_PARAMETER_CATEGORY_VARYING_INPUT, + VaryingOutput = SLANG_PARAMETER_CATEGORY_VARYING_OUTPUT, + SamplerState = SLANG_PARAMETER_CATEGORY_SAMPLER_STATE, + Uniform = SLANG_PARAMETER_CATEGORY_UNIFORM, + DescriptorTableSlot = SLANG_PARAMETER_CATEGORY_DESCRIPTOR_TABLE_SLOT, + SpecializationConstant = SLANG_PARAMETER_CATEGORY_SPECIALIZATION_CONSTANT, + PushConstantBuffer = SLANG_PARAMETER_CATEGORY_PUSH_CONSTANT_BUFFER, + RegisterSpace = SLANG_PARAMETER_CATEGORY_REGISTER_SPACE, + GenericResource = SLANG_PARAMETER_CATEGORY_GENERIC, + + RayPayload = SLANG_PARAMETER_CATEGORY_RAY_PAYLOAD, + HitAttributes = SLANG_PARAMETER_CATEGORY_HIT_ATTRIBUTES, + CallablePayload = SLANG_PARAMETER_CATEGORY_CALLABLE_PAYLOAD, + + ShaderRecord = SLANG_PARAMETER_CATEGORY_SHADER_RECORD, + + ExistentialTypeParam = SLANG_PARAMETER_CATEGORY_EXISTENTIAL_TYPE_PARAM, + ExistentialObjectParam = SLANG_PARAMETER_CATEGORY_EXISTENTIAL_OBJECT_PARAM, + + SubElementRegisterSpace = SLANG_PARAMETER_CATEGORY_SUB_ELEMENT_REGISTER_SPACE, + + // DEPRECATED: + VertexInput = SLANG_PARAMETER_CATEGORY_VERTEX_INPUT, + FragmentOutput = SLANG_PARAMETER_CATEGORY_FRAGMENT_OUTPUT, + }; + + enum class BindingType : SlangBindingTypeIntegral + { + Unknown = SLANG_BINDING_TYPE_UNKNOWN, + + Sampler = SLANG_BINDING_TYPE_SAMPLER, + Texture = SLANG_BINDING_TYPE_TEXTURE, + ConstantBuffer = SLANG_BINDING_TYPE_CONSTANT_BUFFER, + ParameterBlock = SLANG_BINDING_TYPE_PARAMETER_BLOCK, + TypedBuffer = SLANG_BINDING_TYPE_TYPED_BUFFER, + RawBuffer = SLANG_BINDING_TYPE_RAW_BUFFER, + CombinedTextureSampler = SLANG_BINDING_TYPE_COMBINED_TEXTURE_SAMPLER, + InputRenderTarget = SLANG_BINDING_TYPE_INPUT_RENDER_TARGET, + InlineUniformData = SLANG_BINDING_TYPE_INLINE_UNIFORM_DATA, + RayTracingAccelerationStructure = SLANG_BINDING_TYPE_RAY_TRACING_ACCELERATION_STRUCTURE, + VaryingInput = SLANG_BINDING_TYPE_VARYING_INPUT, + VaryingOutput = SLANG_BINDING_TYPE_VARYING_OUTPUT, + ExistentialValue = SLANG_BINDING_TYPE_EXISTENTIAL_VALUE, + PushConstant = SLANG_BINDING_TYPE_PUSH_CONSTANT, + + MutableFlag = SLANG_BINDING_TYPE_MUTABLE_FLAG, + + MutableTexture = SLANG_BINDING_TYPE_MUTABLE_TETURE, + MutableTypedBuffer = SLANG_BINDING_TYPE_MUTABLE_TYPED_BUFFER, + MutableRawBuffer = SLANG_BINDING_TYPE_MUTABLE_RAW_BUFFER, + + BaseMask = SLANG_BINDING_TYPE_BASE_MASK, + ExtMask = SLANG_BINDING_TYPE_EXT_MASK, + }; + + struct TypeLayoutReflection + { + TypeReflection* getType() + { + return (TypeReflection*) spReflectionTypeLayout_GetType((SlangReflectionTypeLayout*) this); + } + + TypeReflection::Kind getKind() + { + return (TypeReflection::Kind) spReflectionTypeLayout_getKind((SlangReflectionTypeLayout*) this); + } + + size_t getSize(SlangParameterCategory category = SLANG_PARAMETER_CATEGORY_UNIFORM) + { + return spReflectionTypeLayout_GetSize((SlangReflectionTypeLayout*) this, category); + } + + size_t getStride(SlangParameterCategory category = SLANG_PARAMETER_CATEGORY_UNIFORM) + { + return spReflectionTypeLayout_GetStride((SlangReflectionTypeLayout*) this, category); + } + + int32_t getAlignment(SlangParameterCategory category = SLANG_PARAMETER_CATEGORY_UNIFORM) + { + return spReflectionTypeLayout_getAlignment((SlangReflectionTypeLayout*) this, category); + } + + unsigned int getFieldCount() + { + return getType()->getFieldCount(); + } + + VariableLayoutReflection* getFieldByIndex(unsigned int index) + { + return (VariableLayoutReflection*) spReflectionTypeLayout_GetFieldByIndex((SlangReflectionTypeLayout*) this, index); + } + + SlangInt findFieldIndexByName(char const* nameBegin, char const* nameEnd = nullptr) + { + return spReflectionTypeLayout_findFieldIndexByName((SlangReflectionTypeLayout*) this, nameBegin, nameEnd); + } + + VariableLayoutReflection* getExplicitCounter() + { + return (VariableLayoutReflection*) spReflectionTypeLayout_GetExplicitCounter((SlangReflectionTypeLayout*) this); + } + + bool isArray() { return getType()->isArray(); } + + TypeLayoutReflection* unwrapArray() + { + TypeLayoutReflection* typeLayout = this; + while( typeLayout->isArray() ) + { + typeLayout = typeLayout->getElementTypeLayout(); + } + return typeLayout; + } + + // only useful if `getKind() == Kind::Array` + size_t getElementCount() + { + return getType()->getElementCount(); + } + + size_t getTotalArrayElementCount() + { + return getType()->getTotalArrayElementCount(); + } + + size_t getElementStride(SlangParameterCategory category) + { + return spReflectionTypeLayout_GetElementStride((SlangReflectionTypeLayout*) this, category); + } + + TypeLayoutReflection* getElementTypeLayout() + { + return (TypeLayoutReflection*) spReflectionTypeLayout_GetElementTypeLayout((SlangReflectionTypeLayout*) this); + } + + VariableLayoutReflection* getElementVarLayout() + { + return (VariableLayoutReflection*)spReflectionTypeLayout_GetElementVarLayout((SlangReflectionTypeLayout*) this); + } + + VariableLayoutReflection* getContainerVarLayout() + { + return (VariableLayoutReflection*)spReflectionTypeLayout_getContainerVarLayout((SlangReflectionTypeLayout*) this); + } + + // How is this type supposed to be bound? + ParameterCategory getParameterCategory() + { + return (ParameterCategory) spReflectionTypeLayout_GetParameterCategory((SlangReflectionTypeLayout*) this); + } + + unsigned int getCategoryCount() + { + return spReflectionTypeLayout_GetCategoryCount((SlangReflectionTypeLayout*) this); + } + + ParameterCategory getCategoryByIndex(unsigned int index) + { + return (ParameterCategory) spReflectionTypeLayout_GetCategoryByIndex((SlangReflectionTypeLayout*) this, index); + } + + unsigned getRowCount() + { + return getType()->getRowCount(); + } + + unsigned getColumnCount() + { + return getType()->getColumnCount(); + } + + TypeReflection::ScalarType getScalarType() + { + return getType()->getScalarType(); + } + + TypeReflection* getResourceResultType() + { + return getType()->getResourceResultType(); + } + + SlangResourceShape getResourceShape() + { + return getType()->getResourceShape(); + } + + SlangResourceAccess getResourceAccess() + { + return getType()->getResourceAccess(); + } + + char const* getName() + { + return getType()->getName(); + } + + SlangMatrixLayoutMode getMatrixLayoutMode() + { + return spReflectionTypeLayout_GetMatrixLayoutMode((SlangReflectionTypeLayout*) this); + } + + int getGenericParamIndex() + { + return spReflectionTypeLayout_getGenericParamIndex( + (SlangReflectionTypeLayout*) this); + } + + TypeLayoutReflection* getPendingDataTypeLayout() + { + return (TypeLayoutReflection*) spReflectionTypeLayout_getPendingDataTypeLayout( + (SlangReflectionTypeLayout*) this); + } + + VariableLayoutReflection* getSpecializedTypePendingDataVarLayout() + { + return (VariableLayoutReflection*) spReflectionTypeLayout_getSpecializedTypePendingDataVarLayout( + (SlangReflectionTypeLayout*) this); + } + + SlangInt getBindingRangeCount() + { + return spReflectionTypeLayout_getBindingRangeCount( + (SlangReflectionTypeLayout*) this); + } + + BindingType getBindingRangeType(SlangInt index) + { + return (BindingType) spReflectionTypeLayout_getBindingRangeType( + (SlangReflectionTypeLayout*) this, + index); + } + + bool isBindingRangeSpecializable(SlangInt index) + { + return (bool)spReflectionTypeLayout_isBindingRangeSpecializable( + (SlangReflectionTypeLayout*)this, + index); + + } + + SlangInt getBindingRangeBindingCount(SlangInt index) + { + return spReflectionTypeLayout_getBindingRangeBindingCount( + (SlangReflectionTypeLayout*) this, + index); + } + + /* + SlangInt getBindingRangeIndexOffset(SlangInt index) + { + return spReflectionTypeLayout_getBindingRangeIndexOffset( + (SlangReflectionTypeLayout*) this, + index); + } + + SlangInt getBindingRangeSpaceOffset(SlangInt index) + { + return spReflectionTypeLayout_getBindingRangeSpaceOffset( + (SlangReflectionTypeLayout*) this, + index); + } + */ + + SlangInt getFieldBindingRangeOffset(SlangInt fieldIndex) + { + return spReflectionTypeLayout_getFieldBindingRangeOffset( + (SlangReflectionTypeLayout*) this, + fieldIndex); + } + + SlangInt getExplicitCounterBindingRangeOffset() + { + return spReflectionTypeLayout_getExplicitCounterBindingRangeOffset( + (SlangReflectionTypeLayout*) this); + } + + TypeLayoutReflection* getBindingRangeLeafTypeLayout(SlangInt index) + { + return (TypeLayoutReflection*) spReflectionTypeLayout_getBindingRangeLeafTypeLayout( + (SlangReflectionTypeLayout*) this, + index); + } + + VariableReflection* getBindingRangeLeafVariable(SlangInt index) + { + return (VariableReflection*)spReflectionTypeLayout_getBindingRangeLeafVariable( + (SlangReflectionTypeLayout*)this, index); + } + + SlangInt getBindingRangeDescriptorSetIndex(SlangInt index) + { + return spReflectionTypeLayout_getBindingRangeDescriptorSetIndex( + (SlangReflectionTypeLayout*) this, + index); + } + + SlangInt getBindingRangeFirstDescriptorRangeIndex(SlangInt index) + { + return spReflectionTypeLayout_getBindingRangeFirstDescriptorRangeIndex( + (SlangReflectionTypeLayout*) this, + index); + } + + SlangInt getBindingRangeDescriptorRangeCount(SlangInt index) + { + return spReflectionTypeLayout_getBindingRangeDescriptorRangeCount( + (SlangReflectionTypeLayout*) this, + index); + } + + SlangInt getDescriptorSetCount() + { + return spReflectionTypeLayout_getDescriptorSetCount( + (SlangReflectionTypeLayout*) this); + } + + SlangInt getDescriptorSetSpaceOffset(SlangInt setIndex) + { + return spReflectionTypeLayout_getDescriptorSetSpaceOffset( + (SlangReflectionTypeLayout*) this, + setIndex); + } + + SlangInt getDescriptorSetDescriptorRangeCount(SlangInt setIndex) + { + return spReflectionTypeLayout_getDescriptorSetDescriptorRangeCount( + (SlangReflectionTypeLayout*) this, + setIndex); + } + + SlangInt getDescriptorSetDescriptorRangeIndexOffset(SlangInt setIndex, SlangInt rangeIndex) + { + return spReflectionTypeLayout_getDescriptorSetDescriptorRangeIndexOffset( + (SlangReflectionTypeLayout*) this, + setIndex, + rangeIndex); + } + + SlangInt getDescriptorSetDescriptorRangeDescriptorCount(SlangInt setIndex, SlangInt rangeIndex) + { + return spReflectionTypeLayout_getDescriptorSetDescriptorRangeDescriptorCount( + (SlangReflectionTypeLayout*) this, + setIndex, + rangeIndex); + } + + BindingType getDescriptorSetDescriptorRangeType(SlangInt setIndex, SlangInt rangeIndex) + { + return (BindingType) spReflectionTypeLayout_getDescriptorSetDescriptorRangeType( + (SlangReflectionTypeLayout*) this, + setIndex, + rangeIndex); + } + + ParameterCategory getDescriptorSetDescriptorRangeCategory(SlangInt setIndex, SlangInt rangeIndex) + { + return (ParameterCategory) spReflectionTypeLayout_getDescriptorSetDescriptorRangeCategory( + (SlangReflectionTypeLayout*) this, + setIndex, + rangeIndex); + } + + SlangInt getSubObjectRangeCount() + { + return spReflectionTypeLayout_getSubObjectRangeCount( + (SlangReflectionTypeLayout*) this); + } + + SlangInt getSubObjectRangeBindingRangeIndex(SlangInt subObjectRangeIndex) + { + return spReflectionTypeLayout_getSubObjectRangeBindingRangeIndex( + (SlangReflectionTypeLayout*) this, + subObjectRangeIndex); + } + + SlangInt getSubObjectRangeSpaceOffset(SlangInt subObjectRangeIndex) + { + return spReflectionTypeLayout_getSubObjectRangeSpaceOffset( + (SlangReflectionTypeLayout*) this, + subObjectRangeIndex); + } + + VariableLayoutReflection* getSubObjectRangeOffset(SlangInt subObjectRangeIndex) + { + return (VariableLayoutReflection*) spReflectionTypeLayout_getSubObjectRangeOffset( + (SlangReflectionTypeLayout*) this, + subObjectRangeIndex); + } + }; + + struct Modifier + { + enum ID : SlangModifierIDIntegral + { + Shared = SLANG_MODIFIER_SHARED, + }; + }; + + struct VariableReflection + { + char const* getName() + { + return spReflectionVariable_GetName((SlangReflectionVariable*) this); + } + + TypeReflection* getType() + { + return (TypeReflection*) spReflectionVariable_GetType((SlangReflectionVariable*) this); + } + + Modifier* findModifier(Modifier::ID id) + { + return (Modifier*) spReflectionVariable_FindModifier((SlangReflectionVariable*) this, (SlangModifierID) id); + } + + unsigned int getUserAttributeCount() + { + return spReflectionVariable_GetUserAttributeCount((SlangReflectionVariable*)this); + } + UserAttribute* getUserAttributeByIndex(unsigned int index) + { + return (UserAttribute*)spReflectionVariable_GetUserAttribute((SlangReflectionVariable*)this, index); + } + UserAttribute* findUserAttributeByName(SlangSession* session, char const* name) + { + return (UserAttribute*)spReflectionVariable_FindUserAttributeByName((SlangReflectionVariable*)this, session, name); + } + }; + + struct VariableLayoutReflection + { + VariableReflection* getVariable() + { + return (VariableReflection*) spReflectionVariableLayout_GetVariable((SlangReflectionVariableLayout*) this); + } + + char const* getName() + { + return getVariable()->getName(); + } + + Modifier* findModifier(Modifier::ID id) + { + return getVariable()->findModifier(id); + } + + TypeLayoutReflection* getTypeLayout() + { + return (TypeLayoutReflection*) spReflectionVariableLayout_GetTypeLayout((SlangReflectionVariableLayout*) this); + } + + ParameterCategory getCategory() + { + return getTypeLayout()->getParameterCategory(); + } + + unsigned int getCategoryCount() + { + return getTypeLayout()->getCategoryCount(); + } + + ParameterCategory getCategoryByIndex(unsigned int index) + { + return getTypeLayout()->getCategoryByIndex(index); + } + + + size_t getOffset(SlangParameterCategory category = SLANG_PARAMETER_CATEGORY_UNIFORM) + { + return spReflectionVariableLayout_GetOffset((SlangReflectionVariableLayout*) this, category); + } + + TypeReflection* getType() + { + return getVariable()->getType(); + } + + unsigned getBindingIndex() + { + return spReflectionParameter_GetBindingIndex((SlangReflectionVariableLayout*) this); + } + + unsigned getBindingSpace() + { + return spReflectionParameter_GetBindingSpace((SlangReflectionVariableLayout*) this); + } + + size_t getBindingSpace(SlangParameterCategory category) + { + return spReflectionVariableLayout_GetSpace((SlangReflectionVariableLayout*) this, category); + } + + char const* getSemanticName() + { + return spReflectionVariableLayout_GetSemanticName((SlangReflectionVariableLayout*) this); + } + + size_t getSemanticIndex() + { + return spReflectionVariableLayout_GetSemanticIndex((SlangReflectionVariableLayout*) this); + } + + SlangStage getStage() + { + return spReflectionVariableLayout_getStage((SlangReflectionVariableLayout*) this); + } + + VariableLayoutReflection* getPendingDataLayout() + { + return (VariableLayoutReflection*) spReflectionVariableLayout_getPendingDataLayout((SlangReflectionVariableLayout*) this); + } + }; + + struct EntryPointReflection + { + char const* getName() + { + return spReflectionEntryPoint_getName((SlangReflectionEntryPoint*) this); + } + + char const* getNameOverride() + { + return spReflectionEntryPoint_getNameOverride((SlangReflectionEntryPoint*)this); + } + + unsigned getParameterCount() + { + return spReflectionEntryPoint_getParameterCount((SlangReflectionEntryPoint*) this); + } + + VariableLayoutReflection* getParameterByIndex(unsigned index) + { + return (VariableLayoutReflection*) spReflectionEntryPoint_getParameterByIndex((SlangReflectionEntryPoint*) this, index); + } + + SlangStage getStage() + { + return spReflectionEntryPoint_getStage((SlangReflectionEntryPoint*) this); + } + + void getComputeThreadGroupSize( + SlangUInt axisCount, + SlangUInt* outSizeAlongAxis) + { + return spReflectionEntryPoint_getComputeThreadGroupSize((SlangReflectionEntryPoint*) this, axisCount, outSizeAlongAxis); + } + + bool usesAnySampleRateInput() + { + return 0 != spReflectionEntryPoint_usesAnySampleRateInput((SlangReflectionEntryPoint*) this); + } + + VariableLayoutReflection* getVarLayout() + { + return (VariableLayoutReflection*) spReflectionEntryPoint_getVarLayout((SlangReflectionEntryPoint*) this); + } + + TypeLayoutReflection* getTypeLayout() + { + return getVarLayout()->getTypeLayout(); + } + + VariableLayoutReflection* getResultVarLayout() + { + return (VariableLayoutReflection*) spReflectionEntryPoint_getResultVarLayout((SlangReflectionEntryPoint*) this); + } + + bool hasDefaultConstantBuffer() + { + return spReflectionEntryPoint_hasDefaultConstantBuffer((SlangReflectionEntryPoint*) this) != 0; + } + }; + typedef EntryPointReflection EntryPointLayout; + + struct TypeParameterReflection + { + char const* getName() + { + return spReflectionTypeParameter_GetName((SlangReflectionTypeParameter*) this); + } + unsigned getIndex() + { + return spReflectionTypeParameter_GetIndex((SlangReflectionTypeParameter*) this); + } + unsigned getConstraintCount() + { + return spReflectionTypeParameter_GetConstraintCount((SlangReflectionTypeParameter*) this); + } + TypeReflection* getConstraintByIndex(int index) + { + return (TypeReflection*)spReflectionTypeParameter_GetConstraintByIndex((SlangReflectionTypeParameter*) this, index); + } + }; + + enum class LayoutRules : SlangLayoutRulesIntegral + { + Default = SLANG_LAYOUT_RULES_DEFAULT, + }; + + typedef struct ShaderReflection ProgramLayout; + + struct ShaderReflection + { + unsigned getParameterCount() + { + return spReflection_GetParameterCount((SlangReflection*) this); + } + + unsigned getTypeParameterCount() + { + return spReflection_GetTypeParameterCount((SlangReflection*) this); + } + + TypeParameterReflection* getTypeParameterByIndex(unsigned index) + { + return (TypeParameterReflection*)spReflection_GetTypeParameterByIndex((SlangReflection*) this, index); + } + + TypeParameterReflection* findTypeParameter(char const* name) + { + return (TypeParameterReflection*)spReflection_FindTypeParameter((SlangReflection*)this, name); + } + + VariableLayoutReflection* getParameterByIndex(unsigned index) + { + return (VariableLayoutReflection*) spReflection_GetParameterByIndex((SlangReflection*) this, index); + } + + static ProgramLayout* get(SlangCompileRequest* request) + { + return (ProgramLayout*) spGetReflection(request); + } + + SlangUInt getEntryPointCount() + { + return spReflection_getEntryPointCount((SlangReflection*) this); + } + + EntryPointReflection* getEntryPointByIndex(SlangUInt index) + { + return (EntryPointReflection*) spReflection_getEntryPointByIndex((SlangReflection*) this, index); + } + + SlangUInt getGlobalConstantBufferBinding() + { + return spReflection_getGlobalConstantBufferBinding((SlangReflection*)this); + } + + size_t getGlobalConstantBufferSize() + { + return spReflection_getGlobalConstantBufferSize((SlangReflection*)this); + } + + TypeReflection* findTypeByName(const char* name) + { + return (TypeReflection*)spReflection_FindTypeByName( + (SlangReflection*) this, + name); + } + + TypeLayoutReflection* getTypeLayout( + TypeReflection* type, + LayoutRules rules = LayoutRules::Default) + { + return (TypeLayoutReflection*)spReflection_GetTypeLayout( + (SlangReflection*) this, + (SlangReflectionType*)type, + SlangLayoutRules(rules)); + } + + EntryPointReflection* findEntryPointByName(const char* name) + { + return (EntryPointReflection*)spReflection_findEntryPointByName( + (SlangReflection*) this, + name); + } + + TypeReflection* specializeType( + TypeReflection* type, + SlangInt specializationArgCount, + TypeReflection* const* specializationArgs, + ISlangBlob** outDiagnostics) + { + return (TypeReflection*) spReflection_specializeType( + (SlangReflection*) this, + (SlangReflectionType*) type, + specializationArgCount, + (SlangReflectionType* const*) specializationArgs, + outDiagnostics); + } + + SlangUInt getHashedStringCount() const { return spReflection_getHashedStringCount((SlangReflection*)this); } + + const char* getHashedString(SlangUInt index, size_t* outCount) const + { + return spReflection_getHashedString((SlangReflection*)this, index, outCount); + } + + TypeLayoutReflection* getGlobalParamsTypeLayout() + { + return (TypeLayoutReflection*) spReflection_getGlobalParamsTypeLayout((SlangReflection*) this); + } + + VariableLayoutReflection* getGlobalParamsVarLayout() + { + return (VariableLayoutReflection*) spReflection_getGlobalParamsVarLayout((SlangReflection*) this); + } + }; + + typedef uint32_t CompileStdLibFlags; + struct CompileStdLibFlag + { + enum Enum : CompileStdLibFlags + { + WriteDocumentation = 0x1, + }; + }; + + typedef ISlangBlob IBlob; + + struct IComponentType; + struct ITypeConformance; + struct IGlobalSession; + struct IModule; + struct ISession; + + struct SessionDesc; + struct SpecializationArg; + struct TargetDesc; + + /** A global session for interaction with the Slang library. + + An application may create and re-use a single global session across + multiple sessions, in order to amortize startups costs (in current + Slang this is mostly the cost of loading the Slang standard library). + + The global session is currently *not* thread-safe and objects created from + a single global session should only be used from a single thread at + a time. + */ + struct IGlobalSession : public ISlangUnknown + { + SLANG_COM_INTERFACE(0xc140b5fd, 0xc78, 0x452e, { 0xba, 0x7c, 0x1a, 0x1e, 0x70, 0xc7, 0xf7, 0x1c }) + + /** Create a new session for loading and compiling code. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL createSession( + SessionDesc const& desc, + ISession** outSession) = 0; + + /** Look up the internal ID of a profile by its `name`. + + Profile IDs are *not* guaranteed to be stable across versions + of the Slang library, so clients are expected to look up + profiles by name at runtime. + */ + virtual SLANG_NO_THROW SlangProfileID SLANG_MCALL findProfile( + char const* name) = 0; + + /** Set the path that downstream compilers (aka back end compilers) will + be looked from. + @param passThrough Identifies the downstream compiler + @param path The path to find the downstream compiler (shared library/dll/executable) + + For back ends that are dlls/shared libraries, it will mean the path will + be prefixed with the path when calls are made out to ISlangSharedLibraryLoader. + For executables - it will look for executables along the path */ + virtual SLANG_NO_THROW void SLANG_MCALL setDownstreamCompilerPath( + SlangPassThrough passThrough, + char const* path) = 0; + + /** DEPRECATED: Use setLanguagePrelude + + Set the 'prelude' for generated code for a 'downstream compiler'. + @param passThrough The downstream compiler for generated code that will have the prelude applied to it. + @param preludeText The text added pre-pended verbatim before the generated source + + That for pass-through usage, prelude is not pre-pended, preludes are for code generation only. + */ + virtual SLANG_NO_THROW void SLANG_MCALL setDownstreamCompilerPrelude( + SlangPassThrough passThrough, + const char* preludeText) = 0; + + /** DEPRECATED: Use getLanguagePrelude + + Get the 'prelude' for generated code for a 'downstream compiler'. + @param passThrough The downstream compiler for generated code that will have the prelude applied to it. + @param outPrelude On exit holds a blob that holds the string of the prelude. + */ + virtual SLANG_NO_THROW void SLANG_MCALL getDownstreamCompilerPrelude( + SlangPassThrough passThrough, + ISlangBlob** outPrelude) = 0; + + /** Get the build version 'tag' string. The string is the same as produced via `git describe --tags` + for the project. If Slang is built separately from the automated build scripts + the contents will by default be 'unknown'. Any string can be set by changing the + contents of 'slang-tag-version.h' file and recompiling the project. + + This method will return exactly the same result as the free function spGetBuildTagString. + + @return The build tag string + */ + virtual SLANG_NO_THROW const char* SLANG_MCALL getBuildTagString() = 0; + + /* For a given source language set the default compiler. + If a default cannot be chosen (for example the target cannot be achieved by the default), + the default will not be used. + + @param sourceLanguage the source language + @param defaultCompiler the default compiler for that language + @return + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL setDefaultDownstreamCompiler( + SlangSourceLanguage sourceLanguage, + SlangPassThrough defaultCompiler) = 0; + + /* For a source type get the default compiler + + @param sourceLanguage the source language + @return The downstream compiler for that source language */ + virtual SlangPassThrough SLANG_MCALL getDefaultDownstreamCompiler( + SlangSourceLanguage sourceLanguage) = 0; + + /* Set the 'prelude' placed before generated code for a specific language type. + + @param sourceLanguage The language the prelude should be inserted on. + @param preludeText The text added pre-pended verbatim before the generated source + + Note! That for pass-through usage, prelude is not pre-pended, preludes are for code generation only. + */ + virtual SLANG_NO_THROW void SLANG_MCALL setLanguagePrelude( + SlangSourceLanguage sourceLanguage, + const char* preludeText) = 0; + + /** Get the 'prelude' associated with a specific source language. + @param sourceLanguage The language the prelude should be inserted on. + @param outPrelude On exit holds a blob that holds the string of the prelude. + */ + virtual SLANG_NO_THROW void SLANG_MCALL getLanguagePrelude( + SlangSourceLanguage sourceLanguage, + ISlangBlob** outPrelude) = 0; + + /** Create a compile request. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL createCompileRequest( + slang::ICompileRequest** outCompileRequest) = 0; + + /** Add new builtin declarations to be used in subsequent compiles. + */ + virtual SLANG_NO_THROW void SLANG_MCALL addBuiltins( + char const* sourcePath, + char const* sourceString) = 0; + + /** Set the session shared library loader. If this changes the loader, it may cause shared libraries to be unloaded + @param loader The loader to set. Setting nullptr sets the default loader. + */ + virtual SLANG_NO_THROW void SLANG_MCALL setSharedLibraryLoader( + ISlangSharedLibraryLoader* loader) = 0; + + /** Gets the currently set shared library loader + @return Gets the currently set loader. If returns nullptr, it's the default loader + */ + virtual SLANG_NO_THROW ISlangSharedLibraryLoader* SLANG_MCALL getSharedLibraryLoader() = 0; + + /** Returns SLANG_OK if a the compilation target is supported for this session + + @param target The compilation target to test + @return SLANG_OK if the target is available + SLANG_E_NOT_IMPLEMENTED if not implemented in this build + SLANG_E_NOT_FOUND if other resources (such as shared libraries) required to make target work could not be found + SLANG_FAIL other kinds of failures */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL checkCompileTargetSupport( + SlangCompileTarget target) = 0; + + /** Returns SLANG_OK if a the pass through support is supported for this session + @param session Session + @param target The compilation target to test + @return SLANG_OK if the target is available + SLANG_E_NOT_IMPLEMENTED if not implemented in this build + SLANG_E_NOT_FOUND if other resources (such as shared libraries) required to make target work could not be found + SLANG_FAIL other kinds of failures */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL checkPassThroughSupport( + SlangPassThrough passThrough) = 0; + + /** Compile from (embedded source) the StdLib on the session. + Will return a failure if there is already a StdLib available + NOTE! API is experimental and not ready for production code + @param flags to control compilation + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL compileStdLib(CompileStdLibFlags flags) = 0; + + /** Load the StdLib. Currently loads modules from the file system. + @param stdLib Start address of the serialized stdlib + @param stdLibSizeInBytes The size in bytes of the serialized stdlib + + NOTE! API is experimental and not ready for production code + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL loadStdLib(const void* stdLib, size_t stdLibSizeInBytes) = 0; + + /** Save the StdLib modules to the file system + @param archiveType The type of archive used to hold the stdlib + @param outBlob The serialized blob containing the standard library + + NOTE! API is experimental and not ready for production code */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL saveStdLib(SlangArchiveType archiveType, ISlangBlob** outBlob) = 0; + + /** Look up the internal ID of a capability by its `name`. + + Capability IDs are *not* guaranteed to be stable across versions + of the Slang library, so clients are expected to look up + capabilities by name at runtime. + */ + virtual SLANG_NO_THROW SlangCapabilityID SLANG_MCALL findCapability( + char const* name) = 0; + + /** Set the downstream/pass through compiler to be used for a transition from the source type to the target type + @param source The source 'code gen target' + @param target The target 'code gen target' + @param compiler The compiler/pass through to use for the transition from source to target + */ + virtual SLANG_NO_THROW void SLANG_MCALL setDownstreamCompilerForTransition(SlangCompileTarget source, SlangCompileTarget target, SlangPassThrough compiler) = 0; + + /** Get the downstream/pass through compiler for a transition specified by source and target + @param source The source 'code gen target' + @param target The target 'code gen target' + @return The compiler that is used for the transition. Returns SLANG_PASS_THROUGH_NONE it is not defined + */ + virtual SLANG_NO_THROW SlangPassThrough SLANG_MCALL getDownstreamCompilerForTransition(SlangCompileTarget source, SlangCompileTarget target) = 0; + + /** Get the time in seconds spent in the slang and downstream compiler. + */ + virtual SLANG_NO_THROW void SLANG_MCALL getCompilerElapsedTime(double* outTotalTime, double* outDownstreamTime) = 0; + + /** Specify a spirv.core.grammar.json file to load and use when + * parsing and checking any SPIR-V code + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL setSPIRVCoreGrammar( + char const* jsonPath) = 0; + }; + + #define SLANG_UUID_IGlobalSession IGlobalSession::getTypeGuid() + + /*! + @brief A request for one or more compilation actions to be performed. + */ + struct ICompileRequest : public ISlangUnknown + { + SLANG_COM_INTERFACE( 0x96d33993, 0x317c, 0x4db5, { 0xaf, 0xd8, 0x66, 0x6e, 0xe7, 0x72, 0x48, 0xe2 } ) + + /** Set the filesystem hook to use for a compile request + + The provided `fileSystem` will be used to load any files that + need to be loaded during processing of the compile `request`. + This includes: + + - Source files loaded via `spAddTranslationUnitSourceFile` + - Files referenced via `#include` + - Files loaded to resolve `#import` operations + */ + virtual SLANG_NO_THROW void SLANG_MCALL setFileSystem( + ISlangFileSystem* fileSystem) = 0; + + /*! + @brief Set flags to be used for compilation. + */ + virtual SLANG_NO_THROW void SLANG_MCALL setCompileFlags( + SlangCompileFlags flags) = 0; + + /*! + @brief Returns the compilation flags previously set with `setCompileFlags` + */ + virtual SLANG_NO_THROW SlangCompileFlags SLANG_MCALL getCompileFlags() = 0; + + /*! + @brief Set whether to dump intermediate results (for debugging) or not. + */ + virtual SLANG_NO_THROW void SLANG_MCALL setDumpIntermediates( + int enable) = 0; + + virtual SLANG_NO_THROW void SLANG_MCALL setDumpIntermediatePrefix( + const char* prefix) = 0; + + /*! + @brief Set whether (and how) `#line` directives should be output. + */ + virtual SLANG_NO_THROW void SLANG_MCALL setLineDirectiveMode( + SlangLineDirectiveMode mode) = 0; + + /*! + @brief Sets the target for code generation. + @param target The code generation target. Possible values are: + - SLANG_GLSL. Generates GLSL code. + - SLANG_HLSL. Generates HLSL code. + - SLANG_SPIRV. Generates SPIR-V code. + */ + virtual SLANG_NO_THROW void SLANG_MCALL setCodeGenTarget( + SlangCompileTarget target) = 0; + + /*! + @brief Add a code-generation target to be used. + */ + virtual SLANG_NO_THROW int SLANG_MCALL addCodeGenTarget( + SlangCompileTarget target) = 0; + + virtual SLANG_NO_THROW void SLANG_MCALL setTargetProfile( + int targetIndex, + SlangProfileID profile) = 0; + + virtual SLANG_NO_THROW void SLANG_MCALL setTargetFlags( + int targetIndex, + SlangTargetFlags flags) = 0; + + + /*! + @brief Set the floating point mode (e.g., precise or fast) to use a target. + */ + virtual SLANG_NO_THROW void SLANG_MCALL setTargetFloatingPointMode( + int targetIndex, + SlangFloatingPointMode mode) = 0; + + /* DEPRECATED: use `spSetMatrixLayoutMode` instead. */ + virtual SLANG_NO_THROW void SLANG_MCALL setTargetMatrixLayoutMode( + int targetIndex, + SlangMatrixLayoutMode mode) = 0; + + virtual SLANG_NO_THROW void SLANG_MCALL setMatrixLayoutMode( + SlangMatrixLayoutMode mode) = 0; + + /*! + @brief Set the level of debug information to produce. + */ + virtual SLANG_NO_THROW void SLANG_MCALL setDebugInfoLevel( + SlangDebugInfoLevel level) = 0; + + /*! + @brief Set the level of optimization to perform. + */ + virtual SLANG_NO_THROW void SLANG_MCALL setOptimizationLevel( + SlangOptimizationLevel level) = 0; + + + + /*! + @brief Set the container format to be used for binary output. + */ + virtual SLANG_NO_THROW void SLANG_MCALL setOutputContainerFormat( + SlangContainerFormat format) = 0; + + virtual SLANG_NO_THROW void SLANG_MCALL setPassThrough( + SlangPassThrough passThrough) = 0; + + + virtual SLANG_NO_THROW void SLANG_MCALL setDiagnosticCallback( + SlangDiagnosticCallback callback, + void const* userData) = 0; + + virtual SLANG_NO_THROW void SLANG_MCALL setWriter( + SlangWriterChannel channel, + ISlangWriter* writer) = 0; + + virtual SLANG_NO_THROW ISlangWriter* SLANG_MCALL getWriter( + SlangWriterChannel channel) = 0; + + /*! + @brief Add a path to use when searching for referenced files. + This will be used for both `#include` directives and also for explicit `__import` declarations. + @param ctx The compilation context. + @param searchDir The additional search directory. + */ + virtual SLANG_NO_THROW void SLANG_MCALL addSearchPath( + const char* searchDir) = 0; + + /*! + @brief Add a macro definition to be used during preprocessing. + @param key The name of the macro to define. + @param value The value of the macro to define. + */ + virtual SLANG_NO_THROW void SLANG_MCALL addPreprocessorDefine( + const char* key, + const char* value) = 0; + + /*! + @brief Set options using arguments as if specified via command line. + @return Returns SlangResult. On success SLANG_SUCCEEDED(result) is true. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL processCommandLineArguments( + char const* const* args, + int argCount) = 0; + + /** Add a distinct translation unit to the compilation request + + `name` is optional. + Returns the zero-based index of the translation unit created. + */ + virtual SLANG_NO_THROW int SLANG_MCALL addTranslationUnit( + SlangSourceLanguage language, + char const* name) = 0; + + + /** Set a default module name. Translation units will default to this module name if one is not + passed. If not set each translation unit will get a unique name. + */ + virtual SLANG_NO_THROW void SLANG_MCALL setDefaultModuleName( + const char* defaultModuleName) = 0; + + /** Add a preprocessor definition that is scoped to a single translation unit. + + @param translationUnitIndex The index of the translation unit to get the definition. + @param key The name of the macro to define. + @param value The value of the macro to define. + */ + virtual SLANG_NO_THROW void SLANG_MCALL addTranslationUnitPreprocessorDefine( + int translationUnitIndex, + const char* key, + const char* value) = 0; + + + /** Add a source file to the given translation unit. + + If a user-defined file system has been specified via + `spSetFileSystem`, then it will be used to load the + file at `path`. Otherwise, Slang will use the OS + file system. + + This function does *not* search for a file using + the registered search paths (`spAddSearchPath`), + and instead using the given `path` as-is. + */ + virtual SLANG_NO_THROW void SLANG_MCALL addTranslationUnitSourceFile( + int translationUnitIndex, + char const* path) = 0; + + /** Add a source string to the given translation unit. + + @param translationUnitIndex The index of the translation unit to add source to. + @param path The file-system path that should be assumed for the source code. + @param source A null-terminated UTF-8 encoded string of source code. + + The implementation will make a copy of the source code data. + An application may free the buffer immediately after this call returns. + + The `path` will be used in any diagnostic output, as well + as to determine the base path when resolving relative + `#include`s. + */ + virtual SLANG_NO_THROW void SLANG_MCALL addTranslationUnitSourceString( + int translationUnitIndex, + char const* path, + char const* source) = 0; + + + /** Add a slang library - such that its contents can be referenced during linking. + This is equivalent to the -r command line option. + + @param libData The library data + @param libDataSize The size of the library data + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL addLibraryReference( + const void* libData, + size_t libDataSize) = 0; + + /** Add a source string to the given translation unit. + + @param translationUnitIndex The index of the translation unit to add source to. + @param path The file-system path that should be assumed for the source code. + @param sourceBegin A pointer to a buffer of UTF-8 encoded source code. + @param sourceEnd A pointer to to the end of the buffer specified in `sourceBegin` + + The implementation will make a copy of the source code data. + An application may free the buffer immediately after this call returns. + + The `path` will be used in any diagnostic output, as well + as to determine the base path when resolving relative + `#include`s. + */ + virtual SLANG_NO_THROW void SLANG_MCALL addTranslationUnitSourceStringSpan( + int translationUnitIndex, + char const* path, + char const* sourceBegin, + char const* sourceEnd) = 0; + + /** Add a blob of source code to the given translation unit. + + @param translationUnitIndex The index of the translation unit to add source to. + @param path The file-system path that should be assumed for the source code. + @param sourceBlob A blob containing UTF-8 encoded source code. + @param sourceEnd A pointer to to the end of the buffer specified in `sourceBegin` + + The compile request will retain a reference to the blob. + + The `path` will be used in any diagnostic output, as well + as to determine the base path when resolving relative + `#include`s. + */ + virtual SLANG_NO_THROW void SLANG_MCALL addTranslationUnitSourceBlob( + int translationUnitIndex, + char const* path, + ISlangBlob* sourceBlob) = 0; + + /** Add an entry point in a particular translation unit + */ + virtual SLANG_NO_THROW int SLANG_MCALL addEntryPoint( + int translationUnitIndex, + char const* name, + SlangStage stage) = 0; + + /** Add an entry point in a particular translation unit, + with additional arguments that specify the concrete + type names for entry-point generic type parameters. + */ + virtual SLANG_NO_THROW int SLANG_MCALL addEntryPointEx( + int translationUnitIndex, + char const* name, + SlangStage stage, + int genericArgCount, + char const** genericArgs) = 0; + + /** Specify the arguments to use for global generic parameters. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL setGlobalGenericArgs( + int genericArgCount, + char const** genericArgs) = 0; + + /** Specify the concrete type to be used for a global "existential slot." + + Every shader parameter (or leaf field of a `struct`-type shader parameter) + that has an interface or array-of-interface type introduces an existential + slot. The number of slots consumed by a shader parameter, and the starting + slot of each parameter can be queried via the reflection API using + `SLANG_PARAMETER_CATEGORY_EXISTENTIAL_TYPE_PARAM`. + + In order to generate specialized code, a concrete type needs to be specified + for each existential slot. This function specifies the name of the type + (or in general a type *expression*) to use for a specific slot at the + global scope. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL setTypeNameForGlobalExistentialTypeParam( + int slotIndex, + char const* typeName) = 0; + + /** Specify the concrete type to be used for an entry-point "existential slot." + + Every shader parameter (or leaf field of a `struct`-type shader parameter) + that has an interface or array-of-interface type introduces an existential + slot. The number of slots consumed by a shader parameter, and the starting + slot of each parameter can be queried via the reflection API using + `SLANG_PARAMETER_CATEGORY_EXISTENTIAL_TYPE_PARAM`. + + In order to generate specialized code, a concrete type needs to be specified + for each existential slot. This function specifies the name of the type + (or in general a type *expression*) to use for a specific slot at the + entry-point scope. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL setTypeNameForEntryPointExistentialTypeParam( + int entryPointIndex, + int slotIndex, + char const* typeName) = 0; + + /** Enable or disable an experimental, best-effort GLSL frontend + */ + virtual SLANG_NO_THROW void SLANG_MCALL setAllowGLSLInput( + bool value) = 0; + + /** Execute the compilation request. + + @returns SlangResult, SLANG_OK on success. Use SLANG_SUCCEEDED() and SLANG_FAILED() to test SlangResult. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL compile() = 0; + + + /** Get any diagnostic messages reported by the compiler. + + @returns A null-terminated UTF-8 encoded string of diagnostic messages. + + The returned pointer is only guaranteed to be valid + until `request` is destroyed. Applications that wish to + hold on to the diagnostic output for longer should use + `getDiagnosticOutputBlob`. + */ + virtual SLANG_NO_THROW char const* SLANG_MCALL getDiagnosticOutput() = 0; + + /** Get diagnostic messages reported by the compiler. + + @param outBlob A pointer to receive a blob holding a nul-terminated UTF-8 encoded string of diagnostic messages. + @returns A `SlangResult` indicating success or failure. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getDiagnosticOutputBlob( + ISlangBlob** outBlob) = 0; + + + /** Get the number of files that this compilation depended on. + + This includes both the explicit source files, as well as any + additional files that were transitively referenced (e.g., via + a `#include` directive). + */ + virtual SLANG_NO_THROW int SLANG_MCALL getDependencyFileCount() = 0; + + /** Get the path to a file this compilation depended on. + */ + virtual SLANG_NO_THROW char const* SLANG_MCALL getDependencyFilePath( + int index) = 0; + + /** Get the number of translation units associated with the compilation request + */ + virtual SLANG_NO_THROW int SLANG_MCALL getTranslationUnitCount() = 0; + + /** Get the output source code associated with a specific entry point. + + The lifetime of the output pointer is the same as `request`. + */ + virtual SLANG_NO_THROW char const* SLANG_MCALL getEntryPointSource( + int entryPointIndex) = 0; + + /** Get the output bytecode associated with a specific entry point. + + The lifetime of the output pointer is the same as `request`. + */ + virtual SLANG_NO_THROW void const* SLANG_MCALL getEntryPointCode( + int entryPointIndex, + size_t* outSize) = 0; + + /** Get the output code associated with a specific entry point. + + @param entryPointIndex The index of the entry point to get code for. + @param targetIndex The index of the target to get code for (default: zero). + @param outBlob A pointer that will receive the blob of code + @returns A `SlangResult` to indicate success or failure. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointCodeBlob( + int entryPointIndex, + int targetIndex, + ISlangBlob** outBlob) = 0; + + /** Get entry point 'callable' functions accessible through the ISlangSharedLibrary interface. + + That the functions remain in scope as long as the ISlangSharedLibrary interface is in scope. + + NOTE! Requires a compilation target of SLANG_HOST_CALLABLE. + + @param entryPointIndex The index of the entry point to get code for. + @param targetIndex The index of the target to get code for (default: zero). + @param outSharedLibrary A pointer to a ISharedLibrary interface which functions can be queried on. + @returns A `SlangResult` to indicate success or failure. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointHostCallable( + int entryPointIndex, + int targetIndex, + ISlangSharedLibrary** outSharedLibrary) = 0; + + /** Get the output code associated with a specific target. + + @param targetIndex The index of the target to get code for (default: zero). + @param outBlob A pointer that will receive the blob of code + @returns A `SlangResult` to indicate success or failure. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getTargetCodeBlob( + int targetIndex, + ISlangBlob** outBlob) = 0; + + /** Get 'callable' functions for a target accessible through the ISlangSharedLibrary interface. + + That the functions remain in scope as long as the ISlangSharedLibrary interface is in scope. + + NOTE! Requires a compilation target of SLANG_HOST_CALLABLE. + + @param targetIndex The index of the target to get code for (default: zero). + @param outSharedLibrary A pointer to a ISharedLibrary interface which functions can be queried on. + @returns A `SlangResult` to indicate success or failure. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getTargetHostCallable( + int targetIndex, + ISlangSharedLibrary** outSharedLibrary) = 0; + + /** Get the output bytecode associated with an entire compile request. + + The lifetime of the output pointer is the same as `request` and the last spCompile. + + @param outSize The size of the containers contents in bytes. Will be zero if there is no code available. + @returns Pointer to start of the contained data, or nullptr if there is no code available. + */ + virtual SLANG_NO_THROW void const* SLANG_MCALL getCompileRequestCode( + size_t* outSize) = 0; + + /** Get the compilation result as a file system. + The result is not written to the actual OS file system, but is made avaiable as an + in memory representation. + */ + virtual SLANG_NO_THROW ISlangMutableFileSystem* SLANG_MCALL getCompileRequestResultAsFileSystem() = 0; + + /** Return the container code as a blob. The container blob is created as part of a compilation (with spCompile), + and a container is produced with a suitable ContainerFormat. + + @param outSize The blob containing the container data. + @returns A `SlangResult` to indicate success or failure. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getContainerCode( + ISlangBlob** outBlob) = 0; + + /** Load repro from memory specified. + + Should only be performed on a newly created request. + + NOTE! When using the fileSystem, files will be loaded via their `unique names` as if they are part of the flat file system. This + mechanism is described more fully in docs/repro.md. + + @param fileSystem An (optional) filesystem. Pass nullptr to just use contents of repro held in data. + @param data The data to load from. + @param size The size of the data to load from. + @returns A `SlangResult` to indicate success or failure. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL loadRepro( + ISlangFileSystem* fileSystem, + const void* data, + size_t size) = 0; + + /** Save repro state. Should *typically* be performed after spCompile, so that everything + that is needed for a compilation is available. + + @param outBlob Blob that will hold the serialized state + @returns A `SlangResult` to indicate success or failure. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL saveRepro( + ISlangBlob** outBlob) = 0; + + /** Enable repro capture. + + Should be set after any ISlangFileSystem has been set, but before any compilation. It ensures that everything + that the ISlangFileSystem accesses will be correctly recorded. + Note that if a ISlangFileSystem/ISlangFileSystemExt isn't explicitly set (ie the default is used), then the + request will automatically be set up to record everything appropriate. + + @returns A `SlangResult` to indicate success or failure. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL enableReproCapture() = 0; + + /** Get the (linked) program for a compile request. + + The linked program will include all of the global-scope modules for the + translation units in the program, plus any modules that they `import` + (transitively), specialized to any global specialization arguments that + were provided via the API. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getProgram( + slang::IComponentType** outProgram) = 0; + + /** Get the (partially linked) component type for an entry point. + + The returned component type will include the entry point at the + given index, and will be specialized using any specialization arguments + that were provided for it via the API. + + The returned component will *not* include the modules representing + the global scope and its dependencies/specialization, so a client + program will typically want to compose this component type with + the one returned by `spCompileRequest_getProgram` to get a complete + and usable component type from which kernel code can be requested. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPoint( + SlangInt entryPointIndex, + slang::IComponentType** outEntryPoint) = 0; + + /** Get the (un-linked) module for a translation unit. + + The returned module will not be linked against any dependencies, + nor against any entry points (even entry points declared inside + the module). Similarly, the module will not be specialized + to the arguments that might have been provided via the API. + + This function provides an atomic unit of loaded code that + is suitable for looking up types and entry points in the + given module, and for linking together to produce a composite + program that matches the needs of an application. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getModule( + SlangInt translationUnitIndex, + slang::IModule** outModule) = 0; + + /** Get the `ISession` handle behind the `SlangCompileRequest`. + TODO(JS): Arguably this should just return the session pointer. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getSession( + slang::ISession** outSession) = 0; + + /** get reflection data from a compilation request */ + virtual SLANG_NO_THROW SlangReflection* SLANG_MCALL getReflection() = 0; + + /** Make output specially handled for command line output */ + virtual SLANG_NO_THROW void SLANG_MCALL setCommandLineCompilerMode() = 0; + + /** Add a defined capability that should be assumed available on the target */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL addTargetCapability( + SlangInt targetIndex, + SlangCapabilityID capability) = 0; + + /** Get the (linked) program for a compile request, including all entry points. + + The resulting program will include all of the global-scope modules for the + translation units in the program, plus any modules that they `import` + (transitively), specialized to any global specialization arguments that + were provided via the API, as well as all entry points specified for compilation, + specialized to their entry-point specialization arguments. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getProgramWithEntryPoints( + slang::IComponentType** outProgram) = 0; + + virtual SLANG_NO_THROW SlangResult SLANG_MCALL isParameterLocationUsed( + SlangInt entryPointIndex, + SlangInt targetIndex, + SlangParameterCategory category, + SlangUInt spaceIndex, + SlangUInt registerIndex, + bool& outUsed) = 0; + + /** Set the line directive mode for a target. + */ + virtual SLANG_NO_THROW void SLANG_MCALL setTargetLineDirectiveMode( + SlangInt targetIndex, + SlangLineDirectiveMode mode) = 0; + + /** Set whether to use scalar buffer layouts for GLSL/Vulkan targets. + If true, the generated GLSL/Vulkan code will use `scalar` layout for storage buffers. + If false, the resulting code will std430 for storage buffers. + */ + virtual SLANG_NO_THROW void SLANG_MCALL setTargetForceGLSLScalarBufferLayout(int targetIndex, bool forceScalarLayout) = 0; + + /** Overrides the severity of a specific diagnostic message. + + @param messageID Numeric identifier of the message to override, + as defined in the 1st parameter of the DIAGNOSTIC macro. + @param overrideSeverity New severity of the message. If the message is originally Error or Fatal, + the new severity cannot be lower than that. + */ + virtual SLANG_NO_THROW void SLANG_MCALL overrideDiagnosticSeverity( + SlangInt messageID, + SlangSeverity overrideSeverity) = 0; + + /** Returns the currently active flags of the request's diagnostic sink. */ + virtual SLANG_NO_THROW SlangDiagnosticFlags SLANG_MCALL getDiagnosticFlags() = 0; + + /** Sets the flags of the request's diagnostic sink. + The previously specified flags are discarded. */ + virtual SLANG_NO_THROW void SLANG_MCALL setDiagnosticFlags(SlangDiagnosticFlags flags) = 0; + + /** Set the debug format to be used for debugging information */ + virtual SLANG_NO_THROW void SLANG_MCALL setDebugInfoFormat(SlangDebugInfoFormat debugFormat) = 0; + + virtual SLANG_NO_THROW void SLANG_MCALL setEnableEffectAnnotations(bool value) = 0; + + virtual SLANG_NO_THROW void SLANG_MCALL setReportDownstreamTime(bool value) = 0; + + virtual SLANG_NO_THROW void SLANG_MCALL setReportPerfBenchmark(bool value) = 0; + + }; + + #define SLANG_UUID_ICompileRequest ICompileRequest::getTypeGuid() + + /** Description of a code generation target. + */ + struct TargetDesc + { + /** The size of this structure, in bytes. + */ + size_t structureSize = sizeof(TargetDesc); + + /** The target format to generate code for (e.g., SPIR-V, DXIL, etc.) + */ + SlangCompileTarget format = SLANG_TARGET_UNKNOWN; + + /** The compilation profile supported by the target (e.g., "Shader Model 5.1") + */ + SlangProfileID profile = SLANG_PROFILE_UNKNOWN; + + /** Flags for the code generation target. Currently unused. */ + SlangTargetFlags flags = kDefaultTargetFlags; + + /** Default mode to use for floating-point operations on the target. + */ + SlangFloatingPointMode floatingPointMode = SLANG_FLOATING_POINT_MODE_DEFAULT; + + /** The line directive mode for output source code. + */ + SlangLineDirectiveMode lineDirectiveMode = SLANG_LINE_DIRECTIVE_MODE_DEFAULT; + + /** Whether to force `scalar` layout for glsl shader storage buffers. + */ + bool forceGLSLScalarBufferLayout = false; + }; + + typedef uint32_t SessionFlags; + enum + { + kSessionFlags_None = 0, + + /** Use application-specific policy for semantics of the `shared` keyword. + + This is a legacy/compatibility flag to help an existing Slang client + migrate to new language features, and should *not* be used by other + clients. This feature may be removed in a future release without a + deprecation warning, and this bit may be re-used for another feature. + You have been warned. + */ + kSessionFlag_FalcorCustomSharedKeywordSemantics = 1 << 0, + }; + + struct PreprocessorMacroDesc + { + const char* name; + const char* value; + }; + + struct SessionDesc + { + /** The size of this structure, in bytes. + */ + size_t structureSize = sizeof(SessionDesc); + + /** Code generation targets to include in the session. + */ + TargetDesc const* targets = nullptr; + SlangInt targetCount = 0; + + /** Flags to configure the session. + */ + SessionFlags flags = kSessionFlags_None; + + /** Default layout to assume for variables with matrix types. + */ + SlangMatrixLayoutMode defaultMatrixLayoutMode = SLANG_MATRIX_LAYOUT_ROW_MAJOR; + + /** Paths to use when searching for `#include`d or `import`ed files. + */ + char const* const* searchPaths = nullptr; + SlangInt searchPathCount = 0; + + PreprocessorMacroDesc const* preprocessorMacros = nullptr; + SlangInt preprocessorMacroCount = 0; + + ISlangFileSystem* fileSystem = nullptr; + + bool enableEffectAnnotations = false; + bool allowGLSLSyntax = false; + }; + + enum class ContainerType + { + None, UnsizedArray, StructuredBuffer, ConstantBuffer, ParameterBlock + }; + + /** A session provides a scope for code that is loaded. + + A session can be used to load modules of Slang source code, + and to request target-specific compiled binaries and layout + information. + + In order to be able to load code, the session owns a set + of active "search paths" for resolving `#include` directives + and `import` declrations, as well as a set of global + preprocessor definitions that will be used for all code + that gets `import`ed in the session. + + If multiple user shaders are loaded in the same session, + and import the same module (e.g., two source files do `import X`) + then there will only be one copy of `X` loaded within the session. + + In order to be able to generate target code, the session + owns a list of available compilation targets, which specify + code generation options. + + Code loaded and compiled within a session is owned by the session + and will remain resident in memory until the session is released. + Applications wishing to control the memory usage for compiled + and loaded code should use multiple sessions. + */ + struct ISession : public ISlangUnknown + { + SLANG_COM_INTERFACE( 0x67618701, 0xd116, 0x468f, { 0xab, 0x3b, 0x47, 0x4b, 0xed, 0xce, 0xe, 0x3d } ) + + /** Get the global session thas was used to create this session. + */ + virtual SLANG_NO_THROW IGlobalSession* SLANG_MCALL getGlobalSession() = 0; + + /** Load a module as it would be by code using `import`. + */ + virtual SLANG_NO_THROW IModule* SLANG_MCALL loadModule( + const char* moduleName, + IBlob** outDiagnostics = nullptr) = 0; + + /** Load a module from Slang source code. + */ + virtual SLANG_NO_THROW IModule* SLANG_MCALL loadModuleFromSource( + const char* moduleName, + const char* path, + slang::IBlob* source, + slang::IBlob** outDiagnostics = nullptr) = 0; + + /** Combine multiple component types to create a composite component type. + + The `componentTypes` array must contain `componentTypeCount` pointers + to component types that were loaded or created using the same session. + + The shader parameters and specialization parameters of the composite will + be the union of those in `componentTypes`. The relative order of child + component types is significant, and will affect the order in which + parameters are reflected and laid out. + + The entry-point functions of the composite will be the union of those in + `componentTypes`, and will follow the ordering of `componentTypes`. + + The requirements of the composite component type will be a subset of + those in `componentTypes`. If an entry in `componentTypes` has a requirement + that can be satisfied by another entry, then the composition will + satisfy the requirement and it will not appear as a requirement of + the composite. If multiple entries in `componentTypes` have a requirement + for the same type, then only the first such requirement will be retained + on the composite. The relative ordering of requirements on the composite + will otherwise match that of `componentTypes`. + + If any diagnostics are generated during creation of the composite, they + will be written to `outDiagnostics`. If an error is encountered, the + function will return null. + + It is an error to create a composite component type that recursively + aggregates the a single module more than once. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL createCompositeComponentType( + IComponentType* const* componentTypes, + SlangInt componentTypeCount, + IComponentType** outCompositeComponentType, + ISlangBlob** outDiagnostics = nullptr) = 0; + + /** Specialize a type based on type arguments. + */ + virtual SLANG_NO_THROW TypeReflection* SLANG_MCALL specializeType( + TypeReflection* type, + SpecializationArg const* specializationArgs, + SlangInt specializationArgCount, + ISlangBlob** outDiagnostics = nullptr) = 0; + + + /** Get the layout `type` on the chosen `target`. + */ + virtual SLANG_NO_THROW TypeLayoutReflection* SLANG_MCALL getTypeLayout( + TypeReflection* type, + SlangInt targetIndex = 0, + LayoutRules rules = LayoutRules::Default, + ISlangBlob** outDiagnostics = nullptr) = 0; + + /** Get a container type from `elementType`. For example, given type `T`, returns + a type that represents `StructuredBuffer`. + + @param `elementType`: the element type to wrap around. + @param `containerType`: the type of the container to wrap `elementType` in. + @param `outDiagnostics`: a blob to receive diagnostic messages. + */ + virtual SLANG_NO_THROW TypeReflection* SLANG_MCALL getContainerType( + TypeReflection* elementType, + ContainerType containerType, + ISlangBlob** outDiagnostics = nullptr) = 0; + + /** Return a `TypeReflection` that represents the `__Dynamic` type. + This type can be used as a specialization argument to indicate using + dynamic dispatch. + */ + virtual SLANG_NO_THROW TypeReflection* SLANG_MCALL getDynamicType() = 0; + + /** Get the mangled name for a type RTTI object. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getTypeRTTIMangledName( + TypeReflection* type, + ISlangBlob** outNameBlob) = 0; + + /** Get the mangled name for a type witness. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getTypeConformanceWitnessMangledName( + TypeReflection* type, + TypeReflection* interfaceType, + ISlangBlob** outNameBlob) = 0; + + /** Get the sequential ID used to identify a type witness in a dynamic object. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getTypeConformanceWitnessSequentialID( + slang::TypeReflection* type, + slang::TypeReflection* interfaceType, + uint32_t* outId) = 0; + + /** Create a request to load/compile front-end code. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL createCompileRequest( + SlangCompileRequest** outCompileRequest) = 0; + + + /** Creates a `IComponentType` that represents a type's conformance to an interface. + The retrieved `ITypeConformance` objects can be included in a composite `IComponentType` + to explicitly specify which implementation types should be included in the final compiled + code. For example, if an module defines `IMaterial` interface and `AMaterial`, + `BMaterial`, `CMaterial` types that implements the interface, the user can exclude + `CMaterial` implementation from the resulting shader code by explcitly adding + `AMaterial:IMaterial` and `BMaterial:IMaterial` conformances to a composite + `IComponentType` and get entry point code from it. The resulting code will not have + anything related to `CMaterial` in the dynamic dispatch logic. If the user does not + explicitly include any `TypeConformances` to an interface type, all implementations to + that interface will be included by default. By linking a `ITypeConformance`, the user is + also given the opportunity to specify the dispatch ID of the implementation type. If + `conformanceIdOverride` is -1, there will be no override behavior and Slang will + automatically assign IDs to implementation types. The automatically assigned IDs can be + queried via `ISession::getTypeConformanceWitnessSequentialID`. + + Returns SLANG_OK if succeeds, or SLANG_FAIL if `type` does not conform to `interfaceType`. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL createTypeConformanceComponentType( + slang::TypeReflection* type, + slang::TypeReflection* interfaceType, + ITypeConformance** outConformance, + SlangInt conformanceIdOverride, + ISlangBlob** outDiagnostics) = 0; + }; + + #define SLANG_UUID_ISession ISession::getTypeGuid() + + /** A component type is a unit of shader code layout, reflection, and linking. + + A component type is a unit of shader code that can be included into + a linked and compiled shader program. Each component type may have: + + * Zero or more uniform shader parameters, representing textures, + buffers, etc. that the code in the component depends on. + + * Zero or more *specialization* parameters, which are type or + value parameters that can be used to synthesize specialized + versions of the component type. + + * Zero or more entry points, which are the individually invocable + kernels that can have final code generated. + + * Zero or more *requirements*, which are other component + types on which the component type depends. + + One example of a component type is a module of Slang code: + + * The global-scope shader parameters declared in the module are + the parameters when considered as a component type. + + * Any global-scope generic or interface type parameters introduce + specialization parameters for the module. + + * A module does not by default include any entry points when + considered as a component type (although the code of the + module might *declare* some entry points). + + * Any other modules that are `import`ed in the source code + become requirements of the module, when considered as a + component type. + + An entry point is another example of a component type: + + * The `uniform` parameters of the entry point function are + its shader parameters when considered as a component type. + + * Any generic or interface-type parameters of the entry point + introduce specialization parameters. + + * An entry point component type exposes a single entry point (itself). + + * An entry point has one requirement for the module in which + it was defined. + + Component types can be manipulated in a few ways: + + * Multiple component types can be combined into a composite, which + combines all of their code, parameters, etc. + + * A component type can be specialized, by "plugging in" types and + values for its specialization parameters. + + * A component type can be laid out for a particular target, giving + offsets/bindings to the shader parameters it contains. + + * Generated kernel code can be requested for entry points. + + */ + struct IComponentType : public ISlangUnknown + { + SLANG_COM_INTERFACE(0x5bc42be8, 0x5c50, 0x4929, { 0x9e, 0x5e, 0xd1, 0x5e, 0x7c, 0x24, 0x1, 0x5f }) + + /** Get the runtime session that this component type belongs to. + */ + virtual SLANG_NO_THROW ISession* SLANG_MCALL getSession() = 0; + + /** Get the layout for this program for the chosen `targetIndex`. + + The resulting layout will establish offsets/bindings for all + of the global and entry-point shader parameters in the + component type. + + If this component type has specialization parameters (that is, + it is not fully specialized), then the resulting layout may + be incomplete, and plugging in arguments for generic specialization + parameters may result in a component type that doesn't have + a compatible layout. If the component type only uses + interface-type specialization parameters, then the layout + for a specialization should be compatible with an unspecialized + layout (all parameters in the unspecialized layout will have + the same offset/binding in the specialized layout). + + If this component type is combined into a composite, then + the absolute offsets/bindings of parameters may not stay the same. + If the shader parameters in a component type don't make + use of explicit binding annotations (e.g., `register(...)`), + then the *relative* offset of shader parameters will stay + the same when it is used in a composition. + */ + virtual SLANG_NO_THROW ProgramLayout* SLANG_MCALL getLayout( + SlangInt targetIndex = 0, + IBlob** outDiagnostics = nullptr) = 0; + + /** Get the number of (unspecialized) specialization parameters for the component type. + */ + virtual SLANG_NO_THROW SlangInt SLANG_MCALL getSpecializationParamCount() = 0; + + /** Get the compiled code for the entry point at `entryPointIndex` for the chosen `targetIndex` + + Entry point code can only be computed for a component type that + has no specialization parameters (it must be fully specialized) + and that has no requirements (it must be fully linked). + + If code has not already been generated for the given entry point and target, + then a compilation error may be detected, in which case `outDiagnostics` + (if non-null) will be filled in with a blob of messages diagnosing the error. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointCode( + SlangInt entryPointIndex, + SlangInt targetIndex, + IBlob** outCode, + IBlob** outDiagnostics = nullptr) = 0; + + /** Get the compilation result as a file system. + + Has the same requirements as getEntryPointCode. + + The result is not written to the actual OS file system, but is made avaiable as an + in memory representation. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getResultAsFileSystem( + SlangInt entryPointIndex, + SlangInt targetIndex, + ISlangMutableFileSystem** outFileSystem) = 0; + + /** Compute a hash for the entry point at `entryPointIndex` for the chosen `targetIndex`. + + This computes a hash based on all the dependencies for this component type as well as the + target settings affecting the compiler backend. The computed hash is used as a key for caching + the output of the compiler backend to implement shader caching. + */ + virtual SLANG_NO_THROW void SLANG_MCALL getEntryPointHash( + SlangInt entryPointIndex, + SlangInt targetIndex, + IBlob** outHash) = 0; + + /** Specialize the component by binding its specialization parameters to concrete arguments. + + The `specializationArgs` array must have `specializationArgCount` entries, and + this must match the number of specialization parameters on this component type. + + If any diagnostics (error or warnings) are produced, they will be written to `outDiagnostics`. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL specialize( + SpecializationArg const* specializationArgs, + SlangInt specializationArgCount, + IComponentType** outSpecializedComponentType, + ISlangBlob** outDiagnostics = nullptr) = 0; + + /** Link this component type against all of its unsatisifed dependencies. + + A component type may have unsatisfied dependencies. For example, a module + depends on any other modules it `import`s, and an entry point depends + on the module that defined it. + + A user can manually satisfy dependencies by creating a composite + component type, and when doing so they retain full control over + the relative ordering of shader parameters in the resulting layout. + + It is an error to try to generate/access compiled kernel code for + a component type with unresolved dependencies, so if dependencies + remain after whatever manual composition steps an application + cares to peform, the `link()` function can be used to automatically + compose in any remaining dependencies. The order of parameters + (and hence the global layout) that results will be deterministic, + but is not currently documented. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL link( + IComponentType** outLinkedComponentType, + ISlangBlob** outDiagnostics = nullptr) = 0; + + /** Get entry point 'callable' functions accessible through the ISlangSharedLibrary interface. + + The functions remain in scope as long as the ISlangSharedLibrary interface is in scope. + + NOTE! Requires a compilation target of SLANG_HOST_CALLABLE. + + @param entryPointIndex The index of the entry point to get code for. + @param targetIndex The index of the target to get code for (default: zero). + @param outSharedLibrary A pointer to a ISharedLibrary interface which functions can be queried on. + @returns A `SlangResult` to indicate success or failure. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL getEntryPointHostCallable( + int entryPointIndex, + int targetIndex, + ISlangSharedLibrary** outSharedLibrary, + slang::IBlob** outDiagnostics = 0) = 0; + + /** Get a new ComponentType object that represents a renamed entry point. + + The current object must be a single EntryPoint, or a CompositeComponentType or + SpecializedComponentType that contains one EntryPoint component. + */ + virtual SLANG_NO_THROW SlangResult SLANG_MCALL renameEntryPoint( + const char* newName, IComponentType** outEntryPoint) = 0; + }; + #define SLANG_UUID_IComponentType IComponentType::getTypeGuid() + + struct IEntryPoint : public IComponentType + { + SLANG_COM_INTERFACE(0x8f241361, 0xf5bd, 0x4ca0, { 0xa3, 0xac, 0x2, 0xf7, 0xfa, 0x24, 0x2, 0xb8 }) + }; + + #define SLANG_UUID_IEntryPoint IEntryPoint::getTypeGuid() + + struct ITypeConformance : public IComponentType + { + SLANG_COM_INTERFACE(0x73eb3147, 0xe544, 0x41b5, { 0xb8, 0xf0, 0xa2, 0x44, 0xdf, 0x21, 0x94, 0xb }) + }; + #define SLANG_UUID_ITypeConformance ITypeConformance::getTypeGuid() + + /** A module is the granularity of shader code compilation and loading. + + In most cases a module corresponds to a single compile "translation unit." + This will often be a single `.slang` or `.hlsl` file and everything it + `#include`s. + + Notably, a module `M` does *not* include the things it `import`s, as these + as distinct modules that `M` depends on. There is a directed graph of + module dependencies, and all modules in the graph must belong to the + same session (`ISession`). + + A module establishes a namespace for looking up types, functions, etc. + */ + struct IModule : public IComponentType + { + SLANG_COM_INTERFACE(0xc720e64, 0x8722, 0x4d31, { 0x89, 0x90, 0x63, 0x8a, 0x98, 0xb1, 0xc2, 0x79 }) + + virtual SLANG_NO_THROW SlangResult SLANG_MCALL findEntryPointByName( + char const* name, + IEntryPoint** outEntryPoint) = 0; + + /// Get number of entry points defined in the module. An entry point defined in a module + /// is by default not included in the linkage, so calls to `IComponentType::getEntryPointCount` + /// on an `IModule` instance will always return 0. However `IModule::getDefinedEntryPointCount` + /// will return the number of defined entry points. + virtual SLANG_NO_THROW SlangInt32 SLANG_MCALL getDefinedEntryPointCount() = 0; + /// Get the name of an entry point defined in the module. + virtual SLANG_NO_THROW SlangResult SLANG_MCALL + getDefinedEntryPoint(SlangInt32 index, IEntryPoint** outEntryPoint) = 0; + }; + + #define SLANG_UUID_IModule IModule::getTypeGuid() + + /** Argument used for specialization to types/values. + */ + struct SpecializationArg + { + enum class Kind : int32_t + { + Unknown, /**< An invalid specialization argument. */ + Type, /**< Specialize to a type. */ + }; + + /** The kind of specialization argument. */ + Kind kind; + union + { + /** A type specialization argument, used for `Kind::Type`. */ + TypeReflection* type; + }; + + static SpecializationArg fromType(TypeReflection* inType) + { + SpecializationArg rs; + rs.kind = Kind::Type; + rs.type = inType; + return rs; + } + }; +} + +// Passed into functions to create globalSession to identify the API version client code is +// using. +#define SLANG_API_VERSION 0 + +/* Create a global session, with built in StdLib. + +@param apiVersion Pass in SLANG_API_VERSION +@param outGlobalSession (out)The created global session. +*/ +SLANG_EXTERN_C SLANG_API SlangResult slang_createGlobalSession( + SlangInt apiVersion, + slang::IGlobalSession** outGlobalSession); + +/* Create a global session, but do not set up the stdlib. The stdlib can +then be loaded via loadStdLib or compileStdLib + +@param apiVersion Pass in SLANG_API_VERSION +@param outGlobalSession (out)The created global session that doesn't have a StdLib setup. + +NOTE! API is experimental and not ready for production code +*/ +SLANG_EXTERN_C SLANG_API SlangResult slang_createGlobalSessionWithoutStdLib( + SlangInt apiVersion, + slang::IGlobalSession** outGlobalSession); + +/* Returns a blob that contains the serialized stdlib. +Returns nullptr if there isn't an embedded stdlib. +*/ +SLANG_API ISlangBlob* slang_getEmbeddedStdLib(); + +namespace slang +{ + inline SlangResult createGlobalSession( + slang::IGlobalSession** outGlobalSession) + { + return slang_createGlobalSession(SLANG_API_VERSION, outGlobalSession); + } +} + +/** @see slang::ICompileRequest::getProgram +*/ +SLANG_EXTERN_C SLANG_API SlangResult spCompileRequest_getProgram( + SlangCompileRequest* request, + slang::IComponentType** outProgram); + +/** @see slang::ICompileRequest::getProgramWithEntryPoints +*/ +SLANG_EXTERN_C SLANG_API SlangResult spCompileRequest_getProgramWithEntryPoints( + SlangCompileRequest* request, + slang::IComponentType** outProgram); + +/** @see slang::ICompileRequest::getEntryPoint +*/ +SLANG_EXTERN_C SLANG_API SlangResult spCompileRequest_getEntryPoint( + SlangCompileRequest* request, + SlangInt entryPointIndex, + slang::IComponentType** outEntryPoint); + +/** @see slang::ICompileRequest::getModule +*/ +SLANG_EXTERN_C SLANG_API SlangResult spCompileRequest_getModule( + SlangCompileRequest* request, + SlangInt translationUnitIndex, + slang::IModule** outModule); + +/** @see slang::ICompileRequest::getSession +*/ +SLANG_EXTERN_C SLANG_API SlangResult spCompileRequest_getSession( + SlangCompileRequest* request, + slang::ISession** outSession); +#endif + +/* DEPRECATED DEFINITIONS + +Everything below this point represents deprecated APIs/definition that are only +being kept around for source/binary compatibility with old client code. New +code should not use any of these declarations, and the Slang API will drop these +declarations over time. +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +#define SLANG_ERROR_INSUFFICIENT_BUFFER SLANG_E_BUFFER_TOO_SMALL +#define SLANG_ERROR_INVALID_PARAMETER SLANG_E_INVALID_ARG + +SLANG_API char const* spGetTranslationUnitSource( + SlangCompileRequest* request, + int translationUnitIndex); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/slang/lib/win64/gfx.dll b/third_party/slang/lib/win64/gfx.dll new file mode 100644 index 0000000..adb4c7a Binary files /dev/null and b/third_party/slang/lib/win64/gfx.dll differ diff --git a/third_party/slang/lib/win64/gfx.lib b/third_party/slang/lib/win64/gfx.lib new file mode 100644 index 0000000..e846f80 Binary files /dev/null and b/third_party/slang/lib/win64/gfx.lib differ diff --git a/third_party/slang/lib/win64/slang-glslang.dll b/third_party/slang/lib/win64/slang-glslang.dll new file mode 100644 index 0000000..388f4f5 Binary files /dev/null and b/third_party/slang/lib/win64/slang-glslang.dll differ diff --git a/third_party/slang/lib/win64/slang-llvm.dll b/third_party/slang/lib/win64/slang-llvm.dll new file mode 100644 index 0000000..31b596f Binary files /dev/null and b/third_party/slang/lib/win64/slang-llvm.dll differ diff --git a/third_party/slang/lib/win64/slang-rt.dll b/third_party/slang/lib/win64/slang-rt.dll new file mode 100644 index 0000000..3f8a76c Binary files /dev/null and b/third_party/slang/lib/win64/slang-rt.dll differ diff --git a/third_party/slang/lib/win64/slang-rt.lib b/third_party/slang/lib/win64/slang-rt.lib new file mode 100644 index 0000000..4700757 Binary files /dev/null and b/third_party/slang/lib/win64/slang-rt.lib differ diff --git a/third_party/slang/lib/win64/slang.dll b/third_party/slang/lib/win64/slang.dll new file mode 100644 index 0000000..23e3ae2 Binary files /dev/null and b/third_party/slang/lib/win64/slang.dll differ diff --git a/third_party/slang/lib/win64/slang.lib b/third_party/slang/lib/win64/slang.lib new file mode 100644 index 0000000..5cd47dd Binary files /dev/null and b/third_party/slang/lib/win64/slang.lib differ diff --git a/third_party/slang/lib/win64/slangc.exe b/third_party/slang/lib/win64/slangc.exe new file mode 100644 index 0000000..f6835d3 Binary files /dev/null and b/third_party/slang/lib/win64/slangc.exe differ diff --git a/third_party/slang/lib/win64/slangd.exe b/third_party/slang/lib/win64/slangd.exe new file mode 100644 index 0000000..fbe9386 Binary files /dev/null and b/third_party/slang/lib/win64/slangd.exe differ