diff --git a/D3D11Engine/D2DSettingsDialog.cpp b/D3D11Engine/D2DSettingsDialog.cpp index a85c130c..14e28e57 100644 --- a/D3D11Engine/D2DSettingsDialog.cpp +++ b/D3D11Engine/D2DSettingsDialog.cpp @@ -370,13 +370,11 @@ XRESULT D2DSettingsDialog::InitControls() { SV_Slider* visualFXDDSlider = new SV_Slider( MainView, MainPanel ); visualFXDDSlider->SetPositionAndSize( D2D1::Point2F( 10, 22 ), D2D1::SizeF( 150, 15 ) ); - - visualFXDDSlider->AlignUnder( visualFXDDLabel, 5 ); - + visualFXDDSlider->AlignUnder( visualFXDDLabel, 5 ); visualFXDDSlider->SetDataToUpdate( &Engine::GAPI->GetRendererState().RendererSettings.VisualFXDrawRadius ); visualFXDDSlider->SetIsIntegralSlider( true ); visualFXDDSlider->SetDisplayMultiplier( 0.001f ); - visualFXDDSlider->SetMinMax( 0.0f, 30000.0f ); + visualFXDDSlider->SetMinMax( 0.0f, 10000.0f ); visualFXDDSlider->SetValue( Engine::GAPI->GetRendererState().RendererSettings.VisualFXDrawRadius ); SV_Label* worldDDLabel = new SV_Label( MainView, MainPanel ); diff --git a/D3D11Engine/D3D11Effect.cpp b/D3D11Engine/D3D11Effect.cpp index d5f3f456..ac107e36 100644 --- a/D3D11Engine/D3D11Effect.cpp +++ b/D3D11Engine/D3D11Effect.cpp @@ -34,14 +34,14 @@ D3D11Effect::~D3D11Effect() { HRESULT LoadTextureArray( Microsoft::WRL::ComPtr pd3dDevice, Microsoft::WRL::ComPtr context, char* sTexturePrefix, int iNumTextures, ID3D11Texture2D** ppTex2D, ID3D11ShaderResourceView** ppSRV ); /** Fills a vector of random raindrop data */ -void D3D11Effect::FillRandomRaindropData( std::vector& data ) { +void D3D11Effect::FillRandomRaindropData( std::vector& data ) { /** Base taken from Nvidias Rain-Sample **/ float radius = Engine::GAPI->GetRendererState().RendererSettings.RainRadiusRange; float height = Engine::GAPI->GetRendererState().RendererSettings.RainHeightRange; for ( size_t i = 0; i < data.size(); i++ ) { - ParticleInstanceInfo raindrop; + RainParticleInstanceInfo raindrop; //use rejection sampling to generate random points inside a circle of radius 1 centered at 0, 0 float SeedX; float SeedZ; @@ -118,15 +118,15 @@ XRESULT D3D11Effect::DrawRain() { e->CreateVertexBuffer( &RainBufferInitial ); UINT numParticles = Engine::GAPI->GetRendererState().RendererSettings.RainNumParticles; - std::vector particles( numParticles ); + std::vector particles( numParticles ); // Fill the vector with random raindrop data FillRandomRaindropData( particles ); // Create vertexbuffers - RainBufferInitial->Init( &particles[0], particles.size() * sizeof( ParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferInitial" ); - RainBufferDrawFrom->Init( &particles[0], particles.size() * sizeof( ParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER | D3D11VertexBuffer::B_STREAM_OUT), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferDrawFrom" ); - RainBufferStreamTo->Init( &particles[0], particles.size() * sizeof( ParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER | D3D11VertexBuffer::B_STREAM_OUT), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferStreamTo" ); + RainBufferInitial->Init( &particles[0], particles.size() * sizeof( RainParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferInitial" ); + RainBufferDrawFrom->Init( &particles[0], particles.size() * sizeof( RainParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER | D3D11VertexBuffer::B_STREAM_OUT), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferDrawFrom" ); + RainBufferStreamTo->Init( &particles[0], particles.size() * sizeof( RainParticleInstanceInfo ), (D3D11VertexBuffer::EBindFlags)(D3D11VertexBuffer::B_VERTEXBUFFER | D3D11VertexBuffer::B_STREAM_OUT), D3D11VertexBuffer::U_DEFAULT, D3D11VertexBuffer::CA_NONE, "D3D11Effect::DrawRain::RainBufferStreamTo" ); firstFrame = true; @@ -146,7 +146,7 @@ XRESULT D3D11Effect::DrawRain() { firstFrame = false; - UINT stride = sizeof( ParticleInstanceInfo ); + UINT stride = sizeof( RainParticleInstanceInfo ); UINT offset = 0; // Bind buffer to draw from last frame @@ -156,6 +156,7 @@ XRESULT D3D11Effect::DrawRain() { e->GetContext()->SOSetTargets( 1, RainBufferStreamTo->GetVertexBuffer().GetAddressOf(), &offset ); // Apply shaders + e->GetContext()->PSSetShader( nullptr, nullptr, 0 ); particleAdvanceVS->Apply(); streamOutGS->Apply(); @@ -236,7 +237,7 @@ XRESULT D3D11Effect::DrawRain() { e->GetContext()->PSSetShaderResources( 0, 1, RainTextureArraySRV.GetAddressOf() ); // Draw the vertexbuffer - e->DrawVertexBuffer( RainBufferDrawFrom, numParticles, sizeof( ParticleInstanceInfo ) ); + e->DrawVertexBuffer( RainBufferDrawFrom, numParticles, sizeof( RainParticleInstanceInfo ) ); // Reset this e->GetContext()->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); diff --git a/D3D11Engine/D3D11Effect.h b/D3D11Engine/D3D11Effect.h index 4a874c32..a374ca0f 100644 --- a/D3D11Engine/D3D11Effect.h +++ b/D3D11Engine/D3D11Effect.h @@ -28,7 +28,7 @@ class D3D11Effect { protected: /** Fills a vector of random raindrop data */ - void FillRandomRaindropData( std::vector& data ); + void FillRandomRaindropData( std::vector& data ); /** Rain */ D3D11VertexBuffer* RainBufferInitial; diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index cfc5fab6..a1b24382 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -867,7 +867,7 @@ XRESULT D3D11GraphicsEngine::OnResize( INT2 newSize ) { GetDevice().Get(), Resolution.x, Resolution.y, DXGI_FORMAT_R16G16_FLOAT ); GBuffer1_Normals = std::make_unique( - GetDevice().Get(), Resolution.x, Resolution.y, DXGI_FORMAT_R16G16B16A16_FLOAT ); + GetDevice().Get(), Resolution.x, Resolution.y, DXGI_FORMAT_R8G8B8A8_SNORM ); GBuffer0_Diffuse = std::make_unique( GetDevice().Get(), Resolution.x, Resolution.y, DXGI_FORMAT_B8G8R8A8_UNORM ); @@ -5692,10 +5692,6 @@ void D3D11GraphicsEngine::DrawDecalList( const std::vector& decals, XMMATRIX mat = view * world * offset * scale; - ParticleInstanceInfo ii; - ii.scale = float2( 50, 50 ); - ii.color = 0xFFFFFFFF; - Engine::GAPI->SetWorldTransformXM( mat ); SetupVS_ExPerInstanceConstantBuffer(); diff --git a/D3D11Engine/D3D11ShaderManager.cpp b/D3D11Engine/D3D11ShaderManager.cpp index 79d14c90..9ed1c7eb 100644 --- a/D3D11Engine/D3D11ShaderManager.cpp +++ b/D3D11Engine/D3D11ShaderManager.cpp @@ -116,12 +116,12 @@ XRESULT D3D11ShaderManager::Init() { Shaders.push_back( ShaderInfo( "VS_ParticlePoint", "VS_ParticlePoint.hlsl", "v", 11 ) ); Shaders.back().cBufferSizes.push_back( sizeof( VS_ExConstantBuffer_PerFrame ) ); - Shaders.push_back( ShaderInfo( "VS_ParticlePointShaded", "VS_ParticlePointShaded.hlsl", "v", 11 ) ); + Shaders.push_back( ShaderInfo( "VS_ParticlePointShaded", "VS_ParticlePointShaded.hlsl", "v", 13 ) ); Shaders.back().cBufferSizes.push_back( sizeof( VS_ExConstantBuffer_PerFrame ) ); Shaders.back().cBufferSizes.push_back( sizeof( ParticlePointShadingConstantBuffer ) ); - Shaders.push_back( ShaderInfo( "VS_AdvanceRain", "VS_AdvanceRain.hlsl", "v", 11 ) ); + Shaders.push_back( ShaderInfo( "VS_AdvanceRain", "VS_AdvanceRain.hlsl", "v", 13 ) ); Shaders.back().cBufferSizes.push_back( sizeof( AdvanceRainConstantBuffer ) ); Shaders.push_back( ShaderInfo( "VS_Ocean", "VS_Ocean.hlsl", "v", 1 ) ); @@ -348,7 +348,7 @@ XRESULT D3D11ShaderManager::Init() { Shaders.push_back( ShaderInfo( "GS_Cubemap", "GS_Cubemap.hlsl", "g" ) ); Shaders.back().cBufferSizes.push_back( sizeof( CubemapGSConstantBuffer ) ); - Shaders.push_back( ShaderInfo( "GS_ParticleStreamOut", "VS_AdvanceRain.hlsl", "g", 11 ) ); + Shaders.push_back( ShaderInfo( "GS_ParticleStreamOut", "VS_AdvanceRain.hlsl", "g", 13 ) ); Shaders.back().cBufferSizes.push_back( sizeof( ParticleGSInfoConstantBuffer ) ); m.Name = "NORMALMAPPING"; diff --git a/D3D11Engine/D3D11VShader.cpp b/D3D11Engine/D3D11VShader.cpp index 10e19681..12524735 100644 --- a/D3D11Engine/D3D11VShader.cpp +++ b/D3D11Engine/D3D11VShader.cpp @@ -136,7 +136,7 @@ XRESULT D3D11VShader::LoadShader( const char* vertexShader, int layout, const st { { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "DIFFUSE", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, - { "SIZE", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "SIZE", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "TYPE", 0, DXGI_FORMAT_R32_UINT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "VELOCITY", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, }; @@ -151,6 +151,15 @@ XRESULT D3D11VShader::LoadShader( const char* vertexShader, int layout, const st { "INSTANCE_REMAP_INDEX", 0, DXGI_FORMAT_R32_UINT, 1, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_INSTANCE_DATA, 1}, }; + const D3D11_INPUT_ELEMENT_DESC layout13[] = + { + { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "DIFFUSE", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "SIZE", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TYPE", 0, DXGI_FORMAT_R32_UINT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "VELOCITY", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + }; + switch ( layout ) { case 1: LE( engine->GetDevice()->CreateInputLayout( layout1, ARRAYSIZE( layout1 ), vsBlob->GetBufferPointer(), @@ -211,6 +220,11 @@ XRESULT D3D11VShader::LoadShader( const char* vertexShader, int layout, const st LE( engine->GetDevice()->CreateInputLayout( layout12, ARRAYSIZE( layout12 ), vsBlob->GetBufferPointer(), vsBlob->GetBufferSize(), InputLayout.ReleaseAndGetAddressOf() ) ); break; + + case 13: + LE( engine->GetDevice()->CreateInputLayout( layout13, ARRAYSIZE( layout13 ), vsBlob->GetBufferPointer(), + vsBlob->GetBufferSize(), InputLayout.ReleaseAndGetAddressOf() ) ); + break; } return XR_SUCCESS; diff --git a/D3D11Engine/DLLMain.cpp b/D3D11Engine/DLLMain.cpp index a63ea182..add84e13 100644 --- a/D3D11Engine/DLLMain.cpp +++ b/D3D11Engine/DLLMain.cpp @@ -25,6 +25,12 @@ extern "C" { _declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 0x00000001; } +ZQuantizeHalfFloat QuantizeHalfFloat; +ZQuantizeHalfFloat_X4 QuantizeHalfFloat_X4; +ZUnquantizeHalfFloat UnquantizeHalfFloat; +ZUnquantizeHalfFloat_X4 UnquantizeHalfFloat_X4; +ZUnquantizeHalfFloat_X4 UnquantizeHalfFloat_X8; + static HINSTANCE hLThis = 0; typedef void (WINAPI* DirectDrawSimple)(); @@ -38,6 +44,170 @@ WinMainFunc originalWinMain = reinterpret_cast(GothicMemoryLocation bool FeatureLevel10Compatibility = false; bool GMPModeActive = false; +unsigned short QuantizeHalfFloat_Scalar( float input ) +{ + union { float f; unsigned int ui; } u = { input }; + unsigned int ui = u.ui; + + int s = ( ui >> 16 ) & 0x8000; + int em = ui & 0x7fffffff; + + int h = ( em - ( 112 << 23 ) + ( 1 << 12 ) ) >> 13; + h = ( em < ( 113 << 23 ) ) ? 0 : h; + h = ( em >= ( 143 << 23 ) ) ? 0x7c00 : h; + h = ( em > ( 255 << 23 ) ) ? 0x7e00 : h; + return static_cast(s | h); +} + +void QuantizeHalfFloats_X4_SSE2( float* input, unsigned short* output ) +{ + __m128i v = _mm_castps_si128( _mm_load_ps( input ) ); + __m128i s = _mm_and_si128( _mm_srli_epi32( v, 16 ), _mm_set1_epi32( 0x8000 ) ); + __m128i em = _mm_and_si128( v, _mm_set1_epi32( 0x7FFFFFFF ) ); + __m128i h = _mm_srli_epi32( _mm_sub_epi32( em, _mm_set1_epi32( 0x37FFF000 ) ), 13 ); + + __m128i mask = _mm_cmplt_epi32( em, _mm_set1_epi32( 0x38800000 ) ); + h = _mm_or_si128( _mm_and_si128( mask, _mm_setzero_si128() ), _mm_andnot_si128( mask, h ) ); + + mask = _mm_cmpgt_epi32( em, _mm_set1_epi32( 0x47800000 - 1 ) ); + h = _mm_or_si128( _mm_and_si128( mask, _mm_set1_epi32( 0x7C00 ) ), _mm_andnot_si128( mask, h ) ); + + mask = _mm_cmpgt_epi32( em, _mm_set1_epi32( 0x7F800000 ) ); + h = _mm_or_si128( _mm_and_si128( mask, _mm_set1_epi32( 0x7E00 ) ), _mm_andnot_si128( mask, h ) ); + + // We need to stay in int16_t range due to signed saturation + __m128i halfs = _mm_sub_epi32( _mm_or_si128( s, h ), _mm_set1_epi32( 32768 ) ); + _mm_store_sd( reinterpret_cast(output), _mm_castsi128_pd( _mm_add_epi16( _mm_packs_epi32( halfs, halfs ), _mm_set1_epi16( 32768 ) ) ) ); +} + +void QuantizeHalfFloats_X4_SSE41( float* input, unsigned short* output ) +{ + __m128i v = _mm_castps_si128( _mm_load_ps( input ) ); + __m128i s = _mm_and_si128( _mm_srli_epi32( v, 16 ), _mm_set1_epi32( 0x8000 ) ); + __m128i em = _mm_and_si128( v, _mm_set1_epi32( 0x7FFFFFFF ) ); + __m128i h = _mm_srli_epi32( _mm_sub_epi32( em, _mm_set1_epi32( 0x37FFF000 ) ), 13 ); + + __m128i mask = _mm_cmplt_epi32( em, _mm_set1_epi32( 0x38800000 ) ); + h = _mm_blendv_epi8( h, _mm_setzero_si128(), mask ); + + mask = _mm_cmpgt_epi32( em, _mm_set1_epi32( 0x47800000 - 1 ) ); + h = _mm_blendv_epi8( h, _mm_set1_epi32( 0x7C00 ), mask ); + + mask = _mm_cmpgt_epi32( em, _mm_set1_epi32( 0x7F800000 ) ); + h = _mm_blendv_epi8( h, _mm_set1_epi32( 0x7E00 ), mask ); + + __m128i halfs = _mm_or_si128( s, h ); + _mm_store_sd( reinterpret_cast(output), _mm_castsi128_pd( _mm_packus_epi32( halfs, halfs ) ) ); +} + +#ifdef _XM_AVX_INTRINSICS_ +unsigned short QuantizeHalfFloat_F16C( float input ) +{ + return static_cast(_mm_cvtsi128_si32( _mm_cvtps_ph( _mm_set_ss( input ), _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC ) )); +} + +void QuantizeHalfFloats_X4_F16C( float* input, unsigned short* output ) +{ + _mm_store_sd( reinterpret_cast(output), _mm_castsi128_pd( _mm_cvtps_ph( _mm_load_ps( input ), _MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC ) ) ); +} +#endif + +float UnquantizeHalfFloat_Scalar( unsigned short input ) +{ + unsigned int s = input & 0x8000; + unsigned int m = input & 0x03FF; + unsigned int e = input & 0x7C00; + e += 0x0001C000; + + float out; + unsigned int r = (s << 16) | (m << 13) | (e << 13); + memcpy( &out, &r, sizeof( float ) ); + return out; +} + +void UnquantizeHalfFloat_X4_SSE2( unsigned short* input, float* output ) +{ + const __m128i mask_zero = _mm_setzero_si128(); + const __m128i mask_s = _mm_set1_epi16( 0x8000 ); + const __m128i mask_m = _mm_set1_epi16( 0x03FF ); + const __m128i mask_e = _mm_set1_epi16( 0x7C00 ); + const __m128i bias_e = _mm_set1_epi32( 0x0001C000 ); + + __m128i halfs = _mm_loadl_epi64( reinterpret_cast(input) ); + + __m128i s = _mm_and_si128( halfs, mask_s ); + __m128i m = _mm_and_si128( halfs, mask_m ); + __m128i e = _mm_and_si128( halfs, mask_e ); + + __m128i s4 = _mm_unpacklo_epi16( s, mask_zero ); + s4 = _mm_slli_epi32( s4, 16 ); + + __m128i m4 = _mm_unpacklo_epi16( m, mask_zero ); + m4 = _mm_slli_epi32( m4, 13 ); + + __m128i e4 = _mm_unpacklo_epi16( e, mask_zero ); + e4 = _mm_add_epi32( e4, bias_e ); + e4 = _mm_slli_epi32( e4, 13 ); + + _mm_store_si128( reinterpret_cast<__m128i*>(output), _mm_or_si128( s4, _mm_or_si128( e4, m4 ) ) ); +} + +void UnquantizeHalfFloat_X8_SSE2( unsigned short* input, float* output ) +{ + const __m128i mask_zero = _mm_setzero_si128(); + const __m128i mask_s = _mm_set1_epi16( 0x8000 ); + const __m128i mask_m = _mm_set1_epi16( 0x03FF ); + const __m128i mask_e = _mm_set1_epi16( 0x7C00 ); + const __m128i bias_e = _mm_set1_epi32( 0x0001C000 ); + + __m128i halfs = _mm_load_si128( reinterpret_cast(input) ); + + __m128i s = _mm_and_si128( halfs, mask_s ); + __m128i m = _mm_and_si128( halfs, mask_m ); + __m128i e = _mm_and_si128( halfs, mask_e ); + + __m128i s4 = _mm_unpacklo_epi16( s, mask_zero ); + s4 = _mm_slli_epi32( s4, 16 ); + + __m128i m4 = _mm_unpacklo_epi16( m, mask_zero ); + m4 = _mm_slli_epi32( m4, 13 ); + + __m128i e4 = _mm_unpacklo_epi16( e, mask_zero ); + e4 = _mm_add_epi32( e4, bias_e ); + e4 = _mm_slli_epi32( e4, 13 ); + + _mm_store_si128( reinterpret_cast<__m128i*>(output + 0), _mm_or_si128( s4, _mm_or_si128( e4, m4 ) ) ); + + s4 = _mm_unpackhi_epi16( s, mask_zero ); + s4 = _mm_slli_epi32( s4, 16 ); + + m4 = _mm_unpackhi_epi16( m, mask_zero ); + m4 = _mm_slli_epi32( m4, 13 ); + + e4 = _mm_unpackhi_epi16( e, mask_zero ); + e4 = _mm_add_epi32( e4, bias_e ); + e4 = _mm_slli_epi32( e4, 13 ); + + _mm_store_si128( reinterpret_cast<__m128i*>(output + 4), _mm_or_si128( s4, _mm_or_si128( e4, m4 ) ) ); +} + +#ifdef _XM_AVX_INTRINSICS_ +float UnquantizeHalfFloat_F16C( unsigned short input ) +{ + return _mm_cvtss_f32( _mm_cvtph_ps( _mm_cvtsi32_si128( input ) ) ); +} + +void UnquantizeHalfFloat_X4_F16C( unsigned short* input, float* output ) +{ + _mm_store_ps( output, _mm_cvtph_ps( _mm_loadl_epi64( reinterpret_cast(input) ) ) ); +} + +void UnquantizeHalfFloat_X8_F16C( unsigned short* input, float* output ) +{ + _mm256_store_ps( output, _mm256_cvtph_ps( _mm_load_si128( reinterpret_cast(input) ) ) ); +} +#endif + void SignalHandler( int signal ) { LogInfo() << "Signal:" << signal; throw "!Access Violation!"; @@ -206,6 +376,29 @@ void CheckPlatformSupport() { #elif __SSE__ support_message( "SSE", InstructionSet::SSE() ); #endif + +#ifdef _XM_AVX_INTRINSICS_ + if ( InstructionSet::F16C() ) { + QuantizeHalfFloat = QuantizeHalfFloat_F16C; + QuantizeHalfFloat_X4 = QuantizeHalfFloats_X4_F16C; + UnquantizeHalfFloat = UnquantizeHalfFloat_F16C; + UnquantizeHalfFloat_X4 = UnquantizeHalfFloat_X4_F16C; + UnquantizeHalfFloat_X8 = UnquantizeHalfFloat_X8_F16C; + } else +#endif + if ( InstructionSet::SSE41() ) { + QuantizeHalfFloat = QuantizeHalfFloat_Scalar; + QuantizeHalfFloat_X4 = QuantizeHalfFloats_X4_SSE41; + UnquantizeHalfFloat = UnquantizeHalfFloat_Scalar; + UnquantizeHalfFloat_X4 = UnquantizeHalfFloat_X4_SSE2; + UnquantizeHalfFloat_X8 = UnquantizeHalfFloat_X8_SSE2; + } else { + QuantizeHalfFloat = QuantizeHalfFloat_Scalar; + QuantizeHalfFloat_X4 = QuantizeHalfFloats_X4_SSE2; + UnquantizeHalfFloat = UnquantizeHalfFloat_Scalar; + UnquantizeHalfFloat_X4 = UnquantizeHalfFloat_X4_SSE2; + UnquantizeHalfFloat_X8 = UnquantizeHalfFloat_X8_SSE2; + } } #if defined(BUILD_GOTHIC_2_6_fix) diff --git a/D3D11Engine/GothicAPI.cpp b/D3D11Engine/GothicAPI.cpp index 19752561..1ef0d5eb 100644 --- a/D3D11Engine/GothicAPI.cpp +++ b/D3D11Engine/GothicAPI.cpp @@ -1853,6 +1853,126 @@ SkeletalMeshVisualInfo* GothicAPI::LoadzCModelData( oCNPC* npc ) { return mi; } +int GothicAPI::GetLowestLODNumPolys_SkeletalMesh( zCModel* model ) { + int numPolys = 0; + + SkeletalMeshVisualInfo* skeletalMesh = nullptr; + zCVob* homeVob = model->GetHomeVob(); + if ( homeVob && homeVob->GetVobType() == zVOB_TYPE_NSC ) { + oCNPC* npc = static_cast(homeVob); + auto it = SkeletalMeshNpcs.find( npc ); + if ( it != SkeletalMeshNpcs.end() ) { + skeletalMesh = it->second; + } + } else { + std::string str = model->GetVisualName(); + if ( str.empty() ) { // Happens when the model has no skeletal-mesh + zSTRING mds = model->GetModelName(); + str = mds.ToChar(); + mds.Delete(); + } + + auto it = SkeletalMeshVisuals.find( str ); + if ( it != SkeletalMeshVisuals.end() ) { + skeletalMesh = it->second; + } + } + + if ( skeletalMesh ) { + for ( auto const& itm : skeletalMesh->SkeletalMeshes ) { + for ( auto& mesh : itm.second ) { + numPolys += static_cast(mesh->Indices.size() / 3); + } + } + } + return numPolys; +} + +float3* GothicAPI::GetLowestLODPoly_SkeletalMesh( zCModel* model, const int polyId, float3*& polyNormal ) { + static float3 returnPositions[3]; + size_t polyIndex = static_cast(polyId) * 3; + polyNormal = &float3(0.f, 1.f, 0.f); + + SkeletalMeshVisualInfo* skeletalMesh = nullptr; + zCVob* homeVob = model->GetHomeVob(); + if ( homeVob && homeVob->GetVobType() == zVOB_TYPE_NSC ) { + oCNPC* npc = static_cast(homeVob); + auto it = SkeletalMeshNpcs.find( npc ); + if ( it != SkeletalMeshNpcs.end() ) { + skeletalMesh = it->second; + } + } else { + std::string str = model->GetVisualName(); + if ( str.empty() ) { // Happens when the model has no skeletal-mesh + zSTRING mds = model->GetModelName(); + str = mds.ToChar(); + mds.Delete(); + } + + auto it = SkeletalMeshVisuals.find( str ); + if ( it != SkeletalMeshVisuals.end() ) { + skeletalMesh = it->second; + } + } + + if ( skeletalMesh ) { + for ( auto const& itm : skeletalMesh->SkeletalMeshes ) { + for ( auto& mesh : itm.second ) { + if ( polyIndex >= mesh->Indices.size() ) { + polyIndex -= mesh->Indices.size(); + } else { + float fatness = model->GetModelFatness(); + std::vector transforms; + model->GetBoneTransforms( &transforms ); + + for ( int i = 0; i < 3; ++i ) { + VERTEX_INDEX _polyId = mesh->Indices[polyIndex + i]; + ExSkelVertexStruct& _polyVert = mesh->Vertices[_polyId]; + + alignas(32) float floats_0[8]; + alignas(32) float floats_1[8]; + alignas(16) unsigned short half2float_0[8] = { _polyVert.Position[0][0], _polyVert.Position[0][1], _polyVert.Position[0][2], _polyVert.weights[0], + _polyVert.Position[1][0], _polyVert.Position[1][1], _polyVert.Position[1][2], _polyVert.weights[1] }; + alignas(16) unsigned short half2float_1[8] = { _polyVert.Position[2][0], _polyVert.Position[2][1], _polyVert.Position[2][2], _polyVert.weights[2], + _polyVert.Position[3][0], _polyVert.Position[3][1], _polyVert.Position[3][2], _polyVert.weights[3] }; + UnquantizeHalfFloat_X8( half2float_0, floats_0 ); + UnquantizeHalfFloat_X8( half2float_1, floats_1 ); + + XMVECTOR position = XMVectorZero(); + position += XMVectorReplicate( floats_0[3] ) * XMVector3Transform( + XMVectorSet( floats_0[0], floats_0[1], floats_0[2], 1.f ), + XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[0]] ) ) ); + + position += XMVectorReplicate( floats_0[7] ) * XMVector3Transform( + XMVectorSet( floats_0[4], floats_0[5], floats_0[6], 1.f ), + XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[1]] ) ) ); + + position += XMVectorReplicate( floats_1[3] ) * XMVector3Transform( + XMVectorSet( floats_1[0], floats_1[1], floats_1[2], 1.f ), + XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[2]] ) ) ); + + position += XMVectorReplicate( floats_1[7] ) * XMVector3Transform( + XMVectorSet( floats_1[4], floats_1[5], floats_1[6], 1.f ), + XMMatrixTranspose( XMLoadFloat4x4( &transforms[_polyVert.boneIndices[3]] ) ) ); + + position += XMVectorReplicate( fatness ) * XMLoadFloat3( reinterpret_cast(&_polyVert.BindPoseNormal) ) ; + + // world matrix is applied later when particle calculate world position + XMMATRIX scale = XMMatrixScalingFromVector( model->GetModelScaleXM() ); + XMStoreFloat3( reinterpret_cast(&returnPositions[i]), XMVector3Transform( position, XMMatrixTranspose( scale ) ) ); + } + return returnPositions; + } + } + } + } + + returnPositions[0] = float3( 0.f, 0.f, 0.f ); + returnPositions[1] = float3( 0.f, 0.f, 0.f ); + returnPositions[2] = float3( 0.f, 0.f, 0.f ); + return returnPositions; +} + // TODO: REMOVE THIS! #include "D3D11GraphicsEngine.h" @@ -1916,7 +2036,7 @@ void GothicAPI::DrawSkeletalMeshVob( SkeletalVobInfo* vi, float distance, bool u // Get the bone transforms std::vector transforms; - model->GetBoneTransforms( &transforms, vi->Vob ); + model->GetBoneTransforms( &transforms ); if ( updateState ) { // Update attachments @@ -2148,7 +2268,7 @@ void GothicAPI::DrawSkeletalVN() { // Get the bone transforms std::vector transforms; - model->GetBoneTransforms( &transforms, vi->Vob ); + model->GetBoneTransforms( &transforms ); if ( !static_cast(vi->VisualInfo)->SkeletalMeshes.empty() ) { g->DrawSkeletalVertexNormals( vi, transforms, 0xFFFFFF, fatness ); @@ -2268,21 +2388,14 @@ void GothicAPI::DrawParticleFX( zCVob* source, zCParticleFX* fx, ParticleFrameDa // Generate instance info part.emplace_back(); ParticleInstanceInfo& ii = part.back(); - ii.scale = XMFLOAT2( p->Size.x, p->Size.y ); - ii.drawMode = 0; + ii.scale = float3( p->Size.x, p->Size.y, 0.f ); // Construct world matrix - int alignment = fx->GetEmitter()->GetVisAlignment(); - if ( alignment == zPARTICLE_ALIGNMENT_XY ) { - ii.drawMode = 2; - } else if ( alignment == zPARTICLE_ALIGNMENT_VELOCITY || alignment == zPARTICLE_ALIGNMENT_VELOCITY_3D ) { - ii.drawMode = 3; - } // TODO: Y-Locked! - - if ( !fx->GetEmitter()->GetVisIsQuadPoly() ) { - ii.scale.x *= 0.5f; - ii.scale.y *= 0.5f; + ii.drawMode = fx->GetEmitter()->GetVisAlignment(); + if ( fx->GetEmitter()->GetVisIsQuadPoly() ) { + ii.drawMode += 10; } + float4 color; color.x = p->Color.x / 255.0f; color.y = p->Color.y / 255.0f; @@ -2300,17 +2413,19 @@ void GothicAPI::DrawParticleFX( zCVob* source, zCParticleFX* fx, ParticleFrameDa ii.color = color; ii.velocity = p->Vel; + if ( fx->GetEmitter()->GetVisAlignment() == 2 ) { + if ( zCVob* connectedVob = fx->GetConnectedVob() ) { + XMFLOAT4X4* worldMatrix = connectedVob->GetWorldMatrixPtr(); + ii.scale = float3( worldMatrix->m[0][0] * p->Size.x, worldMatrix->m[1][0] * p->Size.x, worldMatrix->m[1][0] * p->Size.x ); + ii.velocity = float3( worldMatrix->m[0][2] * p->Size.y, worldMatrix->m[1][2] * p->Size.y, worldMatrix->m[1][2] * p->Size.y ); + } + } + fx->UpdateParticle( p ); i++; } } - /* - Liker@WoG: -11.12.2020 14:58 https://forum.worldofplayers.de/forum/threads/1546222-Yet-Another-D3D11-Renderer?p=26626374&viewfull=1#post26626374 -11.12.2020 16:19 https://forum.worldofplayers.de/forum/threads/1546222-Yet-Another-D3D11-Renderer?p=26626530&viewfull=1#post26626530 -14.12.2020 20:25 https://forum.worldofplayers.de/forum/threads/1546222-Yet-Another-D3D11-Renderer?p=26628056&viewfull=1#post26628056 - */ // Create new particles? fx->CreateParticlesUpdateDependencies(); diff --git a/D3D11Engine/GothicAPI.h b/D3D11Engine/GothicAPI.h index 19208ea8..12651fc1 100644 --- a/D3D11Engine/GothicAPI.h +++ b/D3D11Engine/GothicAPI.h @@ -664,6 +664,10 @@ class GothicAPI { SkeletalMeshVisualInfo* LoadzCModelData( zCModel* model ); SkeletalMeshVisualInfo* LoadzCModelData( oCNPC* npc ); + /** Returns lowest lod of zCModel polys */ + int GetLowestLODNumPolys_SkeletalMesh( zCModel* model ); + float3* GetLowestLODPoly_SkeletalMesh( zCModel* model, const int polyId, float3*& polyNormal ); + /** Prints a message to the screen for the given amount of time */ void PrintMessageTimed( const INT2& position, const std::string& strMessage, float time = 3000.0f, DWORD color = 0xFFFFFFFF ); diff --git a/D3D11Engine/GothicGraphicsState.h b/D3D11Engine/GothicGraphicsState.h index f1380bff..86999ab9 100644 --- a/D3D11Engine/GothicGraphicsState.h +++ b/D3D11Engine/GothicGraphicsState.h @@ -545,12 +545,7 @@ struct GothicRendererSettings { IndoorVobDrawRadius = 5000.0f; OutdoorVobDrawRadius = 30000.0f; SkeletalMeshDrawRadius = 6000.0f; - VisualFXDrawRadius = 10000.0f; - -#if BUILD_SPACER_NET - VisualFXDrawRadius = 16000.0f; -#endif - + VisualFXDrawRadius = 8000.0f; OutdoorSmallVobDrawRadius = 10000.0f; SmallVobSize = 1500.0f; diff --git a/D3D11Engine/GothicMemoryLocations2_6_fix.h b/D3D11Engine/GothicMemoryLocations2_6_fix.h index c05c4bc7..c1adca6f 100644 --- a/D3D11Engine/GothicMemoryLocations2_6_fix.h +++ b/D3D11Engine/GothicMemoryLocations2_6_fix.h @@ -383,6 +383,8 @@ struct GothicMemoryLocations { static const unsigned int Offset_NumActiveAnis = 0x34; static const unsigned int Offset_AniChannels = 0x38; static const unsigned int GetVisualName = 0x0057DF60; + static const unsigned int GetLowestLODNumPolys = 0x00579490; + static const unsigned int GetLowestLODPoly = 0x005794B0; }; struct zCModelAni { diff --git a/D3D11Engine/HookExceptionFilter.h b/D3D11Engine/HookExceptionFilter.h index e4e8dc55..f530fc4e 100644 --- a/D3D11Engine/HookExceptionFilter.h +++ b/D3D11Engine/HookExceptionFilter.h @@ -60,4 +60,4 @@ static void __AddDbgFuncCall( const std::string& fn, int threadID, bool out ) { LogInfo() << "Exception caught!"; \ \ } - */ \ No newline at end of file + */ diff --git a/D3D11Engine/HookedFunctions.h b/D3D11Engine/HookedFunctions.h index 907cd6bf..8e30273e 100644 --- a/D3D11Engine/HookedFunctions.h +++ b/D3D11Engine/HookedFunctions.h @@ -59,7 +59,6 @@ typedef int( __thiscall* oCSpawnManagerCheckRemoveNpc )(void*, oCNPC*); typedef void( __thiscall* oCSpawnManagerCheckInsertNpc )(void*); typedef void( __thiscall* zCVobSetVisual )(void*, zCVisual*); - typedef int( __thiscall* zCTex_D3DXTEX_BuildSurfaces )(void*, int); typedef int( __thiscall* zCTextureLoadResourceData )(void*); typedef int( __thiscall* zCThreadSuspendThread )(void*); @@ -72,6 +71,9 @@ typedef void( __fastcall* oCWorldRemoveFromLists )(void*, zCVob*); typedef int( __thiscall* zCModelPrototypeLoadModelASC )(void*, class zSTRING const&); typedef int( __thiscall* zCModelPrototypeReadMeshAndTreeMSB )(void*, int&, class zCFileBIN&); +typedef int( __thiscall* zCModelGetLowestLODNumPolys )(void*); +typedef float3*( __thiscall* zCModelGetLowestLODPoly )(void*, const int, float3*&); + typedef DWORD( __cdecl* GetInformationManagerProc )(); #ifdef BUILD_GOTHIC_1_08k @@ -148,6 +150,8 @@ struct HookedFunctionInfo { #endif #ifdef BUILD_GOTHIC_2_6_fix GenericThiscall original_zCActiveSndAutoCalcObstruction = reinterpret_cast(GothicMemoryLocations::zCActiveSnd::AutoCalcObstruction); // Not usable - only for hooking + zCModelGetLowestLODNumPolys original_zCModelGetLowestLODNumPolys = reinterpret_cast(GothicMemoryLocations::zCModel::GetLowestLODNumPolys); + zCModelGetLowestLODPoly original_zCModelGetLowestLODPoly = reinterpret_cast(GothicMemoryLocations::zCModel::GetLowestLODPoly); #endif //zCModelPrototypeLoadModelASC original_zCModelPrototypeLoadModelASC = reinterpret_cast(GothicMemoryLocations::zCModelPrototype::LoadModelASC); //zCModelPrototypeReadMeshAndTreeMSB original_zCModelPrototypeReadMeshAndTreeMSB = reinterpret_cast(GothicMemoryLocations::zCModelPrototype::ReadMeshAndTreeMSB); diff --git a/D3D11Engine/Shaders/GS_Billboard.hlsl b/D3D11Engine/Shaders/GS_Billboard.hlsl index 2c13752b..e663ce79 100644 --- a/D3D11Engine/Shaders/GS_Billboard.hlsl +++ b/D3D11Engine/Shaders/GS_Billboard.hlsl @@ -20,79 +20,61 @@ struct PS_INPUT void GSMain(point VS_OUTPUT input[1], inout TriangleStream OutputStream) { float3 planeNormal = input[0].vPosition - CameraPosition; - //planeNormal.y = 0.0f; // For tree bilboard planeNormal = normalize(-planeNormal); + float3 position = input[0].vPosition; float3 upVector; float3 rightVector; - - - - //input[0].vSize *= 0.5f; - - //rightVector = rightVector * 100.0f; - //upVector *= 100.0f; - // Construct vertices - // We get the points by using the billboards right vector and the billboards height + int visIsQuadPoly = int(step(10.0, float(input[0].type))); + int visOrientation = input[0].type - (10 * visIsQuadPoly); + float3 vert[4]; - - if(input[0].type == 3) + if (visOrientation == 2) + { + rightVector = input[0].vSize; + upVector = input[0].vVelocity; + } + else if (visOrientation == 3) { - // Make up/right vectors along the velocity-vector - float3 velYPos = normalize(input[0].vVelocity); - float3 velXPos = normalize(cross(planeNormal, velYPos)); - - //velYPos = normalize(cross(planeNormal, velXPos)); + float3 velYPos = normalize(input[0].vVelocity); + float3 velXPos = normalize(cross(planeNormal, velYPos)); - rightVector = velXPos; - upVector = velYPos; - }else if(input[0].type == 2) - { - // xz-plane - upVector = float3(0.0f, 0.0f, 1.0f); - rightVector = float3(1.0f,0.0f,0.0f); // FIXME: Maybe rotate this with the vob? - }else + rightVector = velXPos * input[0].vSize.x; + upVector = velYPos * input[0].vSize.y; + } + else if (visOrientation == 1) { - // Construct up and right vectors - upVector = float3(0.0f, 1.0f, 0.0f); - rightVector = normalize(cross(planeNormal, upVector)); - - // Construct better up-vector - upVector = normalize(cross(planeNormal, rightVector)); + float3 velYPos = normalize(input[0].vVelocity); + float3 velXPos = normalize(cross(planeNormal, velYPos)); + velYPos = normalize(cross(planeNormal, velXPos)); + + rightVector = velXPos * input[0].vSize.x; + upVector = velYPos * input[0].vSize.y; } - - if(input[0].type == 5) - { - //upVector = float3(0.0f, 1.0f, 0.0f); - //rightVector = float3(1.0f,0.0f,0.0f); // FIXME: Maybe rotate this with the vob? - - // Scale vectors - rightVector *= input[0].vSize.x; - upVector *= input[0].vSize.y; - }else - { - // Scale vectors - rightVector *= input[0].vSize.x; - upVector *= input[0].vSize.y; + else + { + upVector = float3(0.0f, 1.0f, 0.0f) * input[0].vSize.y; + rightVector = float3(1.0f, 0.0f, 0.0f) * input[0].vSize.x; + + position += float3(input[0].vSize.x * 0.5, -input[0].vSize.y * 0.5, 0.0) * float(1 - visIsQuadPoly); } - vert[0] = input[0].vPosition - rightVector - upVector; // Get bottom left vertex - vert[1] = input[0].vPosition + rightVector - upVector; // Get bottom right vertex - vert[2] = input[0].vPosition - rightVector + upVector; // Get top left vertex - vert[3] = input[0].vPosition + rightVector + upVector; // Get top right vertex + vert[0] = position - rightVector + upVector; // Get top left vertex + vert[1] = position + rightVector + upVector; // Get top right vertex + vert[2] = position - rightVector - upVector; // Get bottom left vertex + vert[3] = position + rightVector - upVector; // Get bottom right vertex - // Get billboards texture coordinates float2 texCoord[4]; texCoord[0] = float2(0, 1); texCoord[1] = float2(1, 1); texCoord[2] = float2(0, 0); texCoord[3] = float2(1, 0); - // Append the two triangles to the stream + // Append triangles to the stream PS_INPUT outputVert = (PS_INPUT)0; - for(int i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) { outputVert.vPosition = mul(float4(vert[i], 1.0f), M_ViewProj); outputVert.vTexcoord = texCoord[i]; diff --git a/D3D11Engine/Shaders/GS_Raindrops.hlsl b/D3D11Engine/Shaders/GS_Raindrops.hlsl index 2f94c8df..5d4c405b 100644 --- a/D3D11Engine/Shaders/GS_Raindrops.hlsl +++ b/D3D11Engine/Shaders/GS_Raindrops.hlsl @@ -1,4 +1,4 @@ -#include +#include cbuffer ParticleGSInfo : register( b2 ) { diff --git a/D3D11Engine/Shaders/VS_ParticlePoint.hlsl b/D3D11Engine/Shaders/VS_ParticlePoint.hlsl index c506e3cf..c2aa4805 100644 --- a/D3D11Engine/Shaders/VS_ParticlePoint.hlsl +++ b/D3D11Engine/Shaders/VS_ParticlePoint.hlsl @@ -16,7 +16,7 @@ struct VS_INPUT { float3 vPosition : POSITION; float4 vDiffuse : DIFFUSE; - float2 vSize : SIZE; + float3 vSize : SIZE; unsigned int type : TYPE; float3 vVelocity : VELOCITY; }; @@ -25,7 +25,7 @@ struct VS_OUTPUT { float3 vPosition : POSITION; float4 vDiffuse : DIFFUSE; - float2 vSize : SIZE; + float3 vSize : SIZE; int type : TYPE; float3 vVelocity : VELOCITY; }; diff --git a/D3D11Engine/WorldConverter.cpp b/D3D11Engine/WorldConverter.cpp index dae9c47f..b7230a94 100644 --- a/D3D11Engine/WorldConverter.cpp +++ b/D3D11Engine/WorldConverter.cpp @@ -353,6 +353,10 @@ HRESULT WorldConverter::ConvertWorldMesh( zCPolygon** polys, unsigned int numPol } // Flag portals so that we can apply a different PS shader later + if ( poly->GetPolyFlags()->PortalPoly ) { + continue; + } + /* if ( poly->GetPolyFlags()->PortalPoly ) { zCMaterial* polymat = poly->GetMaterial(); if ( zCTexture* tex = polymat->GetTextureSingle() ) { @@ -367,6 +371,7 @@ HRESULT WorldConverter::ConvertWorldMesh( zCPolygon** polys, unsigned int numPol continue; } } + */ // Calculate midpoint of this triange to get the section XMFLOAT3 avgPos; @@ -783,11 +788,16 @@ void WorldConverter::ExtractSkeletalMeshFromVob( zCModel* model, SkeletalMeshVis // Get index and weight if ( n < 4 ) { - vx.weights[n] = quantizeHalfFloat( weightEntry.Weight ); + alignas(16) float floats[4] = { weightEntry.VertexPosition.x, weightEntry.VertexPosition.y, + weightEntry.VertexPosition.z, weightEntry.Weight }; + alignas(16) unsigned short halfs[4]; + QuantizeHalfFloat_X4( floats, halfs ); + + vx.weights[n] = halfs[3]; vx.boneIndices[n] = weightEntry.NodeIndex; - vx.Position[n][0] = quantizeHalfFloat( weightEntry.VertexPosition.x ); - vx.Position[n][1] = quantizeHalfFloat( weightEntry.VertexPosition.y ); - vx.Position[n][2] = quantizeHalfFloat( weightEntry.VertexPosition.z ); + vx.Position[n][0] = halfs[0]; + vx.Position[n][1] = halfs[1]; + vx.Position[n][2] = halfs[2]; } } diff --git a/D3D11Engine/WorldObjects.h b/D3D11Engine/WorldObjects.h index 0472b3c2..70094721 100644 --- a/D3D11Engine/WorldObjects.h +++ b/D3D11Engine/WorldObjects.h @@ -29,6 +29,14 @@ struct ParticleRenderInfo { }; struct ParticleInstanceInfo { + float3 position; + float4 color; + float3 scale; + int drawMode; // 0 = billboard, 1 = y-locked billboard, 2 = y-plane, 3 = velo aligned + float3 velocity; +}; + +struct RainParticleInstanceInfo { float3 position; float4 color; float2 scale; diff --git a/D3D11Engine/pch.h b/D3D11Engine/pch.h index b174c701..a55e1c1d 100644 --- a/D3D11Engine/pch.h +++ b/D3D11Engine/pch.h @@ -33,7 +33,7 @@ using namespace DirectX; #define ENABLE_TESSELATION 0 #ifndef VERSION_NUMBER -#define VERSION_NUMBER "17.8-dev15" +#define VERSION_NUMBER "17.8-dev16" #endif __declspec(selectany) const char* VERSION_NUMBER_STR = VERSION_NUMBER; @@ -61,17 +61,13 @@ void DebugWrite_i( LPCSTR lpDebugMessage, void* thisptr ); /** Computes the size in bytes of the given FVF */ int ComputeFVFSize( DWORD fvf ); -inline unsigned short quantizeHalfFloat( float v ) -{ - union { float f; unsigned int ui; } u = { v }; - unsigned int ui = u.ui; +typedef unsigned short (*ZQuantizeHalfFloat)(float input); +typedef void (*ZQuantizeHalfFloat_X4)(float* input, unsigned short* output); +typedef float (*ZUnquantizeHalfFloat)(unsigned short input); +typedef void (*ZUnquantizeHalfFloat_X4)(unsigned short* input, float* output); - int s = ( ui >> 16 ) & 0x8000; - int em = ui & 0x7fffffff; - - int h = ( em - ( 112 << 23 ) + ( 1 << 12 ) ) >> 13; - h = ( em < ( 113 << 23 ) ) ? 0 : h; - h = ( em >= ( 143 << 23 ) ) ? 0x7c00 : h; - h = ( em > ( 255 << 23 ) ) ? 0x7e00 : h; - return static_cast(s | h); -} +extern ZQuantizeHalfFloat QuantizeHalfFloat; +extern ZQuantizeHalfFloat_X4 QuantizeHalfFloat_X4; +extern ZUnquantizeHalfFloat UnquantizeHalfFloat; +extern ZUnquantizeHalfFloat_X4 UnquantizeHalfFloat_X4; +extern ZUnquantizeHalfFloat_X4 UnquantizeHalfFloat_X8; diff --git a/D3D11Engine/zCModel.h b/D3D11Engine/zCModel.h index 1151585a..1f8b13d3 100644 --- a/D3D11Engine/zCModel.h +++ b/D3D11Engine/zCModel.h @@ -185,8 +185,24 @@ class zCModel : public zCVisual { byte unsmoothAnisFix[] = {0x75, 0x00, 0xC7, 0x44, 0x24, 0x78, 0x01, 0x00, 0x00, 0x00}; // Replaces a jnz in AdvanceAnis - Thanks to killer-m! memcpy((void *)GothicMemoryLocations::zCModel::RPL_AniQuality, unsmoothAnisFix, sizeof(unsmoothAnisFix)); #endif*/ + +#ifdef BUILD_GOTHIC_2_6_fix + DetourAttach( &reinterpret_cast(HookedFunctions::OriginalFunctions.original_zCModelGetLowestLODNumPolys), Hooked_zCModelGetLowestLODNumPolys ); + DetourAttach( &reinterpret_cast(HookedFunctions::OriginalFunctions.original_zCModelGetLowestLODPoly), Hooked_zCModelGetLowestLODPoly ); +#endif + } + + /** Fix particle emitter setup */ +#ifdef BUILD_GOTHIC_2_6_fix + static int __fastcall Hooked_zCModelGetLowestLODNumPolys( void* thisptr ) { + return Engine::GAPI->GetLowestLODNumPolys_SkeletalMesh( static_cast(thisptr) ); } + static float3* __fastcall Hooked_zCModelGetLowestLODPoly( void* thisptr, void*, const int polyId, float3*& polyNormal ) { + return Engine::GAPI->GetLowestLODPoly_SkeletalMesh( static_cast(thisptr), polyId, polyNormal ); + } +#endif + /** Creates an array of matrices for the bone transforms */ void __fastcall RenderNodeList( zTRenderContext& renderContext, zCArray& boneTransforms, zCRenderLightContainer& lightContainer, int lightingMode = 0 ) { reinterpret_cast&, zCRenderLightContainer&, int )> @@ -298,7 +314,7 @@ class zCModel : public zCVisual { } /** Fills a vector of (viewspace) bone-transformation matrices for this frame */ - void GetBoneTransforms( std::vector* transforms, zCVob* vob = nullptr ) { + void GetBoneTransforms( std::vector* transforms ) { zCArray* nodeList = GetNodeList(); if ( !nodeList ) return; @@ -334,7 +350,7 @@ class zCModel : public zCVisual { } zCVob* GetHomeVob() { - return reinterpret_cast(THISPTR_OFFSET( GothicMemoryLocations::zCModel::Offset_HomeVob )); + return *reinterpret_cast(THISPTR_OFFSET( GothicMemoryLocations::zCModel::Offset_HomeVob )); } private: diff --git a/D3D11Engine/zCParticleFX.h b/D3D11Engine/zCParticleFX.h index a8379fa9..c5ce9d03 100644 --- a/D3D11Engine/zCParticleFX.h +++ b/D3D11Engine/zCParticleFX.h @@ -7,12 +7,6 @@ #include "zCTimer.h" #include "zCPolyStrip.h" -enum EZParticleAlignment { - zPARTICLE_ALIGNMENT_VELOCITY = 1, - zPARTICLE_ALIGNMENT_XY = 2, - zPARTICLE_ALIGNMENT_VELOCITY_3D = 3, -}; - class zSTRING; class zCPolyStrip; class zCMesh;