Skip to content

Commit

Permalink
Merge pull request #19334 from hrydgard/improved-provoking-vertex-fix
Browse files Browse the repository at this point in the history
Improved provoking vertex fix
  • Loading branch information
hrydgard authored Jul 17, 2024
2 parents 5202d05 + 7738899 commit b8f0558
Show file tree
Hide file tree
Showing 13 changed files with 103 additions and 130 deletions.
2 changes: 2 additions & 0 deletions Common/GPU/D3D11/thin3d_d3d11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ D3D11DrawContext::D3D11DrawContext(ID3D11Device *device, ID3D11DeviceContext *de
caps_.blendMinMaxSupported = true;
caps_.multiSampleLevelsMask = 1; // More could be supported with some work.

caps_.provokingVertexLast = false; // D3D has it first, unfortunately. (and no way to change it).

caps_.presentInstantModeChange = true;
caps_.presentMaxInterval = 4;
caps_.presentModesSupported = PresentMode::FIFO | PresentMode::IMMEDIATE;
Expand Down
2 changes: 2 additions & 0 deletions Common/GPU/D3D9/thin3d_d3d9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,8 @@ D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, ID
caps_.presentMaxInterval = 1;
caps_.presentModesSupported = PresentMode::FIFO;

caps_.provokingVertexLast = false; // D3D has it first, unfortunately (and no way to change it).

if ((caps.RasterCaps & D3DPRASTERCAPS_ANISOTROPY) != 0 && caps.MaxAnisotropy > 1) {
caps_.anisoSupported = true;
}
Expand Down
3 changes: 3 additions & 0 deletions Common/GPU/OpenGL/thin3d_gl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,9 @@ OpenGLContext::OpenGLContext(bool canChangeSwapInterval) : renderManager_(frameT
// GLES has no support for logic framebuffer operations. There doesn't even seem to exist any such extensions.
caps_.logicOpSupported = !gl_extensions.IsGLES;

// Always the case in GL (which is what we want for PSP flat shade).
caps_.provokingVertexLast = true;

// Interesting potential hack for emulating GL_DEPTH_CLAMP (use a separate varying, force depth in fragment shader):
// This will induce a performance penalty on many architectures though so a blanket enable of this
// is probably not a good idea.
Expand Down
3 changes: 3 additions & 0 deletions Common/GPU/Vulkan/thin3d_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,9 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread)
caps_.sampleRateShadingSupported = vulkan->GetDeviceFeatures().enabled.standard.sampleRateShading != 0;
caps_.textureSwizzleSupported = true;

// Note that it must also be enabled on the pipelines (which we do).
caps_.provokingVertexLast = vulkan->GetDeviceFeatures().enabled.provokingVertex.provokingVertexLast;

// Present mode stuff
caps_.presentMaxInterval = 1;
caps_.presentInstantModeChange = false; // TODO: Fix this with some work in VulkanContext
Expand Down
2 changes: 1 addition & 1 deletion Common/GPU/thin3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ struct DeviceCaps {
bool setMaxFrameLatencySupported;
bool textureSwizzleSupported;
bool requiresHalfPixelOffset;

bool provokingVertexLast; // GL behavior, what the PSP does
bool verySlowShaderCompiler;

// From the other backends, we can detect if D3D9 support is known bad (like on Xe) and disable it.
Expand Down
8 changes: 4 additions & 4 deletions GPU/Common/IndexGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,13 @@ void IndexGenerator::AddList(int numVerts, int indexOffset, bool clockwise) {

alignas(16) static const u16 offsets_clockwise[24] = {
0, (u16)(0 + 1), (u16)(0 + 2),
1, (u16)(1 + 2), (u16)(1 + 1),
(u16)(1 + 1), 1, (u16)(1 + 2),
2, (u16)(2 + 1), (u16)(2 + 2),
3, (u16)(3 + 2), (u16)(3 + 1),
(u16)(3 + 1), 3, (u16)(3 + 2),
4, (u16)(4 + 1), (u16)(4 + 2),
5, (u16)(5 + 2), (u16)(5 + 1),
(u16)(5 + 1), 5, (u16)(5 + 2),
6, (u16)(6 + 1), (u16)(6 + 2),
7, (u16)(7 + 2), (u16)(7 + 1),
(u16)(7 + 1), 7, (u16)(7 + 2),
};

alignas(16) static const uint16_t offsets_counter_clockwise[24] = {
Expand Down
108 changes: 36 additions & 72 deletions GPU/Common/SoftwareTransformCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,33 +125,6 @@ static bool IsReallyAClear(const TransformedVertex *transformed, int numVerts, f
return true;
}

static int ColorIndexOffset(int prim, GEShadeMode shadeMode, bool clearMode) {
if (shadeMode != GE_SHADE_FLAT || clearMode) {
return 0;
}

switch (prim) {
case GE_PRIM_LINES:
case GE_PRIM_LINE_STRIP:
return 1;

case GE_PRIM_TRIANGLES:
case GE_PRIM_TRIANGLE_STRIP:
return 2;

case GE_PRIM_TRIANGLE_FAN:
return 1;

case GE_PRIM_RECTANGLES:
// We already use BR color when expanding, so no need to offset.
return 0;

default:
break;
}
return 0;
}

void SoftwareTransform::SetProjMatrix(const float mtx[14], bool invertedX, bool invertedY, const Lin::Vec3 &trans, const Lin::Vec3 &scale) {
memcpy(&projMatrix_.m, mtx, 16 * sizeof(float));

Expand Down Expand Up @@ -202,11 +175,6 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de
fog_slope = std::signbit(fog_slope) ? -65535.0f : 65535.0f;
}

int provokeIndOffset = 0;
if (params_.provokeFlatFirst) {
provokeIndOffset = ColorIndexOffset(prim, gstate.getShadeMode(), gstate.isModeClear());
}

VertexReader reader(decoded, decVtxFormat, vertType);
if (throughmode) {
const u32 materialAmbientRGBA = gstate.getMaterialAmbientRGBA();
Expand All @@ -221,13 +189,7 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de
vert.pos_w = 1.0f;

if (hasColor) {
if (provokeIndOffset != 0 && index + provokeIndOffset < numDecodedVerts) {
reader.Goto(index + provokeIndOffset);
vert.color0_32 = reader.ReadColor0_8888();
reader.Goto(index);
} else {
vert.color0_32 = reader.ReadColor0_8888();
}
vert.color0_32 = reader.ReadColor0_8888();
} else {
vert.color0_32 = materialAmbientRGBA;
}
Expand Down Expand Up @@ -268,10 +230,7 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de
if (reader.hasUV())
reader.ReadUV(ruv);

// Read all the provoking vertex values here.
Vec4f unlitColor;
if (provokeIndOffset != 0 && index + provokeIndOffset < numDecodedVerts)
reader.Goto(index + provokeIndOffset);
if (reader.hasColor0())
reader.ReadColor0(unlitColor.AsArray());
else
Expand Down Expand Up @@ -342,34 +301,14 @@ void SoftwareTransform::Transform(int prim, u32 vertType, const DecVtxFormat &de
break;

case GE_PROJMAP_NORMALIZED_NORMAL: // Use normalized normal as source
// Flat uses the vertex normal, not provoking.
if (provokeIndOffset == 0) {
source = normal.Normalized(cpu_info.bSSE4_1);
} else {
reader.Goto(index);
if (reader.hasNormal())
reader.ReadNrm(source.AsArray());
if (gstate.areNormalsReversed())
source = -source;
source.Normalize();
}
source = normal.Normalized(cpu_info.bSSE4_1);
if (!reader.hasNormal()) {
ERROR_LOG_REPORT(Log::G3D, "Normal projection mapping without normal?");
}
break;

case GE_PROJMAP_NORMAL: // Use non-normalized normal as source!
// Flat uses the vertex normal, not provoking.
if (provokeIndOffset == 0) {
source = normal;
} else {
// Need to read the normal for this vertex and weight it again..
reader.Goto(index);
if (reader.hasNormal())
reader.ReadNrm(source.AsArray());
if (gstate.areNormalsReversed())
source = -source;
}
source = normal;
if (!reader.hasNormal()) {
ERROR_LOG_REPORT(Log::G3D, "Normal projection mapping without normal?");
}
Expand Down Expand Up @@ -497,13 +436,11 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy

if (prim == GE_PRIM_RECTANGLES) {
if (!ExpandRectangles(vertexCount, numDecodedVerts, vertsSize, inds, indsSize, transformed, transformedExpanded, numTrans, throughmode, &result->pixelMapped)) {
result->drawIndexed = false;
result->drawNumTrans = 0;
result->pixelMapped = false;
return;
}
result->drawBuffer = transformedExpanded;
result->drawIndexed = true;

// We don't know the color until here, so we have to do it now, instead of in StateMapping.
// Might want to reconsider the order of things later...
Expand All @@ -521,25 +458,20 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy
} else if (prim == GE_PRIM_POINTS) {
result->pixelMapped = false;
if (!ExpandPoints(vertexCount, numDecodedVerts, vertsSize, inds, indsSize, transformed, transformedExpanded, numTrans, throughmode)) {
result->drawIndexed = false;
result->drawNumTrans = 0;
return;
}
result->drawBuffer = transformedExpanded;
result->drawIndexed = true;
} else if (prim == GE_PRIM_LINES) {
result->pixelMapped = false;
if (!ExpandLines(vertexCount, numDecodedVerts, vertsSize, inds, indsSize, transformed, transformedExpanded, numTrans, throughmode)) {
result->drawIndexed = false;
result->drawNumTrans = 0;
return;
}
result->drawBuffer = transformedExpanded;
result->drawIndexed = true;
} else {
// We can simply draw the unexpanded buffer.
numTrans = vertexCount;
result->drawIndexed = true;
result->pixelMapped = false;

// If we don't support custom cull in the shader, process it here.
Expand Down Expand Up @@ -635,7 +567,7 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy
gpuStats.numClears++;
}

result->action = SW_DRAW_PRIMITIVES;
result->action = SW_DRAW_INDEXED;
result->drawNumTrans = numTrans;
}

Expand Down Expand Up @@ -758,6 +690,38 @@ bool SoftwareTransform::ExpandRectangles(int vertexCount, int &numDecodedVerts,
return true;
}

// In-place. So, better not be doing this on GPU memory!
void IndexBufferProvokingLastToFirst(int prim, u16 *inds, int indsSize) {
switch (prim) {
case GE_PRIM_LINES:
// Swap every two indices.
for (int i = 0; i < indsSize - 1; i += 2) {
u16 temp = inds[i];
inds[i] = inds[i + 1];
inds[i + 1] = temp;
}
break;
case GE_PRIM_TRIANGLES:
// Rotate the triangle so the last becomes the first, without changing the winding order.
// This could be done with a series of pshufb.
for (int i = 0; i < indsSize - 2; i += 3) {
u16 temp = inds[i + 2];
inds[i + 2] = inds[i + 1];
inds[i + 1] = inds[i];
inds[i] = temp;
}
break;
case GE_PRIM_POINTS:
// Nothing to do,
break;
case GE_PRIM_RECTANGLES:
// Nothing to do, already using the 2nd vertex.
break;
default:
_dbg_assert_msg_(false, "IndexBufferProvokingFirstToLast: Only works with plain indexed primitives, no strips or fans")
}
}

bool SoftwareTransform::ExpandLines(int vertexCount, int &numDecodedVerts, int vertsSize, u16 *&inds, int indsSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) {
// Before we start, do a sanity check - does the output fit?
if ((vertexCount / 2) * 6 > indsSize) {
Expand Down
10 changes: 6 additions & 4 deletions GPU/Common/SoftwareTransformCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class TextureCacheCommon;

enum SoftwareTransformAction {
SW_NOT_READY,
SW_DRAW_PRIMITIVES,
SW_DRAW_INDEXED,
SW_CLEAR,
};

Expand All @@ -44,8 +44,6 @@ struct SoftwareTransformResult {

TransformedVertex *drawBuffer;
int drawNumTrans;
bool drawIndexed;

bool pixelMapped;
};

Expand All @@ -57,11 +55,15 @@ struct SoftwareTransformParams {
TextureCacheCommon *texCache;
bool allowClear;
bool allowSeparateAlphaClear;
bool provokeFlatFirst;
bool flippedY;
bool usesHalfZ;
};

// Converts an index buffer to make the provoking vertex the last.
// In-place. So, better not be doing this on GPU memory!
// TODO: We could do this already during index decode.
void IndexBufferProvokingLastToFirst(int prim, u16 *inds, int indsSize);

class SoftwareTransform {
public:
SoftwareTransform(SoftwareTransformParams &params) : params_(params) {}
Expand Down
4 changes: 3 additions & 1 deletion GPU/D3D11/D3D11Util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ std::vector<uint8_t> CompileShaderToBytecodeD3D11(const char *code, size_t codeS
if (trimmed.find("pow(f, e) will not work for negative f") != std::string::npos) {
continue;
}
WARN_LOG(Log::G3D, "%.*s", (int)trimmed.length(), trimmed.data());
if (trimmed.size() > 1) { // ignore single nulls, not sure how they appear.
WARN_LOG(Log::G3D, "%.*s", (int)trimmed.length(), trimmed.data());
}
}
} else {
ERROR_LOG(Log::G3D, "%s: %s\n\n%s", "errors", errors.c_str(), numberedCode.c_str());
Expand Down
27 changes: 14 additions & 13 deletions GPU/D3D11/DrawEngineD3D11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -382,10 +382,15 @@ void DrawEngineD3D11::DoFlush() {
params.texCache = textureCache_;
params.allowClear = true;
params.allowSeparateAlphaClear = false; // D3D11 doesn't support separate alpha clears
params.provokeFlatFirst = true;
params.flippedY = false;
params.usesHalfZ = true;

if (gstate.getShadeMode() == GE_SHADE_FLAT) {
// We need to rotate the index buffer to simulate a different provoking vertex.
// We do this before line expansion etc.
IndexBufferProvokingLastToFirst(prim, inds, vertexCount);
}

// We need correct viewport values in gstate_c already.
if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) {
ViewportAndScissor vpAndScissor;
Expand Down Expand Up @@ -424,7 +429,7 @@ void DrawEngineD3D11::DoFlush() {

ApplyDrawStateLate(result.setStencil, result.stencilValue);

if (result.action == SW_DRAW_PRIMITIVES) {
if (result.action == SW_DRAW_INDEXED) {
D3D11VertexShader *vshader;
D3D11FragmentShader *fshader;
shaderManager_->GetShaders(prim, dec_, &vshader, &fshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat, true);
Expand Down Expand Up @@ -452,17 +457,13 @@ void DrawEngineD3D11::DoFlush() {
pushVerts_->EndPush(context_);
ID3D11Buffer *buf = pushVerts_->Buf();
context_->IASetVertexBuffers(0, 1, &buf, &stride, &vOffset);
if (result.drawIndexed) {
UINT iOffset;
int iSize = sizeof(uint16_t) * result.drawNumTrans;
uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize);
memcpy(iptr, inds, iSize);
pushInds_->EndPush(context_);
context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset);
context_->DrawIndexed(result.drawNumTrans, 0, 0);
} else {
context_->Draw(result.drawNumTrans, 0);
}
UINT iOffset;
int iSize = sizeof(uint16_t) * result.drawNumTrans;
uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize);
memcpy(iptr, inds, iSize);
pushInds_->EndPush(context_);
context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset);
context_->DrawIndexed(result.drawNumTrans, 0, 0);
} else if (result.action == SW_CLEAR) {
u32 clearColor = result.color;
float clearDepth = result.depth;
Expand Down
16 changes: 9 additions & 7 deletions GPU/Directx9/DrawEngineDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,10 +334,16 @@ void DrawEngineDX9::DoFlush() {
params.texCache = textureCache_;
params.allowClear = true;
params.allowSeparateAlphaClear = false;
params.provokeFlatFirst = true;
params.flippedY = false;
params.usesHalfZ = true;

if (gstate.getShadeMode() == GE_SHADE_FLAT) {
// We need to rotate the index buffer to simulate a different provoking vertex.
// We do this before line expansion etc.
int indexCount = RemainingIndices(inds);
IndexBufferProvokingLastToFirst(prim, inds, vertexCount);
}

// We need correct viewport values in gstate_c already.
if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) {
ViewportAndScissor vpAndScissor;
Expand Down Expand Up @@ -382,7 +388,7 @@ void DrawEngineDX9::DoFlush() {

VSShader *vshader = shaderManager_->ApplyShader(false, false, dec_, decOptions_.expandAllWeightsToFloat, true, pipelineState_);

if (result.action == SW_DRAW_PRIMITIVES) {
if (result.action == SW_DRAW_INDEXED) {
if (result.setStencil) {
dxstate.stencilFunc.set(D3DCMP_ALWAYS);
dxstate.stencilRef.set(result.stencilValue);
Expand All @@ -393,11 +399,7 @@ void DrawEngineDX9::DoFlush() {
// Might help for text drawing.

device_->SetVertexDeclaration(transformedVertexDecl_);
if (result.drawIndexed) {
device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, numDecodedVerts_, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex));
} else {
device_->DrawPrimitiveUP(d3d_prim[prim], D3DPrimCount(d3d_prim[prim], result.drawNumTrans), result.drawBuffer, sizeof(TransformedVertex));
}
device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, numDecodedVerts_, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex));
} else if (result.action == SW_CLEAR) {
u32 clearColor = result.color;
float clearDepth = result.depth;
Expand Down
Loading

0 comments on commit b8f0558

Please sign in to comment.