Browse Source

Inline triLinearInterpolationNoEase and triLinearInterpolation (#12421)

Performance profiling on Linux AMD64 showed this to be a significant bottleneck. The non-inlined functions are expensive due to XMM registers spilling onto the stack.
paradust7 1 year ago
parent
commit
7ffc0268df
1 changed files with 28 additions and 61 deletions
  1. 28 61
      src/noise.cpp

+ 28 - 61
src/noise.cpp

@@ -38,15 +38,6 @@
 // Unsigned magic seed prevents undefined behavior.
 #define NOISE_MAGIC_SEED 1013U
 
-typedef float (*Interp2dFxn)(
-		float v00, float v10, float v01, float v11,
-		float x, float y);
-
-typedef float (*Interp3dFxn)(
-		float v000, float v100, float v010, float v110,
-		float v001, float v101, float v011, float v111,
-		float x, float y, float z);
-
 FlagDesc flagdesc_noiseparams[] = {
 	{"defaults",    NOISE_FLAG_DEFAULTS},
 	{"eased",       NOISE_FLAG_EASED},
@@ -198,47 +189,34 @@ inline float linearInterpolation(float v0, float v1, float t)
 inline float biLinearInterpolation(
 	float v00, float v10,
 	float v01, float v11,
-	float x, float y)
-{
-	float tx = easeCurve(x);
-	float ty = easeCurve(y);
-	float u = linearInterpolation(v00, v10, tx);
-	float v = linearInterpolation(v01, v11, tx);
-	return linearInterpolation(u, v, ty);
-}
-
-
-inline float biLinearInterpolationNoEase(
-	float v00, float v10,
-	float v01, float v11,
-	float x, float y)
+	float x, float y,
+	bool eased)
 {
+	// Inlining will optimize this branch out when possible
+	if (eased) {
+		x = easeCurve(x);
+		y = easeCurve(y);
+	}
 	float u = linearInterpolation(v00, v10, x);
 	float v = linearInterpolation(v01, v11, x);
 	return linearInterpolation(u, v, y);
 }
 
 
-float triLinearInterpolation(
-	float v000, float v100, float v010, float v110,
-	float v001, float v101, float v011, float v111,
-	float x, float y, float z)
-{
-	float tx = easeCurve(x);
-	float ty = easeCurve(y);
-	float tz = easeCurve(z);
-	float u = biLinearInterpolationNoEase(v000, v100, v010, v110, tx, ty);
-	float v = biLinearInterpolationNoEase(v001, v101, v011, v111, tx, ty);
-	return linearInterpolation(u, v, tz);
-}
-
-float triLinearInterpolationNoEase(
+inline float triLinearInterpolation(
 	float v000, float v100, float v010, float v110,
 	float v001, float v101, float v011, float v111,
-	float x, float y, float z)
+	float x, float y, float z,
+	bool eased)
 {
-	float u = biLinearInterpolationNoEase(v000, v100, v010, v110, x, y);
-	float v = biLinearInterpolationNoEase(v001, v101, v011, v111, x, y);
+	// Inlining will optimize this branch out when possible
+	if (eased) {
+		x = easeCurve(x);
+		y = easeCurve(y);
+		z = easeCurve(z);
+	}
+	float u = biLinearInterpolation(v000, v100, v010, v110, x, y, false);
+	float v = biLinearInterpolation(v001, v101, v011, v111, x, y, false);
 	return linearInterpolation(u, v, z);
 }
 
@@ -256,10 +234,7 @@ float noise2d_gradient(float x, float y, s32 seed, bool eased)
 	float v01 = noise2d(x0, y0+1, seed);
 	float v11 = noise2d(x0+1, y0+1, seed);
 	// Interpolate
-	if (eased)
-		return biLinearInterpolation(v00, v10, v01, v11, xl, yl);
-
-	return biLinearInterpolationNoEase(v00, v10, v01, v11, xl, yl);
+	return biLinearInterpolation(v00, v10, v01, v11, xl, yl, eased);
 }
 
 
@@ -283,17 +258,11 @@ float noise3d_gradient(float x, float y, float z, s32 seed, bool eased)
 	float v011 = noise3d(x0,     y0 + 1, z0 + 1, seed);
 	float v111 = noise3d(x0 + 1, y0 + 1, z0 + 1, seed);
 	// Interpolate
-	if (eased) {
-		return triLinearInterpolation(
-			v000, v100, v010, v110,
-			v001, v101, v011, v111,
-			xl, yl, zl);
-	}
-
-	return triLinearInterpolationNoEase(
+	return triLinearInterpolation(
 		v000, v100, v010, v110,
 		v001, v101, v011, v111,
-		xl, yl, zl);
+		xl, yl, zl,
+		eased);
 }
 
 
@@ -518,9 +487,6 @@ void Noise::gradientMap2D(
 	s32 x0, y0;
 
 	bool eased = np.flags & (NOISE_FLAG_DEFAULTS | NOISE_FLAG_EASED);
-	Interp2dFxn interpolate = eased ?
-		biLinearInterpolation : biLinearInterpolationNoEase;
-
 	x0 = std::floor(x);
 	y0 = std::floor(y);
 	u = x - (float)x0;
@@ -547,7 +513,8 @@ void Noise::gradientMap2D(
 		u = orig_u;
 		noisex = 0;
 		for (i = 0; i != sx; i++) {
-			gradient_buf[index++] = interpolate(v00, v10, v01, v11, u, v);
+			gradient_buf[index++] =
+				biLinearInterpolation(v00, v10, v01, v11, u, v, eased);
 
 			u += step_x;
 			if (u >= 1.0) {
@@ -583,8 +550,7 @@ void Noise::gradientMap3D(
 	u32 nlx, nly, nlz;
 	s32 x0, y0, z0;
 
-	Interp3dFxn interpolate = (np.flags & NOISE_FLAG_EASED) ?
-		triLinearInterpolation : triLinearInterpolationNoEase;
+	bool eased = np.flags & NOISE_FLAG_EASED;
 
 	x0 = std::floor(x);
 	y0 = std::floor(y);
@@ -625,10 +591,11 @@ void Noise::gradientMap3D(
 			u = orig_u;
 			noisex = 0;
 			for (i = 0; i != sx; i++) {
-				gradient_buf[index++] = interpolate(
+				gradient_buf[index++] = triLinearInterpolation(
 					v000, v100, v010, v110,
 					v001, v101, v011, v111,
-					u, v, w);
+					u, v, w,
+					eased);
 
 				u += step_x;
 				if (u >= 1.0) {