diff --git a/core/math/color.h b/core/math/color.h
index e17b8c9fd70..70fad78acbd 100644
--- a/core/math/color.h
+++ b/core/math/color.h
@@ -129,33 +129,46 @@ struct [[nodiscard]] Color {
 	}
 
 	_FORCE_INLINE_ uint32_t to_rgbe9995() const {
-		const float pow2to9 = 512.0f;
-		const float B = 15.0f;
-		const float N = 9.0f;
+		// https://github.com/microsoft/DirectX-Graphics-Samples/blob/v10.0.19041.0/MiniEngine/Core/Color.cpp
+		static const float kMaxVal = float(0x1FF << 7);
+		static const float kMinVal = float(1.f / (1 << 16));
 
-		float sharedexp = 65408.000f; // Result of: ((pow2to9 - 1.0f) / pow2to9) * powf(2.0f, 31.0f - 15.0f)
+		// Clamp RGB to [0, 1.FF*2^16]
+		const float _r = CLAMP(r, 0.0f, kMaxVal);
+		const float _g = CLAMP(g, 0.0f, kMaxVal);
+		const float _b = CLAMP(b, 0.0f, kMaxVal);
 
-		float cRed = MAX(0.0f, MIN(sharedexp, r));
-		float cGreen = MAX(0.0f, MIN(sharedexp, g));
-		float cBlue = MAX(0.0f, MIN(sharedexp, b));
+		// Compute the maximum channel, no less than 1.0*2^-15
+		const float MaxChannel = MAX(MAX(_r, _g), MAX(_b, kMinVal));
 
-		float cMax = MAX(cRed, MAX(cGreen, cBlue));
+		// Take the exponent of the maximum channel (rounding up the 9th bit) and
+		// add 15 to it.  When added to the channels, it causes the implicit '1.0'
+		// bit and the first 8 mantissa bits to be shifted down to the low 9 bits
+		// of the mantissa, rounding the truncated bits.
+		union {
+			float f;
+			int32_t i;
+		} R, G, B, E;
 
-		float expp = MAX(-B - 1.0f, floor(Math::log(cMax) / (real_t)Math_LN2)) + 1.0f + B;
+		E.f = MaxChannel;
+		E.i += 0x07804000; // Add 15 to the exponent and 0x4000 to the mantissa
+		E.i &= 0x7F800000; // Zero the mantissa
 
-		float sMax = (float)floor((cMax / Math::pow(2.0f, expp - B - N)) + 0.5f);
+		// This shifts the 9-bit values we need into the lowest bits, rounding as
+		// needed. Note that if the channel has a smaller exponent than the max
+		// channel, it will shift even more.  This is intentional.
+		R.f = _r + E.f;
+		G.f = _g + E.f;
+		B.f = _b + E.f;
 
-		float exps = expp + 1.0f;
+		// Convert the Bias to the correct exponent in the upper 5 bits.
+		E.i <<= 4;
+		E.i += 0x10000000;
 
-		if (0.0f <= sMax && sMax < pow2to9) {
-			exps = expp;
-		}
-
-		float sRed = Math::floor((cRed / pow(2.0f, exps - B - N)) + 0.5f);
-		float sGreen = Math::floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f);
-		float sBlue = Math::floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f);
-
-		return (uint32_t(Math::fast_ftoi(sRed)) & 0x1FF) | ((uint32_t(Math::fast_ftoi(sGreen)) & 0x1FF) << 9) | ((uint32_t(Math::fast_ftoi(sBlue)) & 0x1FF) << 18) | ((uint32_t(Math::fast_ftoi(exps)) & 0x1F) << 27);
+		// Combine the fields. RGB floats have unwanted data in the upper 9
+		// bits. Only red needs to mask them off because green and blue shift
+		// it out to the left.
+		return E.i | (B.i << 18) | (G.i << 9) | (R.i & 511);
 	}
 
 	_FORCE_INLINE_ Color blend(const Color &p_over) const {
diff --git a/modules/hdr/image_loader_hdr.cpp b/modules/hdr/image_loader_hdr.cpp
index c49c62a08b6..ba59bb25eee 100644
--- a/modules/hdr/image_loader_hdr.cpp
+++ b/modules/hdr/image_loader_hdr.cpp
@@ -68,9 +68,11 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
 	imgdata.resize(height * width * (int)sizeof(uint32_t));
 
 	{
-		uint8_t *w = imgdata.ptrw();
+		uint8_t *ptr = imgdata.ptrw();
 
-		uint8_t *ptr = (uint8_t *)w;
+		Vector<uint8_t> temp_read_data;
+		temp_read_data.resize(128);
+		uint8_t *temp_read_ptr = temp_read_data.ptrw();
 
 		if (width < 8 || width >= 32768) {
 			// Read flat data
@@ -113,8 +115,9 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
 							}
 						} else {
 							// Dump
+							f->get_buffer(temp_read_ptr, count);
 							for (int z = 0; z < count; ++z) {
-								ptr[(j * width + i++) * 4 + k] = f->get_8();
+								ptr[(j * width + i++) * 4 + k] = temp_read_ptr[z];
 							}
 						}
 					}
@@ -122,20 +125,27 @@ Error ImageLoaderHDR::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField
 			}
 		}
 
+		const bool force_linear = p_flags & FLAG_FORCE_LINEAR;
+
 		//convert
 		for (int i = 0; i < width * height; i++) {
-			float exp = pow(2.0f, ptr[3] - 128.0f);
+			int e = ptr[3] - 128;
 
-			Color c(
-					ptr[0] * exp / 255.0,
-					ptr[1] * exp / 255.0,
-					ptr[2] * exp / 255.0);
+			if (force_linear || (e < -15 || e > 15)) {
+				float exp = pow(2.0f, e);
+				Color c(ptr[0] * exp / 255.0, ptr[1] * exp / 255.0, ptr[2] * exp / 255.0);
 
-			if (p_flags & FLAG_FORCE_LINEAR) {
-				c = c.srgb_to_linear();
+				if (force_linear) {
+					c = c.srgb_to_linear();
+				}
+
+				*(uint32_t *)ptr = c.to_rgbe9995();
+			} else {
+				// https://github.com/george-steel/rgbe-rs/blob/e7cc33b7f42b4eb3272c166dac75385e48687c92/src/types.rs#L123-L129
+				uint32_t e5 = (uint32_t)(e + 15);
+				*(uint32_t *)ptr = ((e5 << 27) | ((uint32_t)ptr[2] << 19) | ((uint32_t)ptr[1] << 10) | ((uint32_t)ptr[0] << 1));
 			}
 
-			*(uint32_t *)ptr = c.to_rgbe9995();
 			ptr += 4;
 		}
 	}
diff --git a/modules/hdr/image_loader_hdr.h b/modules/hdr/image_loader_hdr.h
index 9821db059e1..0a8e91fb9e5 100644
--- a/modules/hdr/image_loader_hdr.h
+++ b/modules/hdr/image_loader_hdr.h
@@ -37,6 +37,7 @@ class ImageLoaderHDR : public ImageFormatLoader {
 public:
 	virtual Error load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField<ImageFormatLoader::LoaderFlags> p_flags, float p_scale);
 	virtual void get_recognized_extensions(List<String> *p_extensions) const;
+
 	ImageLoaderHDR();
 };