Merge pull request #63430 from lawnjelly/audioserver_optimize

This commit is contained in:
Rémi Verschelde 2022-08-08 15:44:04 +02:00 committed by GitHub
commit ee3bad3f92
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -261,7 +261,14 @@ void AudioServer::_driver_process(int p_frames, int32_t *p_buffer) {
//master master, send to output //master master, send to output
int cs = master->channels.size(); int cs = master->channels.size();
// take away 1 from the stride as we are manually incrementing one for stereo
uintptr_t stride_minus_one = (cs * 2) - 1;
for (int k = 0; k < cs; k++) { for (int k = 0; k < cs; k++) {
// destination start for data will be the same in all cases
int32_t *dest = &p_buffer[(from_buf * (cs * 2)) + (k * 2)];
if (master->channels[k].active) { if (master->channels[k].active) {
const AudioFrame *buf = master->channels[k].buffer.ptr(); const AudioFrame *buf = master->channels[k].buffer.ptr();
@ -269,18 +276,25 @@ void AudioServer::_driver_process(int p_frames, int32_t *p_buffer) {
float l = CLAMP(buf[from + j].l, -1.0, 1.0); float l = CLAMP(buf[from + j].l, -1.0, 1.0);
int32_t vl = l * ((1 << 20) - 1); int32_t vl = l * ((1 << 20) - 1);
int32_t vl2 = (vl < 0 ? -1 : 1) * (ABS(vl) << 11); int32_t vl2 = (vl < 0 ? -1 : 1) * (ABS(vl) << 11);
p_buffer[(from_buf + j) * (cs * 2) + k * 2 + 0] = vl2; *dest = vl2;
dest++;
float r = CLAMP(buf[from + j].r, -1.0, 1.0); float r = CLAMP(buf[from + j].r, -1.0, 1.0);
int32_t vr = r * ((1 << 20) - 1); int32_t vr = r * ((1 << 20) - 1);
int32_t vr2 = (vr < 0 ? -1 : 1) * (ABS(vr) << 11); int32_t vr2 = (vr < 0 ? -1 : 1) * (ABS(vr) << 11);
p_buffer[(from_buf + j) * (cs * 2) + k * 2 + 1] = vr2; *dest = vr2;
dest += stride_minus_one;
} }
} else { } else {
// Bizarrely, profiling indicates that detecting the common case of cs == 1
// and k == 0, and using memset is SLOWER than setting individually.
// (Perhaps it gets optimized to a faster instruction than memset).
for (int j = 0; j < to_copy; j++) { for (int j = 0; j < to_copy; j++) {
p_buffer[(from_buf + j) * (cs * 2) + k * 2 + 0] = 0; *dest = 0;
p_buffer[(from_buf + j) * (cs * 2) + k * 2 + 1] = 0; dest++;
*dest = 0;
dest += stride_minus_one;
} }
} }
} }