Fix UAC interleaved copy.

This commit is contained in:
HiFiPhile 2023-09-19 15:42:52 +02:00
parent 46f7cf4da2
commit 9d0251f7a6
1 changed files with 51 additions and 62 deletions

View File

@ -631,68 +631,59 @@ static bool audiod_rx_done_cb(uint8_t rhport, audiod_function_t* audio, uint16_t
// Decoding according to 2.3.1.5 Audio Streams // Decoding according to 2.3.1.5 Audio Streams
// Helper function // Helper function
static inline uint8_t * audiod_interleaved_copy_bytes_fast_decode(uint16_t const nBytesToCopy, void * dst, uint8_t * dst_end, uint8_t * src, uint8_t const n_ff_used) static inline uint8_t * audiod_interleaved_copy_bytes_fast_decode(uint16_t const nBytesToCopy, uint8_t * dst, uint8_t * dst_end, uint8_t * src, uint8_t const n_ff_used)
{ {
// This function is an optimized version of // Due to one FIFO contains 2 channels, data always aligned to (nBytesToCopy * 2)
// while((uint8_t *)dst < dst_end)
// {
// memcpy(dst, src, nBytesToCopy);
// dst = (uint8_t *)dst + nBytesToCopy;
// src += nBytesToCopy * n_ff_used;
// }
// Optimize for fast half word copies
typedef struct{
uint16_t val;
} __attribute((__packed__)) unaligned_uint16_t;
// Optimize for fast word copies
typedef struct{
uint32_t val;
} __attribute((__packed__)) unaligned_uint32_t;
switch (nBytesToCopy) switch (nBytesToCopy)
{ {
case 1: case 1:
while((uint8_t *)dst < dst_end) while((uint8_t *)dst < dst_end)
{ {
*(uint8_t *)dst++ = *src; *(uint16_t*)dst = *(uint16_t*)src;
src += n_ff_used; src += 2;
dst += 2;
src += 2 * (n_ff_used - 1);
} }
break; break;
case 2: case 2:
while((uint8_t *)dst < dst_end) while((uint8_t *)dst < dst_end)
{ {
*(unaligned_uint16_t*)dst = *(unaligned_uint16_t*)src; *(uint32_t*)dst = *(uint32_t*)src;
dst += 2; src += 4;
src += 2 * n_ff_used; dst += 4;
src += 4 * (n_ff_used - 1);
} }
break; break;
case 3: case 3:
while((uint8_t *)dst < dst_end) while((uint8_t *)dst < dst_end)
{ {
// memcpy(dst, src, 3); *(uint16_t*)dst = *(uint16_t*)src;
// dst = (uint8_t *)dst + 3; src += 2;
// src += 3 * n_ff_used; dst += 2;
*(uint16_t*)dst = *(uint16_t*)src;
// TODO: Is there a faster way to copy 3 bytes? src += 2;
*(uint8_t *)dst++ = *src++; dst += 2;
*(uint8_t *)dst++ = *src++; *(uint16_t*)dst = *(uint16_t*)src;
*(uint8_t *)dst++ = *src++; src += 2;
dst += 2;
src += 3 * (n_ff_used - 1); src += 6 * (n_ff_used - 1);
} }
break; break;
case 4: case 4:
while((uint8_t *)dst < dst_end) while((uint8_t *)dst < dst_end)
{ {
*(unaligned_uint32_t*)dst = *(unaligned_uint32_t*)src; *(uint32_t*)dst++ = *(uint32_t*)src++;
src += 4;
dst += 4; dst += 4;
src += 4 * n_ff_used; *(uint32_t*)dst++ = *(uint32_t*)src++;
src += 4;
dst += 4;
src += 8 * (n_ff_used - 1);
} }
break; break;
} }
@ -946,57 +937,55 @@ range [-1, +1)
// Helper function // Helper function
static inline uint8_t * audiod_interleaved_copy_bytes_fast_encode(uint16_t const nBytesToCopy, uint8_t * src, uint8_t * src_end, uint8_t * dst, uint8_t const n_ff_used) static inline uint8_t * audiod_interleaved_copy_bytes_fast_encode(uint16_t const nBytesToCopy, uint8_t * src, uint8_t * src_end, uint8_t * dst, uint8_t const n_ff_used)
{ {
// Optimize for fast half word copies // Due to one FIFO contains 2 channels, data always aligned to (nBytesToCopy * 2)
typedef struct{
uint16_t val;
} __attribute((__packed__)) unaligned_uint16_t;
// Optimize for fast word copies
typedef struct{
uint32_t val;
} __attribute((__packed__)) unaligned_uint32_t;
switch (nBytesToCopy) switch (nBytesToCopy)
{ {
case 1: case 1:
while(src < src_end) while(src < src_end)
{ {
*dst = *src++; *(uint16_t*)dst = *(uint16_t*)src;
dst += n_ff_used; src += 2;
dst += 2;
dst += 2 * (n_ff_used - 1);
} }
break; break;
case 2: case 2:
while(src < src_end) while(src < src_end)
{ {
*(unaligned_uint16_t*)dst = *(unaligned_uint16_t*)src; *(uint32_t*)dst = *(uint32_t*)src;
src += 2; src += 4;
dst += 2 * n_ff_used; dst += 4;
dst += 4 * (n_ff_used - 1);
} }
break; break;
case 3: case 3:
while(src < src_end) while(src < src_end)
{ {
// memcpy(dst, src, 3); *(uint16_t*)dst = *(uint16_t*)src;
// src = (uint8_t *)src + 3; src += 2;
// dst += 3 * n_ff_used; dst += 2;
*(uint16_t*)dst = *(uint16_t*)src;
// TODO: Is there a faster way to copy 3 bytes? src += 2;
*dst++ = *src++; dst += 2;
*dst++ = *src++; *(uint16_t*)dst = *(uint16_t*)src;
*dst++ = *src++; src += 2;
dst += 2;
dst += 3 * (n_ff_used - 1); dst += 6 * (n_ff_used - 1);
} }
break; break;
case 4: case 4:
while(src < src_end) while(src < src_end)
{ {
*(unaligned_uint32_t*)dst = *(unaligned_uint32_t*)src; *(uint32_t*)dst++ = *(uint32_t*)src++;
src += 4; src += 4;
dst += 4 * n_ff_used; dst += 4;
*(uint32_t*)dst++ = *(uint32_t*)src++;
src += 4;
dst += 4;
dst += 8 * (n_ff_used - 1);
} }
break; break;
} }