formats: Improve start code lookup

Refactored so the code is somewhat readable. Added few missing cases
for partial start codes.
This commit is contained in:
Joni Räsänen 2022-08-31 16:22:21 +03:00
parent cc8a144b04
commit fef90f154c
1 changed files with 145 additions and 122 deletions

View File

@ -21,6 +21,7 @@
#define PTR_DIFF(a, b) ((ptrdiff_t)((char *)(a) - (char *)(b)))
// see https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
#define haszero64_le(v) (((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080UL)
#define haszero32_le(v) (((v) - 0x01010101) & ~(v) & 0x80808080UL)
@ -40,30 +41,35 @@ constexpr int GARBAGE_COLLECTION_INTERVAL_MS = 100;
// any value less than 30 minutes is ok here, since that is how long it takes to go through all timestamps
constexpr int TIME_TO_KEEP_TRACK_OF_PREVIOUS_FRAMES_MS = 5000;
static inline uint8_t __find_h26x_start(uint32_t value,bool& additional_byte)
static inline uint8_t determine_start_prefix_precense(uint32_t value, bool& additional_byte)
{
additional_byte = false;
#if __BYTE_ORDER == __LITTLE_ENDIAN
uint16_t u = (value >> 16) & 0xffff;
uint16_t l = (value >> 0) & 0xffff;
uint16_t cur_ls = (value >> 16) & 0xffff;
uint16_t cur_ms = (value >> 0) & 0xffff;
bool t1 = (l == 0);
bool t2 = ((u & 0xff) == 0x01);
bool t3 = (u == 0x0100);
bool t4 = (((l >> 8) & 0xff) == 0);
// zeros in more significant bytes
bool ms4z = (cur_ms == 0);
bool ms2z = (((cur_ms >> 8) & 0xff) == 0);
// possible start code end in less significant bytes
bool ls2s = ((cur_ls & 0xff) == 0x01);
bool ls4s = (cur_ls == 0x0100);
#else
uint16_t u = (value >> 0) & 0xffff;
uint16_t l = (value >> 16) & 0xffff;
uint16_t cur_ls = (value >> 0) & 0xffff;
uint16_t cur_ms = (value >> 16) & 0xffff;
bool t1 = (l == 0);
bool t2 = (((u >> 8) & 0xff) == 0x01);
bool t3 = (u == 0x0001);
bool t4 = ((l & 0xff) == 0);
bool ms4z = ( cur_ms == 0);
bool ms2z = ((cur_ms & 0xff) == 0);
bool ls2s = (((cur_ls >> 8) & 0xff) == 0x01);
bool ls4s = ( cur_ls == 0x0001);
#endif
if (t1) {
if (ms4z) {
/* 0x00000001 */
if (t3)
if (ls4s)
return 4;
/* "value" definitely has a start code (0x000001XX), but at this
@ -71,9 +77,9 @@ static inline uint8_t __find_h26x_start(uint32_t value,bool& additional_byte)
*
* Return 5 to indicate that start length could not be determined
* and that caller must check previous dword's last byte for 0x00 */
if (t2)
if (ls2s)
return 5;
} else if (t4 && t3) {
} else if (ms2z && ls4s) {
/* 0xXX000001 */
additional_byte = true;
return 4;
@ -110,163 +116,180 @@ ssize_t uvgrtp::formats::h26x::find_h26x_start_code(
uint8_t *data,
size_t len,
size_t offset,
uint8_t& start_len
)
uint8_t& start_len)
{
bool prev_z = false;
bool cur_z = false;
size_t pos = offset;
size_t rpos = len - (len % 8) - 1;
uint8_t *ptr = data + offset;
uint8_t *tmp = nullptr;
uint8_t lb = 0;
uint32_t prev = UINT32_MAX;
if (data == nullptr || len < offset || len < 1)
{
UVG_LOG_WARN("Invalid parameter found for start code lookup");
return -1;
}
uint64_t prefetch = UINT64_MAX;
uint32_t value = UINT32_MAX;
bool prev_had_zero = false;
bool cur_has_zero = false;
size_t pos = offset;
size_t last_byte_position = len - (len % 8) - 1;
/* We can get rid of the bounds check when looping through
* non-zero 8 byte chunks by setting the last byte to zero.
*
* This added zero will make the last 8 byte zero check to fail
* and when we get out of the loop we can check if we've reached the end */
lb = data[rpos];
data[rpos] = 0;
uint8_t temp_last_byte = data[last_byte_position];
data[last_byte_position] = 0;
uint32_t prev_value32 = UINT32_MAX;
uint32_t cur_value32 = UINT32_MAX;
uint64_t prefetch64 = UINT64_MAX;
while (pos + 8 < len) {
prefetch = *(uint64_t *)ptr;
if (!prev_z)
if (!prev_had_zero)
{
// since we know that start code prefix has zeros, we find the next dword that has zeros
while (!cur_has_zero && pos + 8 < len)
{
prefetch64 = *(uint64_t*)(data + pos);
#if __BYTE_ORDER == __LITTLE_ENDIAN
cur_z = haszero64_le(prefetch);
cur_has_zero = haszero64_le(prefetch64);
#else
cur_z = haszero64_be(prefetch)
cur_has_zero = haszero64_be(prefetch64);
#endif
if (!cur_z) {
/* pos is not used in the following loop so it makes little sense to
* update it on every iteration. Faster way to do the loop is to save
* ptr's current value before loop, update only ptr in the loop and when
* the loop is exited, calculate the difference between tmp and ptr to get
* the number of iterations done * 8 */
tmp = ptr;
do {
ptr += 8;
prefetch = *(uint64_t*)ptr;
#if __BYTE_ORDER == __LITTLE_ENDIAN
cur_z = haszero64_le(prefetch);
#else
cur_z = haszero64_be(prefetch);
#endif
} while (!cur_z);
pos += PTR_DIFF(ptr, tmp);
if (pos + 8 >= len)
break;
if (!cur_has_zero)
{
pos += 8;
}
}
}
value = *(uint32_t *)ptr;
if (cur_z)
#if __BYTE_ORDER == __LITTLE_ENDIAN
cur_z = haszero32_le(value);
#else
cur_z = haszero32_be(value);
#endif
if (!prev_z && !cur_z)
goto end;
/* Previous dword had zeros but this doesn't. The only way there might be a start code
* is if the most significant byte of current dword is 0x01 */
if (prev_z && !cur_z) {
#if __BYTE_ORDER == __LITTLE_ENDIAN
/* previous dword: 0xXX000000 or 0xXXXX0000 and current dword 0x01XXXXXX */
if (((value >> 0) & 0xff) == 0x01 && ((prev >> 16) & 0xffff) == 0) {
start_len = (((prev >> 8) & 0xffffff) == 0) ? 4 : 3;
#else
if (((value >> 24) & 0xff) == 0x01 && ((prev >> 0) & 0xffff) == 0) {
start_len = (((prev >> 0) & 0xffffff) == 0) ? 4 : 3;
#endif
data[rpos] = lb;
return pos + 1;
}
if (pos + 8 < len)
{
cur_value32 = *(uint32_t*)(data + pos);
}
if (cur_has_zero)
{
#if __BYTE_ORDER == __LITTLE_ENDIAN
cur_has_zero = haszero32_le(cur_value32);
#else
cur_has_zero = haszero32_be(cur_value32);
#endif
}
if (prev_had_zero || cur_has_zero)
{
/* Previous dword had zeros but this doesn't. The only way there might be a start code
* is if the most significant byte of current dword is 0x01 */
if (prev_had_zero && !cur_has_zero) {
/* previous dword: 0xXX000000 or 0xXXXX0000 and current dword 0x01XXXXXX */
#if __BYTE_ORDER == __LITTLE_ENDIAN
if (((cur_value32 >> 0) & 0xff) == 0x01 && ((prev_value32 >> 16) & 0xffff) == 0) {
start_len = (((prev_value32 >> 8) & 0xffffff) == 0) ? 4 : 3;
#else
if (((cur_value32 >> 24) & 0xff) == 0x01 && ((prev_value32 >> 0) & 0xffff) == 0) {
start_len = (((prev_value32 >> 0) & 0xffffff) == 0) ? 4 : 3;
#endif
data[last_byte_position] = temp_last_byte;
return pos + 1;
}
}
// find out if the current value as a whole contains start code prefix
bool additional_byte = false;
uint8_t ret = __find_h26x_start(value, additional_byte);
uint8_t ret = determine_start_prefix_precense(cur_value32, additional_byte);
start_len = ret;
if (ret > 0) {
if (ret == 5) {
// ret 5 means we don't know how long the start code is so we check it
ret = 3;
#if __BYTE_ORDER == __LITTLE_ENDIAN
start_len = (((prev >> 24) & 0xff) == 0) ? 4 : 3;
start_len = (((prev_value32 >> 24) & 0xff) == 0) ? 4 : 3;
#else
start_len = (((prev >> 0) & 0xff) == 0) ? 4 : 3;
start_len = (((prev_value32 >> 0) & 0xff) == 0) ? 4 : 3;
#endif
}
if (additional_byte) start_len--;
data[rpos] = lb;
if (additional_byte)
{
--start_len;
}
data[last_byte_position] = temp_last_byte;
return pos + ret;
}
// see if the start code prefix is split between previous and this dword
#if __BYTE_ORDER == __LITTLE_ENDIAN
uint16_t u = (value >> 16) & 0xffff;
uint16_t l = (value >> 0) & 0xffff;
uint16_t p = (prev >> 16) & 0xffff;
uint16_t cur_ls = (cur_value32 >> 16) & 0xffff; // current less significant word
uint16_t cur_ms = (cur_value32 >> 0) & 0xffff; // corrent more significant word
uint16_t prev_ls = (prev_value32 >> 16) & 0xffff; // previous less significant word
// previous has 4 zeros
bool p4z = (prev_ls == 0);
// previous has 2 zeros
bool p2z = (((prev_ls >> 8) & 0xff) == 0);
// current has 2 bytes of possible start code
bool c2s = (((cur_ms >> 8) & 0xff) == 0x01);
// previous has 4 bytes of possible start code
bool c4s = (cur_ms == 0x0100); // current starts with 0001
// previous has 6 bytes of start code
bool c6s = (cur_ms == 0x0000 && (cur_ls & 0xff) == 0x01); // current is 000001XX
bool t1 = ((p & 0xffff) == 0);
bool t2 = (((p >> 8) & 0xff) == 0);
bool t4 = (l == 0x0100);
bool t5 = (l == 0x0000 && u == 0x01);
#else
uint16_t u = (value >> 0) & 0xffff;
uint16_t l = (value >> 16) & 0xffff;
uint16_t p = (prev >> 0) & 0xffff;
uint16_t cur_ls = (cur_value32 >> 0) & 0xffff;
uint16_t cur_ms = (cur_value32 >> 16) & 0xffff;
uint16_t prev_ls = (prev_value32 >> 0) & 0xffff;
bool t1 = ((p & 0xffff) == 0);
bool t2 = ((p & 0xff) == 0);
bool t4 = (l == 0x0001);
bool t5 = (l == 0x0000 && u == 0x01);
bool p4z = (prev_ls == 0);
bool p2z = ((prev_ls & 0xff) == 0);
bool c2s = ((cur_ms & 0xff) == 0x01);
bool c4s = (cur_ms == 0x0001);
bool c6s = (cur_ms == 0x0000 && ((cur_ls >> 8) & 0xff) == 0x01);
#endif
if (t1 && t4) {
/* previous dword 0xxxxx0000 and current dword is 0x0001XXXX */
if (t4) {
// all possible start code modes between two bytes
if (p4z) {
// previous dword 0xXXXX0000
if (c4s) {
// current dword is 0x0001XXXX
start_len = 4;
data[rpos] = lb;
data[last_byte_position] = temp_last_byte;
return pos + 2;
}
/* Previous dwod was 0xXXXXXX00 */
} else if (t2) {
/* Current dword is 0x000001XX */
if (t5) {
else if (c2s)
{
// current dword is 0x01XXXX
start_len = 3;
data[last_byte_position] = temp_last_byte;
return pos + 2;
}
} else if (p2z) {
// Previous dword was 0xXXXXXX00
if (c6s) {
// Current dword is 0x000001XX
start_len = 4;
data[rpos] = lb;
data[last_byte_position] = temp_last_byte;
return pos + 3;
}
/* Current dword is 0x0001XXXX */
else if (t4) {
else if (c4s) {
// Current dword is 0x0001XXXX
start_len = 3;
data[rpos] = lb;
data[last_byte_position] = temp_last_byte;
return pos + 2;
}
}
}
end:
prev_z = cur_z;
prev_had_zero = cur_has_zero;
pos += get_start_code_range();
ptr += get_start_code_range();
prev = value;
prev_value32 = cur_value32;
}
data[rpos] = lb;
data[last_byte_position] = temp_last_byte;
return -1;
}