776 if (num_passes > 1 && lengths2 == 0)
778 OJPH_WARN(0x00010001,
"A malformed codeblock that has more than "
779 "one coding pass, but zero length for "
780 "2nd and potential 3rd pass.");
786 OJPH_WARN(0x00010002,
"We do not support more than 3 coding passes; "
787 "This codeblocks has %d passes.",
827 ui32 p = 62 - missing_msbs;
833 OJPH_WARN(0x00010006,
"Wrong codeblock length.");
839 lcup = (int)lengths1;
841 scup = (((int)coded_data[lcup-1]) << 4) + (coded_data[lcup-2] & 0xF);
842 if (scup < 2 || scup > lcup || scup > 4079)
860 ui16 scratch[8 * 513] = {0};
868 ui32 sstr = ((width + 2u) + 7u) & ~7u;
870 ui32 mmsbp2 = missing_msbs + 2;
882 mel_init(&mel, coded_data, lcup, scup);
894 for (
ui32 x = 0; x < width; sp += 4)
913 t0 = (run == -1) ? t0 : 0;
927 c_q = ((t0 & 0x10U) << 3) | ((t0 & 0xE0U) << 2);
936 t1 =
vlc_tbl0[c_q + (vlc_val & 0x7F)];
939 if (c_q == 0 && x < width)
944 t1 = (run == -1) ? t1 : 0;
949 t1 = x < width ? t1 : 0;
958 c_q = ((t1 & 0x10U) << 3) | ((t1 & 0xE0U) << 2);
966 ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
967 if (uvlc_mode == 0xc0)
971 uvlc_mode += (run == -1) ? 0x40 : 0;
983 ui32 idx = uvlc_mode + (
ui32)(vlc_val & 0x3F);
990 ui32 len = uvlc_entry & 0xF;
991 ui32 tmp = (
ui32)(vlc_val&((1<<len)-1));
995 len = uvlc_entry & 0x7;
997 ui16 u_q0 = (
ui16)((uvlc_entry & 7) + (tmp & ~(0xFFU << len)));
998 ui16 u_q1 = (
ui16)((uvlc_entry >> 3) + (tmp >> len));
1001 ui16 u_ext;
bool cond0, cond1;
1002 cond0 = u_q0 - (u_bias & 0x3) > 32;
1003 u_ext = (
ui16)(cond0 ? (vlc_val & 0xF) : 0);
1005 u_q0 = (
ui16)(u_q0 + (u_ext << 2));
1006 sp[1] = (
ui16)(u_q0 + 1);
1007 cond1 = u_q1 - (u_bias >> 2) > 32;
1008 u_ext = (
ui16)(cond1 ? (vlc_val & 0xF) : 0);
1010 u_q1 = (
ui16)(u_q1 + (u_ext << 2));
1011 sp[3] = (
ui16)(u_q1 + 1);
1016 for (
ui32 y = 2; y < height; y += 2)
1019 ui16 *sp = scratch + (y >> 1) * sstr;
1021 for (
ui32 x = 0; x < width; sp += 4)
1027 c_q |= ((sp[0 - (
si32)sstr] & 0xA0U) << 2);
1028 c_q |= ((sp[2 - (
si32)sstr] & 0x20U) << 4);
1044 t0 = (run == -1) ? t0 : 0;
1059 c_q = ((t0 & 0x40U) << 2) | ((t0 & 0x80U) << 1);
1061 c_q |= sp[0 - (
si32)sstr] & 0x80;
1063 c_q |= ((sp[2 - (
si32)sstr] & 0xA0U) << 2);
1064 c_q |= ((sp[4 - (
si32)sstr] & 0x20U) << 4);
1073 t1 =
vlc_tbl1[ c_q + (vlc_val & 0x7F)];
1076 if (c_q == 0 && x < width)
1081 t1 = (run == -1) ? t1 : 0;
1086 t1 = x < width ? t1 : 0;
1096 c_q = ((t1 & 0x40U) << 2) | ((t1 & 0x80U) << 1);
1098 c_q |= sp[2 - (
si32)sstr] & 0x80;
1106 ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
1112 ui32 len = uvlc_entry & 0xF;
1113 ui32 tmp = (
ui32)(vlc_val&((1<<len)-1));
1117 len = uvlc_entry & 0x7;
1119 ui16 u_q0 = (
ui16)((uvlc_entry & 7) + (tmp & ~(0xFFU << len)));
1120 ui16 u_q1 = (
ui16)((uvlc_entry >> 3) + (tmp >> len));
1123 ui16 u_ext;
bool cond0, cond1;
1125 u_ext = (
ui16)(cond0 ? (vlc_val & 0xF) : 0);
1127 u_q0 = (
ui16)(u_q0 + (u_ext << 2));
1130 u_ext = (
ui16)(cond1 ? (vlc_val & 0xF) : 0);
1132 u_q1 = (
ui16)(u_q1 + (u_ext << 2));
1147 const int v_n_size = 512 + 4;
1148 ui64 v_n_scratch[v_n_size] = {0};
1153 const ui16 *sp = scratch;
1154 ui64 *vp = v_n_scratch;
1155 ui64 *dp = decoded_data;
1158 for (
ui32 x = 0; x < width; sp += 2, ++vp)
1168 if (inf & (1 << (4 + bit)))
1172 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1);
1176 v_n = ms_val & ((1ULL << m_n) - 1);
1177 v_n |= (
ui64)((inf >> (8 + bit)) & 1) << m_n;
1181 val |= (v_n + 2) << (p - 1);
1188 if (inf & (1 << (4 + bit)))
1192 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1);
1196 v_n = ms_val & ((1ULL << m_n) - 1);
1197 v_n |= (
ui64)((inf >> (8 + bit)) & 1) << m_n;
1201 val |= (v_n + 2) << (p - 1);
1204 vp[0] = prev_v_n | v_n;
1212 if (inf & (1 << (4 + bit)))
1216 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1);
1220 v_n = ms_val & ((1ULL << m_n) - 1);
1221 v_n |= (
ui64)((inf >> (8 + bit)) & 1) << m_n;
1225 val |= (v_n + 2) << (p - 1);
1232 if (inf & (1 << (4 + bit)))
1236 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1);
1240 v_n = ms_val & ((1ULL << m_n) - 1);
1241 v_n |= (
ui64)((inf >> (8 + bit)) & 1) << m_n;
1245 val |= (v_n + 2) << (p - 1);
1254 for (
ui32 y = 2; y < height; y += 2)
1256 const ui16 *sp = scratch + (y >> 1) * sstr;
1257 ui64 *vp = v_n_scratch;
1258 ui64 *dp = decoded_data + y * stride;
1261 for (
ui32 x = 0; x < width; sp += 2, ++vp)
1266 ui32 gamma = inf & 0xF0; gamma &= gamma - 0x10;
1268 ui32 kappa = gamma ? emax : 1;
1270 ui32 U_q = u_q + kappa;
1277 if (inf & (1 << (4 + bit)))
1281 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1);
1285 v_n = ms_val & ((1ULL << m_n) - 1);
1286 v_n |= (
ui64)((inf >> (8+bit)) & 1) << m_n;
1290 val |= (v_n + 2) << (p - 1);
1297 if (inf & (1 << (4 + bit)))
1301 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1);
1305 v_n = ms_val & ((1ULL << m_n) - 1);
1306 v_n |= (
ui64)((inf >> (8+bit)) & 1) << m_n;
1310 val |= (v_n + 2) << (p - 1);
1313 vp[0] = prev_v_n | v_n;
1321 if (inf & (1 << (4 + bit)))
1325 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1);
1329 v_n = ms_val & ((1ULL << m_n) - 1);
1330 v_n |= (
ui64)((inf >> (8+bit)) & 1) << m_n;
1334 val |= (v_n + 2) << (p - 1);
1341 if (inf & (1 << (4 + bit)))
1345 ui32 m_n = U_q - ((inf >> (12 + bit)) & 1);
1349 v_n = ms_val & ((1ULL << m_n) - 1);
1350 v_n |= (
ui64)((inf >> (8+bit)) & 1) << m_n;
1354 val |= (v_n + 2) << (p - 1);
1371 ui16*
const sigma = scratch;
1373 ui32 mstr = (width + 3u) >> 2;
1375 mstr = ((mstr + 2u) + 7u) & ~7u;
1382 for (y = 0; y < height; y += 4)
1384 ui16* sp = scratch + (y >> 1) * sstr;
1385 ui16* dp = sigma + (y >> 2) * mstr;
1386 for (
ui32 x = 0; x < width; x += 4, sp += 4, ++dp) {
1387 ui32 t0 = 0, t1 = 0;
1388 t0 = ((sp[0 ] & 0x30u) >> 4) | ((sp[0 ] & 0xC0u) >> 2);
1389 t0 |= ((sp[2 ] & 0x30u) << 4) | ((sp[2 ] & 0xC0u) << 6);
1390 t1 = ((sp[0+sstr] & 0x30u) >> 2) | ((sp[0+sstr] & 0xC0u) );
1391 t1 |= ((sp[2+sstr] & 0x30u) << 6) | ((sp[2+sstr] & 0xC0u) << 8);
1392 dp[0] = (
ui16)(t0 | t1);
1398 ui16* dp = sigma + (y >> 2) * mstr;
1399 for (
ui32 x = 0; x < width; x += 4, ++dp)
1416 ui16 prev_row_sig[256 + 8] = {0};
1419 frwd_init<0>(&sigprop, coded_data + lengths1, (
int)lengths2);
1421 for (
ui32 y = 0; y < height; y += 4)
1423 ui32 pattern = 0xFFFFu;
1424 if (height - y < 4) {
1426 if (height - y < 3) {
1436 ui16 *prev_sig = prev_row_sig;
1437 ui16 *cur_sig = sigma + (y >> 2) * mstr;
1438 ui64 *dpp = decoded_data + y * stride;
1439 for (
ui32 x = 0; x < width; x += 4, ++cur_sig, ++prev_sig)
1444 pattern = pattern >> (s * 4);
1459 ui32 ns = *(
ui32*)(cur_sig + mstr);
1460 ui32 u = (ps & 0x88888888) >> 3;
1462 u |= (ns & 0x11111111) << 3;
1467 mbr |= (cs & 0x77777777) << 1;
1468 mbr |= (cs & 0xEEEEEEEE) >> 1;
1487 ui32 col_mask = 0xFu;
1488 ui32 inv_sig = ~cs & pattern;
1489 for (
int i = 0; i < 16; i += 4, col_mask <<= 4)
1491 if ((col_mask & new_sig) == 0)
1495 ui32 sample_mask = 0x1111u & col_mask;
1496 if (new_sig & sample_mask)
1498 new_sig &= ~sample_mask;
1501 ui32 t = 0x33u << i;
1502 new_sig |= t & inv_sig;
1508 if (new_sig & sample_mask)
1510 new_sig &= ~sample_mask;
1513 ui32 t = 0x76u << i;
1514 new_sig |= t & inv_sig;
1520 if (new_sig & sample_mask)
1522 new_sig &= ~sample_mask;
1525 ui32 t = 0xECu << i;
1526 new_sig |= t & inv_sig;
1532 if (new_sig & sample_mask)
1534 new_sig &= ~sample_mask;
1537 ui32 t = 0xC8u << i;
1538 new_sig |= t & inv_sig;
1549 ui64 val = 3u << (p - 2);
1551 for (
int i = 0; i < 4; ++i, ++dp, col_mask <<= 4)
1553 if ((col_mask & new_sig) == 0)
1557 ui32 sample_mask = 0x1111u & col_mask;
1558 if (new_sig & sample_mask)
1561 dp[0] = (cwd << 63) | val;
1565 sample_mask += sample_mask;
1566 if (new_sig & sample_mask)
1568 assert(dp[stride] == 0);
1569 dp[stride] = (cwd << 63) | val;
1573 sample_mask += sample_mask;
1574 if (new_sig & sample_mask)
1576 assert(dp[2 * stride] == 0);
1577 dp[2 * stride] = (cwd << 63) | val;
1581 sample_mask += sample_mask;
1582 if (new_sig & sample_mask)
1584 assert(dp[3 * stride] == 0);
1585 dp[3 * stride] = (cwd << 63) | val;
1594 *prev_sig = (
ui16)(new_sig);
1598 new_sig |= (t & 0x7777) << 1;
1599 new_sig |= (t & 0xEEEE) >> 1;
1612 rev_init_mrp(&magref, coded_data, (
int)lengths1, (
int)lengths2);
1614 for (
ui32 y = 0; y < height; y += 4)
1616 ui32 *cur_sig = (
ui32*)(sigma + (y >> 2) * mstr);
1617 ui64 *dpp = decoded_data + y * stride;
1618 ui64 half = 1ULL << (p - 2);
1619 for (
ui32 i = 0; i < width; i += 8)
1625 ui32 sig = *cur_sig++;
1626 ui32 col_mask = 0xFu;
1629 for (
int j = 0; j < 8; ++j)
1633 ui64 *dp = dpp + i + j;
1634 ui32 sample_mask = 0x11111111u & col_mask;
1636 for (
int k = 0; k < 4; ++k) {
1637 if (sig & sample_mask)
1640 assert((dp[0] & half) == 0);
1642 sym = (1 - sym) << (p - 1);
1647 sample_mask += sample_mask;