/******************************************************************** * * * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation http://www.xiph.org/ * * * ******************************************************************** function: last mod: $Id: tokenize.c 16503 2009-08-22 18:14:02Z giles $ ********************************************************************/ #include <stdlib.h> #include <string.h> #include "encint.h" static int oc_make_eob_token(int _run_count){ if(_run_count<4)return OC_DCT_EOB1_TOKEN+_run_count-1; else{ int cat; cat=OC_ILOGNZ_32(_run_count)-3; cat=OC_MINI(cat,3); return OC_DCT_REPEAT_RUN0_TOKEN+cat; } } static int oc_make_eob_token_full(int _run_count,int *_eb){ if(_run_count<4){ *_eb=0; return OC_DCT_EOB1_TOKEN+_run_count-1; } else{ int cat; cat=OC_ILOGNZ_32(_run_count)-3; cat=OC_MINI(cat,3); *_eb=_run_count-OC_BYTE_TABLE32(4,8,16,0,cat); return OC_DCT_REPEAT_RUN0_TOKEN+cat; } } /*Returns the number of blocks ended by an EOB token.*/ static int oc_decode_eob_token(int _token,int _eb){ return (0x20820C41U>>_token*5&0x1F)+_eb; } /*TODO: This is now only used during DCT tokenization, and never for runs; it should be simplified.*/ static int oc_make_dct_token_full(int _zzi,int _zzj,int _val,int *_eb){ int neg; int zero_run; int token; int eb; neg=_val<0; _val=abs(_val); zero_run=_zzj-_zzi; if(zero_run>0){ int adj; /*Implement a minor restriction on stack 1 so that we know during DC fixups that extending a dctrun token from stack 1 will never overflow.*/ adj=_zzi!=1; if(_val<2&&zero_run<17+adj){ if(zero_run<6){ token=OC_DCT_RUN_CAT1A+zero_run-1; eb=neg; } else if(zero_run<10){ token=OC_DCT_RUN_CAT1B; eb=zero_run-6+(neg<<2); } else{ token=OC_DCT_RUN_CAT1C; eb=zero_run-10+(neg<<3); } } else if(_val<4&&zero_run<3+adj){ if(zero_run<2){ token=OC_DCT_RUN_CAT2A; eb=_val-2+(neg<<1); } else{ token=OC_DCT_RUN_CAT2B; eb=zero_run-2+(_val-2<<1)+(neg<<2); } } else{ if(zero_run<9)token=OC_DCT_SHORT_ZRL_TOKEN; else token=OC_DCT_ZRL_TOKEN; eb=zero_run-1; } } else if(_val<3){ token=OC_ONE_TOKEN+(_val-1<<1)+neg; eb=0; } else if(_val<7){ token=OC_DCT_VAL_CAT2+_val-3; eb=neg; } else if(_val<9){ token=OC_DCT_VAL_CAT3; eb=_val-7+(neg<<1); } else if(_val<13){ token=OC_DCT_VAL_CAT4; eb=_val-9+(neg<<2); } else if(_val<21){ token=OC_DCT_VAL_CAT5; eb=_val-13+(neg<<3); } else if(_val<37){ token=OC_DCT_VAL_CAT6; eb=_val-21+(neg<<4); } else if(_val<69){ token=OC_DCT_VAL_CAT7; eb=_val-37+(neg<<5); } else{ token=OC_DCT_VAL_CAT8; eb=_val-69+(neg<<9); } *_eb=eb; return token; } /*Token logging to allow a few fragments of efficient rollback. Late SKIP analysis is tied up in the tokenization process, so we need to be able to undo a fragment's tokens on a whim.*/ static const unsigned char OC_ZZI_HUFF_OFFSET[64]={ 0,16,16,16,16,16,32,32, 32,32,32,32,32,32,32,48, 48,48,48,48,48,48,48,48, 48,48,48,48,64,64,64,64, 64,64,64,64,64,64,64,64, 64,64,64,64,64,64,64,64, 64,64,64,64,64,64,64,64 }; static int oc_token_bits(oc_enc_ctx *_enc,int _huffi,int _zzi,int _token){ return _enc->huff_codes[_huffi+OC_ZZI_HUFF_OFFSET[_zzi]][_token].nbits +OC_DCT_TOKEN_EXTRA_BITS[_token]; } static void oc_enc_tokenlog_checkpoint(oc_enc_ctx *_enc, oc_token_checkpoint *_cp,int _pli,int _zzi){ _cp->pli=_pli; _cp->zzi=_zzi; _cp->eob_run=_enc->eob_run[_pli][_zzi]; _cp->ndct_tokens=_enc->ndct_tokens[_pli][_zzi]; } void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc, const oc_token_checkpoint *_stack,int _n){ int i; for(i=_n;i-->0;){ int pli; int zzi; pli=_stack[i].pli; zzi=_stack[i].zzi; _enc->eob_run[pli][zzi]=_stack[i].eob_run; _enc->ndct_tokens[pli][zzi]=_stack[i].ndct_tokens; } } static void oc_enc_token_log(oc_enc_ctx *_enc, int _pli,int _zzi,int _token,int _eb){ ptrdiff_t ti; ti=_enc->ndct_tokens[_pli][_zzi]++; _enc->dct_tokens[_pli][_zzi][ti]=(unsigned char)_token; _enc->extra_bits[_pli][_zzi][ti]=(ogg_uint16_t)_eb; } static void oc_enc_eob_log(oc_enc_ctx *_enc, int _pli,int _zzi,int _run_count){ int token; int eb; token=oc_make_eob_token_full(_run_count,&eb); oc_enc_token_log(_enc,_pli,_zzi,token,eb); } void oc_enc_tokenize_start(oc_enc_ctx *_enc){ memset(_enc->ndct_tokens,0,sizeof(_enc->ndct_tokens)); memset(_enc->eob_run,0,sizeof(_enc->eob_run)); memset(_enc->dct_token_offs,0,sizeof(_enc->dct_token_offs)); memset(_enc->dc_pred_last,0,sizeof(_enc->dc_pred_last)); } typedef struct oc_quant_token oc_quant_token; /*A single node in the Viterbi trellis. We maintain up to 2 of these per coefficient: - A token to code if the value is zero (EOB, zero run, or combo token). - A token to code if the value is not zero (DCT value token).*/ struct oc_quant_token{ unsigned char next; signed char token; ogg_int16_t eb; ogg_uint32_t cost; int bits; int qc; }; /*Tokenizes the AC coefficients, possibly adjusting the quantization, and then dequantizes and de-zig-zags the result. The DC coefficient is not preserved; it should be restored by the caller.*/ int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi, ogg_int16_t *_qdct,const ogg_uint16_t *_dequant,const ogg_int16_t *_dct, int _zzi,oc_token_checkpoint **_stack,int _acmin){ oc_token_checkpoint *stack; ogg_int64_t zflags; ogg_int64_t nzflags; ogg_int64_t best_flags; ogg_uint32_t d2_accum[64]; oc_quant_token tokens[64][2]; ogg_uint16_t *eob_run; const unsigned char *dct_fzig_zag; ogg_uint32_t cost; int bits; int eob; int token; int eb; int next; int huffi; int zzi; int ti; int zzj; int qc; huffi=_enc->huff_idxs[_enc->state.frame_type][1][_pli+1>>1]; eob_run=_enc->eob_run[_pli]; memset(tokens[0],0,sizeof(tokens[0])); best_flags=nzflags=0; zflags=1; d2_accum[0]=0; zzj=64; for(zzi=OC_MINI(_zzi,63);zzi>0;zzi--){ ogg_int32_t lambda; ogg_uint32_t best_cost; int best_bits=best_bits; int best_next=best_next; int best_token=best_token; int best_eb=best_eb; int best_qc=best_qc; int flush_bits; ogg_uint32_t d2; int dq; int e; int c; int s; int tj; lambda=_enc->lambda; qc=_qdct[zzi]; s=-(qc<0); qc=qc+s^s; c=_dct[OC_FZIG_ZAG[zzi]]; if(qc<=1){ ogg_uint32_t sum_d2; int nzeros; int dc_reserve; /*The hard case: try a zero run.*/ if(!qc){ /*Skip runs that are already quantized to zeros. If we considered each zero coefficient in turn, we might theoretically find a better way to partition long zero runs (e.g., a run of > 17 zeros followed by a 1 might be better coded as a short zero run followed by a combo token, rather than the longer zero token followed by a 1 value token), but zeros are so common that this becomes very computationally expensive (quadratic instead of linear in the number of coefficients), for a marginal gain.*/ while(zzi>1&&!_qdct[zzi-1])zzi--; /*The distortion of coefficients originally quantized to zero is treated as zero (since we'll never quantize them to anything else).*/ d2=0; } else{ c=c+s^s; d2=c*(ogg_int32_t)c; } eob=eob_run[zzi]; nzeros=zzj-zzi; zzj&=63; sum_d2=d2+d2_accum[zzj]; d2_accum[zzi]=sum_d2; flush_bits=eob>0?oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob)):0; /*We reserve 1 spot for combo run tokens that start in the 1st AC stack to ensure they can be extended to include the DC coefficient if necessary; this greatly simplifies stack-rewriting later on.*/ dc_reserve=zzi+62>>6; best_cost=0xFFFFFFFF; for(;;){ if(nzflags>>zzj&1){ int cat; int val; int val_s; int zzk; int tk; next=tokens[zzj][1].next; tk=next&1; zzk=next>>1; /*Try a pure zero run to this point.*/ cat=nzeros+55>>6; token=OC_DCT_SHORT_ZRL_TOKEN+cat; bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); d2=sum_d2-d2_accum[zzj]; cost=d2+lambda*bits+tokens[zzj][1].cost; if(cost<=best_cost){ best_next=(zzj<<1)+1; best_token=token; best_eb=nzeros-1; best_cost=cost; best_bits=bits+tokens[zzj][1].bits; best_qc=0; } if(nzeros<16+dc_reserve){ val=_qdct[zzj]; val_s=-(val<0); val=val+val_s^val_s; if(val<=2){ /*Try a +/- 1 combo token.*/ if(nzeros<6){ token=OC_DCT_RUN_CAT1A+nzeros-1; eb=-val_s; } else{ cat=nzeros+54>>6; token=OC_DCT_RUN_CAT1B+cat; eb=(-val_s<<cat+2)+nzeros-6-(cat<<2); } e=(_dct[OC_FZIG_ZAG[zzj]]+val_s^val_s)-_dequant[zzj]; d2=e*(ogg_int32_t)e+sum_d2-d2_accum[zzj]; bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); cost=d2+lambda*bits+tokens[zzk][tk].cost; if(cost<=best_cost){ best_next=next; best_token=token; best_eb=eb; best_cost=cost; best_bits=bits+tokens[zzk][tk].bits; best_qc=1+val_s^val_s; } } if(nzeros<2+dc_reserve&&2<=val&&val<=4){ /*Try a +/- 2/3 combo token.*/ cat=nzeros>>1; token=OC_DCT_RUN_CAT2A+cat; bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); val=2+((val+val_s^val_s)>2); e=(_dct[OC_FZIG_ZAG[zzj]]+val_s^val_s)-_dequant[zzj]*val; d2=e*(ogg_int32_t)e+sum_d2-d2_accum[zzj]; cost=d2+lambda*bits+tokens[zzk][tk].cost; if(cost<=best_cost){ best_cost=cost; best_bits=bits+tokens[zzk][tk].bits; best_next=next; best_token=token; best_eb=(-val_s<<1+cat)+(val-2<<cat)+(nzeros-1>>1); best_qc=val+val_s^val_s; } } } /*zzj can't be coded as a zero, so stop trying to extend the run.*/ if(!(zflags>>zzj&1))break; } /*We could try to consider _all_ potentially non-zero coefficients, but if we already found a bunch of them not worth coding, it's fairly unlikely they would now be worth coding from this position; skipping them saves a lot of work.*/ zzj=(tokens[zzj][0].next>>1)-(tokens[zzj][0].qc!=0)&63; if(zzj==0){ /*We made it all the way to the end of the block; try an EOB token.*/ if(eob<4095){ bits=oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob+1)) -flush_bits; } else bits=oc_token_bits(_enc,huffi,zzi,OC_DCT_EOB1_TOKEN); cost=sum_d2+bits*lambda; /*If the best route so far is still a pure zero run to the end of the block, force coding it as an EOB. Even if it's not optimal for this block, it has a good chance of getting combined with an EOB token from subsequent blocks, saving bits overall.*/ if(cost<=best_cost||best_token<=OC_DCT_ZRL_TOKEN&&zzi+best_eb==63){ best_next=0; /*This token is just a marker; in reality we may not emit any tokens, but update eob_run[] instead.*/ best_token=OC_DCT_EOB1_TOKEN; best_eb=0; best_cost=cost; best_bits=bits; best_qc=0; } break; } nzeros=zzj-zzi; } tokens[zzi][0].next=(unsigned char)best_next; tokens[zzi][0].token=(signed char)best_token; tokens[zzi][0].eb=(ogg_int16_t)best_eb; tokens[zzi][0].cost=best_cost; tokens[zzi][0].bits=best_bits; tokens[zzi][0].qc=best_qc; zflags|=(ogg_int64_t)1<<zzi; if(qc){ dq=_dequant[zzi]; if(zzi<_acmin)lambda=0; e=dq-c; d2=e*(ogg_int32_t)e; token=OC_ONE_TOKEN-s; bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); zzj=zzi+1&63; tj=best_flags>>zzj&1; next=(zzj<<1)+tj; tokens[zzi][1].next=(unsigned char)next; tokens[zzi][1].token=(signed char)token; tokens[zzi][1].eb=0; tokens[zzi][1].cost=d2+lambda*bits+tokens[zzj][tj].cost; tokens[zzi][1].bits=bits+tokens[zzj][tj].bits; tokens[zzi][1].qc=1+s^s; nzflags|=(ogg_int64_t)1<<zzi; best_flags|= (ogg_int64_t)(tokens[zzi][1].cost<tokens[zzi][0].cost)<<zzi; } } else{ eob=eob_run[zzi]; if(zzi<_acmin)lambda=0; c=c+s^s; dq=_dequant[zzi]; /*No zero run can extend past this point.*/ d2_accum[zzi]=0; flush_bits=eob>0?oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob)):0; if(qc<=2){ e=2*dq-c; d2=e*(ogg_int32_t)e; best_token=OC_TWO_TOKEN-s; best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); best_cost=d2+lambda*best_bits; e-=dq; d2=e*(ogg_int32_t)e; token=OC_ONE_TOKEN-s; bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); cost=d2+lambda*bits; if(cost<=best_cost){ best_token=token; best_bits=bits; best_cost=cost; qc--; } best_eb=0; } else if(qc<=3){ e=3*dq-c; d2=e*(ogg_int32_t)e; best_token=OC_DCT_VAL_CAT2; best_eb=-s; best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); best_cost=d2+lambda*best_bits; e-=dq; d2=e*(ogg_int32_t)e; token=OC_TWO_TOKEN-s; bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); cost=d2+lambda*bits; if(cost<=best_cost){ best_token=token; best_eb=0; best_bits=bits; best_cost=cost; qc--; } } else if(qc<=6){ e=qc*dq-c; d2=e*(ogg_int32_t)e; best_token=OC_DCT_VAL_CAT2+qc-3; best_eb=-s; best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); best_cost=d2+lambda*best_bits; e-=dq; d2=e*(ogg_int32_t)e; token=best_token-1; bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); cost=d2+lambda*bits; if(cost<=best_cost){ best_token=token; best_bits=bits; best_cost=cost; qc--; } } else if(qc<=8){ e=qc*dq-c; d2=e*(ogg_int32_t)e; best_token=OC_DCT_VAL_CAT3; best_eb=(-s<<1)+qc-7; best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); best_cost=d2+lambda*best_bits; e=6*dq-c; d2=e*(ogg_int32_t)e; token=OC_DCT_VAL_CAT2+3; bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); cost=d2+lambda*bits; if(cost<=best_cost){ best_token=token; best_eb=-s; best_bits=bits; best_cost=cost; qc=6; } } else if(qc<=12){ e=qc*dq-c; d2=e*(ogg_int32_t)e; best_token=OC_DCT_VAL_CAT4; best_eb=(-s<<2)+qc-9; best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); best_cost=d2+lambda*best_bits; e=8*dq-c; d2=e*(ogg_int32_t)e; token=best_token-1; bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); cost=d2+lambda*bits; if(cost<=best_cost){ best_token=token; best_eb=(-s<<1)+1; best_bits=bits; best_cost=cost; qc=8; } } else if(qc<=20){ e=qc*dq-c; d2=e*(ogg_int32_t)e; best_token=OC_DCT_VAL_CAT5; best_eb=(-s<<3)+qc-13; best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); best_cost=d2+lambda*best_bits; e=12*dq-c; d2=e*(ogg_int32_t)e; token=best_token-1; bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); cost=d2+lambda*bits; if(cost<=best_cost){ best_token=token; best_eb=(-s<<2)+3; best_bits=bits; best_cost=cost; qc=12; } } else if(qc<=36){ e=qc*dq-c; d2=e*(ogg_int32_t)e; best_token=OC_DCT_VAL_CAT6; best_eb=(-s<<4)+qc-21; best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); best_cost=d2+lambda*best_bits; e=20*dq-c; d2=e*(ogg_int32_t)e; token=best_token-1; bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); cost=d2+lambda*bits; if(cost<=best_cost){ best_token=token; best_eb=(-s<<3)+7; best_bits=bits; best_cost=cost; qc=20; } } else if(qc<=68){ e=qc*dq-c; d2=e*(ogg_int32_t)e; best_token=OC_DCT_VAL_CAT7; best_eb=(-s<<5)+qc-37; best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); best_cost=d2+lambda*best_bits; e=36*dq-c; d2=e*(ogg_int32_t)e; token=best_token-1; bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); cost=d2+lambda*bits; if(cost<best_cost){ best_token=token; best_eb=(-s<<4)+15; best_bits=bits; best_cost=cost; qc=36; } } else{ e=qc*dq-c; d2=e*(ogg_int32_t)e; best_token=OC_DCT_VAL_CAT8; best_eb=(-s<<9)+qc-69; best_bits=flush_bits+oc_token_bits(_enc,huffi,zzi,best_token); best_cost=d2+lambda*best_bits; e=68*dq-c; d2=e*(ogg_int32_t)e; token=best_token-1; bits=flush_bits+oc_token_bits(_enc,huffi,zzi,token); cost=d2+lambda*bits; if(cost<best_cost){ best_token=token; best_eb=(-s<<5)+31; best_bits=bits; best_cost=cost; qc=68; } } zzj=zzi+1&63; tj=best_flags>>zzj&1; next=(zzj<<1)+tj; tokens[zzi][1].next=(unsigned char)next; tokens[zzi][1].token=(signed char)best_token; tokens[zzi][1].eb=best_eb; tokens[zzi][1].cost=best_cost+tokens[zzj][tj].cost; tokens[zzi][1].bits=best_bits+tokens[zzj][tj].bits; tokens[zzi][1].qc=qc+s^s; nzflags|=(ogg_int64_t)1<<zzi; best_flags|=(ogg_int64_t)1<<zzi; } zzj=zzi; } /*Emit the tokens from the best path through the trellis.*/ stack=*_stack; /*We blow away the first entry here so that things vectorize better. The DC coefficient is not actually stored in the array yet.*/ for(zzi=0;zzi<64;zzi++)_qdct[zzi]=0; dct_fzig_zag=_enc->state.opt_data.dct_fzig_zag; zzi=1; ti=best_flags>>1&1; bits=tokens[zzi][ti].bits; do{ oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzi); eob=eob_run[zzi]; if(tokens[zzi][ti].token<OC_NDCT_EOB_TOKEN_MAX){ if(++eob>=4095){ oc_enc_eob_log(_enc,_pli,zzi,eob); eob=0; } eob_run[zzi]=eob; /*We don't include the actual EOB cost for this block in the return value. It will be paid for by the fragment that terminates the EOB run.*/ bits-=tokens[zzi][ti].bits; zzi=_zzi; break; } /*Emit pending EOB run if any.*/ if(eob>0){ oc_enc_eob_log(_enc,_pli,zzi,eob); eob_run[zzi]=0; } oc_enc_token_log(_enc,_pli,zzi,tokens[zzi][ti].token,tokens[zzi][ti].eb); next=tokens[zzi][ti].next; qc=tokens[zzi][ti].qc; zzj=(next>>1)-1&63; /*TODO: It may be worth saving the dequantized coefficient in the trellis above; we had to compute it to measure the error anyway.*/ _qdct[dct_fzig_zag[zzj]]=(ogg_int16_t)(qc*(int)_dequant[zzj]); zzi=next>>1; ti=next&1; } while(zzi); *_stack=stack; return bits; } void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc, int _pli,int _fragy0,int _frag_yend){ const oc_fragment_plane *fplane; const oc_fragment *frags; ogg_int16_t *frag_dc; ptrdiff_t fragi; int *pred_last; int nhfrags; int fragx; int fragy; fplane=_enc->state.fplanes+_pli; frags=_enc->state.frags; frag_dc=_enc->frag_dc; pred_last=_enc->dc_pred_last[_pli]; nhfrags=fplane->nhfrags; fragi=fplane->froffset+_fragy0*nhfrags; for(fragy=_fragy0;fragy<_frag_yend;fragy++){ if(fragy==0){ /*For the first row, all of the cases reduce to just using the previous predictor for the same reference frame.*/ for(fragx=0;fragx<nhfrags;fragx++,fragi++){ if(frags[fragi].coded){ int ref; ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode); frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred_last[ref]); pred_last[ref]=frags[fragi].dc; } } } else{ const oc_fragment *u_frags; int l_ref; int ul_ref; int u_ref; u_frags=frags-nhfrags; l_ref=-1; ul_ref=-1; u_ref=u_frags[fragi].coded?OC_FRAME_FOR_MODE(u_frags[fragi].mb_mode):-1; for(fragx=0;fragx<nhfrags;fragx++,fragi++){ int ur_ref; if(fragx+1>=nhfrags)ur_ref=-1; else{ ur_ref=u_frags[fragi+1].coded? OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1; } if(frags[fragi].coded){ int pred; int ref; ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode); /*We break out a separate case based on which of our neighbors use the same reference frames. This is somewhat faster than trying to make a generic case which handles all of them, since it reduces lots of poorly predicted jumps to one switch statement, and also lets a number of the multiplications be optimized out by strength reduction.*/ switch((l_ref==ref)|(ul_ref==ref)<<1| (u_ref==ref)<<2|(ur_ref==ref)<<3){ default:pred=pred_last[ref];break; case 1: case 3:pred=frags[fragi-1].dc;break; case 2:pred=u_frags[fragi-1].dc;break; case 4: case 6: case 12:pred=u_frags[fragi].dc;break; case 5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break; case 8:pred=u_frags[fragi+1].dc;break; case 9: case 11: case 13:{ pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128; }break; case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break; case 14:{ pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc) +10*u_frags[fragi].dc)/16; }break; case 7: case 15:{ int p0; int p1; int p2; p0=frags[fragi-1].dc; p1=u_frags[fragi-1].dc; p2=u_frags[fragi].dc; pred=(29*(p0+p2)-26*p1)/32; if(abs(pred-p2)>128)pred=p2; else if(abs(pred-p0)>128)pred=p0; else if(abs(pred-p1)>128)pred=p1; }break; } frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred); pred_last[ref]=frags[fragi].dc; l_ref=ref; } else l_ref=-1; ul_ref=u_ref; u_ref=ur_ref; } } } } void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli, const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis, int _prev_ndct_tokens1,int _prev_eob_run1){ const ogg_int16_t *frag_dc; ptrdiff_t fragii; unsigned char *dct_tokens0; unsigned char *dct_tokens1; ogg_uint16_t *extra_bits0; ogg_uint16_t *extra_bits1; ptrdiff_t ti0; ptrdiff_t ti1r; ptrdiff_t ti1w; int eob_run0; int eob_run1; int neobs1; int token; int eb; int token1=token1; int eb1=eb1; /*Return immediately if there are no coded fragments; otherwise we'd flush any trailing EOB run into the AC 1 list and never read it back out.*/ if(_ncoded_fragis<=0)return; frag_dc=_enc->frag_dc; dct_tokens0=_enc->dct_tokens[_pli][0]; dct_tokens1=_enc->dct_tokens[_pli][1]; extra_bits0=_enc->extra_bits[_pli][0]; extra_bits1=_enc->extra_bits[_pli][1]; ti0=_enc->ndct_tokens[_pli][0]; ti1w=ti1r=_prev_ndct_tokens1; eob_run0=_enc->eob_run[_pli][0]; /*Flush any trailing EOB run for the 1st AC coefficient. This is needed to allow us to track tokens to the end of the list.*/ eob_run1=_enc->eob_run[_pli][1]; if(eob_run1>0)oc_enc_eob_log(_enc,_pli,1,eob_run1); /*If there was an active EOB run at the start of the 1st AC stack, read it in and decode it.*/ if(_prev_eob_run1>0){ token1=dct_tokens1[ti1r]; eb1=extra_bits1[ti1r]; ti1r++; eob_run1=oc_decode_eob_token(token1,eb1); /*Consume the portion of the run that came before these fragments.*/ neobs1=eob_run1-_prev_eob_run1; } else eob_run1=neobs1=0; for(fragii=0;fragii<_ncoded_fragis;fragii++){ int val; /*All tokens in the 1st AC coefficient stack are regenerated as the DC coefficients are produced. This can be done in-place; stack 1 cannot get larger.*/ if(!neobs1){ /*There's no active EOB run in stack 1; read the next token.*/ token1=dct_tokens1[ti1r]; eb1=extra_bits1[ti1r]; ti1r++; if(token1<OC_NDCT_EOB_TOKEN_MAX){ neobs1=oc_decode_eob_token(token1,eb1); /*It's an EOB run; add it to the current (inactive) one. Because we may have moved entries to stack 0, we may have an opportunity to merge two EOB runs in stack 1.*/ eob_run1+=neobs1; } } val=frag_dc[_coded_fragis[fragii]]; if(val){ /*There was a non-zero DC value, so there's no alteration to stack 1 for this fragment; just code the stack 0 token.*/ /*Flush any pending EOB run.*/ if(eob_run0>0){ token=oc_make_eob_token_full(eob_run0,&eb); dct_tokens0[ti0]=(unsigned char)token; extra_bits0[ti0]=(ogg_uint16_t)eb; ti0++; eob_run0=0; } token=oc_make_dct_token_full(0,0,val,&eb); dct_tokens0[ti0]=(unsigned char)token; extra_bits0[ti0]=(ogg_uint16_t)eb; ti0++; } else{ /*Zero DC value; that means the entry in stack 1 might need to be coded from stack 0. This requires a stack 1 fixup.*/ if(neobs1>0){ /*We're in the middle of an active EOB run in stack 1. Move it to stack 0.*/ if(++eob_run0>=4095){ token=oc_make_eob_token_full(eob_run0,&eb); dct_tokens0[ti0]=(unsigned char)token; extra_bits0[ti0]=(ogg_uint16_t)eb; ti0++; eob_run0=0; } eob_run1--; } else{ /*No active EOB run in stack 1, so we can't extend one in stack 0. Flush it if we've got it.*/ if(eob_run0>0){ token=oc_make_eob_token_full(eob_run0,&eb); dct_tokens0[ti0]=(unsigned char)token; extra_bits0[ti0]=(ogg_uint16_t)eb; ti0++; eob_run0=0; } /*Stack 1 token is one of: a pure zero run token, a single coefficient token, or a zero run/coefficient combo token. A zero run token is expanded and moved to token stack 0, and the stack 1 entry dropped. A single coefficient value may be transformed into combo token that is moved to stack 0, or if it cannot be combined, it is left alone and a single length-1 zero run is emitted in stack 0. A combo token is extended and moved to stack 0. During AC coding, we restrict the run lengths on combo tokens for stack 1 to guarantee we can extend them.*/ switch(token1){ case OC_DCT_SHORT_ZRL_TOKEN:{ if(eb1<7){ dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN; extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); ti0++; /*Don't write the AC coefficient back out.*/ continue; } /*Fall through.*/ } case OC_DCT_ZRL_TOKEN:{ dct_tokens0[ti0]=OC_DCT_ZRL_TOKEN; extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); ti0++; /*Don't write the AC coefficient back out.*/ }continue; case OC_ONE_TOKEN: case OC_MINUS_ONE_TOKEN:{ dct_tokens0[ti0]=OC_DCT_RUN_CAT1A; extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_ONE_TOKEN); ti0++; /*Don't write the AC coefficient back out.*/ }continue; case OC_TWO_TOKEN: case OC_MINUS_TWO_TOKEN:{ dct_tokens0[ti0]=OC_DCT_RUN_CAT2A; extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_TWO_TOKEN<<1); ti0++; /*Don't write the AC coefficient back out.*/ }continue; case OC_DCT_VAL_CAT2:{ dct_tokens0[ti0]=OC_DCT_RUN_CAT2A; extra_bits0[ti0]=(ogg_uint16_t)((eb1<<1)+1); ti0++; /*Don't write the AC coefficient back out.*/ }continue; case OC_DCT_RUN_CAT1A: case OC_DCT_RUN_CAT1A+1: case OC_DCT_RUN_CAT1A+2: case OC_DCT_RUN_CAT1A+3:{ dct_tokens0[ti0]=(unsigned char)(token1+1); extra_bits0[ti0]=(ogg_uint16_t)eb1; ti0++; /*Don't write the AC coefficient back out.*/ }continue; case OC_DCT_RUN_CAT1A+4:{ dct_tokens0[ti0]=OC_DCT_RUN_CAT1B; extra_bits0[ti0]=(ogg_uint16_t)(eb1<<2); ti0++; /*Don't write the AC coefficient back out.*/ }continue; case OC_DCT_RUN_CAT1B:{ if((eb1&3)<3){ dct_tokens0[ti0]=OC_DCT_RUN_CAT1B; extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); ti0++; /*Don't write the AC coefficient back out.*/ continue; } eb1=((eb1&4)<<1)-1; /*Fall through.*/ } case OC_DCT_RUN_CAT1C:{ dct_tokens0[ti0]=OC_DCT_RUN_CAT1C; extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); ti0++; /*Don't write the AC coefficient back out.*/ }continue; case OC_DCT_RUN_CAT2A:{ eb1=(eb1<<1)-1; /*Fall through.*/ } case OC_DCT_RUN_CAT2B:{ dct_tokens0[ti0]=OC_DCT_RUN_CAT2B; extra_bits0[ti0]=(ogg_uint16_t)(eb1+1); ti0++; /*Don't write the AC coefficient back out.*/ }continue; } /*We can't merge tokens, write a short zero run and keep going.*/ dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN; extra_bits0[ti0]=0; ti0++; } } if(!neobs1){ /*Flush any (inactive) EOB run.*/ if(eob_run1>0){ token=oc_make_eob_token_full(eob_run1,&eb); dct_tokens1[ti1w]=(unsigned char)token; extra_bits1[ti1w]=(ogg_uint16_t)eb; ti1w++; eob_run1=0; } /*There's no active EOB run, so log the current token.*/ dct_tokens1[ti1w]=(unsigned char)token1; extra_bits1[ti1w]=(ogg_uint16_t)eb1; ti1w++; } else{ /*Otherwise consume one EOB from the current run.*/ neobs1--; /*If we have more than 4095 EOBs outstanding in stack1, flush the run.*/ if(eob_run1-neobs1>=4095){ token=oc_make_eob_token_full(4095,&eb); dct_tokens1[ti1w]=(unsigned char)token; extra_bits1[ti1w]=(ogg_uint16_t)eb; ti1w++; eob_run1-=4095; } } } /*Save the current state.*/ _enc->ndct_tokens[_pli][0]=ti0; _enc->ndct_tokens[_pli][1]=ti1w; _enc->eob_run[_pli][0]=eob_run0; _enc->eob_run[_pli][1]=eob_run1; } /*Final EOB run welding.*/ void oc_enc_tokenize_finish(oc_enc_ctx *_enc){ int pli; int zzi; /*Emit final EOB runs.*/ for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){ int eob_run; eob_run=_enc->eob_run[pli][zzi]; if(eob_run>0)oc_enc_eob_log(_enc,pli,zzi,eob_run); } /*Merge the final EOB run of one token list with the start of the next, if possible.*/ for(zzi=0;zzi<64;zzi++)for(pli=0;pli<3;pli++){ int old_tok1; int old_tok2; int old_eb1; int old_eb2; int new_tok; int new_eb; int zzj; int plj; ptrdiff_t ti=ti; int run_count; /*Make sure this coefficient has tokens at all.*/ if(_enc->ndct_tokens[pli][zzi]<=0)continue; /*Ensure the first token is an EOB run.*/ old_tok2=_enc->dct_tokens[pli][zzi][0]; if(old_tok2>=OC_NDCT_EOB_TOKEN_MAX)continue; /*Search for a previous coefficient that has any tokens at all.*/ old_tok1=OC_NDCT_EOB_TOKEN_MAX; for(zzj=zzi,plj=pli;zzj>=0;zzj--){ while(plj-->0){ ti=_enc->ndct_tokens[plj][zzj]-1; if(ti>=_enc->dct_token_offs[plj][zzj]){ old_tok1=_enc->dct_tokens[plj][zzj][ti]; break; } } if(plj>=0)break; plj=3; } /*Ensure its last token was an EOB run.*/ if(old_tok1>=OC_NDCT_EOB_TOKEN_MAX)continue; /*Pull off the associated extra bits, if any, and decode the runs.*/ old_eb1=_enc->extra_bits[plj][zzj][ti]; old_eb2=_enc->extra_bits[pli][zzi][0]; run_count=oc_decode_eob_token(old_tok1,old_eb1) +oc_decode_eob_token(old_tok2,old_eb2); /*We can't possibly combine these into one run. It might be possible to split them more optimally, but we'll just leave them as-is.*/ if(run_count>=4096)continue; /*We CAN combine them into one run.*/ new_tok=oc_make_eob_token_full(run_count,&new_eb); _enc->dct_tokens[plj][zzj][ti]=(unsigned char)new_tok; _enc->extra_bits[plj][zzj][ti]=(ogg_uint16_t)new_eb; _enc->dct_token_offs[pli][zzi]++; } }