• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

libavcodec/nellymoserenc.c

Go to the documentation of this file.
00001 /*
00002  * Nellymoser encoder
00003  * This code is developed as part of Google Summer of Code 2008 Program.
00004  *
00005  * Copyright (c) 2008 Bartlomiej Wolowiec
00006  *
00007  * This file is part of Libav.
00008  *
00009  * Libav is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU Lesser General Public
00011  * License as published by the Free Software Foundation; either
00012  * version 2.1 of the License, or (at your option) any later version.
00013  *
00014  * Libav is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017  * Lesser General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU Lesser General Public
00020  * License along with Libav; if not, write to the Free Software
00021  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00022  */
00023 
00038 #include "libavutil/mathematics.h"
00039 #include "nellymoser.h"
00040 #include "avcodec.h"
00041 #include "dsputil.h"
00042 #include "fft.h"
00043 #include "sinewin.h"
00044 
00045 #define BITSTREAM_WRITER_LE
00046 #include "put_bits.h"
00047 
00048 #define POW_TABLE_SIZE (1<<11)
00049 #define POW_TABLE_OFFSET 3
00050 #define OPT_SIZE ((1<<15) + 3000)
00051 
00052 typedef struct NellyMoserEncodeContext {
00053     AVCodecContext  *avctx;
00054     int             last_frame;
00055     int             bufsel;
00056     int             have_saved;
00057     DSPContext      dsp;
00058     FFTContext      mdct_ctx;
00059     DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
00060     DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
00061     DECLARE_ALIGNED(32, float, buf)[2][3 * NELLY_BUF_LEN];     
00062     float           (*opt )[NELLY_BANDS];
00063     uint8_t         (*path)[NELLY_BANDS];
00064 } NellyMoserEncodeContext;
00065 
00066 static float pow_table[POW_TABLE_SIZE];     
00067 
00068 static const uint8_t sf_lut[96] = {
00069      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
00070      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
00071     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
00072     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
00073     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
00074     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
00075 };
00076 
00077 static const uint8_t sf_delta_lut[78] = {
00078      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
00079      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
00080     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
00081     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
00082     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
00083 };
00084 
00085 static const uint8_t quant_lut[230] = {
00086      0,
00087 
00088      0,  1,  2,
00089 
00090      0,  1,  2,  3,  4,  5,  6,
00091 
00092      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
00093     12, 13, 13, 13, 14,
00094 
00095      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
00096      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
00097     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
00098     30,
00099 
00100      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
00101      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
00102     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
00103     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
00104     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
00105     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
00106     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
00107     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
00108     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
00109     61, 61, 61, 61, 62,
00110 };
00111 
00112 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
00113 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
00114 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
00115 
00116 static void apply_mdct(NellyMoserEncodeContext *s)
00117 {
00118     s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN);
00119     s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
00120                                NELLY_BUF_LEN);
00121     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
00122 
00123     s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN,
00124                        ff_sine_128, NELLY_BUF_LEN);
00125     s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
00126                                NELLY_BUF_LEN);
00127     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
00128 }
00129 
00130 static av_cold int encode_init(AVCodecContext *avctx)
00131 {
00132     NellyMoserEncodeContext *s = avctx->priv_data;
00133     int i;
00134 
00135     if (avctx->channels != 1) {
00136         av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
00137         return -1;
00138     }
00139 
00140     if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
00141         avctx->sample_rate != 11025 &&
00142         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
00143         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
00144         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
00145         return -1;
00146     }
00147 
00148     avctx->frame_size = NELLY_SAMPLES;
00149     s->avctx = avctx;
00150     ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0);
00151     dsputil_init(&s->dsp, avctx);
00152 
00153     /* Generate overlap window */
00154     ff_sine_window_init(ff_sine_128, 128);
00155     for (i = 0; i < POW_TABLE_SIZE; i++)
00156         pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
00157 
00158     if (s->avctx->trellis) {
00159         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
00160         s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
00161     }
00162 
00163     return 0;
00164 }
00165 
00166 static av_cold int encode_end(AVCodecContext *avctx)
00167 {
00168     NellyMoserEncodeContext *s = avctx->priv_data;
00169 
00170     ff_mdct_end(&s->mdct_ctx);
00171 
00172     if (s->avctx->trellis) {
00173         av_free(s->opt);
00174         av_free(s->path);
00175     }
00176 
00177     return 0;
00178 }
00179 
00180 #define find_best(val, table, LUT, LUT_add, LUT_size) \
00181     best_idx = \
00182         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
00183     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
00184         best_idx++;
00185 
00186 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
00187 {
00188     int band, best_idx, power_idx = 0;
00189     float power_candidate;
00190 
00191     //base exponent
00192     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
00193     idx_table[0] = best_idx;
00194     power_idx = ff_nelly_init_table[best_idx];
00195 
00196     for (band = 1; band < NELLY_BANDS; band++) {
00197         power_candidate = cand[band] - power_idx;
00198         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
00199         idx_table[band] = best_idx;
00200         power_idx += ff_nelly_delta_table[best_idx];
00201     }
00202 }
00203 
00204 static inline float distance(float x, float y, int band)
00205 {
00206     //return pow(fabs(x-y), 2.0);
00207     float tmp = x - y;
00208     return tmp * tmp;
00209 }
00210 
00211 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
00212 {
00213     int i, j, band, best_idx;
00214     float power_candidate, best_val;
00215 
00216     float  (*opt )[NELLY_BANDS] = s->opt ;
00217     uint8_t(*path)[NELLY_BANDS] = s->path;
00218 
00219     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
00220         opt[0][i] = INFINITY;
00221     }
00222 
00223     for (i = 0; i < 64; i++) {
00224         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
00225         path[0][ff_nelly_init_table[i]] = i;
00226     }
00227 
00228     for (band = 1; band < NELLY_BANDS; band++) {
00229         int q, c = 0;
00230         float tmp;
00231         int idx_min, idx_max, idx;
00232         power_candidate = cand[band];
00233         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
00234             idx_min = FFMAX(0, cand[band] - q);
00235             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
00236             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
00237                 if ( isinf(opt[band - 1][i]) )
00238                     continue;
00239                 for (j = 0; j < 32; j++) {
00240                     idx = i + ff_nelly_delta_table[j];
00241                     if (idx > idx_max)
00242                         break;
00243                     if (idx >= idx_min) {
00244                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
00245                         if (opt[band][idx] > tmp) {
00246                             opt[band][idx] = tmp;
00247                             path[band][idx] = j;
00248                             c = 1;
00249                         }
00250                     }
00251                 }
00252             }
00253         }
00254         assert(c); //FIXME
00255     }
00256 
00257     best_val = INFINITY;
00258     best_idx = -1;
00259     band = NELLY_BANDS - 1;
00260     for (i = 0; i < OPT_SIZE; i++) {
00261         if (best_val > opt[band][i]) {
00262             best_val = opt[band][i];
00263             best_idx = i;
00264         }
00265     }
00266     for (band = NELLY_BANDS - 1; band >= 0; band--) {
00267         idx_table[band] = path[band][best_idx];
00268         if (band) {
00269             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
00270         }
00271     }
00272 }
00273 
00280 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
00281 {
00282     PutBitContext pb;
00283     int i, j, band, block, best_idx, power_idx = 0;
00284     float power_val, coeff, coeff_sum;
00285     float pows[NELLY_FILL_LEN];
00286     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
00287     float cand[NELLY_BANDS];
00288 
00289     apply_mdct(s);
00290 
00291     init_put_bits(&pb, output, output_size * 8);
00292 
00293     i = 0;
00294     for (band = 0; band < NELLY_BANDS; band++) {
00295         coeff_sum = 0;
00296         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
00297             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
00298                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
00299         }
00300         cand[band] =
00301             log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
00302     }
00303 
00304     if (s->avctx->trellis) {
00305         get_exponent_dynamic(s, cand, idx_table);
00306     } else {
00307         get_exponent_greedy(s, cand, idx_table);
00308     }
00309 
00310     i = 0;
00311     for (band = 0; band < NELLY_BANDS; band++) {
00312         if (band) {
00313             power_idx += ff_nelly_delta_table[idx_table[band]];
00314             put_bits(&pb, 5, idx_table[band]);
00315         } else {
00316             power_idx = ff_nelly_init_table[idx_table[0]];
00317             put_bits(&pb, 6, idx_table[0]);
00318         }
00319         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
00320         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
00321             s->mdct_out[i] *= power_val;
00322             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
00323             pows[i] = power_idx;
00324         }
00325     }
00326 
00327     ff_nelly_get_sample_bits(pows, bits);
00328 
00329     for (block = 0; block < 2; block++) {
00330         for (i = 0; i < NELLY_FILL_LEN; i++) {
00331             if (bits[i] > 0) {
00332                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
00333                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
00334                 best_idx =
00335                     quant_lut[av_clip (
00336                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
00337                             quant_lut_offset[bits[i]],
00338                             quant_lut_offset[bits[i]+1] - 1
00339                             )];
00340                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
00341                     best_idx++;
00342 
00343                 put_bits(&pb, bits[i], best_idx);
00344             }
00345         }
00346         if (!block)
00347             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
00348     }
00349 
00350     flush_put_bits(&pb);
00351 }
00352 
00353 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
00354 {
00355     NellyMoserEncodeContext *s = avctx->priv_data;
00356     const float *samples = data;
00357     int i;
00358 
00359     if (s->last_frame)
00360         return 0;
00361 
00362     if (data) {
00363         memcpy(s->buf[s->bufsel], samples, avctx->frame_size * sizeof(*samples));
00364         for (i = avctx->frame_size; i < NELLY_SAMPLES; i++) {
00365             s->buf[s->bufsel][i] = 0;
00366         }
00367         s->bufsel = 1 - s->bufsel;
00368         if (!s->have_saved) {
00369             s->have_saved = 1;
00370             return 0;
00371         }
00372     } else {
00373         memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
00374         s->bufsel = 1 - s->bufsel;
00375         s->last_frame = 1;
00376     }
00377 
00378     if (s->have_saved) {
00379         encode_block(s, frame, buf_size);
00380         return NELLY_BLOCK_LEN;
00381     }
00382     return 0;
00383 }
00384 
00385 AVCodec ff_nellymoser_encoder = {
00386     .name = "nellymoser",
00387     .type = AVMEDIA_TYPE_AUDIO,
00388     .id = CODEC_ID_NELLYMOSER,
00389     .priv_data_size = sizeof(NellyMoserEncodeContext),
00390     .init = encode_init,
00391     .encode = encode_frame,
00392     .close = encode_end,
00393     .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
00394     .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
00395     .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE},
00396 };
Generated on Thu Jan 24 2013 17:08:52 for Libav by doxygen 1.7.1