libs/jpeg6/jfdctflt.cpp

   1 /*
   2
   3  * jfdctflt.c
   4
   5  *
   6
   7  * Copyright (C) 1994, Thomas G. Lane.
   8
   9  * This file is part of the Independent JPEG Group's software.
  10
  11  * For conditions of distribution and use, see the accompanying README file.
  12
  13  *
  14
  15  * This file contains a floating-point implementation of the
  16
  17  * forward DCT (Discrete Cosine Transform).
  18
  19  *
  20
  21  * This implementation should be more accurate than either of the integer
  22
  23  * DCT implementations.  However, it may not give the same results on all
  24
  25  * machines because of differences in roundoff behavior.  Speed will depend
  26
  27  * on the hardware's floating point capacity.
  28
  29  *
  30
  31  * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
  32
  33  * on each column.  Direct algorithms are also available, but they are
  34
  35  * much more complex and seem not to be any faster when reduced to code.
  36
  37  *
  38
  39  * This implementation is based on Arai, Agui, and Nakajima's algorithm for
  40
  41  * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
  42
  43  * Japanese, but the algorithm is described in the Pennebaker & Mitchell
  44
  45  * JPEG textbook (see REFERENCES section in file README).  The following code
  46
  47  * is based directly on figure 4-8 in P&M.
  48
  49  * While an 8-point DCT cannot be done in less than 11 multiplies, it is
  50
  51  * possible to arrange the computation so that many of the multiplies are
  52
  53  * simple scalings of the final outputs.  These multiplies can then be
  54
  55  * folded into the multiplications or divisions by the JPEG quantization
  56
  57  * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
  58
  59  * to be done in the DCT itself.
  60
  61  * The primary disadvantage of this method is that with a fixed-point
  62
  63  * implementation, accuracy is lost due to imprecise representation of the
  64
  65  * scaled quantization values.  However, that problem does not arise if
  66
  67  * we use floating point arithmetic.
  68
  69  */
  70
  71
  72
  73 #define JPEG_INTERNALS
  74
  75 #include "jinclude.h"
  76
  77 #include "radiant_jpeglib.h"
  78
  79 #include "jdct.h"               /* Private declarations for DCT subsystem */
  80
  81
  82
  83 #ifdef DCT_FLOAT_SUPPORTED
  84
  85
  86
  87
  88
  89 /*
  90
  91  * This module is specialized to the case DCTSIZE = 8.
  92
  93  */
  94
  95
  96
  97 #if DCTSIZE != 8
  98
  99   Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
 100
 101 #endif
 102
 103
 104
 105
 106
 107 /*
 108
 109  * Perform the forward DCT on one block of samples.
 110
 111  */
 112
 113
 114
 115 GLOBAL void
 116
 117 jpeg_fdct_float (FAST_FLOAT * data)
 118
 119 {
 120
 121   FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 122
 123   FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
 124
 125   FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
 126
 127   FAST_FLOAT *dataptr;
 128
 129   int ctr;
 130
 131
 132
 133   /* Pass 1: process rows. */
 134
 135
 136
 137   dataptr = data;
 138
 139   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
 140
 141     tmp0 = dataptr[0] + dataptr[7];
 142
 143     tmp7 = dataptr[0] - dataptr[7];
 144
 145     tmp1 = dataptr[1] + dataptr[6];
 146
 147     tmp6 = dataptr[1] - dataptr[6];
 148
 149     tmp2 = dataptr[2] + dataptr[5];
 150
 151     tmp5 = dataptr[2] - dataptr[5];
 152
 153     tmp3 = dataptr[3] + dataptr[4];
 154
 155     tmp4 = dataptr[3] - dataptr[4];
 156
 157
 158
 159     /* Even part */
 160
 161
 162
 163     tmp10 = tmp0 + tmp3;        /* phase 2 */
 164
 165     tmp13 = tmp0 - tmp3;
 166
 167     tmp11 = tmp1 + tmp2;
 168
 169     tmp12 = tmp1 - tmp2;
 170
 171
 172
 173     dataptr[0] = tmp10 + tmp11; /* phase 3 */
 174
 175     dataptr[4] = tmp10 - tmp11;
 176
 177
 178
 179     z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
 180
 181     dataptr[2] = tmp13 + z1;    /* phase 5 */
 182
 183     dataptr[6] = tmp13 - z1;
 184
 185
 186
 187     /* Odd part */
 188
 189
 190
 191     tmp10 = tmp4 + tmp5;        /* phase 2 */
 192
 193     tmp11 = tmp5 + tmp6;
 194
 195     tmp12 = tmp6 + tmp7;
 196
 197
 198
 199     /* The rotator is modified from fig 4-8 to avoid extra negations. */
 200
 201     z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
 202
 203     z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
 204
 205     z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
 206
 207     z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
 208
 209
 210
 211     z11 = tmp7 + z3;            /* phase 5 */
 212
 213     z13 = tmp7 - z3;
 214
 215
 216
 217     dataptr[5] = z13 + z2;      /* phase 6 */
 218
 219     dataptr[3] = z13 - z2;
 220
 221     dataptr[1] = z11 + z4;
 222
 223     dataptr[7] = z11 - z4;
 224
 225
 226
 227     dataptr += DCTSIZE;         /* advance pointer to next row */
 228
 229   }
 230
 231
 232
 233   /* Pass 2: process columns. */
 234
 235
 236
 237   dataptr = data;
 238
 239   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
 240
 241     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
 242
 243     tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
 244
 245     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
 246
 247     tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
 248
 249     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
 250
 251     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
 252
 253     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
 254
 255     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
 256
 257
 258
 259     /* Even part */
 260
 261
 262
 263     tmp10 = tmp0 + tmp3;        /* phase 2 */
 264
 265     tmp13 = tmp0 - tmp3;
 266
 267     tmp11 = tmp1 + tmp2;
 268
 269     tmp12 = tmp1 - tmp2;
 270
 271
 272
 273     dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
 274
 275     dataptr[DCTSIZE*4] = tmp10 - tmp11;
 276
 277
 278
 279     z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
 280
 281     dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
 282
 283     dataptr[DCTSIZE*6] = tmp13 - z1;
 284
 285
 286
 287     /* Odd part */
 288
 289
 290
 291     tmp10 = tmp4 + tmp5;        /* phase 2 */
 292
 293     tmp11 = tmp5 + tmp6;
 294
 295     tmp12 = tmp6 + tmp7;
 296
 297
 298
 299     /* The rotator is modified from fig 4-8 to avoid extra negations. */
 300
 301     z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
 302
 303     z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
 304
 305     z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
 306
 307     z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
 308
 309
 310
 311     z11 = tmp7 + z3;            /* phase 5 */
 312
 313     z13 = tmp7 - z3;
 314
 315
 316
 317     dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
 318
 319     dataptr[DCTSIZE*3] = z13 - z2;
 320
 321     dataptr[DCTSIZE*1] = z11 + z4;
 322
 323     dataptr[DCTSIZE*7] = z11 - z4;
 324
 325
 326
 327     dataptr++;                  /* advance pointer to next column */
 328
 329   }
 330
 331 }
 332
 333
 334
 335 #endif /* DCT_FLOAT_SUPPORTED */
 336