11 #include <sys/types.h>
13 #define MIN(a,b) ((a)<(b) ? (a) : (b))
14 #define MAX(a,b) ((a)>(b) ? (a) : (b))
16 void doFourier(double *input, int channels, sf_count_t len, fftw_complex *output)
18 static sf_count_t len0;
20 static double *windowedData;
21 static fftw_plan planlos;
22 static int planned = 0;
27 windowedData = fftw_malloc(sizeof(double) * channels * len);
30 fprintf(stderr, "Planning...\n");
31 planlos = fftw_plan_many_dft_r2c(1, &l, channels,
32 windowedData, NULL, channels, 1,
33 output, NULL, channels, 1,
34 FFTW_MEASURE | FFTW_PRESERVE_INPUT | FFTW_UNALIGNED);
38 assert(channels0 == channels);
40 sf_count_t nmax = channels * len;
42 for(i = 0; i < nmax; ++i)
44 double ang = (i/channels) * 2 * M_PI / (len - 1);
45 windowedData[i] = input[i] * (
52 fftw_execute_dft_r2c(planlos, windowedData, output);
55 // return 1.0 for identical, 0.0 for different
58 double vectorSim(fftw_complex *v1, fftw_complex *v2, size_t length)
63 for(i = 0; i < length; ++i)
65 fftw_complex a = v1[i];
66 fftw_complex b = v2[i];
68 div += cabs(a) + cabs(b);
71 // cabs(a - b) # 0 .. div
72 // fabs(cabs(a) - cabs(b)) # 0 .. div
74 // component 1: vector diff
75 sum += VDIFF * cabs(a - b); // can be at most 2*div
77 // component 2: length diff
78 sum += LDIFF * fabs(cabs(a) - cabs(b));
80 sum *= (1.0 / (VDIFF + LDIFF));
83 if(sum > div || sum < 0)
84 fprintf(stderr, "%f %f\n", sum, div);
85 return 1.0 - (sum / div);
88 sf_count_t findMaximumSingle(double (*func) (sf_count_t), sf_count_t x0, sf_count_t x1, sf_count_t step, double *val)
90 sf_count_t bestpos = x1;
91 double best = func(x1);
95 for(i = x0; i < x1; i += step)
110 sf_count_t findMaximum(double (*func) (sf_count_t), sf_count_t x0, sf_count_t xg, sf_count_t xg2, sf_count_t x1, double *val)
112 sf_count_t xg0, xg20;
114 xg0 = xg = MAX(x0, MIN(xg, x1));
115 xg20 = xg2 = MAX(x0, MIN(xg2, x1));
117 fprintf(stderr, "min/max: %d %d\n", (int)xg, (int)xg2);
121 sf_count_t size = xg2 - xg;
124 //fprintf(stderr, "round:\n");
125 sf_count_t bestguess = findMaximumSingle(func, xg, xg2, size / 32 + 1, val);
126 xg = MAX(xg0, bestguess - size / 3);
127 xg2 = MIN(bestguess + size / 3, xg20);
130 fprintf(stderr, "guessed: %d\n", (int)xg);
132 if(xg - xg0 < (xg20 - xg0) / 64)
133 fprintf(stderr, "warning: best match very close to left margin, maybe decrease the guess?\n");
135 if(xg20 - xg < (xg20 - xg0) / 64)
136 fprintf(stderr, "warning: best match very close to right margin, maybe increase the guess?\n");
141 int main(int argc, char **argv)
145 memset(&infile_info, 0, sizeof(infile_info));
146 infile_info.format = 0;
147 SNDFILE *infile = sf_open(argv[1], SFM_READ, &infile_info);
150 fprintf(stderr, "Input file has %ld frames, %d Hz, %d channels, format %08x, %d sections and is %s\n",
151 (long) infile_info.frames,
152 infile_info.samplerate,
153 infile_info.channels,
155 infile_info.sections,
156 infile_info.seekable ? "seekable" : "not seekable");
157 chans = infile_info.channels;
159 sf_count_t size = MIN(infile_info.frames, strtod(argv[3], NULL) * infile_info.samplerate);
160 sf_count_t guess = strtod(argv[4], NULL) * infile_info.samplerate;
161 sf_count_t guess2 = strtod(argv[5], NULL) * infile_info.samplerate;
162 int channels = infile_info.channels;
163 size_t fftsize = atoi(argv[2]);
164 size_t ndata = channels * (fftsize/2 + 1);
167 size_t ndata_lowpass = channels * (ndata / channels / 512); // 43 Hz
168 size_t ndata_highpass = channels * (ndata / channels / 4); // 5512 Hz
169 // simplifies the dot product and makes the target function more smooth
171 size_t ndata_lowpass = 0;
172 //size_t ndata_highpass = channels * (ndata / channels / 4); // 5512 Hz
173 size_t ndata_highpass = ndata;
175 if(size < guess + 2 * (sf_count_t) fftsize)
176 err(1, "sound file too small (maybe reduce fftsize?)");
179 err(1, "guess too close to file start (maybe reduce fftsize?)");
181 fprintf(stderr, "Using %ld frames of %d channels, guess at %ld, %d FFT size -> %d FFT result size\n", (long) size, channels, (long) guess, (int) fftsize, (int) ndata);
183 fftw_complex *data_end = fftw_malloc(sizeof(fftw_complex) * ndata);
184 fftw_complex *data_cur = fftw_malloc(sizeof(fftw_complex) * ndata);
186 double *sbuf = malloc(sizeof(double) * (size * channels));
187 if(size != sf_readf_double(infile, sbuf, size))
188 errx(1, "sf_read_double");
191 doFourier(sbuf + (size - fftsize) * chans, chans, fftsize, data_end);
192 fprintf(stderr, "end transformed.\n");
194 double similarityAt(sf_count_t i)
196 // A trampoline! Wheeeeeeeeeew!
197 doFourier(sbuf + i * channels, channels, fftsize, data_cur);
198 double v = vectorSim(data_end + ndata_lowpass, data_cur + ndata_lowpass, ndata_highpass - ndata_lowpass);
199 //fprintf(stderr, "Evaluated at %.9f: %f\n", i / (double) infile_info.samplerate, v);
206 for(i = guess; i < size - 2 * fftsize; ++i)
208 printf("%.9f %f\n", i / (double) infile_info.samplerate, similarityAt(i));
215 sf_count_t best = findMaximum(similarityAt, 0, guess - fftsize, guess2 - fftsize, size - 2 * fftsize, &val);
216 fprintf(stderr, "Result: %.9f (sample %ld) with quality %.9f\n", (best + fftsize) / (double) infile_info.samplerate, (long) (best + fftsize), val);
220 // 1. Crossfading end to our start sample
221 fprintf(stderr, "Crossfading...\n");
225 for(j = 0; j < channels; ++j)
226 for(i = 0; i < fftsize; ++i)
228 double f1 = pow(fftsize - i, p);
229 double f2 = pow(i, p);
230 sbuf[(size - fftsize + i) * channels + j] =
232 sbuf[(size - fftsize + i) * channels + j] * f1
234 sbuf[(best + i) * channels + j] * f2
238 // 2. Open sound file
239 fprintf(stderr, "Opening...\n");
240 SNDFILE *outfile = sf_open(argv[6], SFM_WRITE, &infile_info);
244 fprintf(stderr, "Writing...\n");
245 // 1. Write samples from start to just before start of our end FFT
246 sf_writef_double(outfile, sbuf, size);
248 fprintf(stderr, "Closing...\n");
251 printf("%ld %.9f\n", (long) (best + fftsize), (best + fftsize) / (double) infile_info.samplerate);