Files
clicktrack/lib/analysis/bpm-detect.ts
AJ Avezzano 51f67f0aeb feat: audio upload + AI-assisted tempo map generation
Users can now upload any audio file to generate a CTP tempo map:

BPM detection (lib/analysis/bpm-detect.ts):
- Runs entirely client-side via Web Audio API — audio is never uploaded
- Decodes any browser-supported format (MP3, WAV, AAC, OGG, FLAC, M4A)
- Energy envelope → onset strength → autocorrelation over 55–210 BPM range
- Returns BPM, normalised confidence score, duration, and optional half-time BPM
  for songs where a double-time pulse is detected

AI CTP generation (lib/analysis/ai-ctp.ts):
- Calls Claude (claude-opus-4-6) with adaptive thinking + structured JSON output
- System prompt explains CTP rules and section layout conventions
- Claude uses knowledge of well-known songs to produce accurate section maps;
  falls back to a sensible generic structure for unknown tracks
- Only BPM + duration + optional metadata is sent to the server (no audio data)

API route (app/api/analyze/route.ts):
- POST /api/analyze accepts { bpm, duration, title?, artist?, mbid?, contributed_by? }
- Validates input, calls generateCTPWithAI, runs CTP schema validation
- Returns { ctp, warnings } — warnings are surfaced in the UI rather than 500-ing

UI (components/TempoAnalyzer.tsx, app/(web)/analyze/page.tsx):
- Drag-and-drop or browse file upload
- Shows BPM, confidence, duration after detection
- Half-time toggle when double-time is detected
- Metadata form: title, artist, MusicBrainz ID, contributor name
  (filename parsed into artist/title as a convenience default)
- AI generation with streaming-style progress states
- Sections review via TempoMapEditor
- Download .ctp.json or submit directly to the database

Also: added @anthropic-ai/sdk to package.json, ANTHROPIC_API_KEY to .env.example,
updated next.config.mjs serverComponentsExternalPackages, added Analyze nav link.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-01 11:43:14 -04:00

188 lines
6.4 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Client-side BPM detection
*
* Runs entirely in the browser using the Web Audio API (no server round-trip
* for the audio itself). The algorithm:
*
* 1. Decode the audio file into PCM via AudioContext.decodeAudioData()
* 2. Mix to mono, optionally resample to 22050 Hz
* 3. Compute a short-time energy envelope (512-sample frames)
* 4. Derive an onset-strength signal via half-wave-rectified first difference
* 5. Autocorrelate the onset signal over lags corresponding to 55210 BPM
* 6. Pick the lag with the highest correlation; also test its 2× harmonic
* (halving the BPM) as a tiebreaker for double-time detections
*
* Typical accuracy is ±12 BPM on produced music with a clear beat.
* Rubato, live recordings, or highly syncopated rhythms may need manual adjustment.
*/
export interface BPMDetectionResult {
bpm: number;
/** Normalised confidence 01. Values above ~0.4 are generally reliable. */
confidence: number;
/** Total duration of the source file in seconds. */
duration: number;
/** The raw analysis produced a half-time alternative; user may prefer it. */
halfTimeBpm: number | null;
}
// ─── Internal helpers ─────────────────────────────────────────────────────────
function mixToMono(buffer: AudioBuffer): Float32Array {
const n = buffer.length;
if (buffer.numberOfChannels === 1) {
return buffer.getChannelData(0).slice();
}
const mono = new Float32Array(n);
for (let c = 0; c < buffer.numberOfChannels; c++) {
const ch = buffer.getChannelData(c);
for (let i = 0; i < n; i++) mono[i] += ch[i];
}
const scale = 1 / buffer.numberOfChannels;
for (let i = 0; i < n; i++) mono[i] *= scale;
return mono;
}
function energyEnvelope(samples: Float32Array, frameSize: number): Float32Array {
const numFrames = Math.floor(samples.length / frameSize);
const env = new Float32Array(numFrames);
for (let i = 0; i < numFrames; i++) {
let sum = 0;
const base = i * frameSize;
for (let j = 0; j < frameSize; j++) {
const s = samples[base + j];
sum += s * s;
}
env[i] = Math.sqrt(sum / frameSize);
}
return env;
}
/**
* Half-wave-rectified first difference of the energy envelope.
* Positive spikes correspond to onset events (energy increases).
*/
function onsetStrength(env: Float32Array): Float32Array {
const onset = new Float32Array(env.length);
for (let i = 1; i < env.length; i++) {
const diff = env[i] - env[i - 1];
onset[i] = diff > 0 ? diff : 0;
}
return onset;
}
/**
* Normalised autocorrelation at a given lag.
* Returns a value in [-1, 1].
*/
function autocorrAtLag(signal: Float32Array, lag: number): number {
const n = signal.length - lag;
if (n <= 0) return 0;
let sumXX = 0;
let sumYY = 0;
let sumXY = 0;
for (let i = 0; i < n; i++) {
const x = signal[i];
const y = signal[i + lag];
sumXX += x * x;
sumYY += y * y;
sumXY += x * y;
}
const denom = Math.sqrt(sumXX * sumYY);
return denom > 0 ? sumXY / denom : 0;
}
// ─── Public API ───────────────────────────────────────────────────────────────
/**
* Analyses a user-provided audio file and returns the estimated BPM.
* Must be called from a browser environment (requires Web Audio API).
*
* @param file An audio File (MP3, WAV, AAC, OGG — anything the browser decodes)
* @param signal An optional AbortSignal to cancel long analysis
*/
export async function detectBPM(
file: File,
signal?: AbortSignal
): Promise<BPMDetectionResult> {
// Decode at 22050 Hz to reduce computation while keeping enough resolution
const targetSampleRate = 22050;
const audioCtx = new AudioContext({ sampleRate: targetSampleRate });
try {
const arrayBuffer = await file.arrayBuffer();
if (signal?.aborted) throw new DOMException("Aborted", "AbortError");
const audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
if (signal?.aborted) throw new DOMException("Aborted", "AbortError");
const duration = audioBuffer.duration;
const sampleRate = audioBuffer.sampleRate; // may differ from targetSampleRate
const mono = mixToMono(audioBuffer);
// Analyse a representative middle segment (skip silent intros/outros).
// Cap at 90 s so analysis stays fast even on long recordings.
const analysisStart = Math.floor(sampleRate * Math.min(10, duration * 0.1));
const analysisEnd = Math.min(
mono.length,
analysisStart + Math.floor(sampleRate * 90)
);
const segment = mono.subarray(analysisStart, analysisEnd);
// Energy envelope: ~23 ms frames at 22050 Hz
const FRAME_SIZE = 512;
const frameRate = sampleRate / FRAME_SIZE; // frames per second
const env = energyEnvelope(segment, FRAME_SIZE);
const onset = onsetStrength(env);
// Lag bounds for 55210 BPM
const minLag = Math.max(1, Math.round((frameRate * 60) / 210));
const maxLag = Math.round((frameRate * 60) / 55);
// Sweep lags and collect correlations
let bestLag = minLag;
let bestCorr = -Infinity;
for (let lag = minLag; lag <= maxLag; lag++) {
const corr = autocorrAtLag(onset, lag);
if (corr > bestCorr) {
bestCorr = corr;
bestLag = lag;
}
}
const rawBpm = (frameRate * 60) / bestLag;
// Round to one decimal place
const bpm = Math.round(rawBpm * 10) / 10;
// Check whether the half-time (bpm/2) has comparable correlation —
// double-time detections are common on songs with a 2-beat pulse.
const halfTimeLag = bestLag * 2;
let halfTimeBpm: number | null = null;
if (halfTimeLag <= maxLag * 2) {
const halfCorr = autocorrAtLag(onset, halfTimeLag);
if (halfCorr > bestCorr * 0.85) {
halfTimeBpm = Math.round((rawBpm / 2) * 10) / 10;
}
}
// Normalise confidence against the best possible correlation in the range
const maxPossibleCorr = Math.max(
...Array.from({ length: maxLag - minLag + 1 }, (_, i) =>
Math.abs(autocorrAtLag(onset, minLag + i))
)
);
const confidence =
maxPossibleCorr > 0
? Math.max(0, Math.min(1, bestCorr / maxPossibleCorr))
: 0;
return { bpm, confidence, duration, halfTimeBpm };
} finally {
await audioCtx.close();
}
}