diff --git a/.env.example b/.env.example index aca5757..3bddbd4 100644 --- a/.env.example +++ b/.env.example @@ -4,41 +4,30 @@ # ───────────────────────────────────────────────────────────────────────────── # ── Database ───────────────────────────────────────────────────────────────── -# PostgreSQL connection string. -# When using docker compose the default works out of the box. DATABASE_URL=postgres://clicktrack:clicktrack@localhost:5432/clicktrack - -# Password used by the postgres service in docker-compose.yml. -# Change this before deploying to production. POSTGRES_PASSWORD=clicktrack # ── Redis ──────────────────────────────────────────────────────────────────── -# Redis connection URL. REDIS_URL=redis://localhost:6379 # ── Community registry ─────────────────────────────────────────────────────── -# Public GitHub repository containing community CTP files. -# Example: https://github.com/your-org/clicktrack-registry -# Leave blank to disable registry sync. REGISTRY_REPO= - -# Branch to pull from (default: main). REGISTRY_BRANCH=main - -# Interval in seconds between registry syncs (default: 3600 = 1 hour). REGISTRY_SYNC_INTERVAL=3600 +# ── AI Tempo Analysis ──────────────────────────────────────────────────────── +# Required for the /analyze feature (AI tempo map generation). +# Get a key at https://console.anthropic.com +# BPM detection is client-side and works without this key. +ANTHROPIC_API_KEY= + # ── App ────────────────────────────────────────────────────────────────────── -# Display name shown in the UI and page title. NEXT_PUBLIC_APP_NAME=ClickTrack # ── MusicBrainz ────────────────────────────────────────────────────────────── -# User-Agent string sent to MusicBrainz. Must identify your application and -# provide a contact URL or email per their usage policy: -# https://musicbrainz.org/doc/MusicBrainz_API/Rate_Limiting +# Must identify your instance per MB rate-limit policy. MUSICBRAINZ_USER_AGENT=ClickTrack/0.1 (https://your-instance-url) # ── Ports (docker-compose.yml) ─────────────────────────────────────────────── -# Host ports for the nginx reverse proxy. HTTP_PORT=80 HTTPS_PORT=443 diff --git a/app/(web)/analyze/page.tsx b/app/(web)/analyze/page.tsx new file mode 100644 index 0000000..b8fe717 --- /dev/null +++ b/app/(web)/analyze/page.tsx @@ -0,0 +1,50 @@ +import type { Metadata } from "next"; +import TempoAnalyzer from "@/components/TempoAnalyzer"; + +export const metadata: Metadata = { + title: "Analyze Audio", + description: + "Upload an audio file, detect the tempo, and generate a CTP tempo map with AI assistance.", +}; + +export default function AnalyzePage() { + return ( +
+
+

+ Tempo Analysis +

+

Generate a Tempo Map

+

+ Upload your audio file. The app detects the BPM in your browser, then + uses AI to generate a complete{" "} + CTP tempo map — including + sections, time signatures, and any tempo changes. +

+
+ +
+

+ How it works: +

+
    +
  1. Drop or select any audio file — MP3, WAV, AAC, OGG, FLAC, M4A.
  2. +
  3. + BPM is detected locally in your browser using the Web Audio API. + Your audio is never uploaded. +
  4. +
  5. + Only the detected BPM, duration, and any metadata you provide are + sent to the server for AI generation. +
  6. +
  7. + Claude analyses the song structure and returns a draft CTP document. + Always verify it against the recording. +
  8. +
+
+ + +
+ ); +} diff --git a/app/api/analyze/route.ts b/app/api/analyze/route.ts new file mode 100644 index 0000000..01c16a9 --- /dev/null +++ b/app/api/analyze/route.ts @@ -0,0 +1,83 @@ +import { NextRequest, NextResponse } from "next/server"; +import { z } from "zod"; +import { generateCTPWithAI } from "@/lib/analysis/ai-ctp"; +import { validateCTP } from "@/lib/ctp/validate"; + +// ─── Request schema ─────────────────────────────────────────────────────────── + +const AnalyzeRequestSchema = z.object({ + bpm: z.number().min(20).max(400), + duration: z.number().positive(), + title: z.string().min(1).max(256).optional(), + artist: z.string().min(1).max(256).optional(), + mbid: z.string().uuid().optional().nullable(), + contributed_by: z.string().min(1).max(64).optional(), +}); + +/** + * POST /api/analyze + * + * Accepts BPM detection results from the browser and uses Claude to generate + * a draft CTP document for human review. + * + * Body (JSON): + * { bpm, duration, title?, artist?, mbid?, contributed_by? } + * + * Returns: + * { ctp: CTPDocument, warnings: string[] } + */ +export async function POST(req: NextRequest) { + let body: unknown; + try { + body = await req.json(); + } catch { + return NextResponse.json({ error: "Invalid JSON body" }, { status: 400 }); + } + + const parsed = AnalyzeRequestSchema.safeParse(body); + if (!parsed.success) { + return NextResponse.json( + { error: "Invalid request", details: parsed.error.flatten() }, + { status: 400 } + ); + } + + const { bpm, duration, title, artist, mbid, contributed_by } = parsed.data; + + if (!process.env.ANTHROPIC_API_KEY) { + return NextResponse.json( + { error: "ANTHROPIC_API_KEY is not configured on this server" }, + { status: 503 } + ); + } + + let ctpDoc; + try { + ctpDoc = await generateCTPWithAI({ + bpm, + duration, + title, + artist, + mbid: mbid ?? null, + contributedBy: contributed_by ?? "anonymous", + }); + } catch (err) { + console.error("[analyze] AI generation failed:", err); + return NextResponse.json( + { error: "Failed to generate CTP document", detail: String(err) }, + { status: 500 } + ); + } + + // Validate the AI output against the CTP schema + const validation = validateCTP(ctpDoc); + const warnings: string[] = []; + + if (!validation.success) { + // Rather than 500-ing, return the draft with validation warnings so the user + // can still see and manually correct it. + warnings.push(...validation.errors.issues.map((i) => `${i.path.join(".")}: ${i.message}`)); + } + + return NextResponse.json({ ctp: ctpDoc, warnings }); +} diff --git a/app/layout.tsx b/app/layout.tsx index db12643..b369853 100644 --- a/app/layout.tsx +++ b/app/layout.tsx @@ -27,7 +27,10 @@ export default function RootLayout({ Search - + Analyze + + < href="https://github.com/your-org/clicktrack" target="_blank" rel="noopener noreferrer" diff --git a/components/TempoAnalyzer.tsx b/components/TempoAnalyzer.tsx new file mode 100644 index 0000000..1d7efa8 --- /dev/null +++ b/components/TempoAnalyzer.tsx @@ -0,0 +1,510 @@ +"use client"; + +/** + * TempoAnalyzer + * + * Full workflow: + * 1. User drops / selects an audio file (MP3, WAV, AAC, OGG, etc.) + * 2. Browser decodes the audio and runs BPM detection (Web Audio API) + * 3. Optional: user provides song title, artist, MusicBrainz ID + * 4. Client sends { bpm, duration, … } to POST /api/analyze + * 5. Server calls Claude → returns a CTP document draft + * 6. User can review the sections, download the .ctp.json, or submit to DB + */ + +import { useState, useRef, useCallback } from "react"; +import { detectBPM, type BPMDetectionResult } from "@/lib/analysis/bpm-detect"; +import TempoMapEditor from "@/components/TempoMapEditor"; +import type { CTPDocument } from "@/lib/ctp/schema"; + +// ─── Types ──────────────────────────────────────────────────────────────────── + +type Stage = + | "idle" + | "decoding" + | "detecting" + | "generating" + | "review" + | "saving" + | "saved" + | "error"; + +interface AnalyzerState { + stage: Stage; + file: File | null; + detection: BPMDetectionResult | null; + ctp: CTPDocument | null; + warnings: string[]; + errorMsg: string; + // Optional metadata the user may fill in before AI generation + title: string; + artist: string; + mbid: string; + contributedBy: string; + // Toggle: use halfTimeBpm instead of primary bpm + useHalfTime: boolean; +} + +const INITIAL_STATE: AnalyzerState = { + stage: "idle", + file: null, + detection: null, + ctp: null, + warnings: [], + errorMsg: "", + title: "", + artist: "", + mbid: "", + contributedBy: "", + useHalfTime: false, +}; + +// ─── Helpers ────────────────────────────────────────────────────────────────── + +function formatDuration(s: number) { + const m = Math.floor(s / 60); + const sec = Math.round(s % 60); + return `${m}:${String(sec).padStart(2, "0")}`; +} + +function confidenceLabel(c: number) { + if (c >= 0.7) return { label: "High", color: "text-green-400" }; + if (c >= 0.4) return { label: "Medium", color: "text-amber-400" }; + return { label: "Low", color: "text-red-400" }; +} + +// ─── Component ──────────────────────────────────────────────────────────────── + +export default function TempoAnalyzer() { + const [state, setState] = useState(INITIAL_STATE); + const abortRef = useRef(null); + const dropRef = useRef(null); + const [isDragging, setIsDragging] = useState(false); + + const update = (patch: Partial) => + setState((prev) => ({ ...prev, ...patch })); + + // ── File handling ──────────────────────────────────────────────────────── + + const handleFile = useCallback(async (file: File) => { + if (!file.type.startsWith("audio/") && !file.name.match(/\.(mp3|wav|aac|ogg|flac|m4a|aiff)$/i)) { + update({ errorMsg: "Please select an audio file (MP3, WAV, AAC, OGG, FLAC, M4A).", stage: "error" }); + return; + } + + abortRef.current?.abort(); + const abort = new AbortController(); + abortRef.current = abort; + + // Try to pre-fill title/artist from filename: "Artist - Title.mp3" + const base = file.name.replace(/\.[^.]+$/, ""); + const dashIdx = base.indexOf(" - "); + const autoTitle = dashIdx > -1 ? base.slice(dashIdx + 3) : base; + const autoArtist = dashIdx > -1 ? base.slice(0, dashIdx) : ""; + + update({ + stage: "decoding", + file, + detection: null, + ctp: null, + warnings: [], + errorMsg: "", + title: autoTitle, + artist: autoArtist, + }); + + try { + update({ stage: "detecting" }); + const detection = await detectBPM(file, abort.signal); + update({ detection, stage: "idle" }); // wait for user to confirm/edit metadata + } catch (err) { + if ((err as Error).name === "AbortError") return; + update({ + stage: "error", + errorMsg: `BPM detection failed: ${err instanceof Error ? err.message : String(err)}`, + }); + } + }, []); + + function handleDrop(e: React.DragEvent) { + e.preventDefault(); + setIsDragging(false); + const file = e.dataTransfer.files[0]; + if (file) handleFile(file); + } + + function handleFileInput(e: React.ChangeEvent) { + const file = e.target.files?.[0]; + if (file) handleFile(file); + e.target.value = ""; // reset so re-selecting same file works + } + + // ── AI generation ──────────────────────────────────────────────────────── + + async function handleGenerate() { + if (!state.detection) return; + + const effectiveBpm = + state.useHalfTime && state.detection.halfTimeBpm + ? state.detection.halfTimeBpm + : state.detection.bpm; + + update({ stage: "generating", ctp: null, warnings: [] }); + + try { + const res = await fetch("/api/analyze", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + bpm: effectiveBpm, + duration: state.detection.duration, + title: state.title || undefined, + artist: state.artist || undefined, + mbid: state.mbid || undefined, + contributed_by: state.contributedBy || undefined, + }), + }); + + const data = await res.json(); + + if (!res.ok) { + throw new Error(data.error ?? `Server error ${res.status}`); + } + + update({ ctp: data.ctp, warnings: data.warnings ?? [], stage: "review" }); + } catch (err) { + update({ + stage: "error", + errorMsg: `Generation failed: ${err instanceof Error ? err.message : String(err)}`, + }); + } + } + + // ── Submit to DB ───────────────────────────────────────────────────────── + + async function handleSubmit() { + if (!state.ctp) return; + update({ stage: "saving" }); + + try { + const res = await fetch("/api/tracks", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(state.ctp), + }); + + const data = await res.json(); + if (!res.ok) { + throw new Error(data.error ?? `Server error ${res.status}`); + } + + update({ stage: "saved" }); + } catch (err) { + update({ + stage: "error", + errorMsg: `Save failed: ${err instanceof Error ? err.message : String(err)}`, + }); + } + } + + // ── Download CTP file ──────────────────────────────────────────────────── + + function handleDownload() { + if (!state.ctp) return; + const json = JSON.stringify(state.ctp, null, 2); + const blob = new Blob([json], { type: "application/json" }); + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + const safeName = `${state.ctp.metadata.artist} - ${state.ctp.metadata.title}` + .replace(/[^\w\s\-]/g, "") + .replace(/\s+/g, "_") + .slice(0, 80); + a.href = url; + a.download = `${safeName}.ctp.json`; + a.click(); + URL.revokeObjectURL(url); + } + + // ── Reset ──────────────────────────────────────────────────────────────── + + function handleReset() { + abortRef.current?.abort(); + setState(INITIAL_STATE); + } + + // ─── Render ─────────────────────────────────────────────────────────────── + + const { stage, file, detection, ctp, warnings, errorMsg, useHalfTime } = state; + const isProcessing = stage === "decoding" || stage === "detecting" || stage === "generating" || stage === "saving"; + + return ( +
+ + {/* ── Drop zone ─────────────────────────────────────────────────── */} + {!file && stage === "idle" && ( +
{ e.preventDefault(); setIsDragging(true); }} + onDragLeave={() => setIsDragging(false)} + onDrop={handleDrop} + className={`rounded-xl border-2 border-dashed px-8 py-16 text-center transition-colors ${ + isDragging + ? "border-green-500 bg-green-950/20" + : "border-zinc-700 hover:border-zinc-500" + }`} + > +

🎵

+

+ Drop an audio file here +

+

+ MP3, WAV, AAC, OGG, FLAC, M4A — any format your browser supports +

+ +
+ )} + + {/* ── Processing indicator ───────────────────────────────────────── */} + {isProcessing && ( +
+
+

+ {stage === "decoding" && "Decoding audio…"} + {stage === "detecting" && "Detecting tempo…"} + {stage === "generating" && "Generating tempo map with AI…"} + {stage === "saving" && "Saving to database…"} +

+ {stage === "generating" && ( +

+ Claude is analysing the song structure — this takes ~5–15 seconds. +

+ )} +
+ )} + + {/* ── Error ─────────────────────────────────────────────────────── */} + {stage === "error" && ( +
+

Error

+

{errorMsg}

+ +
+ )} + + {/* ── Detection results + metadata form ─────────────────────────── */} + {detection && (stage === "idle" || stage === "review" || stage === "saved") && ( +
+ {/* File name + detection summary */} +
+
+

Analysed file

+

{file?.name}

+
+ +
+ +
+
+

+ {useHalfTime && detection.halfTimeBpm + ? detection.halfTimeBpm + : detection.bpm} +

+

BPM

+
+
+

+ {confidenceLabel(detection.confidence).label} +

+

Confidence

+
+
+

+ {formatDuration(detection.duration)} +

+

Duration

+
+
+ + {/* Half-time toggle */} + {detection.halfTimeBpm && ( +
+ + Detected double-time pulse — primary BPM may be 2× the actual feel. + Half-time: {detection.halfTimeBpm} BPM + + +
+ )} + + {/* Confidence warning */} + {detection.confidence < 0.4 && ( +

+ Low confidence — the BPM may be inaccurate. Consider a song with a clearer beat, or adjust the detected value manually before generating. +

+ )} + + {/* Metadata form */} + {stage === "idle" && ( + <> +
+
+ + update({ title: e.target.value })} + placeholder="e.g. Bohemian Rhapsody" + className="w-full rounded-lg border border-zinc-700 bg-zinc-800 px-3 py-2 text-sm text-zinc-100 placeholder:text-zinc-600 focus:border-green-500 focus:outline-none" + /> +
+
+ + update({ artist: e.target.value })} + placeholder="e.g. Queen" + className="w-full rounded-lg border border-zinc-700 bg-zinc-800 px-3 py-2 text-sm text-zinc-100 placeholder:text-zinc-600 focus:border-green-500 focus:outline-none" + /> +
+
+ + update({ mbid: e.target.value })} + placeholder="xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + className="w-full rounded-lg border border-zinc-700 bg-zinc-800 px-3 py-2 text-sm font-mono text-zinc-100 placeholder:text-zinc-600 focus:border-green-500 focus:outline-none" + /> +
+
+ + update({ contributedBy: e.target.value })} + placeholder="e.g. guitar_pete" + className="w-full rounded-lg border border-zinc-700 bg-zinc-800 px-3 py-2 text-sm text-zinc-100 placeholder:text-zinc-600 focus:border-green-500 focus:outline-none" + /> +
+
+ + + + )} +
+ )} + + {/* ── AI-generated CTP review ────────────────────────────────────── */} + {ctp && (stage === "review" || stage === "saved") && ( +
+ {warnings.length > 0 && ( +
+

Validation warnings

+
    + {warnings.map((w, i) =>
  • {w}
  • )} +
+
+ )} + +
+
+

Generated tempo map

+ AI draft — verify before using +
+ +
+ + {/* Actions */} + {stage === "review" && ( +
+ + + {ctp.metadata.mbid && ( + + )} + + {!ctp.metadata.mbid && ( +

+ Add a MusicBrainz ID to submit to the database. +

+ )} + + +
+ )} + + {stage === "saved" && ( +
+ +
+

Saved to database

+ {ctp.metadata.mbid && ( + + View track page → + + )} +
+ +
+ )} +
+ )} +
+ ); +} diff --git a/lib/analysis/ai-ctp.ts b/lib/analysis/ai-ctp.ts new file mode 100644 index 0000000..9a8b85f --- /dev/null +++ b/lib/analysis/ai-ctp.ts @@ -0,0 +1,179 @@ +/** + * AI-assisted CTP document generation + * + * Takes the results of BPM detection (and optional song metadata) and uses + * Claude to produce a plausible, well-structured CTP document. + * + * Claude is asked to: + * - Divide the song into typical sections (Intro, Verse, Chorus, Bridge…) + * - Assign realistic start bars for each section + * - Note any tempo changes it would expect for the song/genre + * - Return a fully valid CTP 1.0 JSON document + * + * The caller should treat the result as a *draft* — the generated sections + * are educated guesses and should be verified against the recording. + */ + +import Anthropic from "@anthropic-ai/sdk"; +import type { CTPDocument } from "@/lib/ctp/schema"; + +const client = new Anthropic(); + +// ─── Input / output types ───────────────────────────────────────────────────── + +export interface AnalysisInput { + bpm: number; + duration: number; // seconds + title?: string; + artist?: string; + mbid?: string | null; + contributedBy?: string; +} + +// ─── JSON Schema for structured output ─────────────────────────────────────── +// Must be strict (no additionalProperties, all required fields present). + +const CTP_SCHEMA = { + type: "object", + additionalProperties: false, + required: ["version", "metadata", "count_in", "sections"], + properties: { + version: { type: "string", enum: ["1.0"] }, + metadata: { + type: "object", + additionalProperties: false, + required: [ + "title", "artist", "mbid", "duration_seconds", + "contributed_by", "verified", "created_at", + ], + properties: { + title: { type: "string" }, + artist: { type: "string" }, + mbid: { type: ["string", "null"] }, + duration_seconds: { type: "number" }, + contributed_by: { type: "string" }, + verified: { type: "boolean" }, + created_at: { type: "string" }, + }, + }, + count_in: { + type: "object", + additionalProperties: false, + required: ["enabled", "bars", "use_first_section_tempo"], + properties: { + enabled: { type: "boolean" }, + bars: { type: "integer", minimum: 1, maximum: 8 }, + use_first_section_tempo: { type: "boolean" }, + }, + }, + sections: { + type: "array", + minItems: 1, + items: { + type: "object", + additionalProperties: false, + required: ["label", "start_bar", "time_signature", "transition"], + // bpm is required for step, bpm_start/bpm_end for ramp — handled via oneOf + // but we keep this schema simple (strict mode) and validate downstream with Zod. + properties: { + label: { type: "string" }, + start_bar: { type: "integer", minimum: 1 }, + bpm: { type: "number" }, + bpm_start: { type: "number" }, + bpm_end: { type: "number" }, + transition: { type: "string", enum: ["step", "ramp"] }, + time_signature: { + type: "object", + additionalProperties: false, + required: ["numerator", "denominator"], + properties: { + numerator: { type: "integer", minimum: 1, maximum: 32 }, + denominator: { type: "integer", enum: [1, 2, 4, 8, 16, 32] }, + }, + }, + }, + }, + }, + }, +}; + +// ─── System prompt ──────────────────────────────────────────────────────────── + +const SYSTEM_PROMPT = `\ +You are an expert music producer and session musician assisting cover bands with click tracks. + +You will receive automated BPM detection results for a song and must generate a CTP (Click Track Protocol) document describing the song's full tempo map. + +CTP rules: +- "version" must be "1.0" +- sections[0].start_bar must be 1 +- sections must be sorted by start_bar ascending, with no gaps +- Step sections have a single "bpm" field; ramp sections have "bpm_start" and "bpm_end" (no "bpm" field) +- All BPM values must be between 20 and 400 +- time_signature.denominator must be a power of 2 (1, 2, 4, 8, 16, or 32) +- metadata.verified must be false (this is AI-generated, not human-verified) +- metadata.created_at must be an ISO 8601 datetime string + +Guidelines for section layout: +- Use typical pop/rock section names: Intro, Verse, Pre-Chorus, Chorus, Bridge, Outro +- Estimate bar counts based on song duration and BPM (bars = duration_seconds × BPM / 60 / beats_per_bar) +- Most songs are 4/4; note any unusual meters if you know the song +- If you know the song has a tempo change (ritardando, double-time feel, key change with tempo shift), model it with a ramp or step section +- If unsure about sections, use a single constant-tempo section covering the whole song +- Use the detected BPM as the primary tempo — do not invent a different BPM unless the song is well-known to have a different tempo + +The output is a draft for human review. Add reasonable section structure based on the song's typical arrangement.`; + +// ─── Main function ──────────────────────────────────────────────────────────── + +export async function generateCTPWithAI(input: AnalysisInput): Promise { + const { bpm, duration, title, artist, mbid, contributedBy } = input; + + const approxBars = Math.round((duration * bpm) / 60 / 4); // assuming 4/4 + + const userMessage = `\ +Generate a CTP document for the following song: + +Title: ${title ?? "Unknown Title"} +Artist: ${artist ?? "Unknown Artist"} +MusicBrainz ID: ${mbid ?? "unknown"} +Detected BPM: ${bpm} +Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4) +Contributed by: ${contributedBy ?? "anonymous"} + +Create a plausible section layout for this song. If this is a well-known song, use your knowledge of its actual arrangement. If not, use a sensible generic structure.`; + + const response = await client.messages.create({ + model: "claude-opus-4-6", + max_tokens: 2048, + thinking: { type: "adaptive" }, + system: SYSTEM_PROMPT, + messages: [{ role: "user", content: userMessage }], + output_config: { + format: { + type: "json_schema", + schema: CTP_SCHEMA, + }, + }, + }); + + const textBlock = response.content.find((b) => b.type === "text"); + if (!textBlock || textBlock.type !== "text") { + throw new Error("Claude did not return a text block"); + } + + let parsed: unknown; + try { + parsed = JSON.parse(textBlock.text); + } catch { + throw new Error(`Claude returned invalid JSON: ${textBlock.text.slice(0, 200)}`); + } + + // Stamp the current timestamp if Claude left a placeholder + const doc = parsed as CTPDocument; + if (!doc.metadata.created_at || doc.metadata.created_at.includes("placeholder")) { + doc.metadata.created_at = new Date().toISOString(); + } + + return doc; +} diff --git a/lib/analysis/bpm-detect.ts b/lib/analysis/bpm-detect.ts new file mode 100644 index 0000000..c39b254 --- /dev/null +++ b/lib/analysis/bpm-detect.ts @@ -0,0 +1,187 @@ +/** + * Client-side BPM detection + * + * Runs entirely in the browser using the Web Audio API (no server round-trip + * for the audio itself). The algorithm: + * + * 1. Decode the audio file into PCM via AudioContext.decodeAudioData() + * 2. Mix to mono, optionally resample to 22050 Hz + * 3. Compute a short-time energy envelope (512-sample frames) + * 4. Derive an onset-strength signal via half-wave-rectified first difference + * 5. Autocorrelate the onset signal over lags corresponding to 55–210 BPM + * 6. Pick the lag with the highest correlation; also test its 2× harmonic + * (halving the BPM) as a tiebreaker for double-time detections + * + * Typical accuracy is ±1–2 BPM on produced music with a clear beat. + * Rubato, live recordings, or highly syncopated rhythms may need manual adjustment. + */ + +export interface BPMDetectionResult { + bpm: number; + /** Normalised confidence 0–1. Values above ~0.4 are generally reliable. */ + confidence: number; + /** Total duration of the source file in seconds. */ + duration: number; + /** The raw analysis produced a half-time alternative; user may prefer it. */ + halfTimeBpm: number | null; +} + +// ─── Internal helpers ───────────────────────────────────────────────────────── + +function mixToMono(buffer: AudioBuffer): Float32Array { + const n = buffer.length; + if (buffer.numberOfChannels === 1) { + return buffer.getChannelData(0).slice(); + } + const mono = new Float32Array(n); + for (let c = 0; c < buffer.numberOfChannels; c++) { + const ch = buffer.getChannelData(c); + for (let i = 0; i < n; i++) mono[i] += ch[i]; + } + const scale = 1 / buffer.numberOfChannels; + for (let i = 0; i < n; i++) mono[i] *= scale; + return mono; +} + +function energyEnvelope(samples: Float32Array, frameSize: number): Float32Array { + const numFrames = Math.floor(samples.length / frameSize); + const env = new Float32Array(numFrames); + for (let i = 0; i < numFrames; i++) { + let sum = 0; + const base = i * frameSize; + for (let j = 0; j < frameSize; j++) { + const s = samples[base + j]; + sum += s * s; + } + env[i] = Math.sqrt(sum / frameSize); + } + return env; +} + +/** + * Half-wave-rectified first difference of the energy envelope. + * Positive spikes correspond to onset events (energy increases). + */ +function onsetStrength(env: Float32Array): Float32Array { + const onset = new Float32Array(env.length); + for (let i = 1; i < env.length; i++) { + const diff = env[i] - env[i - 1]; + onset[i] = diff > 0 ? diff : 0; + } + return onset; +} + +/** + * Normalised autocorrelation at a given lag. + * Returns a value in [-1, 1]. + */ +function autocorrAtLag(signal: Float32Array, lag: number): number { + const n = signal.length - lag; + if (n <= 0) return 0; + + let sumXX = 0; + let sumYY = 0; + let sumXY = 0; + for (let i = 0; i < n; i++) { + const x = signal[i]; + const y = signal[i + lag]; + sumXX += x * x; + sumYY += y * y; + sumXY += x * y; + } + const denom = Math.sqrt(sumXX * sumYY); + return denom > 0 ? sumXY / denom : 0; +} + +// ─── Public API ─────────────────────────────────────────────────────────────── + +/** + * Analyses a user-provided audio file and returns the estimated BPM. + * Must be called from a browser environment (requires Web Audio API). + * + * @param file An audio File (MP3, WAV, AAC, OGG — anything the browser decodes) + * @param signal An optional AbortSignal to cancel long analysis + */ +export async function detectBPM( + file: File, + signal?: AbortSignal +): Promise { + // Decode at 22050 Hz to reduce computation while keeping enough resolution + const targetSampleRate = 22050; + const audioCtx = new AudioContext({ sampleRate: targetSampleRate }); + + try { + const arrayBuffer = await file.arrayBuffer(); + if (signal?.aborted) throw new DOMException("Aborted", "AbortError"); + + const audioBuffer = await audioCtx.decodeAudioData(arrayBuffer); + if (signal?.aborted) throw new DOMException("Aborted", "AbortError"); + + const duration = audioBuffer.duration; + const sampleRate = audioBuffer.sampleRate; // may differ from targetSampleRate + + const mono = mixToMono(audioBuffer); + + // Analyse a representative middle segment (skip silent intros/outros). + // Cap at 90 s so analysis stays fast even on long recordings. + const analysisStart = Math.floor(sampleRate * Math.min(10, duration * 0.1)); + const analysisEnd = Math.min( + mono.length, + analysisStart + Math.floor(sampleRate * 90) + ); + const segment = mono.subarray(analysisStart, analysisEnd); + + // Energy envelope: ~23 ms frames at 22050 Hz + const FRAME_SIZE = 512; + const frameRate = sampleRate / FRAME_SIZE; // frames per second + + const env = energyEnvelope(segment, FRAME_SIZE); + const onset = onsetStrength(env); + + // Lag bounds for 55–210 BPM + const minLag = Math.max(1, Math.round((frameRate * 60) / 210)); + const maxLag = Math.round((frameRate * 60) / 55); + + // Sweep lags and collect correlations + let bestLag = minLag; + let bestCorr = -Infinity; + + for (let lag = minLag; lag <= maxLag; lag++) { + const corr = autocorrAtLag(onset, lag); + if (corr > bestCorr) { + bestCorr = corr; + bestLag = lag; + } + } + + const rawBpm = (frameRate * 60) / bestLag; + // Round to one decimal place + const bpm = Math.round(rawBpm * 10) / 10; + + // Check whether the half-time (bpm/2) has comparable correlation — + // double-time detections are common on songs with a 2-beat pulse. + const halfTimeLag = bestLag * 2; + let halfTimeBpm: number | null = null; + if (halfTimeLag <= maxLag * 2) { + const halfCorr = autocorrAtLag(onset, halfTimeLag); + if (halfCorr > bestCorr * 0.85) { + halfTimeBpm = Math.round((rawBpm / 2) * 10) / 10; + } + } + + // Normalise confidence against the best possible correlation in the range + const maxPossibleCorr = Math.max( + ...Array.from({ length: maxLag - minLag + 1 }, (_, i) => + Math.abs(autocorrAtLag(onset, minLag + i)) + ) + ); + const confidence = + maxPossibleCorr > 0 + ? Math.max(0, Math.min(1, bestCorr / maxPossibleCorr)) + : 0; + + return { bpm, confidence, duration, halfTimeBpm }; + } finally { + await audioCtx.close(); + } +} diff --git a/next.config.mjs b/next.config.mjs index c5eddf4..4dadd4e 100644 --- a/next.config.mjs +++ b/next.config.mjs @@ -2,7 +2,7 @@ const nextConfig = { output: "standalone", experimental: { - serverComponentsExternalPackages: ["pg", "ioredis"], + serverComponentsExternalPackages: ["pg", "ioredis", "@anthropic-ai/sdk"], }, }; diff --git a/next.config.ts b/next.config.ts deleted file mode 100644 index cdb1b59..0000000 --- a/next.config.ts +++ /dev/null @@ -1,8 +0,0 @@ -import type { NextConfig } from "next"; - -const nextConfig: NextConfig = { - output: "standalone", - serverExternalPackages: ["pg", "ioredis"], -}; - -export default nextConfig; diff --git a/package.json b/package.json index b87a57e..9957d01 100644 --- a/package.json +++ b/package.json @@ -17,7 +17,8 @@ "zod": "^3.23.8", "pg": "^8.11.5", "ioredis": "^5.3.2", - "node-fetch": "^3.3.2" + "node-fetch": "^3.3.2", + "@anthropic-ai/sdk": "^0.36.3" }, "devDependencies": { "@types/node": "^20.12.7",