feat: audio upload + AI-assisted tempo map generation
Users can now upload any audio file to generate a CTP tempo map:
BPM detection (lib/analysis/bpm-detect.ts):
- Runs entirely client-side via Web Audio API — audio is never uploaded
- Decodes any browser-supported format (MP3, WAV, AAC, OGG, FLAC, M4A)
- Energy envelope → onset strength → autocorrelation over 55–210 BPM range
- Returns BPM, normalised confidence score, duration, and optional half-time BPM
for songs where a double-time pulse is detected
AI CTP generation (lib/analysis/ai-ctp.ts):
- Calls Claude (claude-opus-4-6) with adaptive thinking + structured JSON output
- System prompt explains CTP rules and section layout conventions
- Claude uses knowledge of well-known songs to produce accurate section maps;
falls back to a sensible generic structure for unknown tracks
- Only BPM + duration + optional metadata is sent to the server (no audio data)
API route (app/api/analyze/route.ts):
- POST /api/analyze accepts { bpm, duration, title?, artist?, mbid?, contributed_by? }
- Validates input, calls generateCTPWithAI, runs CTP schema validation
- Returns { ctp, warnings } — warnings are surfaced in the UI rather than 500-ing
UI (components/TempoAnalyzer.tsx, app/(web)/analyze/page.tsx):
- Drag-and-drop or browse file upload
- Shows BPM, confidence, duration after detection
- Half-time toggle when double-time is detected
- Metadata form: title, artist, MusicBrainz ID, contributor name
(filename parsed into artist/title as a convenience default)
- AI generation with streaming-style progress states
- Sections review via TempoMapEditor
- Download .ctp.json or submit directly to the database
Also: added @anthropic-ai/sdk to package.json, ANTHROPIC_API_KEY to .env.example,
updated next.config.mjs serverComponentsExternalPackages, added Analyze nav link.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
25
.env.example
25
.env.example
@@ -4,41 +4,30 @@
|
|||||||
# ─────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
# ── Database ─────────────────────────────────────────────────────────────────
|
# ── Database ─────────────────────────────────────────────────────────────────
|
||||||
# PostgreSQL connection string.
|
|
||||||
# When using docker compose the default works out of the box.
|
|
||||||
DATABASE_URL=postgres://clicktrack:clicktrack@localhost:5432/clicktrack
|
DATABASE_URL=postgres://clicktrack:clicktrack@localhost:5432/clicktrack
|
||||||
|
|
||||||
# Password used by the postgres service in docker-compose.yml.
|
|
||||||
# Change this before deploying to production.
|
|
||||||
POSTGRES_PASSWORD=clicktrack
|
POSTGRES_PASSWORD=clicktrack
|
||||||
|
|
||||||
# ── Redis ────────────────────────────────────────────────────────────────────
|
# ── Redis ────────────────────────────────────────────────────────────────────
|
||||||
# Redis connection URL.
|
|
||||||
REDIS_URL=redis://localhost:6379
|
REDIS_URL=redis://localhost:6379
|
||||||
|
|
||||||
# ── Community registry ───────────────────────────────────────────────────────
|
# ── Community registry ───────────────────────────────────────────────────────
|
||||||
# Public GitHub repository containing community CTP files.
|
|
||||||
# Example: https://github.com/your-org/clicktrack-registry
|
|
||||||
# Leave blank to disable registry sync.
|
|
||||||
REGISTRY_REPO=
|
REGISTRY_REPO=
|
||||||
|
|
||||||
# Branch to pull from (default: main).
|
|
||||||
REGISTRY_BRANCH=main
|
REGISTRY_BRANCH=main
|
||||||
|
|
||||||
# Interval in seconds between registry syncs (default: 3600 = 1 hour).
|
|
||||||
REGISTRY_SYNC_INTERVAL=3600
|
REGISTRY_SYNC_INTERVAL=3600
|
||||||
|
|
||||||
|
# ── AI Tempo Analysis ────────────────────────────────────────────────────────
|
||||||
|
# Required for the /analyze feature (AI tempo map generation).
|
||||||
|
# Get a key at https://console.anthropic.com
|
||||||
|
# BPM detection is client-side and works without this key.
|
||||||
|
ANTHROPIC_API_KEY=
|
||||||
|
|
||||||
# ── App ──────────────────────────────────────────────────────────────────────
|
# ── App ──────────────────────────────────────────────────────────────────────
|
||||||
# Display name shown in the UI and page title.
|
|
||||||
NEXT_PUBLIC_APP_NAME=ClickTrack
|
NEXT_PUBLIC_APP_NAME=ClickTrack
|
||||||
|
|
||||||
# ── MusicBrainz ──────────────────────────────────────────────────────────────
|
# ── MusicBrainz ──────────────────────────────────────────────────────────────
|
||||||
# User-Agent string sent to MusicBrainz. Must identify your application and
|
# Must identify your instance per MB rate-limit policy.
|
||||||
# provide a contact URL or email per their usage policy:
|
|
||||||
# https://musicbrainz.org/doc/MusicBrainz_API/Rate_Limiting
|
|
||||||
MUSICBRAINZ_USER_AGENT=ClickTrack/0.1 (https://your-instance-url)
|
MUSICBRAINZ_USER_AGENT=ClickTrack/0.1 (https://your-instance-url)
|
||||||
|
|
||||||
# ── Ports (docker-compose.yml) ───────────────────────────────────────────────
|
# ── Ports (docker-compose.yml) ───────────────────────────────────────────────
|
||||||
# Host ports for the nginx reverse proxy.
|
|
||||||
HTTP_PORT=80
|
HTTP_PORT=80
|
||||||
HTTPS_PORT=443
|
HTTPS_PORT=443
|
||||||
|
|||||||
50
app/(web)/analyze/page.tsx
Normal file
50
app/(web)/analyze/page.tsx
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
import type { Metadata } from "next";
|
||||||
|
import TempoAnalyzer from "@/components/TempoAnalyzer";
|
||||||
|
|
||||||
|
export const metadata: Metadata = {
|
||||||
|
title: "Analyze Audio",
|
||||||
|
description:
|
||||||
|
"Upload an audio file, detect the tempo, and generate a CTP tempo map with AI assistance.",
|
||||||
|
};
|
||||||
|
|
||||||
|
export default function AnalyzePage() {
|
||||||
|
return (
|
||||||
|
<div>
|
||||||
|
<div className="mb-8">
|
||||||
|
<p className="text-sm text-zinc-500 uppercase tracking-widest mb-2">
|
||||||
|
Tempo Analysis
|
||||||
|
</p>
|
||||||
|
<h1 className="text-3xl font-bold">Generate a Tempo Map</h1>
|
||||||
|
<p className="mt-2 text-zinc-400 max-w-xl">
|
||||||
|
Upload your audio file. The app detects the BPM in your browser, then
|
||||||
|
uses AI to generate a complete{" "}
|
||||||
|
<abbr title="Click Track Protocol">CTP</abbr> tempo map — including
|
||||||
|
sections, time signatures, and any tempo changes.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="mb-6 rounded-lg border border-zinc-800 bg-zinc-900/40 px-5 py-4 text-sm text-zinc-500 space-y-1">
|
||||||
|
<p>
|
||||||
|
<span className="text-zinc-300 font-medium">How it works:</span>
|
||||||
|
</p>
|
||||||
|
<ol className="list-decimal pl-5 space-y-1">
|
||||||
|
<li>Drop or select any audio file — MP3, WAV, AAC, OGG, FLAC, M4A.</li>
|
||||||
|
<li>
|
||||||
|
BPM is detected locally in your browser using the Web Audio API.
|
||||||
|
Your audio is <strong className="text-zinc-400">never uploaded</strong>.
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
Only the detected BPM, duration, and any metadata you provide are
|
||||||
|
sent to the server for AI generation.
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
Claude analyses the song structure and returns a draft CTP document.
|
||||||
|
Always verify it against the recording.
|
||||||
|
</li>
|
||||||
|
</ol>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<TempoAnalyzer />
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
83
app/api/analyze/route.ts
Normal file
83
app/api/analyze/route.ts
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
import { NextRequest, NextResponse } from "next/server";
|
||||||
|
import { z } from "zod";
|
||||||
|
import { generateCTPWithAI } from "@/lib/analysis/ai-ctp";
|
||||||
|
import { validateCTP } from "@/lib/ctp/validate";
|
||||||
|
|
||||||
|
// ─── Request schema ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const AnalyzeRequestSchema = z.object({
|
||||||
|
bpm: z.number().min(20).max(400),
|
||||||
|
duration: z.number().positive(),
|
||||||
|
title: z.string().min(1).max(256).optional(),
|
||||||
|
artist: z.string().min(1).max(256).optional(),
|
||||||
|
mbid: z.string().uuid().optional().nullable(),
|
||||||
|
contributed_by: z.string().min(1).max(64).optional(),
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/analyze
|
||||||
|
*
|
||||||
|
* Accepts BPM detection results from the browser and uses Claude to generate
|
||||||
|
* a draft CTP document for human review.
|
||||||
|
*
|
||||||
|
* Body (JSON):
|
||||||
|
* { bpm, duration, title?, artist?, mbid?, contributed_by? }
|
||||||
|
*
|
||||||
|
* Returns:
|
||||||
|
* { ctp: CTPDocument, warnings: string[] }
|
||||||
|
*/
|
||||||
|
export async function POST(req: NextRequest) {
|
||||||
|
let body: unknown;
|
||||||
|
try {
|
||||||
|
body = await req.json();
|
||||||
|
} catch {
|
||||||
|
return NextResponse.json({ error: "Invalid JSON body" }, { status: 400 });
|
||||||
|
}
|
||||||
|
|
||||||
|
const parsed = AnalyzeRequestSchema.safeParse(body);
|
||||||
|
if (!parsed.success) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: "Invalid request", details: parsed.error.flatten() },
|
||||||
|
{ status: 400 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const { bpm, duration, title, artist, mbid, contributed_by } = parsed.data;
|
||||||
|
|
||||||
|
if (!process.env.ANTHROPIC_API_KEY) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: "ANTHROPIC_API_KEY is not configured on this server" },
|
||||||
|
{ status: 503 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let ctpDoc;
|
||||||
|
try {
|
||||||
|
ctpDoc = await generateCTPWithAI({
|
||||||
|
bpm,
|
||||||
|
duration,
|
||||||
|
title,
|
||||||
|
artist,
|
||||||
|
mbid: mbid ?? null,
|
||||||
|
contributedBy: contributed_by ?? "anonymous",
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.error("[analyze] AI generation failed:", err);
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: "Failed to generate CTP document", detail: String(err) },
|
||||||
|
{ status: 500 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate the AI output against the CTP schema
|
||||||
|
const validation = validateCTP(ctpDoc);
|
||||||
|
const warnings: string[] = [];
|
||||||
|
|
||||||
|
if (!validation.success) {
|
||||||
|
// Rather than 500-ing, return the draft with validation warnings so the user
|
||||||
|
// can still see and manually correct it.
|
||||||
|
warnings.push(...validation.errors.issues.map((i) => `${i.path.join(".")}: ${i.message}`));
|
||||||
|
}
|
||||||
|
|
||||||
|
return NextResponse.json({ ctp: ctpDoc, warnings });
|
||||||
|
}
|
||||||
@@ -27,7 +27,10 @@ export default function RootLayout({
|
|||||||
<a href="/" className="hover:text-zinc-100 transition-colors">
|
<a href="/" className="hover:text-zinc-100 transition-colors">
|
||||||
Search
|
Search
|
||||||
</a>
|
</a>
|
||||||
<a
|
<a href="/analyze" className="hover:text-zinc-100 transition-colors">
|
||||||
|
Analyze
|
||||||
|
</a>
|
||||||
|
<
|
||||||
href="https://github.com/your-org/clicktrack"
|
href="https://github.com/your-org/clicktrack"
|
||||||
target="_blank"
|
target="_blank"
|
||||||
rel="noopener noreferrer"
|
rel="noopener noreferrer"
|
||||||
|
|||||||
510
components/TempoAnalyzer.tsx
Normal file
510
components/TempoAnalyzer.tsx
Normal file
@@ -0,0 +1,510 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TempoAnalyzer
|
||||||
|
*
|
||||||
|
* Full workflow:
|
||||||
|
* 1. User drops / selects an audio file (MP3, WAV, AAC, OGG, etc.)
|
||||||
|
* 2. Browser decodes the audio and runs BPM detection (Web Audio API)
|
||||||
|
* 3. Optional: user provides song title, artist, MusicBrainz ID
|
||||||
|
* 4. Client sends { bpm, duration, … } to POST /api/analyze
|
||||||
|
* 5. Server calls Claude → returns a CTP document draft
|
||||||
|
* 6. User can review the sections, download the .ctp.json, or submit to DB
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { useState, useRef, useCallback } from "react";
|
||||||
|
import { detectBPM, type BPMDetectionResult } from "@/lib/analysis/bpm-detect";
|
||||||
|
import TempoMapEditor from "@/components/TempoMapEditor";
|
||||||
|
import type { CTPDocument } from "@/lib/ctp/schema";
|
||||||
|
|
||||||
|
// ─── Types ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
type Stage =
|
||||||
|
| "idle"
|
||||||
|
| "decoding"
|
||||||
|
| "detecting"
|
||||||
|
| "generating"
|
||||||
|
| "review"
|
||||||
|
| "saving"
|
||||||
|
| "saved"
|
||||||
|
| "error";
|
||||||
|
|
||||||
|
interface AnalyzerState {
|
||||||
|
stage: Stage;
|
||||||
|
file: File | null;
|
||||||
|
detection: BPMDetectionResult | null;
|
||||||
|
ctp: CTPDocument | null;
|
||||||
|
warnings: string[];
|
||||||
|
errorMsg: string;
|
||||||
|
// Optional metadata the user may fill in before AI generation
|
||||||
|
title: string;
|
||||||
|
artist: string;
|
||||||
|
mbid: string;
|
||||||
|
contributedBy: string;
|
||||||
|
// Toggle: use halfTimeBpm instead of primary bpm
|
||||||
|
useHalfTime: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
const INITIAL_STATE: AnalyzerState = {
|
||||||
|
stage: "idle",
|
||||||
|
file: null,
|
||||||
|
detection: null,
|
||||||
|
ctp: null,
|
||||||
|
warnings: [],
|
||||||
|
errorMsg: "",
|
||||||
|
title: "",
|
||||||
|
artist: "",
|
||||||
|
mbid: "",
|
||||||
|
contributedBy: "",
|
||||||
|
useHalfTime: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function formatDuration(s: number) {
|
||||||
|
const m = Math.floor(s / 60);
|
||||||
|
const sec = Math.round(s % 60);
|
||||||
|
return `${m}:${String(sec).padStart(2, "0")}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function confidenceLabel(c: number) {
|
||||||
|
if (c >= 0.7) return { label: "High", color: "text-green-400" };
|
||||||
|
if (c >= 0.4) return { label: "Medium", color: "text-amber-400" };
|
||||||
|
return { label: "Low", color: "text-red-400" };
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Component ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export default function TempoAnalyzer() {
|
||||||
|
const [state, setState] = useState<AnalyzerState>(INITIAL_STATE);
|
||||||
|
const abortRef = useRef<AbortController | null>(null);
|
||||||
|
const dropRef = useRef<HTMLDivElement>(null);
|
||||||
|
const [isDragging, setIsDragging] = useState(false);
|
||||||
|
|
||||||
|
const update = (patch: Partial<AnalyzerState>) =>
|
||||||
|
setState((prev) => ({ ...prev, ...patch }));
|
||||||
|
|
||||||
|
// ── File handling ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const handleFile = useCallback(async (file: File) => {
|
||||||
|
if (!file.type.startsWith("audio/") && !file.name.match(/\.(mp3|wav|aac|ogg|flac|m4a|aiff)$/i)) {
|
||||||
|
update({ errorMsg: "Please select an audio file (MP3, WAV, AAC, OGG, FLAC, M4A).", stage: "error" });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
abortRef.current?.abort();
|
||||||
|
const abort = new AbortController();
|
||||||
|
abortRef.current = abort;
|
||||||
|
|
||||||
|
// Try to pre-fill title/artist from filename: "Artist - Title.mp3"
|
||||||
|
const base = file.name.replace(/\.[^.]+$/, "");
|
||||||
|
const dashIdx = base.indexOf(" - ");
|
||||||
|
const autoTitle = dashIdx > -1 ? base.slice(dashIdx + 3) : base;
|
||||||
|
const autoArtist = dashIdx > -1 ? base.slice(0, dashIdx) : "";
|
||||||
|
|
||||||
|
update({
|
||||||
|
stage: "decoding",
|
||||||
|
file,
|
||||||
|
detection: null,
|
||||||
|
ctp: null,
|
||||||
|
warnings: [],
|
||||||
|
errorMsg: "",
|
||||||
|
title: autoTitle,
|
||||||
|
artist: autoArtist,
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
update({ stage: "detecting" });
|
||||||
|
const detection = await detectBPM(file, abort.signal);
|
||||||
|
update({ detection, stage: "idle" }); // wait for user to confirm/edit metadata
|
||||||
|
} catch (err) {
|
||||||
|
if ((err as Error).name === "AbortError") return;
|
||||||
|
update({
|
||||||
|
stage: "error",
|
||||||
|
errorMsg: `BPM detection failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
function handleDrop(e: React.DragEvent) {
|
||||||
|
e.preventDefault();
|
||||||
|
setIsDragging(false);
|
||||||
|
const file = e.dataTransfer.files[0];
|
||||||
|
if (file) handleFile(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleFileInput(e: React.ChangeEvent<HTMLInputElement>) {
|
||||||
|
const file = e.target.files?.[0];
|
||||||
|
if (file) handleFile(file);
|
||||||
|
e.target.value = ""; // reset so re-selecting same file works
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── AI generation ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async function handleGenerate() {
|
||||||
|
if (!state.detection) return;
|
||||||
|
|
||||||
|
const effectiveBpm =
|
||||||
|
state.useHalfTime && state.detection.halfTimeBpm
|
||||||
|
? state.detection.halfTimeBpm
|
||||||
|
: state.detection.bpm;
|
||||||
|
|
||||||
|
update({ stage: "generating", ctp: null, warnings: [] });
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch("/api/analyze", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({
|
||||||
|
bpm: effectiveBpm,
|
||||||
|
duration: state.detection.duration,
|
||||||
|
title: state.title || undefined,
|
||||||
|
artist: state.artist || undefined,
|
||||||
|
mbid: state.mbid || undefined,
|
||||||
|
contributed_by: state.contributedBy || undefined,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = await res.json();
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(data.error ?? `Server error ${res.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
update({ ctp: data.ctp, warnings: data.warnings ?? [], stage: "review" });
|
||||||
|
} catch (err) {
|
||||||
|
update({
|
||||||
|
stage: "error",
|
||||||
|
errorMsg: `Generation failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Submit to DB ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async function handleSubmit() {
|
||||||
|
if (!state.ctp) return;
|
||||||
|
update({ stage: "saving" });
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = await fetch("/api/tracks", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify(state.ctp),
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = await res.json();
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(data.error ?? `Server error ${res.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
update({ stage: "saved" });
|
||||||
|
} catch (err) {
|
||||||
|
update({
|
||||||
|
stage: "error",
|
||||||
|
errorMsg: `Save failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Download CTP file ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function handleDownload() {
|
||||||
|
if (!state.ctp) return;
|
||||||
|
const json = JSON.stringify(state.ctp, null, 2);
|
||||||
|
const blob = new Blob([json], { type: "application/json" });
|
||||||
|
const url = URL.createObjectURL(blob);
|
||||||
|
const a = document.createElement("a");
|
||||||
|
const safeName = `${state.ctp.metadata.artist} - ${state.ctp.metadata.title}`
|
||||||
|
.replace(/[^\w\s\-]/g, "")
|
||||||
|
.replace(/\s+/g, "_")
|
||||||
|
.slice(0, 80);
|
||||||
|
a.href = url;
|
||||||
|
a.download = `${safeName}.ctp.json`;
|
||||||
|
a.click();
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Reset ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function handleReset() {
|
||||||
|
abortRef.current?.abort();
|
||||||
|
setState(INITIAL_STATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Render ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const { stage, file, detection, ctp, warnings, errorMsg, useHalfTime } = state;
|
||||||
|
const isProcessing = stage === "decoding" || stage === "detecting" || stage === "generating" || stage === "saving";
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-8">
|
||||||
|
|
||||||
|
{/* ── Drop zone ─────────────────────────────────────────────────── */}
|
||||||
|
{!file && stage === "idle" && (
|
||||||
|
<div
|
||||||
|
ref={dropRef}
|
||||||
|
onDragOver={(e) => { e.preventDefault(); setIsDragging(true); }}
|
||||||
|
onDragLeave={() => setIsDragging(false)}
|
||||||
|
onDrop={handleDrop}
|
||||||
|
className={`rounded-xl border-2 border-dashed px-8 py-16 text-center transition-colors ${
|
||||||
|
isDragging
|
||||||
|
? "border-green-500 bg-green-950/20"
|
||||||
|
: "border-zinc-700 hover:border-zinc-500"
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
<p className="text-4xl mb-4">🎵</p>
|
||||||
|
<p className="text-lg font-medium text-zinc-200 mb-2">
|
||||||
|
Drop an audio file here
|
||||||
|
</p>
|
||||||
|
<p className="text-sm text-zinc-500 mb-6">
|
||||||
|
MP3, WAV, AAC, OGG, FLAC, M4A — any format your browser supports
|
||||||
|
</p>
|
||||||
|
<label className="inline-block cursor-pointer rounded-lg bg-green-700 px-6 py-2.5 text-sm font-semibold text-white hover:bg-green-600 transition-colors">
|
||||||
|
Browse files
|
||||||
|
<input
|
||||||
|
type="file"
|
||||||
|
accept="audio/*,.mp3,.wav,.aac,.ogg,.flac,.m4a,.aiff"
|
||||||
|
className="hidden"
|
||||||
|
onChange={handleFileInput}
|
||||||
|
/>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* ── Processing indicator ───────────────────────────────────────── */}
|
||||||
|
{isProcessing && (
|
||||||
|
<div className="rounded-xl border border-zinc-800 bg-zinc-900/60 px-6 py-8 text-center">
|
||||||
|
<div className="mb-3 text-2xl animate-spin inline-block">⟳</div>
|
||||||
|
<p className="font-medium text-zinc-200">
|
||||||
|
{stage === "decoding" && "Decoding audio…"}
|
||||||
|
{stage === "detecting" && "Detecting tempo…"}
|
||||||
|
{stage === "generating" && "Generating tempo map with AI…"}
|
||||||
|
{stage === "saving" && "Saving to database…"}
|
||||||
|
</p>
|
||||||
|
{stage === "generating" && (
|
||||||
|
<p className="mt-1 text-sm text-zinc-500">
|
||||||
|
Claude is analysing the song structure — this takes ~5–15 seconds.
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* ── Error ─────────────────────────────────────────────────────── */}
|
||||||
|
{stage === "error" && (
|
||||||
|
<div className="rounded-xl border border-red-800 bg-red-950/30 px-6 py-5">
|
||||||
|
<p className="text-red-400 font-medium mb-1">Error</p>
|
||||||
|
<p className="text-sm text-red-300">{errorMsg}</p>
|
||||||
|
<button
|
||||||
|
onClick={handleReset}
|
||||||
|
className="mt-4 text-sm text-zinc-400 hover:text-zinc-200 underline"
|
||||||
|
>
|
||||||
|
Try again
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* ── Detection results + metadata form ─────────────────────────── */}
|
||||||
|
{detection && (stage === "idle" || stage === "review" || stage === "saved") && (
|
||||||
|
<div className="rounded-xl border border-zinc-800 bg-zinc-900/60 p-6 space-y-5">
|
||||||
|
{/* File name + detection summary */}
|
||||||
|
<div className="flex flex-wrap items-start justify-between gap-3">
|
||||||
|
<div>
|
||||||
|
<p className="text-xs text-zinc-500 mb-0.5">Analysed file</p>
|
||||||
|
<p className="font-medium text-zinc-200 truncate max-w-sm">{file?.name}</p>
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
onClick={handleReset}
|
||||||
|
className="text-xs text-zinc-600 hover:text-zinc-400 underline shrink-0"
|
||||||
|
>
|
||||||
|
Change file
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="grid grid-cols-3 gap-4 text-center">
|
||||||
|
<div className="rounded-lg bg-zinc-800/60 p-4">
|
||||||
|
<p className="text-2xl font-bold font-mono text-green-400">
|
||||||
|
{useHalfTime && detection.halfTimeBpm
|
||||||
|
? detection.halfTimeBpm
|
||||||
|
: detection.bpm}
|
||||||
|
</p>
|
||||||
|
<p className="text-xs text-zinc-500 mt-1">BPM</p>
|
||||||
|
</div>
|
||||||
|
<div className="rounded-lg bg-zinc-800/60 p-4">
|
||||||
|
<p className={`text-2xl font-bold ${confidenceLabel(detection.confidence).color}`}>
|
||||||
|
{confidenceLabel(detection.confidence).label}
|
||||||
|
</p>
|
||||||
|
<p className="text-xs text-zinc-500 mt-1">Confidence</p>
|
||||||
|
</div>
|
||||||
|
<div className="rounded-lg bg-zinc-800/60 p-4">
|
||||||
|
<p className="text-2xl font-bold text-zinc-200">
|
||||||
|
{formatDuration(detection.duration)}
|
||||||
|
</p>
|
||||||
|
<p className="text-xs text-zinc-500 mt-1">Duration</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Half-time toggle */}
|
||||||
|
{detection.halfTimeBpm && (
|
||||||
|
<div className="flex items-center gap-3 rounded-lg border border-amber-800/50 bg-amber-950/20 px-4 py-3">
|
||||||
|
<span className="text-sm text-amber-300 flex-1">
|
||||||
|
Detected double-time pulse — primary BPM may be 2× the actual feel.
|
||||||
|
Half-time: <strong>{detection.halfTimeBpm}</strong> BPM
|
||||||
|
</span>
|
||||||
|
<button
|
||||||
|
onClick={() => update({ useHalfTime: !useHalfTime })}
|
||||||
|
className={`rounded px-3 py-1 text-xs font-medium transition-colors ${
|
||||||
|
useHalfTime
|
||||||
|
? "bg-amber-600 text-white"
|
||||||
|
: "border border-amber-700 text-amber-400 hover:bg-amber-900/40"
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{useHalfTime ? "Using half-time" : "Use half-time"}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Confidence warning */}
|
||||||
|
{detection.confidence < 0.4 && (
|
||||||
|
<p className="text-sm text-amber-400">
|
||||||
|
Low confidence — the BPM may be inaccurate. Consider a song with a clearer beat, or adjust the detected value manually before generating.
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Metadata form */}
|
||||||
|
{stage === "idle" && (
|
||||||
|
<>
|
||||||
|
<div className="grid gap-3 sm:grid-cols-2">
|
||||||
|
<div>
|
||||||
|
<label className="block text-xs text-zinc-500 mb-1">Song title</label>
|
||||||
|
<input
|
||||||
|
value={state.title}
|
||||||
|
onChange={(e) => update({ title: e.target.value })}
|
||||||
|
placeholder="e.g. Bohemian Rhapsody"
|
||||||
|
className="w-full rounded-lg border border-zinc-700 bg-zinc-800 px-3 py-2 text-sm text-zinc-100 placeholder:text-zinc-600 focus:border-green-500 focus:outline-none"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label className="block text-xs text-zinc-500 mb-1">Artist</label>
|
||||||
|
<input
|
||||||
|
value={state.artist}
|
||||||
|
onChange={(e) => update({ artist: e.target.value })}
|
||||||
|
placeholder="e.g. Queen"
|
||||||
|
className="w-full rounded-lg border border-zinc-700 bg-zinc-800 px-3 py-2 text-sm text-zinc-100 placeholder:text-zinc-600 focus:border-green-500 focus:outline-none"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label className="block text-xs text-zinc-500 mb-1">
|
||||||
|
MusicBrainz ID{" "}
|
||||||
|
<span className="text-zinc-600">(optional)</span>
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
value={state.mbid}
|
||||||
|
onChange={(e) => update({ mbid: e.target.value })}
|
||||||
|
placeholder="xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
|
||||||
|
className="w-full rounded-lg border border-zinc-700 bg-zinc-800 px-3 py-2 text-sm font-mono text-zinc-100 placeholder:text-zinc-600 focus:border-green-500 focus:outline-none"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label className="block text-xs text-zinc-500 mb-1">Your name / handle</label>
|
||||||
|
<input
|
||||||
|
value={state.contributedBy}
|
||||||
|
onChange={(e) => update({ contributedBy: e.target.value })}
|
||||||
|
placeholder="e.g. guitar_pete"
|
||||||
|
className="w-full rounded-lg border border-zinc-700 bg-zinc-800 px-3 py-2 text-sm text-zinc-100 placeholder:text-zinc-600 focus:border-green-500 focus:outline-none"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<button
|
||||||
|
onClick={handleGenerate}
|
||||||
|
className="w-full rounded-lg bg-green-600 py-3 font-semibold text-white hover:bg-green-500 transition-colors"
|
||||||
|
>
|
||||||
|
Generate tempo map with AI →
|
||||||
|
</button>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* ── AI-generated CTP review ────────────────────────────────────── */}
|
||||||
|
{ctp && (stage === "review" || stage === "saved") && (
|
||||||
|
<div className="space-y-6">
|
||||||
|
{warnings.length > 0 && (
|
||||||
|
<div className="rounded-lg border border-amber-800/50 bg-amber-950/20 px-4 py-3">
|
||||||
|
<p className="text-sm font-medium text-amber-300 mb-1">Validation warnings</p>
|
||||||
|
<ul className="text-xs text-amber-400 list-disc pl-4 space-y-0.5">
|
||||||
|
{warnings.map((w, i) => <li key={i}>{w}</li>)}
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<div className="flex items-center justify-between mb-3">
|
||||||
|
<h2 className="text-lg font-semibold">Generated tempo map</h2>
|
||||||
|
<span className="text-xs text-zinc-600 italic">AI draft — verify before using</span>
|
||||||
|
</div>
|
||||||
|
<TempoMapEditor ctpDoc={ctp} readOnly />
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Actions */}
|
||||||
|
{stage === "review" && (
|
||||||
|
<div className="flex flex-wrap gap-3">
|
||||||
|
<button
|
||||||
|
onClick={handleDownload}
|
||||||
|
className="flex items-center gap-2 rounded-lg border border-zinc-700 px-5 py-2.5 text-sm font-medium text-zinc-300 hover:border-zinc-500 hover:text-zinc-100 transition-colors"
|
||||||
|
>
|
||||||
|
↓ Download .ctp.json
|
||||||
|
</button>
|
||||||
|
|
||||||
|
{ctp.metadata.mbid && (
|
||||||
|
<button
|
||||||
|
onClick={handleSubmit}
|
||||||
|
className="flex items-center gap-2 rounded-lg bg-green-700 px-5 py-2.5 text-sm font-semibold text-white hover:bg-green-600 transition-colors"
|
||||||
|
>
|
||||||
|
Submit to database
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{!ctp.metadata.mbid && (
|
||||||
|
<p className="self-center text-xs text-zinc-600">
|
||||||
|
Add a MusicBrainz ID to submit to the database.
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<button
|
||||||
|
onClick={() => update({ stage: "idle", ctp: null })}
|
||||||
|
className="text-sm text-zinc-600 hover:text-zinc-400 underline self-center"
|
||||||
|
>
|
||||||
|
Re-generate
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{stage === "saved" && (
|
||||||
|
<div className="rounded-lg border border-green-800/50 bg-green-950/20 px-4 py-3 flex items-center gap-3">
|
||||||
|
<span className="text-green-400">✓</span>
|
||||||
|
<div>
|
||||||
|
<p className="text-sm font-medium text-green-300">Saved to database</p>
|
||||||
|
{ctp.metadata.mbid && (
|
||||||
|
<a
|
||||||
|
href={`/track/${ctp.metadata.mbid}`}
|
||||||
|
className="text-xs text-green-600 hover:underline"
|
||||||
|
>
|
||||||
|
View track page →
|
||||||
|
</a>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
onClick={handleDownload}
|
||||||
|
className="ml-auto text-xs text-zinc-500 hover:text-zinc-300 underline"
|
||||||
|
>
|
||||||
|
Download .ctp.json
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
179
lib/analysis/ai-ctp.ts
Normal file
179
lib/analysis/ai-ctp.ts
Normal file
@@ -0,0 +1,179 @@
|
|||||||
|
/**
|
||||||
|
* AI-assisted CTP document generation
|
||||||
|
*
|
||||||
|
* Takes the results of BPM detection (and optional song metadata) and uses
|
||||||
|
* Claude to produce a plausible, well-structured CTP document.
|
||||||
|
*
|
||||||
|
* Claude is asked to:
|
||||||
|
* - Divide the song into typical sections (Intro, Verse, Chorus, Bridge…)
|
||||||
|
* - Assign realistic start bars for each section
|
||||||
|
* - Note any tempo changes it would expect for the song/genre
|
||||||
|
* - Return a fully valid CTP 1.0 JSON document
|
||||||
|
*
|
||||||
|
* The caller should treat the result as a *draft* — the generated sections
|
||||||
|
* are educated guesses and should be verified against the recording.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import Anthropic from "@anthropic-ai/sdk";
|
||||||
|
import type { CTPDocument } from "@/lib/ctp/schema";
|
||||||
|
|
||||||
|
const client = new Anthropic();
|
||||||
|
|
||||||
|
// ─── Input / output types ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export interface AnalysisInput {
|
||||||
|
bpm: number;
|
||||||
|
duration: number; // seconds
|
||||||
|
title?: string;
|
||||||
|
artist?: string;
|
||||||
|
mbid?: string | null;
|
||||||
|
contributedBy?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── JSON Schema for structured output ───────────────────────────────────────
|
||||||
|
// Must be strict (no additionalProperties, all required fields present).
|
||||||
|
|
||||||
|
const CTP_SCHEMA = {
|
||||||
|
type: "object",
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ["version", "metadata", "count_in", "sections"],
|
||||||
|
properties: {
|
||||||
|
version: { type: "string", enum: ["1.0"] },
|
||||||
|
metadata: {
|
||||||
|
type: "object",
|
||||||
|
additionalProperties: false,
|
||||||
|
required: [
|
||||||
|
"title", "artist", "mbid", "duration_seconds",
|
||||||
|
"contributed_by", "verified", "created_at",
|
||||||
|
],
|
||||||
|
properties: {
|
||||||
|
title: { type: "string" },
|
||||||
|
artist: { type: "string" },
|
||||||
|
mbid: { type: ["string", "null"] },
|
||||||
|
duration_seconds: { type: "number" },
|
||||||
|
contributed_by: { type: "string" },
|
||||||
|
verified: { type: "boolean" },
|
||||||
|
created_at: { type: "string" },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
count_in: {
|
||||||
|
type: "object",
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ["enabled", "bars", "use_first_section_tempo"],
|
||||||
|
properties: {
|
||||||
|
enabled: { type: "boolean" },
|
||||||
|
bars: { type: "integer", minimum: 1, maximum: 8 },
|
||||||
|
use_first_section_tempo: { type: "boolean" },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
sections: {
|
||||||
|
type: "array",
|
||||||
|
minItems: 1,
|
||||||
|
items: {
|
||||||
|
type: "object",
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ["label", "start_bar", "time_signature", "transition"],
|
||||||
|
// bpm is required for step, bpm_start/bpm_end for ramp — handled via oneOf
|
||||||
|
// but we keep this schema simple (strict mode) and validate downstream with Zod.
|
||||||
|
properties: {
|
||||||
|
label: { type: "string" },
|
||||||
|
start_bar: { type: "integer", minimum: 1 },
|
||||||
|
bpm: { type: "number" },
|
||||||
|
bpm_start: { type: "number" },
|
||||||
|
bpm_end: { type: "number" },
|
||||||
|
transition: { type: "string", enum: ["step", "ramp"] },
|
||||||
|
time_signature: {
|
||||||
|
type: "object",
|
||||||
|
additionalProperties: false,
|
||||||
|
required: ["numerator", "denominator"],
|
||||||
|
properties: {
|
||||||
|
numerator: { type: "integer", minimum: 1, maximum: 32 },
|
||||||
|
denominator: { type: "integer", enum: [1, 2, 4, 8, 16, 32] },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// ─── System prompt ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const SYSTEM_PROMPT = `\
|
||||||
|
You are an expert music producer and session musician assisting cover bands with click tracks.
|
||||||
|
|
||||||
|
You will receive automated BPM detection results for a song and must generate a CTP (Click Track Protocol) document describing the song's full tempo map.
|
||||||
|
|
||||||
|
CTP rules:
|
||||||
|
- "version" must be "1.0"
|
||||||
|
- sections[0].start_bar must be 1
|
||||||
|
- sections must be sorted by start_bar ascending, with no gaps
|
||||||
|
- Step sections have a single "bpm" field; ramp sections have "bpm_start" and "bpm_end" (no "bpm" field)
|
||||||
|
- All BPM values must be between 20 and 400
|
||||||
|
- time_signature.denominator must be a power of 2 (1, 2, 4, 8, 16, or 32)
|
||||||
|
- metadata.verified must be false (this is AI-generated, not human-verified)
|
||||||
|
- metadata.created_at must be an ISO 8601 datetime string
|
||||||
|
|
||||||
|
Guidelines for section layout:
|
||||||
|
- Use typical pop/rock section names: Intro, Verse, Pre-Chorus, Chorus, Bridge, Outro
|
||||||
|
- Estimate bar counts based on song duration and BPM (bars = duration_seconds × BPM / 60 / beats_per_bar)
|
||||||
|
- Most songs are 4/4; note any unusual meters if you know the song
|
||||||
|
- If you know the song has a tempo change (ritardando, double-time feel, key change with tempo shift), model it with a ramp or step section
|
||||||
|
- If unsure about sections, use a single constant-tempo section covering the whole song
|
||||||
|
- Use the detected BPM as the primary tempo — do not invent a different BPM unless the song is well-known to have a different tempo
|
||||||
|
|
||||||
|
The output is a draft for human review. Add reasonable section structure based on the song's typical arrangement.`;
|
||||||
|
|
||||||
|
// ─── Main function ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export async function generateCTPWithAI(input: AnalysisInput): Promise<CTPDocument> {
|
||||||
|
const { bpm, duration, title, artist, mbid, contributedBy } = input;
|
||||||
|
|
||||||
|
const approxBars = Math.round((duration * bpm) / 60 / 4); // assuming 4/4
|
||||||
|
|
||||||
|
const userMessage = `\
|
||||||
|
Generate a CTP document for the following song:
|
||||||
|
|
||||||
|
Title: ${title ?? "Unknown Title"}
|
||||||
|
Artist: ${artist ?? "Unknown Artist"}
|
||||||
|
MusicBrainz ID: ${mbid ?? "unknown"}
|
||||||
|
Detected BPM: ${bpm}
|
||||||
|
Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4)
|
||||||
|
Contributed by: ${contributedBy ?? "anonymous"}
|
||||||
|
|
||||||
|
Create a plausible section layout for this song. If this is a well-known song, use your knowledge of its actual arrangement. If not, use a sensible generic structure.`;
|
||||||
|
|
||||||
|
const response = await client.messages.create({
|
||||||
|
model: "claude-opus-4-6",
|
||||||
|
max_tokens: 2048,
|
||||||
|
thinking: { type: "adaptive" },
|
||||||
|
system: SYSTEM_PROMPT,
|
||||||
|
messages: [{ role: "user", content: userMessage }],
|
||||||
|
output_config: {
|
||||||
|
format: {
|
||||||
|
type: "json_schema",
|
||||||
|
schema: CTP_SCHEMA,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const textBlock = response.content.find((b) => b.type === "text");
|
||||||
|
if (!textBlock || textBlock.type !== "text") {
|
||||||
|
throw new Error("Claude did not return a text block");
|
||||||
|
}
|
||||||
|
|
||||||
|
let parsed: unknown;
|
||||||
|
try {
|
||||||
|
parsed = JSON.parse(textBlock.text);
|
||||||
|
} catch {
|
||||||
|
throw new Error(`Claude returned invalid JSON: ${textBlock.text.slice(0, 200)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stamp the current timestamp if Claude left a placeholder
|
||||||
|
const doc = parsed as CTPDocument;
|
||||||
|
if (!doc.metadata.created_at || doc.metadata.created_at.includes("placeholder")) {
|
||||||
|
doc.metadata.created_at = new Date().toISOString();
|
||||||
|
}
|
||||||
|
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
187
lib/analysis/bpm-detect.ts
Normal file
187
lib/analysis/bpm-detect.ts
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
/**
|
||||||
|
* Client-side BPM detection
|
||||||
|
*
|
||||||
|
* Runs entirely in the browser using the Web Audio API (no server round-trip
|
||||||
|
* for the audio itself). The algorithm:
|
||||||
|
*
|
||||||
|
* 1. Decode the audio file into PCM via AudioContext.decodeAudioData()
|
||||||
|
* 2. Mix to mono, optionally resample to 22050 Hz
|
||||||
|
* 3. Compute a short-time energy envelope (512-sample frames)
|
||||||
|
* 4. Derive an onset-strength signal via half-wave-rectified first difference
|
||||||
|
* 5. Autocorrelate the onset signal over lags corresponding to 55–210 BPM
|
||||||
|
* 6. Pick the lag with the highest correlation; also test its 2× harmonic
|
||||||
|
* (halving the BPM) as a tiebreaker for double-time detections
|
||||||
|
*
|
||||||
|
* Typical accuracy is ±1–2 BPM on produced music with a clear beat.
|
||||||
|
* Rubato, live recordings, or highly syncopated rhythms may need manual adjustment.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface BPMDetectionResult {
|
||||||
|
bpm: number;
|
||||||
|
/** Normalised confidence 0–1. Values above ~0.4 are generally reliable. */
|
||||||
|
confidence: number;
|
||||||
|
/** Total duration of the source file in seconds. */
|
||||||
|
duration: number;
|
||||||
|
/** The raw analysis produced a half-time alternative; user may prefer it. */
|
||||||
|
halfTimeBpm: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Internal helpers ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function mixToMono(buffer: AudioBuffer): Float32Array {
|
||||||
|
const n = buffer.length;
|
||||||
|
if (buffer.numberOfChannels === 1) {
|
||||||
|
return buffer.getChannelData(0).slice();
|
||||||
|
}
|
||||||
|
const mono = new Float32Array(n);
|
||||||
|
for (let c = 0; c < buffer.numberOfChannels; c++) {
|
||||||
|
const ch = buffer.getChannelData(c);
|
||||||
|
for (let i = 0; i < n; i++) mono[i] += ch[i];
|
||||||
|
}
|
||||||
|
const scale = 1 / buffer.numberOfChannels;
|
||||||
|
for (let i = 0; i < n; i++) mono[i] *= scale;
|
||||||
|
return mono;
|
||||||
|
}
|
||||||
|
|
||||||
|
function energyEnvelope(samples: Float32Array, frameSize: number): Float32Array {
|
||||||
|
const numFrames = Math.floor(samples.length / frameSize);
|
||||||
|
const env = new Float32Array(numFrames);
|
||||||
|
for (let i = 0; i < numFrames; i++) {
|
||||||
|
let sum = 0;
|
||||||
|
const base = i * frameSize;
|
||||||
|
for (let j = 0; j < frameSize; j++) {
|
||||||
|
const s = samples[base + j];
|
||||||
|
sum += s * s;
|
||||||
|
}
|
||||||
|
env[i] = Math.sqrt(sum / frameSize);
|
||||||
|
}
|
||||||
|
return env;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Half-wave-rectified first difference of the energy envelope.
|
||||||
|
* Positive spikes correspond to onset events (energy increases).
|
||||||
|
*/
|
||||||
|
function onsetStrength(env: Float32Array): Float32Array {
|
||||||
|
const onset = new Float32Array(env.length);
|
||||||
|
for (let i = 1; i < env.length; i++) {
|
||||||
|
const diff = env[i] - env[i - 1];
|
||||||
|
onset[i] = diff > 0 ? diff : 0;
|
||||||
|
}
|
||||||
|
return onset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalised autocorrelation at a given lag.
|
||||||
|
* Returns a value in [-1, 1].
|
||||||
|
*/
|
||||||
|
function autocorrAtLag(signal: Float32Array, lag: number): number {
|
||||||
|
const n = signal.length - lag;
|
||||||
|
if (n <= 0) return 0;
|
||||||
|
|
||||||
|
let sumXX = 0;
|
||||||
|
let sumYY = 0;
|
||||||
|
let sumXY = 0;
|
||||||
|
for (let i = 0; i < n; i++) {
|
||||||
|
const x = signal[i];
|
||||||
|
const y = signal[i + lag];
|
||||||
|
sumXX += x * x;
|
||||||
|
sumYY += y * y;
|
||||||
|
sumXY += x * y;
|
||||||
|
}
|
||||||
|
const denom = Math.sqrt(sumXX * sumYY);
|
||||||
|
return denom > 0 ? sumXY / denom : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Public API ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyses a user-provided audio file and returns the estimated BPM.
|
||||||
|
* Must be called from a browser environment (requires Web Audio API).
|
||||||
|
*
|
||||||
|
* @param file An audio File (MP3, WAV, AAC, OGG — anything the browser decodes)
|
||||||
|
* @param signal An optional AbortSignal to cancel long analysis
|
||||||
|
*/
|
||||||
|
export async function detectBPM(
|
||||||
|
file: File,
|
||||||
|
signal?: AbortSignal
|
||||||
|
): Promise<BPMDetectionResult> {
|
||||||
|
// Decode at 22050 Hz to reduce computation while keeping enough resolution
|
||||||
|
const targetSampleRate = 22050;
|
||||||
|
const audioCtx = new AudioContext({ sampleRate: targetSampleRate });
|
||||||
|
|
||||||
|
try {
|
||||||
|
const arrayBuffer = await file.arrayBuffer();
|
||||||
|
if (signal?.aborted) throw new DOMException("Aborted", "AbortError");
|
||||||
|
|
||||||
|
const audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
|
||||||
|
if (signal?.aborted) throw new DOMException("Aborted", "AbortError");
|
||||||
|
|
||||||
|
const duration = audioBuffer.duration;
|
||||||
|
const sampleRate = audioBuffer.sampleRate; // may differ from targetSampleRate
|
||||||
|
|
||||||
|
const mono = mixToMono(audioBuffer);
|
||||||
|
|
||||||
|
// Analyse a representative middle segment (skip silent intros/outros).
|
||||||
|
// Cap at 90 s so analysis stays fast even on long recordings.
|
||||||
|
const analysisStart = Math.floor(sampleRate * Math.min(10, duration * 0.1));
|
||||||
|
const analysisEnd = Math.min(
|
||||||
|
mono.length,
|
||||||
|
analysisStart + Math.floor(sampleRate * 90)
|
||||||
|
);
|
||||||
|
const segment = mono.subarray(analysisStart, analysisEnd);
|
||||||
|
|
||||||
|
// Energy envelope: ~23 ms frames at 22050 Hz
|
||||||
|
const FRAME_SIZE = 512;
|
||||||
|
const frameRate = sampleRate / FRAME_SIZE; // frames per second
|
||||||
|
|
||||||
|
const env = energyEnvelope(segment, FRAME_SIZE);
|
||||||
|
const onset = onsetStrength(env);
|
||||||
|
|
||||||
|
// Lag bounds for 55–210 BPM
|
||||||
|
const minLag = Math.max(1, Math.round((frameRate * 60) / 210));
|
||||||
|
const maxLag = Math.round((frameRate * 60) / 55);
|
||||||
|
|
||||||
|
// Sweep lags and collect correlations
|
||||||
|
let bestLag = minLag;
|
||||||
|
let bestCorr = -Infinity;
|
||||||
|
|
||||||
|
for (let lag = minLag; lag <= maxLag; lag++) {
|
||||||
|
const corr = autocorrAtLag(onset, lag);
|
||||||
|
if (corr > bestCorr) {
|
||||||
|
bestCorr = corr;
|
||||||
|
bestLag = lag;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const rawBpm = (frameRate * 60) / bestLag;
|
||||||
|
// Round to one decimal place
|
||||||
|
const bpm = Math.round(rawBpm * 10) / 10;
|
||||||
|
|
||||||
|
// Check whether the half-time (bpm/2) has comparable correlation —
|
||||||
|
// double-time detections are common on songs with a 2-beat pulse.
|
||||||
|
const halfTimeLag = bestLag * 2;
|
||||||
|
let halfTimeBpm: number | null = null;
|
||||||
|
if (halfTimeLag <= maxLag * 2) {
|
||||||
|
const halfCorr = autocorrAtLag(onset, halfTimeLag);
|
||||||
|
if (halfCorr > bestCorr * 0.85) {
|
||||||
|
halfTimeBpm = Math.round((rawBpm / 2) * 10) / 10;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalise confidence against the best possible correlation in the range
|
||||||
|
const maxPossibleCorr = Math.max(
|
||||||
|
...Array.from({ length: maxLag - minLag + 1 }, (_, i) =>
|
||||||
|
Math.abs(autocorrAtLag(onset, minLag + i))
|
||||||
|
)
|
||||||
|
);
|
||||||
|
const confidence =
|
||||||
|
maxPossibleCorr > 0
|
||||||
|
? Math.max(0, Math.min(1, bestCorr / maxPossibleCorr))
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
return { bpm, confidence, duration, halfTimeBpm };
|
||||||
|
} finally {
|
||||||
|
await audioCtx.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
const nextConfig = {
|
const nextConfig = {
|
||||||
output: "standalone",
|
output: "standalone",
|
||||||
experimental: {
|
experimental: {
|
||||||
serverComponentsExternalPackages: ["pg", "ioredis"],
|
serverComponentsExternalPackages: ["pg", "ioredis", "@anthropic-ai/sdk"],
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
import type { NextConfig } from "next";
|
|
||||||
|
|
||||||
const nextConfig: NextConfig = {
|
|
||||||
output: "standalone",
|
|
||||||
serverExternalPackages: ["pg", "ioredis"],
|
|
||||||
};
|
|
||||||
|
|
||||||
export default nextConfig;
|
|
||||||
@@ -17,7 +17,8 @@
|
|||||||
"zod": "^3.23.8",
|
"zod": "^3.23.8",
|
||||||
"pg": "^8.11.5",
|
"pg": "^8.11.5",
|
||||||
"ioredis": "^5.3.2",
|
"ioredis": "^5.3.2",
|
||||||
"node-fetch": "^3.3.2"
|
"node-fetch": "^3.3.2",
|
||||||
|
"@anthropic-ai/sdk": "^0.36.3"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/node": "^20.12.7",
|
"@types/node": "^20.12.7",
|
||||||
|
|||||||
Reference in New Issue
Block a user