Users can now upload any audio file to generate a CTP tempo map:
BPM detection (lib/analysis/bpm-detect.ts):
- Runs entirely client-side via Web Audio API — audio is never uploaded
- Decodes any browser-supported format (MP3, WAV, AAC, OGG, FLAC, M4A)
- Energy envelope → onset strength → autocorrelation over 55–210 BPM range
- Returns BPM, normalised confidence score, duration, and optional half-time BPM
for songs where a double-time pulse is detected
AI CTP generation (lib/analysis/ai-ctp.ts):
- Calls Claude (claude-opus-4-6) with adaptive thinking + structured JSON output
- System prompt explains CTP rules and section layout conventions
- Claude uses knowledge of well-known songs to produce accurate section maps;
falls back to a sensible generic structure for unknown tracks
- Only BPM + duration + optional metadata is sent to the server (no audio data)
API route (app/api/analyze/route.ts):
- POST /api/analyze accepts { bpm, duration, title?, artist?, mbid?, contributed_by? }
- Validates input, calls generateCTPWithAI, runs CTP schema validation
- Returns { ctp, warnings } — warnings are surfaced in the UI rather than 500-ing
UI (components/TempoAnalyzer.tsx, app/(web)/analyze/page.tsx):
- Drag-and-drop or browse file upload
- Shows BPM, confidence, duration after detection
- Half-time toggle when double-time is detected
- Metadata form: title, artist, MusicBrainz ID, contributor name
(filename parsed into artist/title as a convenience default)
- AI generation with streaming-style progress states
- Sections review via TempoMapEditor
- Download .ctp.json or submit directly to the database
Also: added @anthropic-ai/sdk to package.json, ANTHROPIC_API_KEY to .env.example,
updated next.config.mjs serverComponentsExternalPackages, added Analyze nav link.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
180 lines
7.0 KiB
TypeScript
180 lines
7.0 KiB
TypeScript
/**
|
||
* AI-assisted CTP document generation
|
||
*
|
||
* Takes the results of BPM detection (and optional song metadata) and uses
|
||
* Claude to produce a plausible, well-structured CTP document.
|
||
*
|
||
* Claude is asked to:
|
||
* - Divide the song into typical sections (Intro, Verse, Chorus, Bridge…)
|
||
* - Assign realistic start bars for each section
|
||
* - Note any tempo changes it would expect for the song/genre
|
||
* - Return a fully valid CTP 1.0 JSON document
|
||
*
|
||
* The caller should treat the result as a *draft* — the generated sections
|
||
* are educated guesses and should be verified against the recording.
|
||
*/
|
||
|
||
import Anthropic from "@anthropic-ai/sdk";
|
||
import type { CTPDocument } from "@/lib/ctp/schema";
|
||
|
||
const client = new Anthropic();
|
||
|
||
// ─── Input / output types ─────────────────────────────────────────────────────
|
||
|
||
export interface AnalysisInput {
|
||
bpm: number;
|
||
duration: number; // seconds
|
||
title?: string;
|
||
artist?: string;
|
||
mbid?: string | null;
|
||
contributedBy?: string;
|
||
}
|
||
|
||
// ─── JSON Schema for structured output ───────────────────────────────────────
|
||
// Must be strict (no additionalProperties, all required fields present).
|
||
|
||
const CTP_SCHEMA = {
|
||
type: "object",
|
||
additionalProperties: false,
|
||
required: ["version", "metadata", "count_in", "sections"],
|
||
properties: {
|
||
version: { type: "string", enum: ["1.0"] },
|
||
metadata: {
|
||
type: "object",
|
||
additionalProperties: false,
|
||
required: [
|
||
"title", "artist", "mbid", "duration_seconds",
|
||
"contributed_by", "verified", "created_at",
|
||
],
|
||
properties: {
|
||
title: { type: "string" },
|
||
artist: { type: "string" },
|
||
mbid: { type: ["string", "null"] },
|
||
duration_seconds: { type: "number" },
|
||
contributed_by: { type: "string" },
|
||
verified: { type: "boolean" },
|
||
created_at: { type: "string" },
|
||
},
|
||
},
|
||
count_in: {
|
||
type: "object",
|
||
additionalProperties: false,
|
||
required: ["enabled", "bars", "use_first_section_tempo"],
|
||
properties: {
|
||
enabled: { type: "boolean" },
|
||
bars: { type: "integer", minimum: 1, maximum: 8 },
|
||
use_first_section_tempo: { type: "boolean" },
|
||
},
|
||
},
|
||
sections: {
|
||
type: "array",
|
||
minItems: 1,
|
||
items: {
|
||
type: "object",
|
||
additionalProperties: false,
|
||
required: ["label", "start_bar", "time_signature", "transition"],
|
||
// bpm is required for step, bpm_start/bpm_end for ramp — handled via oneOf
|
||
// but we keep this schema simple (strict mode) and validate downstream with Zod.
|
||
properties: {
|
||
label: { type: "string" },
|
||
start_bar: { type: "integer", minimum: 1 },
|
||
bpm: { type: "number" },
|
||
bpm_start: { type: "number" },
|
||
bpm_end: { type: "number" },
|
||
transition: { type: "string", enum: ["step", "ramp"] },
|
||
time_signature: {
|
||
type: "object",
|
||
additionalProperties: false,
|
||
required: ["numerator", "denominator"],
|
||
properties: {
|
||
numerator: { type: "integer", minimum: 1, maximum: 32 },
|
||
denominator: { type: "integer", enum: [1, 2, 4, 8, 16, 32] },
|
||
},
|
||
},
|
||
},
|
||
},
|
||
},
|
||
},
|
||
};
|
||
|
||
// ─── System prompt ────────────────────────────────────────────────────────────
|
||
|
||
const SYSTEM_PROMPT = `\
|
||
You are an expert music producer and session musician assisting cover bands with click tracks.
|
||
|
||
You will receive automated BPM detection results for a song and must generate a CTP (Click Track Protocol) document describing the song's full tempo map.
|
||
|
||
CTP rules:
|
||
- "version" must be "1.0"
|
||
- sections[0].start_bar must be 1
|
||
- sections must be sorted by start_bar ascending, with no gaps
|
||
- Step sections have a single "bpm" field; ramp sections have "bpm_start" and "bpm_end" (no "bpm" field)
|
||
- All BPM values must be between 20 and 400
|
||
- time_signature.denominator must be a power of 2 (1, 2, 4, 8, 16, or 32)
|
||
- metadata.verified must be false (this is AI-generated, not human-verified)
|
||
- metadata.created_at must be an ISO 8601 datetime string
|
||
|
||
Guidelines for section layout:
|
||
- Use typical pop/rock section names: Intro, Verse, Pre-Chorus, Chorus, Bridge, Outro
|
||
- Estimate bar counts based on song duration and BPM (bars = duration_seconds × BPM / 60 / beats_per_bar)
|
||
- Most songs are 4/4; note any unusual meters if you know the song
|
||
- If you know the song has a tempo change (ritardando, double-time feel, key change with tempo shift), model it with a ramp or step section
|
||
- If unsure about sections, use a single constant-tempo section covering the whole song
|
||
- Use the detected BPM as the primary tempo — do not invent a different BPM unless the song is well-known to have a different tempo
|
||
|
||
The output is a draft for human review. Add reasonable section structure based on the song's typical arrangement.`;
|
||
|
||
// ─── Main function ────────────────────────────────────────────────────────────
|
||
|
||
export async function generateCTPWithAI(input: AnalysisInput): Promise<CTPDocument> {
|
||
const { bpm, duration, title, artist, mbid, contributedBy } = input;
|
||
|
||
const approxBars = Math.round((duration * bpm) / 60 / 4); // assuming 4/4
|
||
|
||
const userMessage = `\
|
||
Generate a CTP document for the following song:
|
||
|
||
Title: ${title ?? "Unknown Title"}
|
||
Artist: ${artist ?? "Unknown Artist"}
|
||
MusicBrainz ID: ${mbid ?? "unknown"}
|
||
Detected BPM: ${bpm}
|
||
Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4)
|
||
Contributed by: ${contributedBy ?? "anonymous"}
|
||
|
||
Create a plausible section layout for this song. If this is a well-known song, use your knowledge of its actual arrangement. If not, use a sensible generic structure.`;
|
||
|
||
const response = await client.messages.create({
|
||
model: "claude-opus-4-6",
|
||
max_tokens: 2048,
|
||
thinking: { type: "adaptive" },
|
||
system: SYSTEM_PROMPT,
|
||
messages: [{ role: "user", content: userMessage }],
|
||
output_config: {
|
||
format: {
|
||
type: "json_schema",
|
||
schema: CTP_SCHEMA,
|
||
},
|
||
},
|
||
});
|
||
|
||
const textBlock = response.content.find((b) => b.type === "text");
|
||
if (!textBlock || textBlock.type !== "text") {
|
||
throw new Error("Claude did not return a text block");
|
||
}
|
||
|
||
let parsed: unknown;
|
||
try {
|
||
parsed = JSON.parse(textBlock.text);
|
||
} catch {
|
||
throw new Error(`Claude returned invalid JSON: ${textBlock.text.slice(0, 200)}`);
|
||
}
|
||
|
||
// Stamp the current timestamp if Claude left a placeholder
|
||
const doc = parsed as CTPDocument;
|
||
if (!doc.metadata.created_at || doc.metadata.created_at.includes("placeholder")) {
|
||
doc.metadata.created_at = new Date().toISOString();
|
||
}
|
||
|
||
return doc;
|
||
}
|