Files
clicktrack/lib/analysis/providers/anthropic.ts
AJ Avezzano 5e686fc9c4 feat: MusicBrainz BPM enrichment + improved AI prompts
- lookupRecordingWithTags, extractBpmFromTags, extractTimeSigFromTags, getMusicBrainzRecording added to MB client
- upsertSong preserves existing BPM via COALESCE on conflict
- updateSongBpm helper for async enrichment writes
- AnalysisInput gains confirmedBpm / confirmedTimeSigNum fields
- POST /api/analyze fetches confirmed BPM from DB then MB tags before generation
- All three AI providers use confirmedBpm as authoritative and build enriched userMessage
- POST /api/tracks auto-registration now fetches tags via getMusicBrainzRecording
- Updated User-Agent and MB client fallback URL to Gitea

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-03 19:25:04 -04:00

198 lines
8.0 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import 'server-only';
import Anthropic from "@anthropic-ai/sdk";
import type { CTPDocument } from "@/lib/ctp/schema";
import type { AnalysisInput, AnalysisProvider } from "@/lib/analysis/providers";
// Extract the non-streaming Message type from the SDK without relying on internal paths
type AnthropicMessage = Extract<
Awaited<ReturnType<Anthropic["messages"]["create"]>>,
{ content: unknown[] }
>;
const client = new Anthropic();
// ─── JSON Schema for structured output ───────────────────────────────────────
export const CTP_SCHEMA = {
type: "object",
additionalProperties: false,
required: ["version", "metadata", "count_in", "sections"],
properties: {
version: { type: "string", enum: ["1.0"] },
metadata: {
type: "object",
additionalProperties: false,
required: [
"title", "artist", "mbid", "duration_seconds",
"contributed_by", "verified", "created_at",
],
properties: {
title: { type: "string" },
artist: { type: "string" },
mbid: { type: ["string", "null"] },
duration_seconds: { type: "number" },
contributed_by: { type: "string" },
verified: { type: "boolean" },
created_at: { type: "string" },
},
},
count_in: {
type: "object",
additionalProperties: false,
required: ["enabled", "bars", "use_first_section_tempo"],
properties: {
enabled: { type: "boolean" },
bars: { type: "integer", minimum: 1, maximum: 8 },
use_first_section_tempo: { type: "boolean" },
},
},
sections: {
type: "array",
minItems: 1,
items: {
type: "object",
additionalProperties: false,
required: ["label", "start_bar", "time_signature", "transition"],
properties: {
label: { type: "string" },
start_bar: { type: "integer", minimum: 1 },
bpm: { type: "number" },
bpm_start: { type: "number" },
bpm_end: { type: "number" },
transition: { type: "string", enum: ["step", "ramp"] },
time_signature: {
type: "object",
additionalProperties: false,
required: ["numerator", "denominator"],
properties: {
numerator: { type: "integer", minimum: 1, maximum: 32 },
denominator: { type: "integer", enum: [1, 2, 4, 8, 16, 32] },
},
},
},
},
},
},
};
// ─── System prompt ────────────────────────────────────────────────────────────
export const SYSTEM_PROMPT = `\
You are an expert music producer and session musician assisting cover bands with click tracks.
You will receive information about a song and must generate a CTP (Click Track Protocol) document describing the song's full tempo map.
**Use your training knowledge of the specific song.** If you recognise the title and artist, use what you know about its actual structure: section names, bar counts, time signature, and any tempo changes (ritardando, double-time, key change with tempo shift). Your training data is a valuable source — do not ignore it in favour of generic guesses.
CTP rules:
- "version" must be "1.0"
- sections[0].start_bar must be 1
- sections must be sorted by start_bar ascending, with no gaps
- Step sections have a single "bpm" field; ramp sections have "bpm_start" and "bpm_end" (no "bpm" field)
- All BPM values must be between 20 and 400
- time_signature.denominator must be a power of 2 (1, 2, 4, 8, 16, or 32)
- metadata.verified must be false (this is AI-generated, not human-verified)
- metadata.created_at must be an ISO 8601 datetime string
Guidelines for section layout:
- Use typical pop/rock section names: Intro, Verse, Pre-Chorus, Chorus, Bridge, Outro
- Estimate bar counts based on song duration and BPM (bars = duration_seconds × BPM / 60 / beats_per_bar)
- Most songs are 4/4; use 3/4, 6/8, etc. if you know the song uses that meter
- If the song has a tempo change (ritardando, double-time feel, key change with tempo shift), model it with a ramp or step section
- If unsure about sections, use a single constant-tempo section covering the whole song
**BPM authority:** When a "Confirmed BPM" is provided in the user message, it comes from
MusicBrainz community tags or a reliable reference — treat it as ground truth and use it
for all sections unless you know the song has a significant tempo change. Do not average it
with the detected BPM or discard it. When only a "Detected BPM" is provided, use it as a
starting point but apply your knowledge of the song if you recognise it.
The output is a draft for human review. Add reasonable section structure based on the song's typical arrangement.`;
// ─── Provider implementation ──────────────────────────────────────────────────
export const anthropicProvider: AnalysisProvider = {
id: "anthropic",
label: "Claude (Anthropic)",
type: "cloud-ai",
async isAvailable() {
if (process.env.ANTHROPIC_API_KEY) {
return { available: true };
}
return { available: false, reason: "ANTHROPIC_API_KEY not set" };
},
async generateCTP(input: AnalysisInput): Promise<CTPDocument> {
const { bpm, duration, title, artist, mbid, contributed_by, confirmedBpm, confirmedTimeSigNum } = input;
const model = process.env.ANTHROPIC_MODEL ?? "claude-opus-4-6";
const effectiveBpm = confirmedBpm ?? bpm;
const approxBars = Math.round((duration * effectiveBpm) / 60 / 4);
const bpmLine = confirmedBpm
? `Confirmed BPM (from MusicBrainz community tags — treat as authoritative): ${confirmedBpm}\nDetected BPM (audio analysis): ${bpm}`
: `Detected BPM (audio analysis): ${bpm}`;
const timeSigHint = confirmedTimeSigNum
? `\nConfirmed time signature numerator: ${confirmedTimeSigNum}`
: "";
const userMessage = `\
Generate a CTP document for the following song:
Title: ${title ?? "Unknown Title"}
Artist: ${artist ?? "Unknown Artist"}
MusicBrainz ID: ${mbid ?? "unknown"}
${bpmLine}${timeSigHint}
Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4)
Contributed by: ${contributed_by}
If you recognise this song, use your training knowledge of its actual arrangement — section names, bar counts, time signature, and any tempo changes. If you do not recognise it, use a sensible generic structure based on the BPM and duration above.`;
// thinking and output_config are not yet in the SDK type definitions;
// cast through the base param type to avoid type errors.
type ExtendedParams = Parameters<typeof client.messages.create>[0] & {
thinking?: { type: string };
output_config?: { format: { type: string; schema: unknown } };
};
const params: ExtendedParams = {
model,
max_tokens: 2048,
thinking: { type: "adaptive" },
system: SYSTEM_PROMPT,
messages: [{ role: "user", content: userMessage }],
output_config: {
format: {
type: "json_schema",
schema: CTP_SCHEMA,
},
},
};
const response = (await client.messages.create(
params as Parameters<typeof client.messages.create>[0]
)) as AnthropicMessage;
const textBlock = response.content.find((b) => b.type === "text");
if (!textBlock || textBlock.type !== "text") {
throw new Error("Claude did not return a text block");
}
let parsed: unknown;
try {
parsed = JSON.parse(textBlock.text);
} catch {
throw new Error(`Claude returned invalid JSON: ${textBlock.text.slice(0, 200)}`);
}
const doc = parsed as CTPDocument;
if (!doc.metadata.created_at || doc.metadata.created_at.includes("placeholder")) {
doc.metadata.created_at = new Date().toISOString();
}
return doc;
},
};