feat: MusicBrainz BPM enrichment + improved AI prompts

- lookupRecordingWithTags, extractBpmFromTags, extractTimeSigFromTags, getMusicBrainzRecording added to MB client
- upsertSong preserves existing BPM via COALESCE on conflict
- updateSongBpm helper for async enrichment writes
- AnalysisInput gains confirmedBpm / confirmedTimeSigNum fields
- POST /api/analyze fetches confirmed BPM from DB then MB tags before generation
- All three AI providers use confirmedBpm as authoritative and build enriched userMessage
- POST /api/tracks auto-registration now fetches tags via getMusicBrainzRecording
- Updated User-Agent and MB client fallback URL to Gitea

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
AJ Avezzano
2026-04-03 19:25:04 -04:00
parent 7ba4381bff
commit 5e686fc9c4
8 changed files with 223 additions and 26 deletions

View File

@@ -2,6 +2,8 @@ import { NextRequest, NextResponse } from "next/server";
import { z } from "zod"; import { z } from "zod";
import { getProvider, getAvailableProviders } from "@/lib/analysis/providers/registry"; import { getProvider, getAvailableProviders } from "@/lib/analysis/providers/registry";
import { validateCTP } from "@/lib/ctp/validate"; import { validateCTP } from "@/lib/ctp/validate";
import { getSongByMbid, updateSongBpm } from "@/lib/db/client";
import { lookupRecordingWithTags, extractBpmFromTags, extractTimeSigFromTags } from "@/lib/musicbrainz/client";
// ─── Request schema ─────────────────────────────────────────────────────────── // ─── Request schema ───────────────────────────────────────────────────────────
@@ -77,6 +79,43 @@ export async function POST(req: NextRequest) {
} }
} }
// ── BPM enrichment ────────────────────────────────────────────────────────
// If the song has an MBID, try to supply the AI with a confirmed BPM from
// MusicBrainz community tags. Best-effort: any failure is silently ignored.
let confirmedBpm: number | null = null;
let confirmedTimeSigNum: number | null = null;
if (mbid) {
// 1. Check the DB first — may already have a stored BPM from a prior lookup
try {
const storedSong = await getSongByMbid(mbid);
if (storedSong?.acousticbrainz_bpm) {
confirmedBpm = storedSong.acousticbrainz_bpm;
confirmedTimeSigNum = storedSong.acousticbrainz_time_sig_num ?? null;
}
} catch {
// ignore DB errors
}
// 2. If no stored BPM, fetch from MusicBrainz tags
if (!confirmedBpm) {
try {
const rec = await lookupRecordingWithTags(mbid);
const tagBpm = rec.tags ? extractBpmFromTags(rec.tags) : null;
const tagTimeSig = rec.tags ? extractTimeSigFromTags(rec.tags) : null;
if (tagBpm) {
confirmedBpm = tagBpm;
confirmedTimeSigNum = tagTimeSig;
// Persist for next time — fire-and-forget
updateSongBpm(mbid, tagBpm, tagTimeSig).catch(() => {});
}
} catch {
// MusicBrainz unavailable or rate-limited — proceed without confirmed BPM
}
}
}
const input = { const input = {
bpm, bpm,
duration, duration,
@@ -85,6 +124,8 @@ export async function POST(req: NextRequest) {
mbid: mbid ?? null, mbid: mbid ?? null,
contributed_by: contributed_by ?? "anonymous", contributed_by: contributed_by ?? "anonymous",
ollamaModel, ollamaModel,
confirmedBpm,
confirmedTimeSigNum,
}; };
let ctpDoc; let ctpDoc;

View File

@@ -2,7 +2,7 @@ import { NextRequest, NextResponse } from "next/server";
import { z } from "zod"; import { z } from "zod";
import { getTempoMapsForSong, getSongByMbid, insertTempoMap, upsertSong } from "@/lib/db/client"; import { getTempoMapsForSong, getSongByMbid, insertTempoMap, upsertSong } from "@/lib/db/client";
import { validateCTP } from "@/lib/ctp/validate"; import { validateCTP } from "@/lib/ctp/validate";
import { lookupRecording, formatArtistCredit, mbDurationToSeconds } from "@/lib/musicbrainz/client"; import { getMusicBrainzRecording } from "@/lib/musicbrainz/client";
// ─── GET /api/tracks?mbid=<uuid> ───────────────────────────────────────────── // ─── GET /api/tracks?mbid=<uuid> ─────────────────────────────────────────────
@@ -61,14 +61,14 @@ export async function POST(req: NextRequest) {
if (!existing) { if (!existing) {
try { try {
const rec = await lookupRecording(doc.metadata.mbid); const mbRecord = await getMusicBrainzRecording(doc.metadata.mbid);
await upsertSong({ await upsertSong({
mbid: doc.metadata.mbid, mbid: doc.metadata.mbid,
title: rec.title, title: mbRecord.title,
artist: formatArtistCredit(rec["artist-credit"]), artist: mbRecord.artist,
duration_seconds: mbDurationToSeconds(rec.length), duration_seconds: mbRecord.duration_seconds,
acousticbrainz_bpm: null, acousticbrainz_bpm: mbRecord.bpm,
acousticbrainz_time_sig_num: null, acousticbrainz_time_sig_num: mbRecord.timeSigNum,
source: "musicbrainz", source: "musicbrainz",
}); });
} catch { } catch {

View File

@@ -8,6 +8,8 @@ export interface AnalysisInput {
mbid?: string | null; mbid?: string | null;
contributed_by: string; contributed_by: string;
ollamaModel?: string; // required when provider id is "ollama" ollamaModel?: string; // required when provider id is "ollama"
confirmedBpm?: number | null; // from MusicBrainz tags or other reliable source
confirmedTimeSigNum?: number | null; // time signature numerator if confirmed
} }
export interface ProviderInfo { export interface ProviderInfo {

View File

@@ -81,7 +81,9 @@ export const CTP_SCHEMA = {
export const SYSTEM_PROMPT = `\ export const SYSTEM_PROMPT = `\
You are an expert music producer and session musician assisting cover bands with click tracks. You are an expert music producer and session musician assisting cover bands with click tracks.
You will receive automated BPM detection results for a song and must generate a CTP (Click Track Protocol) document describing the song's full tempo map. You will receive information about a song and must generate a CTP (Click Track Protocol) document describing the song's full tempo map.
**Use your training knowledge of the specific song.** If you recognise the title and artist, use what you know about its actual structure: section names, bar counts, time signature, and any tempo changes (ritardando, double-time, key change with tempo shift). Your training data is a valuable source — do not ignore it in favour of generic guesses.
CTP rules: CTP rules:
- "version" must be "1.0" - "version" must be "1.0"
@@ -96,10 +98,15 @@ CTP rules:
Guidelines for section layout: Guidelines for section layout:
- Use typical pop/rock section names: Intro, Verse, Pre-Chorus, Chorus, Bridge, Outro - Use typical pop/rock section names: Intro, Verse, Pre-Chorus, Chorus, Bridge, Outro
- Estimate bar counts based on song duration and BPM (bars = duration_seconds × BPM / 60 / beats_per_bar) - Estimate bar counts based on song duration and BPM (bars = duration_seconds × BPM / 60 / beats_per_bar)
- Most songs are 4/4; note any unusual meters if you know the song - Most songs are 4/4; use 3/4, 6/8, etc. if you know the song uses that meter
- If you know the song has a tempo change (ritardando, double-time feel, key change with tempo shift), model it with a ramp or step section - If the song has a tempo change (ritardando, double-time feel, key change with tempo shift), model it with a ramp or step section
- If unsure about sections, use a single constant-tempo section covering the whole song - If unsure about sections, use a single constant-tempo section covering the whole song
- Use the detected BPM as the primary tempo — do not invent a different BPM unless the song is well-known to have a different tempo
**BPM authority:** When a "Confirmed BPM" is provided in the user message, it comes from
MusicBrainz community tags or a reliable reference — treat it as ground truth and use it
for all sections unless you know the song has a significant tempo change. Do not average it
with the detected BPM or discard it. When only a "Detected BPM" is provided, use it as a
starting point but apply your knowledge of the song if you recognise it.
The output is a draft for human review. Add reasonable section structure based on the song's typical arrangement.`; The output is a draft for human review. Add reasonable section structure based on the song's typical arrangement.`;
@@ -118,9 +125,18 @@ export const anthropicProvider: AnalysisProvider = {
}, },
async generateCTP(input: AnalysisInput): Promise<CTPDocument> { async generateCTP(input: AnalysisInput): Promise<CTPDocument> {
const { bpm, duration, title, artist, mbid, contributed_by } = input; const { bpm, duration, title, artist, mbid, contributed_by, confirmedBpm, confirmedTimeSigNum } = input;
const model = process.env.ANTHROPIC_MODEL ?? "claude-opus-4-6"; const model = process.env.ANTHROPIC_MODEL ?? "claude-opus-4-6";
const approxBars = Math.round((duration * bpm) / 60 / 4); const effectiveBpm = confirmedBpm ?? bpm;
const approxBars = Math.round((duration * effectiveBpm) / 60 / 4);
const bpmLine = confirmedBpm
? `Confirmed BPM (from MusicBrainz community tags — treat as authoritative): ${confirmedBpm}\nDetected BPM (audio analysis): ${bpm}`
: `Detected BPM (audio analysis): ${bpm}`;
const timeSigHint = confirmedTimeSigNum
? `\nConfirmed time signature numerator: ${confirmedTimeSigNum}`
: "";
const userMessage = `\ const userMessage = `\
Generate a CTP document for the following song: Generate a CTP document for the following song:
@@ -128,11 +144,11 @@ Generate a CTP document for the following song:
Title: ${title ?? "Unknown Title"} Title: ${title ?? "Unknown Title"}
Artist: ${artist ?? "Unknown Artist"} Artist: ${artist ?? "Unknown Artist"}
MusicBrainz ID: ${mbid ?? "unknown"} MusicBrainz ID: ${mbid ?? "unknown"}
Detected BPM: ${bpm} ${bpmLine}${timeSigHint}
Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4) Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4)
Contributed by: ${contributed_by} Contributed by: ${contributed_by}
Create a plausible section layout for this song. If this is a well-known song, use your knowledge of its actual arrangement. If not, use a sensible generic structure.`; If you recognise this song, use your training knowledge of its actual arrangement — section names, bar counts, time signature, and any tempo changes. If you do not recognise it, use a sensible generic structure based on the BPM and duration above.`;
// thinking and output_config are not yet in the SDK type definitions; // thinking and output_config are not yet in the SDK type definitions;
// cast through the base param type to avoid type errors. // cast through the base param type to avoid type errors.

View File

@@ -96,13 +96,22 @@ export const ollamaProvider: AnalysisProvider = {
}, },
async generateCTP(input: AnalysisInput): Promise<CTPDocument> { async generateCTP(input: AnalysisInput): Promise<CTPDocument> {
const { ollamaModel, bpm, duration, title, artist, mbid, contributed_by } = input; const { ollamaModel, bpm, duration, title, artist, mbid, contributed_by, confirmedBpm, confirmedTimeSigNum } = input;
if (!ollamaModel) { if (!ollamaModel) {
throw new Error("ollamaModel is required for Ollama provider"); throw new Error("ollamaModel is required for Ollama provider");
} }
const approxBars = Math.round((duration * bpm) / 60 / 4); const effectiveBpm = confirmedBpm ?? bpm;
const approxBars = Math.round((duration * effectiveBpm) / 60 / 4);
const bpmLine = confirmedBpm
? `Confirmed BPM (from MusicBrainz community tags — treat as authoritative): ${confirmedBpm}\nDetected BPM (audio analysis): ${bpm}`
: `Detected BPM (audio analysis): ${bpm}`;
const timeSigHint = confirmedTimeSigNum
? `\nConfirmed time signature numerator: ${confirmedTimeSigNum}`
: "";
const userMessage = `\ const userMessage = `\
Generate a CTP document for the following song: Generate a CTP document for the following song:
@@ -110,11 +119,11 @@ Generate a CTP document for the following song:
Title: ${title ?? "Unknown Title"} Title: ${title ?? "Unknown Title"}
Artist: ${artist ?? "Unknown Artist"} Artist: ${artist ?? "Unknown Artist"}
MusicBrainz ID: ${mbid ?? "unknown"} MusicBrainz ID: ${mbid ?? "unknown"}
Detected BPM: ${bpm} ${bpmLine}${timeSigHint}
Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4) Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4)
Contributed by: ${contributed_by} Contributed by: ${contributed_by}
Create a plausible section layout for this song. If this is a well-known song, use your knowledge of its actual arrangement. If not, use a sensible generic structure.`; If you recognise this song, use your training knowledge of its actual arrangement — section names, bar counts, time signature, and any tempo changes. If you do not recognise it, use a sensible generic structure based on the BPM and duration above.`;
// Attempt parse with one retry on failure // Attempt parse with one retry on failure
let content: string; let content: string;

View File

@@ -38,8 +38,17 @@ export const openaiProvider: AnalysisProvider = {
const baseUrl = process.env.OPENAI_BASE_URL ?? "https://api.openai.com/v1"; const baseUrl = process.env.OPENAI_BASE_URL ?? "https://api.openai.com/v1";
const model = process.env.OPENAI_MODEL ?? "gpt-4o"; const model = process.env.OPENAI_MODEL ?? "gpt-4o";
const { bpm, duration, title, artist, mbid, contributed_by } = input; const { bpm, duration, title, artist, mbid, contributed_by, confirmedBpm, confirmedTimeSigNum } = input;
const approxBars = Math.round((duration * bpm) / 60 / 4); const effectiveBpm = confirmedBpm ?? bpm;
const approxBars = Math.round((duration * effectiveBpm) / 60 / 4);
const bpmLine = confirmedBpm
? `Confirmed BPM (from MusicBrainz community tags — treat as authoritative): ${confirmedBpm}\nDetected BPM (audio analysis): ${bpm}`
: `Detected BPM (audio analysis): ${bpm}`;
const timeSigHint = confirmedTimeSigNum
? `\nConfirmed time signature numerator: ${confirmedTimeSigNum}`
: "";
const userMessage = `\ const userMessage = `\
Generate a CTP document for the following song: Generate a CTP document for the following song:
@@ -47,11 +56,11 @@ Generate a CTP document for the following song:
Title: ${title ?? "Unknown Title"} Title: ${title ?? "Unknown Title"}
Artist: ${artist ?? "Unknown Artist"} Artist: ${artist ?? "Unknown Artist"}
MusicBrainz ID: ${mbid ?? "unknown"} MusicBrainz ID: ${mbid ?? "unknown"}
Detected BPM: ${bpm} ${bpmLine}${timeSigHint}
Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4) Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4)
Contributed by: ${contributed_by} Contributed by: ${contributed_by}
Create a plausible section layout for this song. If this is a well-known song, use your knowledge of its actual arrangement. If not, use a sensible generic structure.`; If you recognise this song, use your training knowledge of its actual arrangement — section names, bar counts, time signature, and any tempo changes. If you do not recognise it, use a sensible generic structure based on the BPM and duration above.`;
const response = await fetch(`${baseUrl}/chat/completions`, { const response = await fetch(`${baseUrl}/chat/completions`, {
method: "POST", method: "POST",

View File

@@ -92,8 +92,8 @@ export async function upsertSong(song: Omit<SongRow, "created_at" | "updated_at"
title = EXCLUDED.title, title = EXCLUDED.title,
artist = EXCLUDED.artist, artist = EXCLUDED.artist,
duration_seconds = EXCLUDED.duration_seconds, duration_seconds = EXCLUDED.duration_seconds,
acousticbrainz_bpm = EXCLUDED.acousticbrainz_bpm, acousticbrainz_bpm = COALESCE(EXCLUDED.acousticbrainz_bpm, songs.acousticbrainz_bpm),
acousticbrainz_time_sig_num = EXCLUDED.acousticbrainz_time_sig_num, acousticbrainz_time_sig_num = COALESCE(EXCLUDED.acousticbrainz_time_sig_num, songs.acousticbrainz_time_sig_num),
source = EXCLUDED.source`, source = EXCLUDED.source`,
[ [
song.mbid, song.mbid,
@@ -107,6 +107,26 @@ export async function upsertSong(song: Omit<SongRow, "created_at" | "updated_at"
); );
} }
/**
* Updates only the BPM and time-signature fields of a song row.
* No-ops if the song doesn't exist.
* Used by the async MusicBrainz tag enrichment path.
*/
export async function updateSongBpm(
mbid: string,
bpm: number | null,
timeSigNum: number | null
): Promise<void> {
await query(
`UPDATE songs
SET acousticbrainz_bpm = COALESCE($2, acousticbrainz_bpm),
acousticbrainz_time_sig_num = COALESCE($3, acousticbrainz_time_sig_num),
updated_at = NOW()
WHERE mbid = $1`,
[mbid, bpm, timeSigNum]
);
}
// ─── Tempo map queries ──────────────────────────────────────────────────────── // ─── Tempo map queries ────────────────────────────────────────────────────────
export interface TempoMapRow { export interface TempoMapRow {

View File

@@ -10,7 +10,7 @@
const MB_BASE = "https://musicbrainz.org/ws/2"; const MB_BASE = "https://musicbrainz.org/ws/2";
const USER_AGENT = const USER_AGENT =
process.env.MUSICBRAINZ_USER_AGENT ?? process.env.MUSICBRAINZ_USER_AGENT ??
"ClickTrack/0.1 ( https://github.com/your-org/clicktrack )"; "ClickTrack/0.1 ( https://git.avezzano.io/the_og/clicktrack )";
// ─── Rate limiter ───────────────────────────────────────────────────────────── // ─── Rate limiter ─────────────────────────────────────────────────────────────
@@ -60,6 +60,7 @@ export interface MBRecording {
length?: number; // duration in milliseconds length?: number; // duration in milliseconds
"artist-credit": MBArtistCredit[]; "artist-credit": MBArtistCredit[];
releases?: MBRelease[]; releases?: MBRelease[];
tags?: MBTag[];
score?: number; // search relevance (0100) score?: number; // search relevance (0100)
} }
@@ -79,6 +80,11 @@ export interface MBRelease {
status?: string; status?: string;
} }
export interface MBTag {
name: string;
count: number;
}
export interface MBSearchResult { export interface MBSearchResult {
created: string; created: string;
count: number; count: number;
@@ -147,6 +153,100 @@ export async function lookupRecording(mbid: string): Promise<MBRecording> {
return response.json() as Promise<MBRecording>; return response.json() as Promise<MBRecording>;
} }
/**
* Looks up a single recording by MBID, requesting artist-credits, releases,
* and community tags. Use this when you also want BPM or time-signature tags.
*/
export async function lookupRecordingWithTags(mbid: string): Promise<MBRecording> {
const params = new URLSearchParams({
inc: "artist-credits+releases+tags",
fmt: "json",
});
const url = `${MB_BASE}/recording/${encodeURIComponent(mbid)}?${params}`;
const response = await rateLimitedFetch(url);
return response.json() as Promise<MBRecording>;
}
/**
* Parses a MusicBrainz tag list and extracts a BPM value if present.
*
* MusicBrainz users tag recordings with strings like:
* "bpm: 174", "174 bpm", "bpm:174", "tempo: 174"
*
* Returns the most-voted BPM value (highest tag count), or null if none found.
*/
export function extractBpmFromTags(tags: MBTag[]): number | null {
const bpmPattern = /(?:bpm|tempo)\s*:?\s*(\d{2,3})|(\d{2,3})\s*bpm/i;
let bestBpm: number | null = null;
let bestCount = 0;
for (const tag of tags) {
const match = bpmPattern.exec(tag.name);
if (match) {
const value = parseInt(match[1] ?? match[2], 10);
if (value >= 20 && value <= 400 && tag.count > bestCount) {
bestBpm = value;
bestCount = tag.count;
}
}
}
return bestBpm;
}
/**
* Parses a MusicBrainz tag list and extracts a time signature numerator if present.
*
* Users tag recordings like "3/4", "5/4", "6/8", "time signature: 3/4".
* We only store the numerator since that's what the CTP denominator-agnostic
* count-in uses.
*
* Returns the most-voted numerator, or null if none found.
*/
export function extractTimeSigFromTags(tags: MBTag[]): number | null {
const timeSigPattern = /(?:time\s*signature\s*:?\s*)?(\d{1,2})\/(\d{1,2})/i;
let bestNum: number | null = null;
let bestCount = 0;
for (const tag of tags) {
const match = timeSigPattern.exec(tag.name);
if (match) {
const numerator = parseInt(match[1], 10);
if (numerator >= 1 && numerator <= 32 && tag.count > bestCount) {
bestNum = numerator;
bestCount = tag.count;
}
}
}
return bestNum;
}
/**
* Returns a normalised recording object suitable for passing to `upsertSong`.
* Includes BPM and time-signature from community tags when available.
*/
export async function getMusicBrainzRecording(mbid: string): Promise<{
title: string;
artist: string;
duration_seconds: number | null;
bpm: number | null;
timeSigNum: number | null;
}> {
const rec = await lookupRecordingWithTags(mbid);
return {
title: rec.title,
artist: formatArtistCredit(rec["artist-credit"]),
duration_seconds: mbDurationToSeconds(rec.length),
bpm: rec.tags ? extractBpmFromTags(rec.tags) : null,
timeSigNum: rec.tags ? extractTimeSigFromTags(rec.tags) : null,
};
}
/** /**
* Convenience function: searches MusicBrainz and returns results normalised * Convenience function: searches MusicBrainz and returns results normalised
* for storage in the `songs` table. * for storage in the `songs` table.