feat: MusicBrainz BPM enrichment + improved AI prompts

- lookupRecordingWithTags, extractBpmFromTags, extractTimeSigFromTags, getMusicBrainzRecording added to MB client
- upsertSong preserves existing BPM via COALESCE on conflict
- updateSongBpm helper for async enrichment writes
- AnalysisInput gains confirmedBpm / confirmedTimeSigNum fields
- POST /api/analyze fetches confirmed BPM from DB then MB tags before generation
- All three AI providers use confirmedBpm as authoritative and build enriched userMessage
- POST /api/tracks auto-registration now fetches tags via getMusicBrainzRecording
- Updated User-Agent and MB client fallback URL to Gitea

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
AJ Avezzano
2026-04-03 19:25:04 -04:00
parent 7ba4381bff
commit 5e686fc9c4
8 changed files with 223 additions and 26 deletions

View File

@@ -2,6 +2,8 @@ import { NextRequest, NextResponse } from "next/server";
import { z } from "zod";
import { getProvider, getAvailableProviders } from "@/lib/analysis/providers/registry";
import { validateCTP } from "@/lib/ctp/validate";
import { getSongByMbid, updateSongBpm } from "@/lib/db/client";
import { lookupRecordingWithTags, extractBpmFromTags, extractTimeSigFromTags } from "@/lib/musicbrainz/client";
// ─── Request schema ───────────────────────────────────────────────────────────
@@ -77,6 +79,43 @@ export async function POST(req: NextRequest) {
}
}
// ── BPM enrichment ────────────────────────────────────────────────────────
// If the song has an MBID, try to supply the AI with a confirmed BPM from
// MusicBrainz community tags. Best-effort: any failure is silently ignored.
let confirmedBpm: number | null = null;
let confirmedTimeSigNum: number | null = null;
if (mbid) {
// 1. Check the DB first — may already have a stored BPM from a prior lookup
try {
const storedSong = await getSongByMbid(mbid);
if (storedSong?.acousticbrainz_bpm) {
confirmedBpm = storedSong.acousticbrainz_bpm;
confirmedTimeSigNum = storedSong.acousticbrainz_time_sig_num ?? null;
}
} catch {
// ignore DB errors
}
// 2. If no stored BPM, fetch from MusicBrainz tags
if (!confirmedBpm) {
try {
const rec = await lookupRecordingWithTags(mbid);
const tagBpm = rec.tags ? extractBpmFromTags(rec.tags) : null;
const tagTimeSig = rec.tags ? extractTimeSigFromTags(rec.tags) : null;
if (tagBpm) {
confirmedBpm = tagBpm;
confirmedTimeSigNum = tagTimeSig;
// Persist for next time — fire-and-forget
updateSongBpm(mbid, tagBpm, tagTimeSig).catch(() => {});
}
} catch {
// MusicBrainz unavailable or rate-limited — proceed without confirmed BPM
}
}
}
const input = {
bpm,
duration,
@@ -85,6 +124,8 @@ export async function POST(req: NextRequest) {
mbid: mbid ?? null,
contributed_by: contributed_by ?? "anonymous",
ollamaModel,
confirmedBpm,
confirmedTimeSigNum,
};
let ctpDoc;

View File

@@ -2,7 +2,7 @@ import { NextRequest, NextResponse } from "next/server";
import { z } from "zod";
import { getTempoMapsForSong, getSongByMbid, insertTempoMap, upsertSong } from "@/lib/db/client";
import { validateCTP } from "@/lib/ctp/validate";
import { lookupRecording, formatArtistCredit, mbDurationToSeconds } from "@/lib/musicbrainz/client";
import { getMusicBrainzRecording } from "@/lib/musicbrainz/client";
// ─── GET /api/tracks?mbid=<uuid> ─────────────────────────────────────────────
@@ -61,14 +61,14 @@ export async function POST(req: NextRequest) {
if (!existing) {
try {
const rec = await lookupRecording(doc.metadata.mbid);
const mbRecord = await getMusicBrainzRecording(doc.metadata.mbid);
await upsertSong({
mbid: doc.metadata.mbid,
title: rec.title,
artist: formatArtistCredit(rec["artist-credit"]),
duration_seconds: mbDurationToSeconds(rec.length),
acousticbrainz_bpm: null,
acousticbrainz_time_sig_num: null,
title: mbRecord.title,
artist: mbRecord.artist,
duration_seconds: mbRecord.duration_seconds,
acousticbrainz_bpm: mbRecord.bpm,
acousticbrainz_time_sig_num: mbRecord.timeSigNum,
source: "musicbrainz",
});
} catch {

View File

@@ -8,6 +8,8 @@ export interface AnalysisInput {
mbid?: string | null;
contributed_by: string;
ollamaModel?: string; // required when provider id is "ollama"
confirmedBpm?: number | null; // from MusicBrainz tags or other reliable source
confirmedTimeSigNum?: number | null; // time signature numerator if confirmed
}
export interface ProviderInfo {

View File

@@ -81,7 +81,9 @@ export const CTP_SCHEMA = {
export const SYSTEM_PROMPT = `\
You are an expert music producer and session musician assisting cover bands with click tracks.
You will receive automated BPM detection results for a song and must generate a CTP (Click Track Protocol) document describing the song's full tempo map.
You will receive information about a song and must generate a CTP (Click Track Protocol) document describing the song's full tempo map.
**Use your training knowledge of the specific song.** If you recognise the title and artist, use what you know about its actual structure: section names, bar counts, time signature, and any tempo changes (ritardando, double-time, key change with tempo shift). Your training data is a valuable source — do not ignore it in favour of generic guesses.
CTP rules:
- "version" must be "1.0"
@@ -96,10 +98,15 @@ CTP rules:
Guidelines for section layout:
- Use typical pop/rock section names: Intro, Verse, Pre-Chorus, Chorus, Bridge, Outro
- Estimate bar counts based on song duration and BPM (bars = duration_seconds × BPM / 60 / beats_per_bar)
- Most songs are 4/4; note any unusual meters if you know the song
- If you know the song has a tempo change (ritardando, double-time feel, key change with tempo shift), model it with a ramp or step section
- Most songs are 4/4; use 3/4, 6/8, etc. if you know the song uses that meter
- If the song has a tempo change (ritardando, double-time feel, key change with tempo shift), model it with a ramp or step section
- If unsure about sections, use a single constant-tempo section covering the whole song
- Use the detected BPM as the primary tempo — do not invent a different BPM unless the song is well-known to have a different tempo
**BPM authority:** When a "Confirmed BPM" is provided in the user message, it comes from
MusicBrainz community tags or a reliable reference — treat it as ground truth and use it
for all sections unless you know the song has a significant tempo change. Do not average it
with the detected BPM or discard it. When only a "Detected BPM" is provided, use it as a
starting point but apply your knowledge of the song if you recognise it.
The output is a draft for human review. Add reasonable section structure based on the song's typical arrangement.`;
@@ -118,9 +125,18 @@ export const anthropicProvider: AnalysisProvider = {
},
async generateCTP(input: AnalysisInput): Promise<CTPDocument> {
const { bpm, duration, title, artist, mbid, contributed_by } = input;
const { bpm, duration, title, artist, mbid, contributed_by, confirmedBpm, confirmedTimeSigNum } = input;
const model = process.env.ANTHROPIC_MODEL ?? "claude-opus-4-6";
const approxBars = Math.round((duration * bpm) / 60 / 4);
const effectiveBpm = confirmedBpm ?? bpm;
const approxBars = Math.round((duration * effectiveBpm) / 60 / 4);
const bpmLine = confirmedBpm
? `Confirmed BPM (from MusicBrainz community tags — treat as authoritative): ${confirmedBpm}\nDetected BPM (audio analysis): ${bpm}`
: `Detected BPM (audio analysis): ${bpm}`;
const timeSigHint = confirmedTimeSigNum
? `\nConfirmed time signature numerator: ${confirmedTimeSigNum}`
: "";
const userMessage = `\
Generate a CTP document for the following song:
@@ -128,11 +144,11 @@ Generate a CTP document for the following song:
Title: ${title ?? "Unknown Title"}
Artist: ${artist ?? "Unknown Artist"}
MusicBrainz ID: ${mbid ?? "unknown"}
Detected BPM: ${bpm}
${bpmLine}${timeSigHint}
Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4)
Contributed by: ${contributed_by}
Create a plausible section layout for this song. If this is a well-known song, use your knowledge of its actual arrangement. If not, use a sensible generic structure.`;
If you recognise this song, use your training knowledge of its actual arrangement — section names, bar counts, time signature, and any tempo changes. If you do not recognise it, use a sensible generic structure based on the BPM and duration above.`;
// thinking and output_config are not yet in the SDK type definitions;
// cast through the base param type to avoid type errors.

View File

@@ -96,13 +96,22 @@ export const ollamaProvider: AnalysisProvider = {
},
async generateCTP(input: AnalysisInput): Promise<CTPDocument> {
const { ollamaModel, bpm, duration, title, artist, mbid, contributed_by } = input;
const { ollamaModel, bpm, duration, title, artist, mbid, contributed_by, confirmedBpm, confirmedTimeSigNum } = input;
if (!ollamaModel) {
throw new Error("ollamaModel is required for Ollama provider");
}
const approxBars = Math.round((duration * bpm) / 60 / 4);
const effectiveBpm = confirmedBpm ?? bpm;
const approxBars = Math.round((duration * effectiveBpm) / 60 / 4);
const bpmLine = confirmedBpm
? `Confirmed BPM (from MusicBrainz community tags — treat as authoritative): ${confirmedBpm}\nDetected BPM (audio analysis): ${bpm}`
: `Detected BPM (audio analysis): ${bpm}`;
const timeSigHint = confirmedTimeSigNum
? `\nConfirmed time signature numerator: ${confirmedTimeSigNum}`
: "";
const userMessage = `\
Generate a CTP document for the following song:
@@ -110,11 +119,11 @@ Generate a CTP document for the following song:
Title: ${title ?? "Unknown Title"}
Artist: ${artist ?? "Unknown Artist"}
MusicBrainz ID: ${mbid ?? "unknown"}
Detected BPM: ${bpm}
${bpmLine}${timeSigHint}
Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4)
Contributed by: ${contributed_by}
Create a plausible section layout for this song. If this is a well-known song, use your knowledge of its actual arrangement. If not, use a sensible generic structure.`;
If you recognise this song, use your training knowledge of its actual arrangement — section names, bar counts, time signature, and any tempo changes. If you do not recognise it, use a sensible generic structure based on the BPM and duration above.`;
// Attempt parse with one retry on failure
let content: string;

View File

@@ -38,8 +38,17 @@ export const openaiProvider: AnalysisProvider = {
const baseUrl = process.env.OPENAI_BASE_URL ?? "https://api.openai.com/v1";
const model = process.env.OPENAI_MODEL ?? "gpt-4o";
const { bpm, duration, title, artist, mbid, contributed_by } = input;
const approxBars = Math.round((duration * bpm) / 60 / 4);
const { bpm, duration, title, artist, mbid, contributed_by, confirmedBpm, confirmedTimeSigNum } = input;
const effectiveBpm = confirmedBpm ?? bpm;
const approxBars = Math.round((duration * effectiveBpm) / 60 / 4);
const bpmLine = confirmedBpm
? `Confirmed BPM (from MusicBrainz community tags — treat as authoritative): ${confirmedBpm}\nDetected BPM (audio analysis): ${bpm}`
: `Detected BPM (audio analysis): ${bpm}`;
const timeSigHint = confirmedTimeSigNum
? `\nConfirmed time signature numerator: ${confirmedTimeSigNum}`
: "";
const userMessage = `\
Generate a CTP document for the following song:
@@ -47,11 +56,11 @@ Generate a CTP document for the following song:
Title: ${title ?? "Unknown Title"}
Artist: ${artist ?? "Unknown Artist"}
MusicBrainz ID: ${mbid ?? "unknown"}
Detected BPM: ${bpm}
${bpmLine}${timeSigHint}
Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4)
Contributed by: ${contributed_by}
Create a plausible section layout for this song. If this is a well-known song, use your knowledge of its actual arrangement. If not, use a sensible generic structure.`;
If you recognise this song, use your training knowledge of its actual arrangement — section names, bar counts, time signature, and any tempo changes. If you do not recognise it, use a sensible generic structure based on the BPM and duration above.`;
const response = await fetch(`${baseUrl}/chat/completions`, {
method: "POST",

View File

@@ -92,8 +92,8 @@ export async function upsertSong(song: Omit<SongRow, "created_at" | "updated_at"
title = EXCLUDED.title,
artist = EXCLUDED.artist,
duration_seconds = EXCLUDED.duration_seconds,
acousticbrainz_bpm = EXCLUDED.acousticbrainz_bpm,
acousticbrainz_time_sig_num = EXCLUDED.acousticbrainz_time_sig_num,
acousticbrainz_bpm = COALESCE(EXCLUDED.acousticbrainz_bpm, songs.acousticbrainz_bpm),
acousticbrainz_time_sig_num = COALESCE(EXCLUDED.acousticbrainz_time_sig_num, songs.acousticbrainz_time_sig_num),
source = EXCLUDED.source`,
[
song.mbid,
@@ -107,6 +107,26 @@ export async function upsertSong(song: Omit<SongRow, "created_at" | "updated_at"
);
}
/**
* Updates only the BPM and time-signature fields of a song row.
* No-ops if the song doesn't exist.
* Used by the async MusicBrainz tag enrichment path.
*/
export async function updateSongBpm(
mbid: string,
bpm: number | null,
timeSigNum: number | null
): Promise<void> {
await query(
`UPDATE songs
SET acousticbrainz_bpm = COALESCE($2, acousticbrainz_bpm),
acousticbrainz_time_sig_num = COALESCE($3, acousticbrainz_time_sig_num),
updated_at = NOW()
WHERE mbid = $1`,
[mbid, bpm, timeSigNum]
);
}
// ─── Tempo map queries ────────────────────────────────────────────────────────
export interface TempoMapRow {

View File

@@ -10,7 +10,7 @@
const MB_BASE = "https://musicbrainz.org/ws/2";
const USER_AGENT =
process.env.MUSICBRAINZ_USER_AGENT ??
"ClickTrack/0.1 ( https://github.com/your-org/clicktrack )";
"ClickTrack/0.1 ( https://git.avezzano.io/the_og/clicktrack )";
// ─── Rate limiter ─────────────────────────────────────────────────────────────
@@ -60,6 +60,7 @@ export interface MBRecording {
length?: number; // duration in milliseconds
"artist-credit": MBArtistCredit[];
releases?: MBRelease[];
tags?: MBTag[];
score?: number; // search relevance (0100)
}
@@ -79,6 +80,11 @@ export interface MBRelease {
status?: string;
}
export interface MBTag {
name: string;
count: number;
}
export interface MBSearchResult {
created: string;
count: number;
@@ -147,6 +153,100 @@ export async function lookupRecording(mbid: string): Promise<MBRecording> {
return response.json() as Promise<MBRecording>;
}
/**
* Looks up a single recording by MBID, requesting artist-credits, releases,
* and community tags. Use this when you also want BPM or time-signature tags.
*/
export async function lookupRecordingWithTags(mbid: string): Promise<MBRecording> {
const params = new URLSearchParams({
inc: "artist-credits+releases+tags",
fmt: "json",
});
const url = `${MB_BASE}/recording/${encodeURIComponent(mbid)}?${params}`;
const response = await rateLimitedFetch(url);
return response.json() as Promise<MBRecording>;
}
/**
* Parses a MusicBrainz tag list and extracts a BPM value if present.
*
* MusicBrainz users tag recordings with strings like:
* "bpm: 174", "174 bpm", "bpm:174", "tempo: 174"
*
* Returns the most-voted BPM value (highest tag count), or null if none found.
*/
export function extractBpmFromTags(tags: MBTag[]): number | null {
const bpmPattern = /(?:bpm|tempo)\s*:?\s*(\d{2,3})|(\d{2,3})\s*bpm/i;
let bestBpm: number | null = null;
let bestCount = 0;
for (const tag of tags) {
const match = bpmPattern.exec(tag.name);
if (match) {
const value = parseInt(match[1] ?? match[2], 10);
if (value >= 20 && value <= 400 && tag.count > bestCount) {
bestBpm = value;
bestCount = tag.count;
}
}
}
return bestBpm;
}
/**
* Parses a MusicBrainz tag list and extracts a time signature numerator if present.
*
* Users tag recordings like "3/4", "5/4", "6/8", "time signature: 3/4".
* We only store the numerator since that's what the CTP denominator-agnostic
* count-in uses.
*
* Returns the most-voted numerator, or null if none found.
*/
export function extractTimeSigFromTags(tags: MBTag[]): number | null {
const timeSigPattern = /(?:time\s*signature\s*:?\s*)?(\d{1,2})\/(\d{1,2})/i;
let bestNum: number | null = null;
let bestCount = 0;
for (const tag of tags) {
const match = timeSigPattern.exec(tag.name);
if (match) {
const numerator = parseInt(match[1], 10);
if (numerator >= 1 && numerator <= 32 && tag.count > bestCount) {
bestNum = numerator;
bestCount = tag.count;
}
}
}
return bestNum;
}
/**
* Returns a normalised recording object suitable for passing to `upsertSong`.
* Includes BPM and time-signature from community tags when available.
*/
export async function getMusicBrainzRecording(mbid: string): Promise<{
title: string;
artist: string;
duration_seconds: number | null;
bpm: number | null;
timeSigNum: number | null;
}> {
const rec = await lookupRecordingWithTags(mbid);
return {
title: rec.title,
artist: formatArtistCredit(rec["artist-credit"]),
duration_seconds: mbDurationToSeconds(rec.length),
bpm: rec.tags ? extractBpmFromTags(rec.tags) : null,
timeSigNum: rec.tags ? extractTimeSigFromTags(rec.tags) : null,
};
}
/**
* Convenience function: searches MusicBrainz and returns results normalised
* for storage in the `songs` table.