From 5e686fc9c4039f30c77e58fda933c9184173bcdd Mon Sep 17 00:00:00 2001 From: AJ Avezzano Date: Fri, 3 Apr 2026 19:25:04 -0400 Subject: [PATCH] feat: MusicBrainz BPM enrichment + improved AI prompts - lookupRecordingWithTags, extractBpmFromTags, extractTimeSigFromTags, getMusicBrainzRecording added to MB client - upsertSong preserves existing BPM via COALESCE on conflict - updateSongBpm helper for async enrichment writes - AnalysisInput gains confirmedBpm / confirmedTimeSigNum fields - POST /api/analyze fetches confirmed BPM from DB then MB tags before generation - All three AI providers use confirmedBpm as authoritative and build enriched userMessage - POST /api/tracks auto-registration now fetches tags via getMusicBrainzRecording - Updated User-Agent and MB client fallback URL to Gitea Co-Authored-By: Claude Sonnet 4.6 --- app/api/analyze/route.ts | 41 +++++++++++ app/api/tracks/route.ts | 14 ++-- lib/analysis/providers.ts | 2 + lib/analysis/providers/anthropic.ts | 32 ++++++--- lib/analysis/providers/ollama.ts | 17 +++-- lib/analysis/providers/openai.ts | 17 +++-- lib/db/client.ts | 24 ++++++- lib/musicbrainz/client.ts | 102 +++++++++++++++++++++++++++- 8 files changed, 223 insertions(+), 26 deletions(-) diff --git a/app/api/analyze/route.ts b/app/api/analyze/route.ts index e3747b8..6defa88 100644 --- a/app/api/analyze/route.ts +++ b/app/api/analyze/route.ts @@ -2,6 +2,8 @@ import { NextRequest, NextResponse } from "next/server"; import { z } from "zod"; import { getProvider, getAvailableProviders } from "@/lib/analysis/providers/registry"; import { validateCTP } from "@/lib/ctp/validate"; +import { getSongByMbid, updateSongBpm } from "@/lib/db/client"; +import { lookupRecordingWithTags, extractBpmFromTags, extractTimeSigFromTags } from "@/lib/musicbrainz/client"; // ─── Request schema ─────────────────────────────────────────────────────────── @@ -77,6 +79,43 @@ export async function POST(req: NextRequest) { } } + // ── BPM enrichment ──────────────────────────────────────────────────────── + // If the song has an MBID, try to supply the AI with a confirmed BPM from + // MusicBrainz community tags. Best-effort: any failure is silently ignored. + let confirmedBpm: number | null = null; + let confirmedTimeSigNum: number | null = null; + + if (mbid) { + // 1. Check the DB first — may already have a stored BPM from a prior lookup + try { + const storedSong = await getSongByMbid(mbid); + if (storedSong?.acousticbrainz_bpm) { + confirmedBpm = storedSong.acousticbrainz_bpm; + confirmedTimeSigNum = storedSong.acousticbrainz_time_sig_num ?? null; + } + } catch { + // ignore DB errors + } + + // 2. If no stored BPM, fetch from MusicBrainz tags + if (!confirmedBpm) { + try { + const rec = await lookupRecordingWithTags(mbid); + const tagBpm = rec.tags ? extractBpmFromTags(rec.tags) : null; + const tagTimeSig = rec.tags ? extractTimeSigFromTags(rec.tags) : null; + + if (tagBpm) { + confirmedBpm = tagBpm; + confirmedTimeSigNum = tagTimeSig; + // Persist for next time — fire-and-forget + updateSongBpm(mbid, tagBpm, tagTimeSig).catch(() => {}); + } + } catch { + // MusicBrainz unavailable or rate-limited — proceed without confirmed BPM + } + } + } + const input = { bpm, duration, @@ -85,6 +124,8 @@ export async function POST(req: NextRequest) { mbid: mbid ?? null, contributed_by: contributed_by ?? "anonymous", ollamaModel, + confirmedBpm, + confirmedTimeSigNum, }; let ctpDoc; diff --git a/app/api/tracks/route.ts b/app/api/tracks/route.ts index 80f46e4..6ed4862 100644 --- a/app/api/tracks/route.ts +++ b/app/api/tracks/route.ts @@ -2,7 +2,7 @@ import { NextRequest, NextResponse } from "next/server"; import { z } from "zod"; import { getTempoMapsForSong, getSongByMbid, insertTempoMap, upsertSong } from "@/lib/db/client"; import { validateCTP } from "@/lib/ctp/validate"; -import { lookupRecording, formatArtistCredit, mbDurationToSeconds } from "@/lib/musicbrainz/client"; +import { getMusicBrainzRecording } from "@/lib/musicbrainz/client"; // ─── GET /api/tracks?mbid= ───────────────────────────────────────────── @@ -61,14 +61,14 @@ export async function POST(req: NextRequest) { if (!existing) { try { - const rec = await lookupRecording(doc.metadata.mbid); + const mbRecord = await getMusicBrainzRecording(doc.metadata.mbid); await upsertSong({ mbid: doc.metadata.mbid, - title: rec.title, - artist: formatArtistCredit(rec["artist-credit"]), - duration_seconds: mbDurationToSeconds(rec.length), - acousticbrainz_bpm: null, - acousticbrainz_time_sig_num: null, + title: mbRecord.title, + artist: mbRecord.artist, + duration_seconds: mbRecord.duration_seconds, + acousticbrainz_bpm: mbRecord.bpm, + acousticbrainz_time_sig_num: mbRecord.timeSigNum, source: "musicbrainz", }); } catch { diff --git a/lib/analysis/providers.ts b/lib/analysis/providers.ts index 7ec9da5..a3ce4c6 100644 --- a/lib/analysis/providers.ts +++ b/lib/analysis/providers.ts @@ -8,6 +8,8 @@ export interface AnalysisInput { mbid?: string | null; contributed_by: string; ollamaModel?: string; // required when provider id is "ollama" + confirmedBpm?: number | null; // from MusicBrainz tags or other reliable source + confirmedTimeSigNum?: number | null; // time signature numerator if confirmed } export interface ProviderInfo { diff --git a/lib/analysis/providers/anthropic.ts b/lib/analysis/providers/anthropic.ts index 5f0ecdb..17b9a98 100644 --- a/lib/analysis/providers/anthropic.ts +++ b/lib/analysis/providers/anthropic.ts @@ -81,7 +81,9 @@ export const CTP_SCHEMA = { export const SYSTEM_PROMPT = `\ You are an expert music producer and session musician assisting cover bands with click tracks. -You will receive automated BPM detection results for a song and must generate a CTP (Click Track Protocol) document describing the song's full tempo map. +You will receive information about a song and must generate a CTP (Click Track Protocol) document describing the song's full tempo map. + +**Use your training knowledge of the specific song.** If you recognise the title and artist, use what you know about its actual structure: section names, bar counts, time signature, and any tempo changes (ritardando, double-time, key change with tempo shift). Your training data is a valuable source — do not ignore it in favour of generic guesses. CTP rules: - "version" must be "1.0" @@ -96,10 +98,15 @@ CTP rules: Guidelines for section layout: - Use typical pop/rock section names: Intro, Verse, Pre-Chorus, Chorus, Bridge, Outro - Estimate bar counts based on song duration and BPM (bars = duration_seconds × BPM / 60 / beats_per_bar) -- Most songs are 4/4; note any unusual meters if you know the song -- If you know the song has a tempo change (ritardando, double-time feel, key change with tempo shift), model it with a ramp or step section +- Most songs are 4/4; use 3/4, 6/8, etc. if you know the song uses that meter +- If the song has a tempo change (ritardando, double-time feel, key change with tempo shift), model it with a ramp or step section - If unsure about sections, use a single constant-tempo section covering the whole song -- Use the detected BPM as the primary tempo — do not invent a different BPM unless the song is well-known to have a different tempo + +**BPM authority:** When a "Confirmed BPM" is provided in the user message, it comes from +MusicBrainz community tags or a reliable reference — treat it as ground truth and use it +for all sections unless you know the song has a significant tempo change. Do not average it +with the detected BPM or discard it. When only a "Detected BPM" is provided, use it as a +starting point but apply your knowledge of the song if you recognise it. The output is a draft for human review. Add reasonable section structure based on the song's typical arrangement.`; @@ -118,9 +125,18 @@ export const anthropicProvider: AnalysisProvider = { }, async generateCTP(input: AnalysisInput): Promise { - const { bpm, duration, title, artist, mbid, contributed_by } = input; + const { bpm, duration, title, artist, mbid, contributed_by, confirmedBpm, confirmedTimeSigNum } = input; const model = process.env.ANTHROPIC_MODEL ?? "claude-opus-4-6"; - const approxBars = Math.round((duration * bpm) / 60 / 4); + const effectiveBpm = confirmedBpm ?? bpm; + const approxBars = Math.round((duration * effectiveBpm) / 60 / 4); + + const bpmLine = confirmedBpm + ? `Confirmed BPM (from MusicBrainz community tags — treat as authoritative): ${confirmedBpm}\nDetected BPM (audio analysis): ${bpm}` + : `Detected BPM (audio analysis): ${bpm}`; + + const timeSigHint = confirmedTimeSigNum + ? `\nConfirmed time signature numerator: ${confirmedTimeSigNum}` + : ""; const userMessage = `\ Generate a CTP document for the following song: @@ -128,11 +144,11 @@ Generate a CTP document for the following song: Title: ${title ?? "Unknown Title"} Artist: ${artist ?? "Unknown Artist"} MusicBrainz ID: ${mbid ?? "unknown"} -Detected BPM: ${bpm} +${bpmLine}${timeSigHint} Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4) Contributed by: ${contributed_by} -Create a plausible section layout for this song. If this is a well-known song, use your knowledge of its actual arrangement. If not, use a sensible generic structure.`; +If you recognise this song, use your training knowledge of its actual arrangement — section names, bar counts, time signature, and any tempo changes. If you do not recognise it, use a sensible generic structure based on the BPM and duration above.`; // thinking and output_config are not yet in the SDK type definitions; // cast through the base param type to avoid type errors. diff --git a/lib/analysis/providers/ollama.ts b/lib/analysis/providers/ollama.ts index 364cec2..0701a24 100644 --- a/lib/analysis/providers/ollama.ts +++ b/lib/analysis/providers/ollama.ts @@ -96,13 +96,22 @@ export const ollamaProvider: AnalysisProvider = { }, async generateCTP(input: AnalysisInput): Promise { - const { ollamaModel, bpm, duration, title, artist, mbid, contributed_by } = input; + const { ollamaModel, bpm, duration, title, artist, mbid, contributed_by, confirmedBpm, confirmedTimeSigNum } = input; if (!ollamaModel) { throw new Error("ollamaModel is required for Ollama provider"); } - const approxBars = Math.round((duration * bpm) / 60 / 4); + const effectiveBpm = confirmedBpm ?? bpm; + const approxBars = Math.round((duration * effectiveBpm) / 60 / 4); + + const bpmLine = confirmedBpm + ? `Confirmed BPM (from MusicBrainz community tags — treat as authoritative): ${confirmedBpm}\nDetected BPM (audio analysis): ${bpm}` + : `Detected BPM (audio analysis): ${bpm}`; + + const timeSigHint = confirmedTimeSigNum + ? `\nConfirmed time signature numerator: ${confirmedTimeSigNum}` + : ""; const userMessage = `\ Generate a CTP document for the following song: @@ -110,11 +119,11 @@ Generate a CTP document for the following song: Title: ${title ?? "Unknown Title"} Artist: ${artist ?? "Unknown Artist"} MusicBrainz ID: ${mbid ?? "unknown"} -Detected BPM: ${bpm} +${bpmLine}${timeSigHint} Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4) Contributed by: ${contributed_by} -Create a plausible section layout for this song. If this is a well-known song, use your knowledge of its actual arrangement. If not, use a sensible generic structure.`; +If you recognise this song, use your training knowledge of its actual arrangement — section names, bar counts, time signature, and any tempo changes. If you do not recognise it, use a sensible generic structure based on the BPM and duration above.`; // Attempt parse with one retry on failure let content: string; diff --git a/lib/analysis/providers/openai.ts b/lib/analysis/providers/openai.ts index 4b120b2..854dd08 100644 --- a/lib/analysis/providers/openai.ts +++ b/lib/analysis/providers/openai.ts @@ -38,8 +38,17 @@ export const openaiProvider: AnalysisProvider = { const baseUrl = process.env.OPENAI_BASE_URL ?? "https://api.openai.com/v1"; const model = process.env.OPENAI_MODEL ?? "gpt-4o"; - const { bpm, duration, title, artist, mbid, contributed_by } = input; - const approxBars = Math.round((duration * bpm) / 60 / 4); + const { bpm, duration, title, artist, mbid, contributed_by, confirmedBpm, confirmedTimeSigNum } = input; + const effectiveBpm = confirmedBpm ?? bpm; + const approxBars = Math.round((duration * effectiveBpm) / 60 / 4); + + const bpmLine = confirmedBpm + ? `Confirmed BPM (from MusicBrainz community tags — treat as authoritative): ${confirmedBpm}\nDetected BPM (audio analysis): ${bpm}` + : `Detected BPM (audio analysis): ${bpm}`; + + const timeSigHint = confirmedTimeSigNum + ? `\nConfirmed time signature numerator: ${confirmedTimeSigNum}` + : ""; const userMessage = `\ Generate a CTP document for the following song: @@ -47,11 +56,11 @@ Generate a CTP document for the following song: Title: ${title ?? "Unknown Title"} Artist: ${artist ?? "Unknown Artist"} MusicBrainz ID: ${mbid ?? "unknown"} -Detected BPM: ${bpm} +${bpmLine}${timeSigHint} Duration: ${duration.toFixed(1)} seconds (~${approxBars} bars at 4/4) Contributed by: ${contributed_by} -Create a plausible section layout for this song. If this is a well-known song, use your knowledge of its actual arrangement. If not, use a sensible generic structure.`; +If you recognise this song, use your training knowledge of its actual arrangement — section names, bar counts, time signature, and any tempo changes. If you do not recognise it, use a sensible generic structure based on the BPM and duration above.`; const response = await fetch(`${baseUrl}/chat/completions`, { method: "POST", diff --git a/lib/db/client.ts b/lib/db/client.ts index a5bbb88..193a902 100644 --- a/lib/db/client.ts +++ b/lib/db/client.ts @@ -92,8 +92,8 @@ export async function upsertSong(song: Omit { + await query( + `UPDATE songs + SET acousticbrainz_bpm = COALESCE($2, acousticbrainz_bpm), + acousticbrainz_time_sig_num = COALESCE($3, acousticbrainz_time_sig_num), + updated_at = NOW() + WHERE mbid = $1`, + [mbid, bpm, timeSigNum] + ); +} + // ─── Tempo map queries ──────────────────────────────────────────────────────── export interface TempoMapRow { diff --git a/lib/musicbrainz/client.ts b/lib/musicbrainz/client.ts index 82d135c..26198b1 100644 --- a/lib/musicbrainz/client.ts +++ b/lib/musicbrainz/client.ts @@ -10,7 +10,7 @@ const MB_BASE = "https://musicbrainz.org/ws/2"; const USER_AGENT = process.env.MUSICBRAINZ_USER_AGENT ?? - "ClickTrack/0.1 ( https://github.com/your-org/clicktrack )"; + "ClickTrack/0.1 ( https://git.avezzano.io/the_og/clicktrack )"; // ─── Rate limiter ───────────────────────────────────────────────────────────── @@ -60,6 +60,7 @@ export interface MBRecording { length?: number; // duration in milliseconds "artist-credit": MBArtistCredit[]; releases?: MBRelease[]; + tags?: MBTag[]; score?: number; // search relevance (0–100) } @@ -79,6 +80,11 @@ export interface MBRelease { status?: string; } +export interface MBTag { + name: string; + count: number; +} + export interface MBSearchResult { created: string; count: number; @@ -147,6 +153,100 @@ export async function lookupRecording(mbid: string): Promise { return response.json() as Promise; } +/** + * Looks up a single recording by MBID, requesting artist-credits, releases, + * and community tags. Use this when you also want BPM or time-signature tags. + */ +export async function lookupRecordingWithTags(mbid: string): Promise { + const params = new URLSearchParams({ + inc: "artist-credits+releases+tags", + fmt: "json", + }); + + const url = `${MB_BASE}/recording/${encodeURIComponent(mbid)}?${params}`; + const response = await rateLimitedFetch(url); + return response.json() as Promise; +} + +/** + * Parses a MusicBrainz tag list and extracts a BPM value if present. + * + * MusicBrainz users tag recordings with strings like: + * "bpm: 174", "174 bpm", "bpm:174", "tempo: 174" + * + * Returns the most-voted BPM value (highest tag count), or null if none found. + */ +export function extractBpmFromTags(tags: MBTag[]): number | null { + const bpmPattern = /(?:bpm|tempo)\s*:?\s*(\d{2,3})|(\d{2,3})\s*bpm/i; + + let bestBpm: number | null = null; + let bestCount = 0; + + for (const tag of tags) { + const match = bpmPattern.exec(tag.name); + if (match) { + const value = parseInt(match[1] ?? match[2], 10); + if (value >= 20 && value <= 400 && tag.count > bestCount) { + bestBpm = value; + bestCount = tag.count; + } + } + } + + return bestBpm; +} + +/** + * Parses a MusicBrainz tag list and extracts a time signature numerator if present. + * + * Users tag recordings like "3/4", "5/4", "6/8", "time signature: 3/4". + * We only store the numerator since that's what the CTP denominator-agnostic + * count-in uses. + * + * Returns the most-voted numerator, or null if none found. + */ +export function extractTimeSigFromTags(tags: MBTag[]): number | null { + const timeSigPattern = /(?:time\s*signature\s*:?\s*)?(\d{1,2})\/(\d{1,2})/i; + + let bestNum: number | null = null; + let bestCount = 0; + + for (const tag of tags) { + const match = timeSigPattern.exec(tag.name); + if (match) { + const numerator = parseInt(match[1], 10); + if (numerator >= 1 && numerator <= 32 && tag.count > bestCount) { + bestNum = numerator; + bestCount = tag.count; + } + } + } + + return bestNum; +} + +/** + * Returns a normalised recording object suitable for passing to `upsertSong`. + * Includes BPM and time-signature from community tags when available. + */ +export async function getMusicBrainzRecording(mbid: string): Promise<{ + title: string; + artist: string; + duration_seconds: number | null; + bpm: number | null; + timeSigNum: number | null; +}> { + const rec = await lookupRecordingWithTags(mbid); + + return { + title: rec.title, + artist: formatArtistCredit(rec["artist-credit"]), + duration_seconds: mbDurationToSeconds(rec.length), + bpm: rec.tags ? extractBpmFromTags(rec.tags) : null, + timeSigNum: rec.tags ? extractTimeSigFromTags(rec.tags) : null, + }; +} + /** * Convenience function: searches MusicBrainz and returns results normalised * for storage in the `songs` table.