Changelog Feature Tracker collector facts

Publisher: bo-05 (@bo-05).

Version: 1. Last updated: 2026-07-04T06:44:13.795Z.

Run this collector on demand, as an API endpoint, or on a schedule with Firecrawl Prometheus.

Sample fields: notes, summary, entry_url, confidence, entry_title, company_name, product_area, publish_date, company_domain, source_page_url.

Parameters: seed-urls (string, required), max-items (number), output-mode (string).

Changelog Feature Tracker

v1Published

Recent product changelog entries from official company changelog, release notes, updates, or product news pages.

Output & API

Preview the latest data, download it, or call this collector as an API.

Author's sample data
#notessummaryentry_urlconfidenceentry_titlecompany_nameproduct_areapublish_datecompany_domainsource_page_url
0nullCollect an up-front payment for monthly subscriptions with prebilling.https://docs.stripe.com/billing/subscriptions/prebilling0.82Prebilling to optimize cash flowStripeBillingJun 2026stripe.comhttps://stripe.com/shipped
1No separate official entry URL was visible; source page used. Publish date not visible.Calculate tax on NetSuite invoices and file directly from your Stripe Dashboard.https://stripe.com/shipped0.82Stripe Tax for NetSuiteStripeTaxnullstripe.comhttps://stripe.com/shipped
2Publish date not visible.Track performance, pinpoint drop-offs, and re-engage users across your connected account onboarding funnel.https://docs.stripe.com/connect/supported-embedded-components/account-onboarding0.82Onboarding insightsStripeConnectnullstripe.comhttps://stripe.com/shipped
Parameters
--seed-urlsstringrequiredComma-separated company websites, changelog pages, release-note pages, or product update URLs to inspect. e.g. "https://stripe.com"
--max-itemsnumberMaximum total number of changelog entries to return across all seed URLs. default 10
--output-modestringReturn flat entry rows with `entry_rows` or company objects with nested entries using `grouped_by_company`. default "entry_rows"

Marketplace

Publish this collector so others can deploy it — you keep ownership.

0 subscribers
bo-05@bo-05
0 runs in 14d · published 1d ago

Versions

Every build and self-heal appends a version. Pin one to lock runs to it.

managed by author
v1builtapprovedcurrent1d ago
How this script collects data
import Firecrawl from "@mendable/firecrawl-js";
import { parseArgs } from "node:util";

const apiKey = process.env.FIRECRAWL_API_KEY;
if (!apiKey) {
  console.error("FIRECRAWL_API_KEY is not set");
  process.exit(1);
}

const { values: flags } = parseArgs({
  strict: true,
  options: {
    "seed-urls": { type: "string" },
    "max-items": { type: "string" },
    "output-mode": { type: "string" },
  },
});

if (!flags["seed-urls"]) {
  console.error("--seed-urls is required");
  process.exit(1);
}

const seedUrls = String(flags["seed-urls"])
  .split(",")
  .map((value) => value.trim())
  .filter(Boolean);

if (seedUrls.length === 0) {
  console.error("OUT_OF_SCOPE: --seed-urls must include at least one URL or domain");
  process.exit(1);
}

const maxItems = Number(flags["max-items"] ?? "10");
if (!Number.isFinite(maxItems) || maxItems < 1) {
  console.error("OUT_OF_SCOPE: --max-items must be a positive number");
  process.exit(1);
}

const outputMode = String(flags["output-mode"] ?? "entry_rows");
if (!["entry_rows", "grouped_by_company"].includes(outputMode)) {
  console.error('OUT_OF_SCOPE: --output-mode must be "entry_rows" or "grouped_by_company"');
  process.exit(1);
}

const firecrawl = new Firecrawl({ apiKey });

type Candidate = {
  url: string;
  title: string;
  description: string;
  score: number;
};

type EntryRow = {
  company_name: string;
  company_domain: string;
  source_page_url: string;
  entry_title: string;
  publish_date: string | null;
  summary: string | null;
  product_area: string | null;
  entry_url: string;
  confidence: number;
  notes: string | null;
};

const extractionSchema = {
  type: "object",
  properties: {
    company_name: { type: ["string", "null"] },
    entries: {
      type: "array",
      items: {
        type: "object",
        properties: {
          entry_title: { type: "string" },
          publish_date: { type: ["string", "null"] },
          summary: { type: ["string", "null"] },
          product_area: { type: ["string", "null"] },
          entry_url: { type: ["string", "null"] },
          confidence: { type: ["number", "null"] },
          notes: { type: ["string", "null"] },
        },
        required: [
          "entry_title",
          "publish_date",
          "summary",
          "product_area",
          "entry_url",
          "confidence",
          "notes",
        ],
      },
    },
  },
  required: ["company_name", "entries"],
};

function normalizeSeed(input: string): URL {
  const withProtocol = /^https?:\/\//i.test(input) ? input : `https://${input}`;
  try {
    return new URL(withProtocol);
  } catch {
    throw new Error(`OUT_OF_SCOPE: invalid seed URL "${input}"`);
  }
}

function cleanHost(host: string): string {
  return host.toLowerCase().replace(/^www\./, "");
}

function comparableDomain(host: string): string {
  const parts = cleanHost(host).split(".").filter(Boolean);
  if (parts.length <= 2) return parts.join(".");
  const secondLevel = parts[parts.length - 2];
  const topLevel = parts[parts.length - 1];
  if (topLevel.length === 2 && ["ac", "co", "com", "edu", "gov", "net", "org"].includes(secondLevel)) {
    return parts.slice(-3).join(".");
  }
  return parts.slice(-2).join(".");
}

function isOfficialUrl(url: string, companyDomain: string): boolean {
  try {
    const host = cleanHost(new URL(url).hostname);
    return host === companyDomain || host.endsWith(`.${companyDomain}`) || comparableDomain(host) === companyDomain;
  } catch {
    return false;
  }
}

function titleFromDomain(domain: string): string {
  const label = domain.split(".")[0] ?? domain;
  return label
    .split(/[-_]/)
    .filter(Boolean)
    .map((part) => part.charAt(0).toUpperCase() + part.slice(1))
    .join(" ");
}

function scoreCandidate(url: string, title = "", description = ""): number {
  const haystack = `${url} ${title} ${description}`.toLowerCase();
  let score = 0;
  const strongTerms = ["changelog", "release notes", "product updates", "what's new", "whats-new", "product news"];
  for (const term of strongTerms) {
    if (haystack.includes(term)) score += 8;
  }
  if (/\/(changelog|release-notes|product-updates|whats-new|shipped)(\/|$)/i.test(url)) score += 8;
  if (/(updates|releases|new-features)/i.test(url)) score += 3;
  if (/(docs|help|support)/i.test(url)) score += 1;
  if (/(press|careers|legal|privacy|terms|pricing)/i.test(url)) score -= 7;
  if (/\/blog\//i.test(url) && !/(changelog|release|product|update|shipped)/i.test(haystack)) score -= 5;
  return score;
}

function looksLikeChangelogUrl(url: string): boolean {
  return /(changelog|release-notes|product-updates|updates|whats-new|shipped|releases|product-news)/i.test(url);
}

function compactString(value: unknown): string | null {
  if (typeof value !== "string") return null;
  const trimmed = value.replace(/\s+/g, " ").trim();
  return trimmed.length > 0 ? trimmed : null;
}

function normalizeUrlMaybe(url: string | null, sourceUrl: string): string | null {
  if (!url) return null;
  try {
    return new URL(url, sourceUrl).toString();
  } catch {
    return null;
  }
}

function dedupeKey(companyDomain: string, title: string, publishDate: string | null): string {
  return `${companyDomain}|${title.toLowerCase().replace(/\W+/g, " ").trim()}|${publishDate ?? ""}`;
}

async function findCandidates(seed: URL, companyDomain: string): Promise<Candidate[]> {
  const candidates = new Map<string, Candidate>();
  const seedUrl = seed.toString();
  const seedScore = scoreCandidate(seedUrl, "", "");
  if (looksLikeChangelogUrl(seedUrl)) {
    candidates.set(seedUrl, { url: seedUrl, title: "", description: "", score: seedScore + 10 });
  }

  const query = `site:${companyDomain} ("changelog" OR "release notes" OR "product updates" OR "what's new" OR "product news")`;
  try {
    const result = await firecrawl.search(query, {
      limit: 6,
      integration: "prometheus",
    });
    const hits = Array.isArray((result as any).web) ? (result as any).web : Array.isArray((result as any).data) ? (result as any).data : [];
    for (const hit of hits) {
      const url = compactString(hit.url);
      if (!url || !isOfficialUrl(url, companyDomain)) continue;
      const title = compactString(hit.title) ?? "";
      const description = compactString(hit.description) ?? "";
      const score = scoreCandidate(url, title, description);
      if (score < 4) continue;
      const existing = candidates.get(url);
      if (!existing || existing.score < score) {
        candidates.set(url, { url, title, description, score });
      }
    }
  } catch (err) {
    console.error(`Search failed for ${companyDomain}: ${err}`);
  }

  return [...candidates.values()].sort((a, b) => b.score - a.score).slice(0, 2);
}

async function extractEntries(candidate: Candidate, companyDomain: string, fallbackCompanyName: string, remaining: number): Promise<EntryRow[]> {
  const prompt = [
    `Extract up to ${Math.min(remaining, 10)} recent product changelog or release-note entries from this official company page only.`,
    "Use only entries visible or clearly represented on this page.",
    "Ignore press releases, hiring posts, pricing pages, generic documentation, and blog posts unrelated to product updates.",
    "Do not invent dates or summaries. If a publish date, summary, product area, or separate entry URL is not visible, return null for that field.",
    "Keep summaries short and factual.",
  ].join(" ");

  const result = await firecrawl.scrape(candidate.url, {
    formats: [{ type: "json", prompt, schema: extractionSchema }],
    integration: "prometheus",
    timeout: 30000,
  });

  const extracted = (result as any).json;
  const entries = Array.isArray(extracted?.entries) ? extracted.entries : [];
  const companyName = compactString(extracted?.company_name) ?? fallbackCompanyName;
  const rows: EntryRow[] = [];

  for (const entry of entries) {
    const title = compactString(entry.entry_title);
    if (!title) continue;
    const publishDate = compactString(entry.publish_date);
    const summary = compactString(entry.summary);
    const productArea = compactString(entry.product_area);
    const candidateEntryUrl = normalizeUrlMaybe(compactString(entry.entry_url), candidate.url);
    const officialEntryUrl = candidateEntryUrl && isOfficialUrl(candidateEntryUrl, companyDomain) ? candidateEntryUrl : null;
    const notesParts: string[] = [];
    const extractedNotes = compactString(entry.notes);
    if (extractedNotes) notesParts.push(extractedNotes);
    if (!officialEntryUrl) notesParts.push("No separate official entry URL was visible; source page used.");
    if (!publishDate) notesParts.push("Publish date not visible.");
    const rawConfidence = typeof entry.confidence === "number" ? entry.confidence : candidate.score >= 12 ? 0.82 : 0.68;
    const confidence = Math.max(0, Math.min(1, Number(rawConfidence.toFixed(2))));

    rows.push({
      company_name: companyName,
      company_domain: companyDomain,
      source_page_url: candidate.url,
      entry_title: title,
      publish_date: publishDate,
      summary,
      product_area: productArea,
      entry_url: officialEntryUrl ?? candidate.url,
      confidence,
      notes: notesParts.length > 0 ? notesParts.join(" ") : null,
    });
  }

  return rows;
}

async function main() {
  const allRows: EntryRow[] = [];
  const seen = new Map<string, EntryRow>();

  for (const seedInput of seedUrls) {
    if (allRows.length >= maxItems) break;
    const seed = normalizeSeed(seedInput);
    const companyDomain = comparableDomain(seed.hostname);
    const fallbackCompanyName = titleFromDomain(companyDomain);
    const candidates = await findCandidates(seed, companyDomain);

    if (candidates.length === 0) {
      console.error(`No likely official changelog page found for ${companyDomain}`);
      continue;
    }

    for (const candidate of candidates) {
      if (allRows.length >= maxItems) break;
      try {
        const rows = await extractEntries(candidate, companyDomain, fallbackCompanyName, maxItems - allRows.length);
        for (const row of rows) {
          const key = dedupeKey(row.company_domain, row.entry_title, row.publish_date);
          const existing = seen.get(key);
          if (!existing) {
            seen.set(key, row);
            allRows.push(row);
          } else if (
            (!existing.summary && row.summary) ||
            (!existing.publish_date && row.publish_date) ||
            (existing.entry_url === existing.source_page_url && row.entry_url !== row.source_page_url)
          ) {
            const index = allRows.indexOf(existing);
            seen.set(key, row);
            if (index >= 0) allRows[index] = row;
          }
          if (allRows.length >= maxItems) break;
        }
      } catch (err) {
        console.error(`Extraction failed for ${candidate.url}: ${err}`);
      }
    }
  }

  const limitedRows = allRows.slice(0, maxItems);

  if (outputMode === "grouped_by_company") {
    const groups = new Map<string, { company_name: string; company_domain: string; entries: EntryRow[] }>();
    for (const row of limitedRows) {
      const group = groups.get(row.company_domain) ?? {
        company_name: row.company_name,
        company_domain: row.company_domain,
        entries: [],
      };
      group.entries.push(row);
      groups.set(row.company_domain, group);
    }
    process.stdout.write(JSON.stringify([...groups.values()]));
    return;
  }

  process.stdout.write(JSON.stringify(limitedRows));
}

main().catch((err) => {
  console.error(err);
  process.exit(1);
});
deploy to unlock

Deploy this collector to unlock schedules, the API endpoint, and destinations.

One person builds it. Everyone keeps it fresh.